Source code for kinmatch.search

# -*- coding:utf-8 -*-
from __future__ import unicode_literals
import sys
import time
import threading
import pandas as pd
import numpy as np

from kinmatch import config
from kinmatch.utils import (apply_by_multiprocessing, decode_marker, 
        decode_alleles)
from kinmatch import db
from kinmatch.utils import DataContainer
from kinmatch import NoValueException, ParameterException

## dataframe apply functions
[문서]def kinship_index(row, query=None, calculator=None, active_markers=None): return pd.Series([calculator.get_cumulative_index(query, row, relationship) for relationship in calculator.relationships], index=calculator.relationships)
[문서]def paternities_match_sum(row, query=None, active_markers=None): new_active_markers = row.index[row.notnull()] & active_markers nan_count = len(active_markers) - len(new_active_markers) return pd.Series([len(active_markers), sum([(not bool(set(t[0]) & set(t[1]))) for t in map(lambda a,b: (a,b), query[new_active_markers], row[new_active_markers])]) + nan_count], index = ['markers','mismatches']) """ return sum([(not bool(set(t[0]) & set(t[1]))) for t in map(lambda a,b: (a,b), query[new_active_markers], row[new_active_markers])]) + nan_count """
[문서]def paternities_match_sum_partial(row, query=None, active_markers=None): new_active_markers = row.index[row.notnull()] & active_markers return pd.Series([len(new_active_markers), sum([(not bool(set(t[0]) & set(t[1]))) for t in map(lambda a,b: (a,b), query[new_active_markers], row[new_active_markers])])], index = ['markers','mismatches'])
[문서]def identities_match_sum(row, query=None, active_markers=None): return pd.Series([len(active_markers), (row[active_markers] != query[active_markers]).sum()], index= ['markers','mismatches'])
[문서]def identities_match_sum_partial(row, query=None, active_markers=None): new_active_markers = row.index[row.notnull()] & active_markers return pd.Series([len(new_active_markers), (row[new_active_markers] != query[new_active_markers]).sum()], index=['markers','mismatches'])
[문서]class IndexCalculator: formula = {} def __init__(self, af_table): self.af_table = af_table
[문서] def get_af(self, marker, allele): allele_frequency = 0.001 try: if marker not in self.af_table.columns: allele_frequency = np.nan else: allele_frequency = self.af_table[marker][allele] if np.isnan(allele_frequency): allele_frequency = 0.001 except KeyError: pass return allele_frequency
[문서] def get_cumulative_index(self, markers, query, target): raise NotImplementedError
[문서] def get_index(self, marker, a1, a2, b1, b2): raise NotImplementedError
[문서] def find_formula(self, query_alleles, target_alleles): raise NotImplementedError
[문서]class IdentityIndexCalculator(IndexCalculator):
[문서] def get_cumulative_index(self, markers, query, target): index = 1 for marker in markers: if marker not in config.SEX_MARKERS and \ query[marker] == target[marker]: a, b = query[marker] pa = self.get_af(marker, a) pb = self.get_af(marker, b) if a == b: p = pa * pb elif a != b: p = 2 * pa * pb index *= p return 1 / index
[문서]class PaternityIndexCalculator(IndexCalculator): formula = { 1: (lambda pa, pb: 0.25 / pa), 2: (lambda pa, pb: (pa + pb) / (4 * pa * pb)), 3: (lambda pa, pb: 0.5 / pa), 4: (lambda pa, pb: 0.5 / pa), 5: (lambda pa, pb: 1 / pa), }
[문서] def get_cumulative_index(self, markers, query, target): cpi = 1 for marker in markers: if marker not in config.SEX_MARKERS and type(query[marker]) == list\ and type(target[marker]) == list and \ set(query[marker]) & set(target[marker]): a1, a2 = query[marker] b1, b2 = target[marker] cpi *= self.get_index(marker, a1, a2, b1, b2) return cpi
[문서] def get_index(self, marker, a1, a2, b1, b2): try: f, A, B = self.find_formula([a1, a2], [b1, b2]) except AssertionError: return 1 pa = self.get_af(marker, A) pb = self.get_af(marker, B) return self.formula[f](pa, pb)
[문서] def find_formula(self, query_alleles, target_alleles): # 10, 15, 10, 10k a1, a2 = query_alleles b1, b2 = target_alleles if a1 != b1: if a1 == b2: b1, b2 = b2, b1 if a2 == b1: a1, a2 = a2, a1 if a2 == b2: a1, a2 = a2, a1 b1, b2 = b2, b1 if a1 == b1 and a2 != b2 and a1 != a2 and b1 != b2: f = 1; A = a1; B = a2 elif a1 == b1 and a2 == b2 and a1 != a2: f = 2; A = a1; B = a2 elif a1 == b1 and a2 != b2 and b1 == b2: f = 3; A = a1; B = a2 elif a1 == b1 and a2 != b2 and a1 == a2: f = 4; A = a1; B = a2 elif a1 == b1 and a2 == b2 and a1 == a2: f = 5; A = a1; B = a2 else: raise AssertionError( 'No formualr for this allele combination %s %s %s %s' % ( a1, a2, b1, b2)) return f, A, B
[문서]class KinshipIndexCalculator(IndexCalculator): coefficients = { 'parent-child': (0, 1, 0), 'full-sib': (1./4, 1./2, 1./4), 'half-sib': (1./2, 1./2, 0), 'first cousin': (3./4, 1./4, 0), 'unrelated': (1, 0, 0), '5 chon': (7./8, 1./8, 0), '6 chon': (15./16, 1./16, 0), '7 chon': (31./32, 1./32, 0), '8 chon': (63./64, 1./64, 0), } relationships = list(sorted(coefficients.keys())) relationships_explanation = { 'parent-child': '부모-자식관계', 'full-sib': '형제관계 (부모가 모두 같을 경우)', 'half-sib': '이복/이부 형제, 삼촌-조카, 조부-손자', 'first cousin': '4촌 형제관계', 'unrelated': '무관한 관계', '5 chon': '5촌', '6 chon': '6촌 형제', '7 chon': '7촌', '8 chon': '8촌 형제', } formulas = { 1: (lambda phi, pa, pb, pc, pd: phi[2] + 0.5*phi[1]*(pa + pb) + 2*phi[0]*pa*pb ), 2: (lambda phi, pa, pb, pc, pd: phi[2] + phi[1]*pa + phi[0]*pa*pa ), 3: (lambda phi, pa, pb, pc, pd: phi[1]*pb + 2*phi[0]*pa*pb ), 4: (lambda phi, pa, pb, pc, pd: 0.5*phi[1]*pa + phi[0]*pa*pa ), 5: (lambda phi, pa, pb, pc, pd: 0.5*phi[1]*pc + 2*phi[0]*pa*pc ), 6: (lambda phi, pa, pb, pc, pd: 2*phi[0]*pc*pd ), 7: (lambda phi, pa, pb, pc, pd: phi[0]*pb*pb ), 8: (lambda phi, pa, pb, pc, pd: 2*phi[0]*pb*pc ), }
[문서] def get_cumulative_index(self, query, target, relationship): cpi = 1 """ ## comprehension is slower markers = target.index[target.notnull()] return np.product([self.get_index(marker, relationship, query[marker], target[marker]) for marker in markers ]) """ markers = target.index for i, marker in enumerate(markers): target_alleles = target[marker] if type(target_alleles) != list: continue pi = self.get_index(marker, relationship, query[marker], target_alleles) cpi *= pi return cpi
[문서] def get_index(self, marker, relationship, query_alleles, target_alleles): f, A, B, C, D = self.find_formula(query_alleles, target_alleles) if f is None: return 1 pa = self.get_af(marker, A) pb = self.get_af(marker, B) pc = self.get_af(marker, C) pd = self.get_af(marker, D) phi = self.coefficients[relationship] #print(marker, f, A, B, C, D, pa, pb, pc, pd, query_alleles,\ # target_alleles, pa, pb, self.formulas[f](phi, pa, pb, pc, pd)) return self.formulas[f](phi, pa, pb, pc, pd)
[문서] def find_formula(self, query_alleles, target_alleles): if type(target_alleles) != list: return [None] * 5 a1, a2 = query_alleles b1, b2 = target_alleles if a1 == b1 and a2 == b2 and a1 != a2: f = 1; A = a1; B = a2; C = None; D = None elif a1 == b2 and a2 == b1 and a1 != a2: f = 1; A = a1; B = a2; C = None; D = None elif a1 == a2 == b1 == b2: f = 2; A = a1; B = None; C = None; D = None elif a1 == a2 and a1 == b1 and b1 != b2: f = 3; A = a1; B = b2; C = None; D = None elif a1 == a2 and a1 == b2 and b1 != b2: f = 3; A = a1; B = b1; C = None; D = None #f4 elif a1 != a2 and a1 == b1 and b1 == b2: f = 4; A = a1; B = a2; C = None; D = None elif a1 != a2 and a2 == b1 and b1 == b2: f = 4; A = a2; B = a1; C = None; D = None #f5 elif a1 != a2 and a1 == b1 and a2 != b2: f = 5; A = a1; B = a2; C = b2; D = None elif a1 != a2 and a2 == b1 and b1 != b2: f = 5; A = a2; B = a1; C = b2; D = None elif a1 != a2 and a2 == b2 and b1 != b2: f = 5; A = a2; B = a1; C = b1; D = None elif a1 != a2 and a1 == b2 and b1 != b2: f = 5; A = a1; B = a2; C = b1; D = None #f6,7,8 elif a1 != a2 and a1 != b1 and b1 != b2: f = 6; A = a1; B = a2; C = b1; D = b2 elif a1 == a2 and a1 != b1 and b1 == b2: f = 7; A = a1; B = b1; C = None; D = None elif a1 == a2 and a1 != b1 and b1 != b2: f = 8; A = a1; B = b1; C = b2; D = None elif a1 != a2 and a1 != b1 and b1 == b2: f = 8; A = a1; B = a2; C = b1; D = None else: return [None] * 5 return f, A, B, C, D
[문서]class BaseSearcher(object): def __init__(self, query=None, df_genotypes=None): self.set_params(query, df_genotypes)
[문서] def set_params(self, query, df_genotypes): self.genotypes = df_genotypes self.query = query if isinstance(query, pd.Series) or isinstance(query, pd.DataFrame): self.identifier = query.name else: self.identifier = None
[문서] def search_identities(self): raise NotImplementedError
[문서]class MtdnaSearcher(BaseSearcher):
[문서] def exclude_markers_in_cstretch(self, columns): filtered_columns = [] for column in columns: position = float(decode_marker(column, 'mtDNA')) if (16279 <= position <= 16282) or (16384 <= position <=16393): continue filtered_columns.append(column) return pd.Index(filtered_columns)
[문서] def search_identities_by_query_index(self, permit_mismatches=2, exclude_cstretches=False): if exclude_cstretches: columns = self.exclude_markers_in_cstretch(self.query.index) else: columns = self.query.index common_columns = self.genotypes.columns & columns targets = self.genotypes[common_columns] for col in (columns - common_columns): targets[col] = np.nan targets = targets[columns] try: targets = self.genotypes[columns].drop(self.identifier) except: pass query_t = self.query[columns] result_compare = targets.apply(self._compare_by_query_index, query=query_t, axis=1) result = pd.DataFrame(result_compare, columns=['mismatches']) result['markers'] = (query_t.notnull()).sum() return result.loc[result['mismatches'] <= permit_mismatches, ['markers','mismatches']]
def _compare_by_query_index(self, row, query): columns = query.notnull() return (row[columns] != query[columns]).sum() """ def search_identities_union(self, permit_mismatches=2): targets = self.genotypes.drop(self.identifier) result_compare = targets.apply(self._compare_union, query=self.query, axis=1) return result_compare[result_compare <= permit_mismatches] def _compare_union(self, row, query): return ((row != query) & (row.notnull() | query.notnull())).sum() """
[문서] def search_identities_intersection(self, permit_mismatches=2, exclude_cstretches=False): if exclude_cstretches: columns = self.exclude_markers_in_cstretch(self.query.index) else: columns = self.query.index columns = columns & self.genotypes.columns try: targets = self.genotypes[columns].drop(self.identifier) except: targets = self.genotypes[columns] query_t = self.query[columns] result_compare = targets.apply(self._compare_intersection, query=query_t, axis=1) #result = pd.DataFrame(result_compare, columns=['mismatches']) result = result_compare #result['markers'] = targets.apply(self._compare_intersection_columns, # query=query_t, axis=1) return result.loc[result['mismatches'] <= permit_mismatches, ['markers','mismatches']]
def _compare_intersection(self, row, query): columns = (row.notnull() & query.notnull()) return pd.Series([(row.notnull() & query.notnull()).sum(), (row[columns] != query[columns]).sum()], index=['markers','mismatches']) #return (row[columns] != query[columns]).sum() def _compare_intersection_columns(self, row, query): return (row.notnull() & query.notnull()).sum()
[문서] def search_identities(self, permit_mismatches=2, exclude_cstretches=False, partial=False, is_multiprocess=config.PYTHON_MULTIPROCESSING): if partial: result = self.search_identities_intersection(permit_mismatches, exclude_cstretches) else: result = self.search_identities_by_query_index(permit_mismatches, exclude_cstretches) return result
[문서]class StrSearcher(BaseSearcher): def __init__(self, query, df_genotypes): super(StrSearcher, self).__init__(query, df_genotypes) self.active_markers = [] for index in self.query.index: if ((type(self.query.loc[index]) == list) and not pd.isnull(self.query.loc[index]).all()): if '.' in index: index = index.replace('.', '__') self.active_markers.append(index) self.active_markers = pd.core.index.Index(self.active_markers) self.active_genotypes = self.genotypes[self.active_markers]
[문서] def search_identities(self, permit_mismatches=2, af_table=None, partial=False, is_multiprocess=config.PYTHON_MULTIPROCESSING): calculator = None if af_table is not None and not af_table.empty: calculator = IdentityIndexCalculator(af_table) if partial: match_sum_func = identities_match_sum_partial else: match_sum_func = identities_match_sum if len(self.active_markers) == 0: return pd.DataFrame(columns=['markers','mismatches']) else: return self._search_by_match_sum(match_sum_func, calculator, permit_mismatches=permit_mismatches, is_multiprocess=is_multiprocess)
[문서] def search_paternities(self, permit_mismatches=2, af_table=None, partial=False, is_multiprocess=config.PYTHON_MULTIPROCESSING): calculator = None if af_table is not None and not af_table.empty: calculator = PaternityIndexCalculator(af_table) if partial: match_sum_func = paternities_match_sum_partial else: match_sum_func = paternities_match_sum return self._search_by_match_sum(match_sum_func, calculator, permit_mismatches=permit_mismatches, is_multiprocess=is_multiprocess)
def _search_by_match_sum(self, match_sum_func, calculator, permit_mismatches=2, is_multiprocess=config.PYTHON_MULTIPROCESSING): try: targets = self.active_genotypes.drop(self.identifier) except: targets = self.active_genotypes if targets.empty: return if is_multiprocess: result = apply_by_multiprocessing(targets, match_sum_func, query=self.query[self.active_markers], active_markers=self.active_markers, axis=1) else: result = targets.apply(match_sum_func, query=self.query[self.active_markers], active_markers=self.active_markers, axis=1) result = result.loc[result['mismatches'] <= permit_mismatches] result.sort(ascending=True, columns=['mismatches']) result = result.reindex_axis(['markers', 'mismatches'], axis=1) if result is not None and not result.empty and calculator: result['index'] = targets.loc[result.index].apply( self._identity_index, calculator=calculator, axis=1) return result
[문서] def search_kinships(self, af_table, min_kinship_index=config.DEFAULT_MIN_KINSHIP_INDEX, is_multiprocess=config.PYTHON_MULTIPROCESSING): calculator = KinshipIndexCalculator(af_table) active_autosomal_markers = [m for m in self.active_markers if m not in config.SEX_MARKERS] if active_autosomal_markers == []: return pd.DataFrame(columns = ['5 chon', '6 chon', '7 chon',\ '8 chon', 'first cousin', 'full-sib', 'half-sib',\ 'parent-child', 'unrelated', 'max_index', 'max_relative']) targets = self.active_genotypes[active_autosomal_markers] try: targets = targets.drop(self.identifier) except: pass if is_multiprocess: result = apply_by_multiprocessing(targets, kinship_index, query=self.query, calculator=calculator, axis=1) else: result = targets.apply(self._kinship_index, calculator=calculator, axis=1) if not min_kinship_index: min_kinship_index = config.DEFAULT_MIN_KINSHIP_INDEX if result is not None: max_index = result.max(axis=1) / result['unrelated'] max_relative = result.idxmax(axis=1) result['max_index'] = max_index result['max_relative'] = max_relative result = result[result['max_index'] > min_kinship_index] result = result.sort(['max_index', 'max_relative'], ascending=False) else: result = pd.DataFrame(columns=['max_index','max_relative']) return result
def _identity_index(self, row, calculator=None): if calculator: return calculator.get_cumulative_index(self.active_markers, self.query, row) def _kinship_index(self, row, calculator=None): return kinship_index(row, query=self.query, calculator=calculator)
[문서]class SearchIntegrator: def __init__(self, query): self.query = query self.identifier = query['identifier']
[문서] def search_relationship(self, relationship_type, astrs=None, ystrs=None, mtdnas=None, astr_kwargs={}, ystr_kwargs={}, mtdna_kwargs={}, partial=False, is_multiprocess=config.PYTHON_MULTIPROCESSING): """Mutable dict was used in keyword argument. Be careful for this mutation. """ astr_kwargs_all = astr_kwargs.copy() astr_kwargs_all['is_multiprocess'] = is_multiprocess if not relationship_type == 'kinship': astr_kwargs_all['permit_mismatches'] = float('inf') ystr_kwargs_all = ystr_kwargs.copy() ystr_kwargs_all['permit_mismatches'] = float('inf') ystr_kwargs_all['is_multiprocess'] = is_multiprocess mtdna_kwargs_all = mtdna_kwargs.copy() mtdna_kwargs_all['permit_mismatches'] = float('inf') mtdna_kwargs_all['is_multiprocess'] = is_multiprocess index_total = pd.Index([]) try: index_total = index_total + astrs.index except: pass try: index_total = index_total + ystrs.index except: pass try: index_total = index_total + mtdnas.index except: pass if self.identifier in index_total: index_total = index_total.drop(self.identifier) results = pd.DataFrame(index=index_total) if astrs is not None and ('A-STR' in self.query): ss = StrSearcher(pd.Series(self.query['A-STR'], name=self.identifier), astrs) if relationship_type == 'identity': result = ss.search_identities(**astr_kwargs_all) elif relationship_type == 'paternity': result = ss.search_paternities(**astr_kwargs_all) elif relationship_type == 'kinship': result = ss.search_kinships(**astr_kwargs_all) result.columns = ['A-STR {}'.format(c) for c in result.columns] results = results.join(result) if ystrs is not None and ('Y-STR' in self.query): ss = StrSearcher(pd.Series(self.query['Y-STR'], name=self.identifier), ystrs) result = ss.search_identities(**ystr_kwargs_all) result.columns = ['Y-STR {}'.format(c) for c in result.columns] results = results.join(result) if mtdnas is not None and ('mtDNA' in self.query): ss = MtdnaSearcher(pd.Series(self.query['mtDNA'], name=self.identifier), mtdnas) result = ss.search_identities(**mtdna_kwargs_all) result.columns = ['mtDNA {}'.format(c) for c in result.columns] results = results.join(result) if astrs is not None and relationship_type == 'kinship': results = results.loc[results['A-STR max_index'].notnull()] if 'A-STR markers' in results.columns and \ astr_kwargs and ('permit_mismatches' in astr_kwargs) and \ (not np.isinf(astr_kwargs['permit_mismatches'])): if partial: results = results.loc[pd.isnull(results['A-STR markers']) | (results['A-STR mismatches'] <= \ astr_kwargs['permit_mismatches'])] else: results = results.loc[(results['A-STR mismatches'] <= \ astr_kwargs['permit_mismatches'])] if 'Y-STR markers' in results.columns and \ ystr_kwargs and not np.isinf(ystr_kwargs['permit_mismatches']): if partial: results = results.loc[pd.isnull(results['Y-STR markers']) | (results['Y-STR mismatches'] <= \ ystr_kwargs['permit_mismatches'])] else: results = results.loc[(results['Y-STR mismatches'] <= \ ystr_kwargs['permit_mismatches'])] if 'mtDNA markers' in results.columns and \ mtdna_kwargs and not np.isinf(mtdna_kwargs['permit_mismatches']): if partial: results = results.loc[pd.isnull(results['mtDNA markers']) | (results['mtDNA mismatches'] <= \ mtdna_kwargs['permit_mismatches'])] else: results = results.loc[(results['mtDNA mismatches'] <= \ mtdna_kwargs['permit_mismatches'])] sort_columns = [] if 'A-STR max_index' in results.columns and ( not results['A-STR max_index'].isnull().all()): sort_columns.append(('A-STR max_index', False)) elif 'A-STR mismatches' in results.columns and ( sum(results['A-STR mismatches'].notnull()) > 1): sort_columns.append(('A-STR mismatches', True)) sort_columns.append(('A-STR index', False)) if 'Y-STR mismatches' in results.columns and ( sum(results['Y-STR mismatches'].notnull()) > 1): sort_columns.append(('Y-STR mismatches', True)) if 'mtDNA mismatches' in results.columns and ( sum(results['mtDNA mismatches'].notnull()) > 1): sort_columns.append(('mtDNA mismatches', True)) if sort_columns and not results.empty: if len(sort_columns) == 1 and astrs is not None: ## it looks bug, when one column, assending not working so, sort_columns.append(('A-STR max_relative', True)) results = results.sort_index(by=[sc[0] for sc in sort_columns], ascending=[sc[1] for sc in sort_columns]) return results
[문서] def search_kinships(self, astrs=None, ystrs=None, astr_kwargs={}, ystr_kwargs={}): """Mutable dict was used in keyword argument. Be careful for this mutation. """ results = [] #basis_ids = [] if 'A-STR' in self.query and astrs: ss = StrSearcher(self.identifier, astrs) result = ss.search_kinships(**astr_kwargs) result.columns = ['A-STR {}'.format(c) for c in result.columns] #basis_ids.extend(list(result.index)) results.append(result) if 'Y-STR' in self.query and ystrs: ss = StrSearcher(self.identifier, ystrs) result = ss.search_kinships(**ystr_kwargs) result.columns = ['Y-STR {}'.format(c) for c in result.columns] #if basis_ids: # result = result.loc[basis_ids] #else: # basis_ids.extend(list(result.index)) results.append(result) results = pd.concat(results, axis=1) return results
[문서]class CheckerBetween(): def __init__(self, query_a, query_b, type_, af_table=None): self.query_a = query_a self.query_b = query_b self.type_ = type_ self.active_markers = self.query_a.index[self.query_a.notnull()] self.af_table = af_table
[문서] def arrange_result(self, result, true_string): samples = [self.query_a.name, self.query_b.name] result_re = result.copy() total_row = pd.Series(name='total') for column in result_re.columns: if column == 'status': match_count = sum(result_re['status'] == true_string) total_row = total_row.set_value('status',match_count) elif (column not in samples) and (column != 'shared_allele'): total_index = np.product(list( result_re.loc[result_re[column].notnull(),column])) total_row = total_row.set_value(column,total_index) result_re[column] = result_re[[column]].astype(object) allele_columns = [self.query_a.name, self.query_b.name] if 'shared_allele' in result_re.columns: allele_columns.append('shared_allele') for query in allele_columns: for marker in result_re[query].index: cleaned_marker = decode_marker(marker, self.type_) alleles = decode_alleles(result_re.loc[marker, query], self.type_, marker, null=np.nan) if alleles != '-': result_re.loc[marker, query] = alleles result_re = result_re.append(total_row) return result_re
[문서] def check_identities(self): result = pd.DataFrame(index=self.query_a.index) result[self.query_a.name] = self.query_a result[self.query_b.name] = self.query_b result = result.loc[result[self.query_a.name].notnull() | result[self.query_b.name].notnull()] result['status'] = result.apply(self._check_identities, axis=1) calculator=None if self.af_table is not None and not self.af_table.empty: calculator = IdentityIndexCalculator(self.af_table) if calculator: index_ = [] for marker in self.active_markers: if marker not in self.query_b: self.query_b[marker] = None index_.append(calculator.get_cumulative_index( pd.Index([marker]), self.query_a, self.query_b)) index_ = pd.Series(index_, index=self.active_markers) result['index'] = index_ return result
def _check_identities(self, row): if row[0] == row[1]: return 'matched' elif (type(row[0]) != list and pd.isnull(row[0])) or \ (type(row[1]) != list and pd.isnull(row[1])): return 'partial' else: return 'unmatched'
[문서] def check_paternities(self, add_shared_allele=False): result = pd.DataFrame(index=self.query_a.index) result[self.query_a.name] = self.query_a result[self.query_b.name] = self.query_b result = result.loc[result[self.query_a.name].notnull() | result[self.query_b.name].notnull()] result['status'] = result.apply(self._check_paternities,axis=1) calculator=None if self.af_table is not None and not self.af_table.empty: calculator = PaternityIndexCalculator(self.af_table) if calculator: index_ = [] for marker in self.active_markers: if marker not in self.query_b: self.query_b[marker] = None index_.append(calculator.get_cumulative_index( pd.Index([marker]), self.query_a, self.query_b)) index_ = pd.Series(index_, index=self.active_markers) result['index'] = index_ if add_shared_allele: result = self.check_paternities_add_shared_allele(result) return result
def _check_paternities(self, row): if (type(row[0]) != list and pd.isnull(row[0])) or \ (type(row[1]) != list and pd.isnull(row[1])): return 'partial' elif bool(set(row[0]) & set(row[1])): return 'shared' else: return 'unshared'
[문서] def check_paternities_add_shared_allele(self, result): added_allele = {} index = result.index for i in range(result.shape[0]): if result.ix[i, 'status'] == 'shared': added_allele[index[i]] = \ list(set(result.ix[i,0]) & set(result.ix[i,1])) result['shared_allele'] = pd.Series(added_allele, name='shared_allele') return result
[문서] def check_kinships(self): result = pd.DataFrame(index=self.query_a.index) result[self.query_a.name] = self.query_a result[self.query_b.name] = self.query_b active_autosomal_markers = [m for m in self.active_markers if m not in config.SEX_MARKERS] calculator = KinshipIndexCalculator(self.af_table) for relationship in calculator.relationships: index_ = [] for marker in active_autosomal_markers: try: aa = pd.Series([self.query_a[marker]], index=[marker]) except KeyError: aa = pd.Series([None], index=[marker]) try: bb = pd.Series([self.query_b[marker]], index=[marker]) except KeyError: bb = pd.Series([None], index=[marker]) cc = calculator.get_cumulative_index(aa, bb, relationship) index_.append(calculator.get_cumulative_index( aa, bb, relationship)) index_ = pd.Series(index_, index=active_autosomal_markers) result[relationship] = index_ return result
[문서]class ManageSearcher(): def __init__(self, relationship_type, identifier, target_group,\ partial=True,\ check_astr=True, astr_partial=True, astr_permit_mismatches=2,\ af_table=None, min_kinship_index=1,\ check_ystr=False, ystr_partial=True, ystr_permit_mismatches=2,\ check_mtdna=False, mtdna_partial=True, mtdna_permit_mismatches=2, mtdna_exclude_cstretches=False, dbname=config.DEFAULT_MONGODB_NAME): self.relationship_type = relationship_type self.identifier = identifier self.target_group = target_group self.partial = partial self.check_astr = check_astr self.astr_partial = astr_partial self.astr_permit_mismatches = astr_permit_mismatches self.af_table = af_table self.min_kinship_index = min_kinship_index self.check_ystr = check_ystr self.ystr_partial = ystr_partial self.ystr_permit_mismatches = ystr_permit_mismatches self.check_mtdna = check_mtdna self.mtdna_partial = mtdna_partial self.mtdna_permit_mismatches = mtdna_permit_mismatches self.mtdna_exclude_cstretches = mtdna_exclude_cstretches self.dbname = dbname self.gm = db.GenotypeManager(self.dbname) self.afm = db.AlleleFrequenciesManager(self.dbname)
[문서] def get_target_group_info(self, row, group=None): sample = Sample.objects.get(identifier=row.name) individual = sample.get_individual() columns = self.TARGET_GROUP_INFO_COLUMNS[group] result = [] for column in columns: if column == 'sex': result.append(individual.get_sex_display()) else: result.append(getattr(individual, column)) return pd.Series(result, index=columns)
[문서] def search(self): targets = {} if self.check_astr: targets['A-STR'] = self.gm.get_genotypes_by_group('A-STR',\ self.target_group) if self.check_ystr: targets['Y-STR'] = self.gm.get_genotypes_by_group('Y-STR',\ self.target_group) if self.check_mtdna: targets['mtDNA'] = self.gm.get_genotypes_by_group('mtDNA',\ self.target_group) result_df = self.run_search(targets) if result_df is not None and not result_df.empty: """ result_df[self.TARGET_GROUP_INFO_COLUMNS[target_group]] = ( result_df.apply(self.get_target_group_info, group=self.target_group, axis=1) ) """ new_columns = [] for column in result_df.columns: new_columns.append(column.replace(' ', '_').replace('-', '_')) result_df.columns = new_columns return DataContainer(status='Success', message='Search', items=result_df)
[문서]class ManageMultipleSearch(): def __init__(self, query_group='soldier', query_type='all',\ query_first = None, query_second=None,\ target_group='customer', target_type='all',\ target_first = None, target_second=None, relationship_type='kinship', partial=True,\ check_astr=True, astr_partial=True, astr_permit_mismatches=2,\ af_table=None, min_kinship_index=1,\ check_ystr=False, ystr_partial=True, ystr_permit_mismatches=2,\ check_mtdna=False, mtdna_partial=True, mtdna_permit_mismatches=2,\ mtdna_exclude_cstretches=False, dbname=config.DEFAULT_MONGODB_NAME,\ description=None): if query_type not in ['all', 'range', 'keyword', 'manual']: raise ParameterException('{} is not proper value for query_type'.\ format(query_type)) if target_type not in ['all', 'range', 'keyword', 'manual']: raise ParameterException('{} is not proper value for target_type'.\ format(target_type)) self.query_group = query_group self.query_type = query_type self.query_first = query_first self.query_second = query_second self.target_group = target_group self.target_type = target_type self.target_first = target_first self.target_second = target_second self.relationship_type = relationship_type self.partial = partial self.check_astr = check_astr self.astr_partial = astr_partial self.astr_permit_mismatches = astr_permit_mismatches self.af_table = af_table self.min_kinship_index = min_kinship_index self.check_ystr = check_ystr self.ystr_partial = ystr_partial self.ystr_permit_mismatches = ystr_permit_mismatches self.check_mtdna = check_mtdna self.mtdna_partial = mtdna_partial self.mtdna_permit_mismatches = mtdna_permit_mismatches self.mtdna_exclude_cstretches = mtdna_exclude_cstretches self.dbname = dbname self.description = description self.gm = db.GenotypeManager(self.dbname) self.afm = db.AlleleFrequenciesManager(self.dbname) self.tm = db.TaskManager(self.dbname) self.grm = db.GroupManager(self.dbname)
[문서] def get_filtered_identifiers(self, group, query_type, queries): ids = self.grm.ids_belong_group(group) ids.sort() if query_type == 'all': pass elif query_type == 'range': query_first = queries[0] query_second = queries[1] if not query_first in ids: raise NoValueException('There is no id {}'.format(query_first)) if not query_second in ids: raise NoValueException('There is no id {}'.format(query_second)) ids = ids[ids.index(query_first) : ids.index(query_second)+1] elif query_type == 'keyword': keyword = queries[0] ids = [id_ for id_ in ids if id_.find(keyword) != -1] elif query_type == 'manual': ids = queries[0].strip().split() return ids
[문서] def search(self): ## make task & save into db today_tasks = self.tm.get_alltasks('identifier', time.strftime("%Y%m%d")) counts = today_tasks.count() if today_tasks else 0 task_id = '{}-{}'.format(time.strftime("%Y%m%d"), str(counts + 1)) astr_option = {} ystr_option = {} mtdna_option = {} if self.check_astr: astr_option = { 'permit_mismatches': self.astr_permit_mismatches, 'partial': self.astr_partial, 'min_kinship_index': self.min_kinship_index, 'af_table': self.af_table, } if self.check_ystr: ystr_option = { 'permit_mismatches': self.ystr_permit_mismatches, 'partial': self.ystr_partial, } if self.check_mtdna: mtdna_option = { 'permit_mismatches': self.mtdna_permit_mismatches, 'partial': self.mtdna_partial, 'exclude_cstretches': self.mtdna_exclude_cstretches, } ## get query ids, target ids queries = self.get_filtered_identifiers(self.query_group,\ self.query_type, [self.query_first, self.query_second]) targets = self.get_filtered_identifiers(self.target_group,\ self.target_type, [self.target_first, self.target_second]) mst = self.tm.create(identifier=task_id, query_group=self.query_group, query_type=self.query_type, query_count=len(queries), query_first=self.query_first, query_second=self.query_second, target_group=self.target_group, target_type=self.target_type, target_count=len(targets), target_first=self.target_first, target_second=self.target_second, relationship_type=self.relationship_type, description=self.description, partial=self.partial, astr_option=astr_option, ystr_option=ystr_option, mtdna_option=mtdna_option, dbname=self.dbname ) ## run multiple searching threading.Thread(target=self.search_multiple_async,\ args=(task_id, queries, astr_option,\ ystr_option, mtdna_option, targets,)).start() return DataContainer(status='Success', message='Search', items={'taskId':task_id})
[문서] def search_multiple_async(self, task_id, queries,astr_option=None,\ ystr_option=None, mtdna_option=None, targets=None): self.tm.update(task_id, 'status', 'Searching') percentage_to_record = 10 astrs = ystrs = mtdnas = af_table = None if astr_option: astrs = self.gm.get_genotypes_by_group('A-STR', self.target_group) if targets is not None: astrs = astrs.loc[targets] af_table = self.afm.get(astr_option['af_table'], 'A-STR') if ystr_option: ystrs = self.gm.get_genotypes_by_group('Y-STR', self.target_group) if targets is not None: ystrs = ystrs.loc[targets] if mtdna_option: mtdnas = self.gm.get_genotypes_by_group('mtDNA', self.target_group) if targets is not None: mtdnas = mtdnas.loc[targets] if self.relationship_type == 'kinship': astr_kwargs = { 'af_table': af_table, 'min_kinship_index': astr_option['min_kinship_index'], } else: if astr_option: astr_kwargs = { 'permit_mismatches': astr_option['permit_mismatches'], 'af_table': af_table, 'partial': astr_option['partial'], } else: astr_kwargs = astr_option total_result = pd.DataFrame() total = len(queries) query_count_to_record = int(total * percentage_to_record / 100) for i, identifier in enumerate(queries): print('search...', identifier) query = self.gm.get_genotype(identifier) types = list(query.keys()) for type_ in types: if type_ in ['A-STR', 'Y-STR', 'mtDNA']: is_all_None = [genotype is None for genotype in\ list(query[type_].values())] if all(is_all_None): del(query[type_]) si = SearchIntegrator(query) result = si.search_relationship(self.relationship_type, partial=self.partial, astrs=astrs, ystrs=ystrs, mtdnas=mtdnas, astr_kwargs=astr_kwargs, ystr_kwargs=ystr_option, mtdna_kwargs=mtdna_option, #is_multiprocess=False, ) result['Identifier A'] = identifier result['Identifier B'] = result.index total_result = result.append(total_result, ignore_index=True) #total_result = total_result.append(result, ignore_index=True) if ((i+1) % query_count_to_record) == 0: self.tm.update(task_id, 'progress', ((i + 1) * 100 / total)) self.tm.update(task_id, 'progress', 100) prev_columns = total_result.columns.tolist() if self.relationship_type == 'kinship': columns = ['Identifier A', 'Identifier B', 'A-STR max_index',\ 'A-STR max_relative', 'A-STR parent-child',\ 'A-STR full-sib', 'A-STR half-sib', 'A-STR first cousin',\ 'A-STR 5 chon', 'A-STR 6 chon', 'A-STR 7 chon',\ 'A-STR 8 chon', 'A-STR unrelated'] else: columns = ['Identifier A', 'Identifier B'] for column in columns: if column in prev_columns: prev_columns.remove(column) columns = columns + prev_columns """ if self.target_group == 'customer': total_result = total_result.join(pd.DataFrame({'relationship':\ {customer_id: Customer.objects.filter(sample__identifier=\ customer_id)[0].relationship for customer_id\ in total_result['Identifier B']}}), on='Identifier B') prev_columns = columns columns = ['Identifier A', 'Identifier B', 'relationship'] for column in columns: if column in prev_columns: prev_columns.remove(column) columns += prev_columns """ try: total_result = total_result[columns] except KeyError: for column in columns: if column not in total_result.columns: total_result[column] = np.nan total_result = total_result[columns] if 'A-STR max_index' in total_result.columns and ( not total_result['A-STR max_index'].isnull().all()): total_result = total_result.sort_index(by=['A-STR max_index'],\ ascending=False) total_result.index = list(range(total_result.shape[0])) """ excel = df_to_excel(total_result, index=True) msr.result = SimpleUploadedFile('{}.xlsx'.format(msr.identifier), excel.read(), content_type='application/vnd.openxmlformats-officedocument' '.spreadsheetml.sheet') """ total_result self.tm.update(task_id, 'result', total_result.T.to_json()) self.tm.update(task_id, 'status', 'Finished')
[문서]class ManageCheckBetween(): def __init__(self, relationship_type, identifier_a, identifier_b,\ partial=True,\ check_astr=True, astr_partial=True, astr_permit_mismatches=2,\ af_table=None, min_kinship_index=1,\ check_ystr=False, ystr_partial=True, ystr_permit_mismatches=2,\ check_mtdna=False, mtdna_partial=True, mtdna_permit_mismatches=2, mtdna_exclude_cstretches=False, dbname=config.DEFAULT_MONGODB_NAME): self.relationship_type = relationship_type self.identifier_a = identifier_a self.identifier_b = identifier_b self.partial = partial self.check_astr = check_astr self.astr_partial = astr_partial self.astr_permit_mismatches = astr_permit_mismatches self.af_table = af_table self.min_kinship_index = min_kinship_index self.check_ystr = check_ystr self.ystr_partial = ystr_partial self.ystr_permit_mismatches = ystr_permit_mismatches self.check_mtdna = check_mtdna self.mtdna_partial = mtdna_partial self.mtdna_permit_mismatches = mtdna_permit_mismatches self.mtdna_exclude_cstretches = mtdna_exclude_cstretches self.dbname = dbname self.gm = db.GenotypeManager(self.dbname) self.afm = db.AlleleFrequenciesManager(self.dbname)
[문서] def total_row(self, result, is_kinship=False, is_paternity=False): last_row = [] for column in result.columns: if column in [self.identifier_a, self.identifier_b]: last_row.append(result[column].count()) elif column == 'shared_allele': last_row.append(np.nan) elif column == 'status': if is_paternity: last_row.append(sum((result[column]=='shared') |\ (result[column]=='partial'))) else: last_row.append(sum((result[column]=='matched') |\ (result[column]=='partial'))) else: last_row.append(np.prod(result[column])) last_row = pd.Series(last_row, index=result.columns) if is_kinship: ci_row = last_row / last_row['unrelated'] probability_row = ci_row * 100 / (ci_row + 1) df = pd.DataFrame({ 'total': last_row, 'KI': ci_row, 'probability': probability_row, }, columns=['total', 'KI', 'probability']).T df.loc['KI':'probability', self.identifier_a:self.identifier_b] =\ np.nan return df else: return pd.DataFrame({'total': last_row}).T
[문서] def check(self): sample_a = self.gm.get_genotype(self.identifier_a) sample_b = self.gm.get_genotype(self.identifier_b) result = {} if self.check_astr and ('A-STR' in sample_a) and ('A-STR' in sample_b): astr_result = self.run_check_between(self.relationship_type,\ 'A-STR', shared_allele=True, partial=self.astr_partial) astr_result = astr_result.loc[ astr_result[self.identifier_a].notnull() | astr_result[self.identifier_b].notnull()] is_kinship = True if self.relationship_type == 'kinship' else False is_paternity = True if self.relationship_type == 'paternity'\ else False astr_result = pd.concat([astr_result, self.total_row(astr_result,\ is_kinship, is_paternity)]) astr_result.index = [decode_marker(marker, 'A-STR') for\ marker in astr_result.index] result['astr_result'] = astr_result if self.relationship_type == 'kinship': astr_result_summary = astr_result.ix[ ['total','KI','probability'], ['parent-child', 'full-sib', 'half-sib',\ 'first cousin', '5 chon', '6 chon',\ 'unrelated',]] astr_result_summary =\ astr_result_summary.T.sort(['KI'], ascending=False).T columns_temp = astr_result_summary.columns columns_temp_final = [] for column in columns_temp: if column == 'parent-child': column = 'Parent child' elif column == 'half-sib': column = 'Half sibling' elif column == 'full-sib': column = 'Full sibling' elif column == 'first cousin': column = 'First cousin' elif column == 'unrelated': column = 'Unrelated' columns_temp_final.append(column) astr_result_summary.columns = columns_temp_final result['astr_result_summary'] = astr_result_summary ordered_relationship = list(\ result['astr_result'].ix[:,2:].T.sort('probability',\ ascending=False).index) ordered_relationship.remove('7 chon') ordered_relationship.remove('8 chon') result['ordered_relationship'] = ordered_relationship result['ordered_astr_result'] = result['astr_result'][list(\ result['astr_result'].columns[:2])+ordered_relationship] if self.check_ystr and ('Y-STR' in sample_a) and ('Y-STR' in sample_b): ystr_result = self.run_check_between('identity','Y-STR',\ False, self.ystr_partial) ystr_result = ystr_result.loc[ ystr_result[self.identifier_a].notnull() | ystr_result[self.identifier_b].notnull()] ystr_result = pd.concat([ystr_result, self.total_row(ystr_result)]) ystr_result.index = [decode_marker(marker, 'Y-STR') for\ marker in ystr_result.index] result['ystr_result'] = ystr_result if self.check_mtdna and ('mtDNA' in sample_a) and ('mtDNA' in sample_b): mtdna_result = self.run_check_between('identity', 'mtDNA',\ False, self.mtdna_partial) mtdna_result = mtdna_result.loc[ mtdna_result[self.identifier_a].notnull() | mtdna_result[self.identifier_b].notnull()] mtdna_index = [] for index in mtdna_result.index: decoded_index = decode_marker(index, 'mtDNA') mtdna_index.append((float(decoded_index),index)) mtdna_index.sort() mtdna_index = [index[1] for index in mtdna_index] mtdna_result = mtdna_result.loc[mtdna_index] mtdna_result = pd.concat([mtdna_result, self.total_row(mtdna_result)]) mtdna_result.index = [decode_marker(marker, 'mtDNA') for\ marker in mtdna_result.index] result['mtdna_result'] = mtdna_result del(sample_a['_id']) del(sample_b['_id']) result['sample_a'] = sample_a result['sample_b'] = sample_b result['report'] = self.make_report(result) return DataContainer(status='Success', message='Success checking between search', items=result)
[문서] def get_astr_report(self, df): result = {} result['astr_option'] = { 'permit_mismatches': self.astr_permit_mismatches, 'partial': self.astr_partial, 'min_kinship_index': self.min_kinship_index, 'af_table': self.af_table, } markers = int(df.loc['total', self.identifier_a]) if self.relationship_type == 'kinship': matches = None ci_series = df.loc['KI', '5 chon':'unrelated'] max_column_index = ci_series.argmax() index = ci_series.max() try: relation = ci_series.index[max_column_index] except ValueError: relation = max_column_index decision = index >= self.min_kinship_index result['astr_report'] = { 'markers': markers, 'relation': ci_series[ci_series == index].index[0], 'index': index, 'decision': decision, } else: matches = int(df.loc['total', 'status']) index = df.loc['total', 'index'] if result['astr_option']['partial']: if self.relationship_type == 'identity': decision = df.loc[df['status']=='unmatched','status'].\ count() <= self.astr_permit_mismatches else: decision = df.loc[df['status']=='unshared','status'].\ count() <= self.astr_permit_mismatches else: decision = markers - matches <= self.astr_permit_mismatches result['astr_report'] = { 'markers': markers, 'matches': matches, 'decision': decision, } return result
[문서] def get_ystr_report(self, df): result = {} result['ystr_option'] = { 'permit_mismatches': self.ystr_permit_mismatches, 'partial': self.ystr_partial, } markers = int(df.loc['total', self.identifier_a]) matches = int(df.loc['total', 'status']) if result['ystr_option']['partial']: decision = df.loc[df['status']=='unmatched','status'].count() <=\ self.ystr_permit_mismatches else: decision = markers - matches <= self.ystr_permit_mismatches result['ystr_report'] = { 'markers': markers, 'matches': matches, 'decision': decision, } return result
[문서] def get_mtdna_report(self, df): result = {} result['mtdna_option'] = { 'permit_mismatches': self.mtdna_permit_mismatches, 'partial': self.mtdna_partial, 'exclude_cstretchs': self.mtdna_exclude_cstretches, } markers = int(df.loc['total', self.identifier_a]) matches = int(df.loc['total', 'status']) if result['mtdna_option']['partial']: decision = df.loc[df['status']=='unmatched','status'].count() <=\ self.mtdna_permit_mismatches else: decision = markers - matches <= self.mtdna_permit_mismatches result['mtdna_report'] = { 'markers': markers, 'matches': matches, 'decision': decision, } return result
[문서] def make_report(self, data): result = { 'total_option': { 'type': self.relationship_type, 'partial': self.partial, }, } if 'astr_result' in data: result.update(self.get_astr_report(data['astr_result'])) if 'ystr_result' in data: result.update(self.get_ystr_report(data['ystr_result'])) if 'mtdna_result' in data: result.update(self.get_mtdna_report(data['mtdna_result'])) decisions = [] for report in ('astr_report', 'ystr_report', 'mtdna_report'): if report in result: decisions.append(result[report]['decision']) if self.partial and True in decisions: result['total_decision'] = True else: result['total_decision'] = all(decisions) return result
[문서] def run_check_between(self, relationship_type, type_, shared_allele=False,\ partial=False, format=None): s_identifier_a = pd.Series(\ self.gm.get_genotype(self.identifier_a)[type_]) s_identifier_b = pd.Series(\ self.gm.get_genotype(self.identifier_b)[type_]) s_identifier_a.name = self.identifier_a s_identifier_b.name = self.identifier_b query_a = s_identifier_a query_b = s_identifier_b ## try: af_table = self.afm.get(self.af_table, type_) if self.af_table\ else None except: af_table = None cb = CheckerBetween(query_a, query_b, type_, af_table) if relationship_type == 'identity': result = cb.check_identities() result = cb.arrange_result(result, 'matched') elif relationship_type == 'paternity': result = cb.check_paternities(add_shared_allele=shared_allele) result = cb.arrange_result(result, 'shared') elif relationship_type == 'kinship': result = cb.check_kinships() result = cb.arrange_result(result, 'shared') return result
""" def run_search_multiple(self, relationship_type, group_a, group_b, name, partial=False, permit_mismatches_astr=float('inf'), partial_astr=False, permit_mismatches_ystr=float('inf'), partial_ystr=False, permit_mismatches_mtdna=float('inf'), partial_mtdna=False, exclude_cstretch_mtdna=False): group_a = group_a.split(',') group_b = group_b.split(',') af_astrs = self.afm.get(name,'A-STR') af_ystrs = None identifiers_a = pd.Index([]) try: astrs_a = self.gm.get_genotypes_by_group('A-STR', group_a) identifiers_a = identifiers_a + astrs_a.index except: astrs_a = None try: ystrs_a = self.gm.get_genotypes_by_group('Y-STR', group_a) identifiers_a = identifiers_a + ystrs_a.index except: ystrs_a = None try: mtdnas_a = self.gm.get_genotypes_by_group('mtDNA', group_a) identifiers_a = identifiers_a + mtdnas_a.index except: mtdnas_a = None identifiers_b = pd.Index([]) try: astrs_b = self.gm.get_genotypes_by_group('A-STR', group_b) identifiers_b = identifiers_b + astrs_b.index except: astrs_b = None try: ystrs_b = self.gm.get_genotypes_by_group('Y-STR', group_b) identifiers_b = identifiers_b + ystrs_b.index except: ystrs_b = None try: mtdnas_b = self.gm.get_genotypes_by_group('mtDNA', group_b) identifiers_b = identifiers_b + mtdnas_b.index except: mtdnas_b = None total_result = pd.DataFrame() for identifier in identifiers_a: query = self.gm.get_genotype(identifier) if relationship_type == 'kinship': astr_kwargs={'af_table': af_astrs} else: astr_kwargs={'permit_mismatches': permit_mismatches_astr, 'af_table': af_astrs, 'partial': partial_astr} si = SearchIntegrator(query) result = si.search_relationship(relationship_type, astrs=astrs_b, ystrs=ystrs_b, mtdnas=mtdnas_b, astr_kwargs=astr_kwargs, ystr_kwargs={'permit_mismatches':\ permit_mismatches_ystr, 'af_table': af_ystrs, 'partial': partial_ystr}, mtdna_kwargs={'permit_mismatches': permit_mismatches_mtdna, 'partial': partial_mtdna, 'exclude_cstretches': exclude_cstretch_mtdna,}, partial=partial ) index_length = len(result.index) index = pd.MultiIndex(levels=[[identifier], result.index], labels=[[0]*index_length, range(index_length)], names=['group_A','group_B']) result.index = index total_result = total_result.append(result) return DataContainer(status='Success', message='Search', items=total_result) """
[문서]def run_check_hml(customer_identifier, goldstar_identifier): gm = db.GenotypeManager() afm = db.AlleleFrequenciesManager() #customer_identifier = 'FRS08-2161' #goldstar_identifier = 'KWHR08-0219' #template_file = '../data/%s'%config.HML_TEMPLATE ##astr try: s_query_a = pd.Series(gm.get_genotype(query_a)['A-STR']) s_query_a.name = query_a query_a = s_query_a except (TypeError, KeyError): query_a = None try: s_query_b = pd.Series(gm.get_genotype(query_b)['A-STR']) s_query_b.name = query_b query_b = s_query_b except (TypeError, KeyError): query_b = None af_table = afm.get('Korean', 'A-STR') if query_a is not None and query_b is not None: cb = CheckerBetween(query_a, query_b, 'A-STR', af_table) astr_result = cb.check_paternities(add_shared_allele=True) astr_result = cb.arrange_result(astr_result, 'shared') astr_result = astr_result.drop('total') astr_rows, astr_row_count = \ create_astr_rows(astr_result, goldstar_identifier, customer_identifier, start_row_num=1) else: astr_rows, astr_row_count = '', 1 ##ystr try: s_query_a = pd.Series(gm.get_genotype(query_a)['Y-STR']) s_query_a.name = query_a query_a = s_query_a except (TypeError, KeyError): query_a = None try: s_query_b = pd.Series(gm.get_genotype(query_b)['Y-STR']) s_query_b.name = query_b query_b = s_query_b except (TypeError, KeyError): query_b = None af_table = None if query_a is not None and query_b is not None: cb = CheckerBetween(query_a, query_b, 'Y-STR', af_table) ystr_result = cb.check_identities() ystr_result = cb.arrange_result(ystr_result,'matched') ystr_result = ystr_result.drop('total') ystr_rows, ystr_row_count = \ create_ystr_rows(ystr_result, goldstar_identifier, customer_identifier, start_row_num=1) else: ystr_rows, ystr_row_count = '', 1 ##mtdna try: s_query_a = pd.Series(gm.get_genotype(query_a)['mtDNA']) s_query_a.name = query_a query_a = s_query_a except (TypeError, KeyError): query_a = None try: s_query_b = pd.Series(gm.get_genotype(query_b)['mtDNA']) s_query_b.name = query_b query_b = s_query_b except (TypeError, KeyError): query_b = None af_table = None if query_a is not None and query_b is not None: cb = CheckerBetween(query_a, query_b, 'mtDNA', af_table) mtdna_result = cb.check_identities() mtdna_result = cb.arrange_result(mtdna_result,'matched') mtdna_result = mtdna_result.drop('total') mtdna_rows, mtdna_row_count = \ create_mtdna_rows(mtdna_result, goldstar_identifier, customer_identifier, start_row_num=1) else: mtdna_rows, mtdna_row_count = '', 1 ##kinship try: s_query_a = pd.Series(gm.get_genotype(query_a)['A-STR']) s_query_a.name = query_a query_a = s_query_a except (TypeError, KeyError): query_a = None try: s_query_b = pd.Series(gm.get_genotype(query_b)['A-STR']) s_query_b.name = query_b query_b = s_query_b except (TypeError, KeyError): query_b = None af_table = afm.get('Korean', 'A-STR') cb = CheckerBetween(query_a, query_b, 'A-STR', af_table) if query_a is not None and query_b is not None: kinship_result = cb.check_kinships() kinship_result = cb.arrange_result(kinship_result, 'shared') ci = kinship_result.loc['total'] ci = ci.drop(customer_identifier) ci = ci.drop(goldstar_identifier) ci = ci.astype(float) ci = ci/ci['unrelated'] ci.sort(ascending=False) kinship_df = pd.DataFrame(index=ci.index) kinship_df['ratio'] = ci kinship_df['probability'] = ci/(ci+1) kinship_rows, kinship_row_count = \ create_astr_kinship_rows(kinship_df, goldstar_identifier, customer_identifier, start_row_num=2) else: kinship_rows, kinship_row_count = '', 2 ## key_value = { 'customer_goldstar_name' : 'aaa', 'goldstar_identifier' : goldstar_identifier, 'customer_relationship' : 'bbb', 'customer_name' : 'cc', 'customer_identifier' : customer_identifier, 'kinship_row_count' : kinship_row_count, 'kinship_rows' : kinship_rows, 'astr_row_count' : astr_row_count, 'astr_rows' : astr_rows, 'ystr_row_count' : ystr_row_count, 'ystr_rows' : ystr_rows, 'mtdna_row_count' : mtdna_row_count, 'mtdna_rows' : mtdna_rows, } return key_value