# -*- coding:utf-8 -*-
from __future__ import unicode_literals
import sys
import time
import threading
import pandas as pd
import numpy as np
from kinmatch import config
from kinmatch.utils import (apply_by_multiprocessing, decode_marker,
decode_alleles)
from kinmatch import db
from kinmatch.utils import DataContainer
from kinmatch import NoValueException, ParameterException
## dataframe apply functions
[문서]def kinship_index(row, query=None, calculator=None, active_markers=None):
return pd.Series([calculator.get_cumulative_index(query, row,
relationship) for relationship in calculator.relationships],
index=calculator.relationships)
[문서]def paternities_match_sum(row, query=None, active_markers=None):
new_active_markers = row.index[row.notnull()] & active_markers
nan_count = len(active_markers) - len(new_active_markers)
return pd.Series([len(active_markers),
sum([(not bool(set(t[0]) & set(t[1])))
for t in map(lambda a,b: (a,b), query[new_active_markers],
row[new_active_markers])]) + nan_count],
index = ['markers','mismatches'])
"""
return sum([(not bool(set(t[0]) & set(t[1])))
for t in map(lambda a,b: (a,b),
query[new_active_markers], row[new_active_markers])]) + nan_count
"""
[문서]def paternities_match_sum_partial(row, query=None, active_markers=None):
new_active_markers = row.index[row.notnull()] & active_markers
return pd.Series([len(new_active_markers),
sum([(not bool(set(t[0]) & set(t[1])))
for t in map(lambda a,b: (a,b), query[new_active_markers],
row[new_active_markers])])],
index = ['markers','mismatches'])
[문서]def identities_match_sum(row, query=None, active_markers=None):
return pd.Series([len(active_markers),
(row[active_markers] != query[active_markers]).sum()],
index= ['markers','mismatches'])
[문서]def identities_match_sum_partial(row, query=None, active_markers=None):
new_active_markers = row.index[row.notnull()] & active_markers
return pd.Series([len(new_active_markers),
(row[new_active_markers] != query[new_active_markers]).sum()],
index=['markers','mismatches'])
[문서]class IndexCalculator:
formula = {}
def __init__(self, af_table):
self.af_table = af_table
[문서] def get_af(self, marker, allele):
allele_frequency = 0.001
try:
if marker not in self.af_table.columns:
allele_frequency = np.nan
else:
allele_frequency = self.af_table[marker][allele]
if np.isnan(allele_frequency):
allele_frequency = 0.001
except KeyError:
pass
return allele_frequency
[문서] def get_cumulative_index(self, markers, query, target):
raise NotImplementedError
[문서] def get_index(self, marker, a1, a2, b1, b2):
raise NotImplementedError
[문서]class IdentityIndexCalculator(IndexCalculator):
[문서] def get_cumulative_index(self, markers, query, target):
index = 1
for marker in markers:
if marker not in config.SEX_MARKERS and \
query[marker] == target[marker]:
a, b = query[marker]
pa = self.get_af(marker, a)
pb = self.get_af(marker, b)
if a == b:
p = pa * pb
elif a != b:
p = 2 * pa * pb
index *= p
return 1 / index
[문서]class PaternityIndexCalculator(IndexCalculator):
formula = {
1: (lambda pa, pb: 0.25 / pa),
2: (lambda pa, pb: (pa + pb) / (4 * pa * pb)),
3: (lambda pa, pb: 0.5 / pa),
4: (lambda pa, pb: 0.5 / pa),
5: (lambda pa, pb: 1 / pa),
}
[문서] def get_cumulative_index(self, markers, query, target):
cpi = 1
for marker in markers:
if marker not in config.SEX_MARKERS and type(query[marker]) == list\
and type(target[marker]) == list and \
set(query[marker]) & set(target[marker]):
a1, a2 = query[marker]
b1, b2 = target[marker]
cpi *= self.get_index(marker, a1, a2, b1, b2)
return cpi
[문서] def get_index(self, marker, a1, a2, b1, b2):
try:
f, A, B = self.find_formula([a1, a2], [b1, b2])
except AssertionError:
return 1
pa = self.get_af(marker, A)
pb = self.get_af(marker, B)
return self.formula[f](pa, pb)
[문서]class KinshipIndexCalculator(IndexCalculator):
coefficients = {
'parent-child': (0, 1, 0),
'full-sib': (1./4, 1./2, 1./4),
'half-sib': (1./2, 1./2, 0),
'first cousin': (3./4, 1./4, 0),
'unrelated': (1, 0, 0),
'5 chon': (7./8, 1./8, 0),
'6 chon': (15./16, 1./16, 0),
'7 chon': (31./32, 1./32, 0),
'8 chon': (63./64, 1./64, 0),
}
relationships = list(sorted(coefficients.keys()))
relationships_explanation = {
'parent-child': '부모-자식관계',
'full-sib': '형제관계 (부모가 모두 같을 경우)',
'half-sib': '이복/이부 형제, 삼촌-조카, 조부-손자',
'first cousin': '4촌 형제관계',
'unrelated': '무관한 관계',
'5 chon': '5촌',
'6 chon': '6촌 형제',
'7 chon': '7촌',
'8 chon': '8촌 형제',
}
formulas = {
1: (lambda phi, pa, pb, pc, pd:
phi[2] + 0.5*phi[1]*(pa + pb) + 2*phi[0]*pa*pb ),
2: (lambda phi, pa, pb, pc, pd: phi[2] + phi[1]*pa + phi[0]*pa*pa ),
3: (lambda phi, pa, pb, pc, pd: phi[1]*pb + 2*phi[0]*pa*pb ),
4: (lambda phi, pa, pb, pc, pd: 0.5*phi[1]*pa + phi[0]*pa*pa ),
5: (lambda phi, pa, pb, pc, pd: 0.5*phi[1]*pc + 2*phi[0]*pa*pc ),
6: (lambda phi, pa, pb, pc, pd: 2*phi[0]*pc*pd ),
7: (lambda phi, pa, pb, pc, pd: phi[0]*pb*pb ),
8: (lambda phi, pa, pb, pc, pd: 2*phi[0]*pb*pc ),
}
[문서] def get_cumulative_index(self, query, target, relationship):
cpi = 1
""" ## comprehension is slower
markers = target.index[target.notnull()]
return np.product([self.get_index(marker, relationship, query[marker],
target[marker]) for marker in markers ])
"""
markers = target.index
for i, marker in enumerate(markers):
target_alleles = target[marker]
if type(target_alleles) != list:
continue
pi = self.get_index(marker, relationship, query[marker],
target_alleles)
cpi *= pi
return cpi
[문서] def get_index(self, marker, relationship, query_alleles, target_alleles):
f, A, B, C, D = self.find_formula(query_alleles, target_alleles)
if f is None:
return 1
pa = self.get_af(marker, A)
pb = self.get_af(marker, B)
pc = self.get_af(marker, C)
pd = self.get_af(marker, D)
phi = self.coefficients[relationship]
#print(marker, f, A, B, C, D, pa, pb, pc, pd, query_alleles,\
# target_alleles, pa, pb, self.formulas[f](phi, pa, pb, pc, pd))
return self.formulas[f](phi, pa, pb, pc, pd)
[문서]class BaseSearcher(object):
def __init__(self, query=None, df_genotypes=None):
self.set_params(query, df_genotypes)
[문서] def set_params(self, query, df_genotypes):
self.genotypes = df_genotypes
self.query = query
if isinstance(query, pd.Series) or isinstance(query, pd.DataFrame):
self.identifier = query.name
else:
self.identifier = None
[문서] def search_identities(self):
raise NotImplementedError
[문서]class MtdnaSearcher(BaseSearcher):
[문서] def exclude_markers_in_cstretch(self, columns):
filtered_columns = []
for column in columns:
position = float(decode_marker(column, 'mtDNA'))
if (16279 <= position <= 16282) or (16384 <= position <=16393):
continue
filtered_columns.append(column)
return pd.Index(filtered_columns)
[문서] def search_identities_by_query_index(self, permit_mismatches=2,
exclude_cstretches=False):
if exclude_cstretches:
columns = self.exclude_markers_in_cstretch(self.query.index)
else:
columns = self.query.index
common_columns = self.genotypes.columns & columns
targets = self.genotypes[common_columns]
for col in (columns - common_columns):
targets[col] = np.nan
targets = targets[columns]
try:
targets = self.genotypes[columns].drop(self.identifier)
except:
pass
query_t = self.query[columns]
result_compare = targets.apply(self._compare_by_query_index,
query=query_t, axis=1)
result = pd.DataFrame(result_compare, columns=['mismatches'])
result['markers'] = (query_t.notnull()).sum()
return result.loc[result['mismatches'] <= permit_mismatches,
['markers','mismatches']]
def _compare_by_query_index(self, row, query):
columns = query.notnull()
return (row[columns] != query[columns]).sum()
"""
def search_identities_union(self, permit_mismatches=2):
targets = self.genotypes.drop(self.identifier)
result_compare = targets.apply(self._compare_union,
query=self.query, axis=1)
return result_compare[result_compare <= permit_mismatches]
def _compare_union(self, row, query):
return ((row != query) & (row.notnull() | query.notnull())).sum()
"""
[문서] def search_identities_intersection(self, permit_mismatches=2,
exclude_cstretches=False):
if exclude_cstretches:
columns = self.exclude_markers_in_cstretch(self.query.index)
else:
columns = self.query.index
columns = columns & self.genotypes.columns
try:
targets = self.genotypes[columns].drop(self.identifier)
except:
targets = self.genotypes[columns]
query_t = self.query[columns]
result_compare = targets.apply(self._compare_intersection,
query=query_t, axis=1)
#result = pd.DataFrame(result_compare, columns=['mismatches'])
result = result_compare
#result['markers'] = targets.apply(self._compare_intersection_columns,
# query=query_t, axis=1)
return result.loc[result['mismatches'] <= permit_mismatches,
['markers','mismatches']]
def _compare_intersection(self, row, query):
columns = (row.notnull() & query.notnull())
return pd.Series([(row.notnull() & query.notnull()).sum(),
(row[columns] != query[columns]).sum()],
index=['markers','mismatches'])
#return (row[columns] != query[columns]).sum()
def _compare_intersection_columns(self, row, query):
return (row.notnull() & query.notnull()).sum()
[문서] def search_identities(self, permit_mismatches=2, exclude_cstretches=False,
partial=False, is_multiprocess=config.PYTHON_MULTIPROCESSING):
if partial:
result = self.search_identities_intersection(permit_mismatches,
exclude_cstretches)
else:
result = self.search_identities_by_query_index(permit_mismatches,
exclude_cstretches)
return result
[문서]class StrSearcher(BaseSearcher):
def __init__(self, query, df_genotypes):
super(StrSearcher, self).__init__(query, df_genotypes)
self.active_markers = []
for index in self.query.index:
if ((type(self.query.loc[index]) == list) and
not pd.isnull(self.query.loc[index]).all()):
if '.' in index:
index = index.replace('.', '__')
self.active_markers.append(index)
self.active_markers = pd.core.index.Index(self.active_markers)
self.active_genotypes = self.genotypes[self.active_markers]
[문서] def search_identities(self, permit_mismatches=2, af_table=None,
partial=False, is_multiprocess=config.PYTHON_MULTIPROCESSING):
calculator = None
if af_table is not None and not af_table.empty:
calculator = IdentityIndexCalculator(af_table)
if partial:
match_sum_func = identities_match_sum_partial
else:
match_sum_func = identities_match_sum
if len(self.active_markers) == 0:
return pd.DataFrame(columns=['markers','mismatches'])
else:
return self._search_by_match_sum(match_sum_func,
calculator, permit_mismatches=permit_mismatches,
is_multiprocess=is_multiprocess)
[문서] def search_paternities(self, permit_mismatches=2, af_table=None,
partial=False, is_multiprocess=config.PYTHON_MULTIPROCESSING):
calculator = None
if af_table is not None and not af_table.empty:
calculator = PaternityIndexCalculator(af_table)
if partial:
match_sum_func = paternities_match_sum_partial
else:
match_sum_func = paternities_match_sum
return self._search_by_match_sum(match_sum_func,
calculator, permit_mismatches=permit_mismatches,
is_multiprocess=is_multiprocess)
def _search_by_match_sum(self, match_sum_func, calculator,
permit_mismatches=2, is_multiprocess=config.PYTHON_MULTIPROCESSING):
try:
targets = self.active_genotypes.drop(self.identifier)
except:
targets = self.active_genotypes
if targets.empty:
return
if is_multiprocess:
result = apply_by_multiprocessing(targets, match_sum_func,
query=self.query[self.active_markers],
active_markers=self.active_markers, axis=1)
else:
result = targets.apply(match_sum_func,
query=self.query[self.active_markers],
active_markers=self.active_markers, axis=1)
result = result.loc[result['mismatches'] <= permit_mismatches]
result.sort(ascending=True, columns=['mismatches'])
result = result.reindex_axis(['markers', 'mismatches'], axis=1)
if result is not None and not result.empty and calculator:
result['index'] = targets.loc[result.index].apply(
self._identity_index, calculator=calculator, axis=1)
return result
[문서] def search_kinships(self, af_table,
min_kinship_index=config.DEFAULT_MIN_KINSHIP_INDEX,
is_multiprocess=config.PYTHON_MULTIPROCESSING):
calculator = KinshipIndexCalculator(af_table)
active_autosomal_markers = [m for m in self.active_markers
if m not in config.SEX_MARKERS]
if active_autosomal_markers == []:
return pd.DataFrame(columns = ['5 chon', '6 chon', '7 chon',\
'8 chon', 'first cousin', 'full-sib', 'half-sib',\
'parent-child', 'unrelated', 'max_index', 'max_relative'])
targets = self.active_genotypes[active_autosomal_markers]
try:
targets = targets.drop(self.identifier)
except:
pass
if is_multiprocess:
result = apply_by_multiprocessing(targets, kinship_index,
query=self.query, calculator=calculator, axis=1)
else:
result = targets.apply(self._kinship_index,
calculator=calculator, axis=1)
if not min_kinship_index:
min_kinship_index = config.DEFAULT_MIN_KINSHIP_INDEX
if result is not None:
max_index = result.max(axis=1) / result['unrelated']
max_relative = result.idxmax(axis=1)
result['max_index'] = max_index
result['max_relative'] = max_relative
result = result[result['max_index'] > min_kinship_index]
result = result.sort(['max_index', 'max_relative'], ascending=False)
else:
result = pd.DataFrame(columns=['max_index','max_relative'])
return result
def _identity_index(self, row, calculator=None):
if calculator:
return calculator.get_cumulative_index(self.active_markers,
self.query, row)
def _kinship_index(self, row, calculator=None):
return kinship_index(row, query=self.query, calculator=calculator)
[문서]class SearchIntegrator:
def __init__(self, query):
self.query = query
self.identifier = query['identifier']
[문서] def search_relationship(self, relationship_type,
astrs=None, ystrs=None, mtdnas=None,
astr_kwargs={}, ystr_kwargs={}, mtdna_kwargs={}, partial=False,
is_multiprocess=config.PYTHON_MULTIPROCESSING):
"""Mutable dict was used in keyword argument.
Be careful for this mutation. """
astr_kwargs_all = astr_kwargs.copy()
astr_kwargs_all['is_multiprocess'] = is_multiprocess
if not relationship_type == 'kinship':
astr_kwargs_all['permit_mismatches'] = float('inf')
ystr_kwargs_all = ystr_kwargs.copy()
ystr_kwargs_all['permit_mismatches'] = float('inf')
ystr_kwargs_all['is_multiprocess'] = is_multiprocess
mtdna_kwargs_all = mtdna_kwargs.copy()
mtdna_kwargs_all['permit_mismatches'] = float('inf')
mtdna_kwargs_all['is_multiprocess'] = is_multiprocess
index_total = pd.Index([])
try: index_total = index_total + astrs.index
except: pass
try: index_total = index_total + ystrs.index
except: pass
try: index_total = index_total + mtdnas.index
except: pass
if self.identifier in index_total:
index_total = index_total.drop(self.identifier)
results = pd.DataFrame(index=index_total)
if astrs is not None and ('A-STR' in self.query):
ss = StrSearcher(pd.Series(self.query['A-STR'],
name=self.identifier), astrs)
if relationship_type == 'identity':
result = ss.search_identities(**astr_kwargs_all)
elif relationship_type == 'paternity':
result = ss.search_paternities(**astr_kwargs_all)
elif relationship_type == 'kinship':
result = ss.search_kinships(**astr_kwargs_all)
result.columns = ['A-STR {}'.format(c) for c in result.columns]
results = results.join(result)
if ystrs is not None and ('Y-STR' in self.query):
ss = StrSearcher(pd.Series(self.query['Y-STR'],
name=self.identifier), ystrs)
result = ss.search_identities(**ystr_kwargs_all)
result.columns = ['Y-STR {}'.format(c) for c in result.columns]
results = results.join(result)
if mtdnas is not None and ('mtDNA' in self.query):
ss = MtdnaSearcher(pd.Series(self.query['mtDNA'],
name=self.identifier), mtdnas)
result = ss.search_identities(**mtdna_kwargs_all)
result.columns = ['mtDNA {}'.format(c) for c in result.columns]
results = results.join(result)
if astrs is not None and relationship_type == 'kinship':
results = results.loc[results['A-STR max_index'].notnull()]
if 'A-STR markers' in results.columns and \
astr_kwargs and ('permit_mismatches' in astr_kwargs) and \
(not np.isinf(astr_kwargs['permit_mismatches'])):
if partial:
results = results.loc[pd.isnull(results['A-STR markers']) |
(results['A-STR mismatches'] <= \
astr_kwargs['permit_mismatches'])]
else:
results = results.loc[(results['A-STR mismatches'] <= \
astr_kwargs['permit_mismatches'])]
if 'Y-STR markers' in results.columns and \
ystr_kwargs and not np.isinf(ystr_kwargs['permit_mismatches']):
if partial:
results = results.loc[pd.isnull(results['Y-STR markers']) |
(results['Y-STR mismatches'] <= \
ystr_kwargs['permit_mismatches'])]
else:
results = results.loc[(results['Y-STR mismatches'] <= \
ystr_kwargs['permit_mismatches'])]
if 'mtDNA markers' in results.columns and \
mtdna_kwargs and not np.isinf(mtdna_kwargs['permit_mismatches']):
if partial:
results = results.loc[pd.isnull(results['mtDNA markers']) |
(results['mtDNA mismatches'] <= \
mtdna_kwargs['permit_mismatches'])]
else:
results = results.loc[(results['mtDNA mismatches'] <= \
mtdna_kwargs['permit_mismatches'])]
sort_columns = []
if 'A-STR max_index' in results.columns and (
not results['A-STR max_index'].isnull().all()):
sort_columns.append(('A-STR max_index', False))
elif 'A-STR mismatches' in results.columns and (
sum(results['A-STR mismatches'].notnull()) > 1):
sort_columns.append(('A-STR mismatches', True))
sort_columns.append(('A-STR index', False))
if 'Y-STR mismatches' in results.columns and (
sum(results['Y-STR mismatches'].notnull()) > 1):
sort_columns.append(('Y-STR mismatches', True))
if 'mtDNA mismatches' in results.columns and (
sum(results['mtDNA mismatches'].notnull()) > 1):
sort_columns.append(('mtDNA mismatches', True))
if sort_columns and not results.empty:
if len(sort_columns) == 1 and astrs is not None:
## it looks bug, when one column, assending not working so,
sort_columns.append(('A-STR max_relative', True))
results = results.sort_index(by=[sc[0] for sc in sort_columns],
ascending=[sc[1] for sc in sort_columns])
return results
[문서] def search_kinships(self, astrs=None, ystrs=None,
astr_kwargs={}, ystr_kwargs={}):
"""Mutable dict was used in keyword argument.
Be careful for this mutation. """
results = []
#basis_ids = []
if 'A-STR' in self.query and astrs:
ss = StrSearcher(self.identifier, astrs)
result = ss.search_kinships(**astr_kwargs)
result.columns = ['A-STR {}'.format(c) for c in result.columns]
#basis_ids.extend(list(result.index))
results.append(result)
if 'Y-STR' in self.query and ystrs:
ss = StrSearcher(self.identifier, ystrs)
result = ss.search_kinships(**ystr_kwargs)
result.columns = ['Y-STR {}'.format(c) for c in result.columns]
#if basis_ids:
# result = result.loc[basis_ids]
#else:
# basis_ids.extend(list(result.index))
results.append(result)
results = pd.concat(results, axis=1)
return results
[문서]class CheckerBetween():
def __init__(self, query_a, query_b, type_, af_table=None):
self.query_a = query_a
self.query_b = query_b
self.type_ = type_
self.active_markers = self.query_a.index[self.query_a.notnull()]
self.af_table = af_table
[문서] def arrange_result(self, result, true_string):
samples = [self.query_a.name, self.query_b.name]
result_re = result.copy()
total_row = pd.Series(name='total')
for column in result_re.columns:
if column == 'status':
match_count = sum(result_re['status'] == true_string)
total_row = total_row.set_value('status',match_count)
elif (column not in samples) and (column != 'shared_allele'):
total_index = np.product(list(
result_re.loc[result_re[column].notnull(),column]))
total_row = total_row.set_value(column,total_index)
result_re[column] = result_re[[column]].astype(object)
allele_columns = [self.query_a.name, self.query_b.name]
if 'shared_allele' in result_re.columns:
allele_columns.append('shared_allele')
for query in allele_columns:
for marker in result_re[query].index:
cleaned_marker = decode_marker(marker, self.type_)
alleles = decode_alleles(result_re.loc[marker, query],
self.type_, marker, null=np.nan)
if alleles != '-':
result_re.loc[marker, query] = alleles
result_re = result_re.append(total_row)
return result_re
[문서] def check_identities(self):
result = pd.DataFrame(index=self.query_a.index)
result[self.query_a.name] = self.query_a
result[self.query_b.name] = self.query_b
result = result.loc[result[self.query_a.name].notnull() |
result[self.query_b.name].notnull()]
result['status'] = result.apply(self._check_identities, axis=1)
calculator=None
if self.af_table is not None and not self.af_table.empty:
calculator = IdentityIndexCalculator(self.af_table)
if calculator:
index_ = []
for marker in self.active_markers:
if marker not in self.query_b:
self.query_b[marker] = None
index_.append(calculator.get_cumulative_index(
pd.Index([marker]), self.query_a, self.query_b))
index_ = pd.Series(index_, index=self.active_markers)
result['index'] = index_
return result
def _check_identities(self, row):
if row[0] == row[1]:
return 'matched'
elif (type(row[0]) != list and pd.isnull(row[0])) or \
(type(row[1]) != list and pd.isnull(row[1])):
return 'partial'
else:
return 'unmatched'
[문서] def check_paternities(self, add_shared_allele=False):
result = pd.DataFrame(index=self.query_a.index)
result[self.query_a.name] = self.query_a
result[self.query_b.name] = self.query_b
result = result.loc[result[self.query_a.name].notnull() |
result[self.query_b.name].notnull()]
result['status'] = result.apply(self._check_paternities,axis=1)
calculator=None
if self.af_table is not None and not self.af_table.empty:
calculator = PaternityIndexCalculator(self.af_table)
if calculator:
index_ = []
for marker in self.active_markers:
if marker not in self.query_b:
self.query_b[marker] = None
index_.append(calculator.get_cumulative_index(
pd.Index([marker]), self.query_a, self.query_b))
index_ = pd.Series(index_, index=self.active_markers)
result['index'] = index_
if add_shared_allele:
result = self.check_paternities_add_shared_allele(result)
return result
def _check_paternities(self, row):
if (type(row[0]) != list and pd.isnull(row[0])) or \
(type(row[1]) != list and pd.isnull(row[1])):
return 'partial'
elif bool(set(row[0]) & set(row[1])):
return 'shared'
else:
return 'unshared'
[문서] def check_paternities_add_shared_allele(self, result):
added_allele = {}
index = result.index
for i in range(result.shape[0]):
if result.ix[i, 'status'] == 'shared':
added_allele[index[i]] = \
list(set(result.ix[i,0]) & set(result.ix[i,1]))
result['shared_allele'] = pd.Series(added_allele, name='shared_allele')
return result
[문서] def check_kinships(self):
result = pd.DataFrame(index=self.query_a.index)
result[self.query_a.name] = self.query_a
result[self.query_b.name] = self.query_b
active_autosomal_markers = [m for m in self.active_markers
if m not in config.SEX_MARKERS]
calculator = KinshipIndexCalculator(self.af_table)
for relationship in calculator.relationships:
index_ = []
for marker in active_autosomal_markers:
try:
aa = pd.Series([self.query_a[marker]], index=[marker])
except KeyError:
aa = pd.Series([None], index=[marker])
try:
bb = pd.Series([self.query_b[marker]], index=[marker])
except KeyError:
bb = pd.Series([None], index=[marker])
cc = calculator.get_cumulative_index(aa, bb, relationship)
index_.append(calculator.get_cumulative_index(
aa, bb, relationship))
index_ = pd.Series(index_, index=active_autosomal_markers)
result[relationship] = index_
return result
[문서]class ManageSearcher():
def __init__(self, relationship_type, identifier, target_group,\
partial=True,\
check_astr=True, astr_partial=True, astr_permit_mismatches=2,\
af_table=None, min_kinship_index=1,\
check_ystr=False, ystr_partial=True, ystr_permit_mismatches=2,\
check_mtdna=False, mtdna_partial=True, mtdna_permit_mismatches=2,
mtdna_exclude_cstretches=False, dbname=config.DEFAULT_MONGODB_NAME):
self.relationship_type = relationship_type
self.identifier = identifier
self.target_group = target_group
self.partial = partial
self.check_astr = check_astr
self.astr_partial = astr_partial
self.astr_permit_mismatches = astr_permit_mismatches
self.af_table = af_table
self.min_kinship_index = min_kinship_index
self.check_ystr = check_ystr
self.ystr_partial = ystr_partial
self.ystr_permit_mismatches = ystr_permit_mismatches
self.check_mtdna = check_mtdna
self.mtdna_partial = mtdna_partial
self.mtdna_permit_mismatches = mtdna_permit_mismatches
self.mtdna_exclude_cstretches = mtdna_exclude_cstretches
self.dbname = dbname
self.gm = db.GenotypeManager(self.dbname)
self.afm = db.AlleleFrequenciesManager(self.dbname)
[문서] def get_target_group_info(self, row, group=None):
sample = Sample.objects.get(identifier=row.name)
individual = sample.get_individual()
columns = self.TARGET_GROUP_INFO_COLUMNS[group]
result = []
for column in columns:
if column == 'sex':
result.append(individual.get_sex_display())
else:
result.append(getattr(individual, column))
return pd.Series(result, index=columns)
[문서] def search(self):
targets = {}
if self.check_astr:
targets['A-STR'] = self.gm.get_genotypes_by_group('A-STR',\
self.target_group)
if self.check_ystr:
targets['Y-STR'] = self.gm.get_genotypes_by_group('Y-STR',\
self.target_group)
if self.check_mtdna:
targets['mtDNA'] = self.gm.get_genotypes_by_group('mtDNA',\
self.target_group)
result_df = self.run_search(targets)
if result_df is not None and not result_df.empty:
"""
result_df[self.TARGET_GROUP_INFO_COLUMNS[target_group]] = (
result_df.apply(self.get_target_group_info,
group=self.target_group, axis=1)
)
"""
new_columns = []
for column in result_df.columns:
new_columns.append(column.replace(' ', '_').replace('-', '_'))
result_df.columns = new_columns
return DataContainer(status='Success', message='Search',
items=result_df)
[문서] def run_search(self, targets):
query = self.gm.get_genotype(self.identifier)
if self.check_astr and 'A-STR' not in query:
raise NoValueException('{} has no A-STR'.format(self.identifier))
if self.check_ystr and 'Y-STR' not in query:
raise NoValueException('{} has no Y-STR'.format(self.identifier))
if self.check_mtdna and 'mtDNA' not in query:
raise NoValueException('{} has no mtDNA'.format(self.identifier))
if self.af_table:
af_table = self.afm.get(self.af_table, 'A-STR')
if targets:
astrs = targets.get('A-STR')
ystrs = targets.get('Y-STR')
mtdnas = targets.get('mtDNA')
else:
astrs = self.gm.get_genotypes('A-STR')
ystrs = self.gm.get_genotypes('Y-STR')
mtdnas = self.gm.get_genotypes('mtDNA')
if self.relationship_type == 'kinship':
astr_kwargs = {
'af_table': af_table,
'min_kinship_index': self.min_kinship_index,
}
else:
astr_kwargs = {
'permit_mismatches': self.astr_permit_mismatches,
'af_table': af_table,
'partial': self.astr_partial,
}
si = SearchIntegrator(query)
result = si.search_relationship(self.relationship_type,
astrs=astrs, ystrs=ystrs, mtdnas=mtdnas,
astr_kwargs=astr_kwargs,
ystr_kwargs={
'permit_mismatches': self.ystr_permit_mismatches,
'partial': self.ystr_partial,
},
mtdna_kwargs={
'permit_mismatches': self.mtdna_permit_mismatches,
'partial': self.mtdna_partial,
'exclude_cstretches': self.mtdna_exclude_cstretches,
},
partial=self.partial,
)
return result
[문서]class ManageMultipleSearch():
def __init__(self, query_group='soldier', query_type='all',\
query_first = None, query_second=None,\
target_group='customer', target_type='all',\
target_first = None, target_second=None,
relationship_type='kinship', partial=True,\
check_astr=True, astr_partial=True, astr_permit_mismatches=2,\
af_table=None, min_kinship_index=1,\
check_ystr=False, ystr_partial=True, ystr_permit_mismatches=2,\
check_mtdna=False, mtdna_partial=True, mtdna_permit_mismatches=2,\
mtdna_exclude_cstretches=False, dbname=config.DEFAULT_MONGODB_NAME,\
description=None):
if query_type not in ['all', 'range', 'keyword', 'manual']:
raise ParameterException('{} is not proper value for query_type'.\
format(query_type))
if target_type not in ['all', 'range', 'keyword', 'manual']:
raise ParameterException('{} is not proper value for target_type'.\
format(target_type))
self.query_group = query_group
self.query_type = query_type
self.query_first = query_first
self.query_second = query_second
self.target_group = target_group
self.target_type = target_type
self.target_first = target_first
self.target_second = target_second
self.relationship_type = relationship_type
self.partial = partial
self.check_astr = check_astr
self.astr_partial = astr_partial
self.astr_permit_mismatches = astr_permit_mismatches
self.af_table = af_table
self.min_kinship_index = min_kinship_index
self.check_ystr = check_ystr
self.ystr_partial = ystr_partial
self.ystr_permit_mismatches = ystr_permit_mismatches
self.check_mtdna = check_mtdna
self.mtdna_partial = mtdna_partial
self.mtdna_permit_mismatches = mtdna_permit_mismatches
self.mtdna_exclude_cstretches = mtdna_exclude_cstretches
self.dbname = dbname
self.description = description
self.gm = db.GenotypeManager(self.dbname)
self.afm = db.AlleleFrequenciesManager(self.dbname)
self.tm = db.TaskManager(self.dbname)
self.grm = db.GroupManager(self.dbname)
[문서] def get_filtered_identifiers(self, group, query_type, queries):
ids = self.grm.ids_belong_group(group)
ids.sort()
if query_type == 'all':
pass
elif query_type == 'range':
query_first = queries[0]
query_second = queries[1]
if not query_first in ids:
raise NoValueException('There is no id {}'.format(query_first))
if not query_second in ids:
raise NoValueException('There is no id {}'.format(query_second))
ids = ids[ids.index(query_first) : ids.index(query_second)+1]
elif query_type == 'keyword':
keyword = queries[0]
ids = [id_ for id_ in ids if id_.find(keyword) != -1]
elif query_type == 'manual':
ids = queries[0].strip().split()
return ids
[문서] def search(self):
## make task & save into db
today_tasks = self.tm.get_alltasks('identifier', time.strftime("%Y%m%d"))
counts = today_tasks.count() if today_tasks else 0
task_id = '{}-{}'.format(time.strftime("%Y%m%d"), str(counts + 1))
astr_option = {}
ystr_option = {}
mtdna_option = {}
if self.check_astr:
astr_option = {
'permit_mismatches': self.astr_permit_mismatches,
'partial': self.astr_partial,
'min_kinship_index': self.min_kinship_index,
'af_table': self.af_table,
}
if self.check_ystr:
ystr_option = {
'permit_mismatches': self.ystr_permit_mismatches,
'partial': self.ystr_partial,
}
if self.check_mtdna:
mtdna_option = {
'permit_mismatches': self.mtdna_permit_mismatches,
'partial': self.mtdna_partial,
'exclude_cstretches': self.mtdna_exclude_cstretches,
}
## get query ids, target ids
queries = self.get_filtered_identifiers(self.query_group,\
self.query_type, [self.query_first, self.query_second])
targets = self.get_filtered_identifiers(self.target_group,\
self.target_type, [self.target_first, self.target_second])
mst = self.tm.create(identifier=task_id,
query_group=self.query_group,
query_type=self.query_type,
query_count=len(queries),
query_first=self.query_first,
query_second=self.query_second,
target_group=self.target_group,
target_type=self.target_type,
target_count=len(targets),
target_first=self.target_first,
target_second=self.target_second,
relationship_type=self.relationship_type,
description=self.description,
partial=self.partial,
astr_option=astr_option,
ystr_option=ystr_option,
mtdna_option=mtdna_option,
dbname=self.dbname
)
## run multiple searching
threading.Thread(target=self.search_multiple_async,\
args=(task_id, queries, astr_option,\
ystr_option, mtdna_option, targets,)).start()
return DataContainer(status='Success', message='Search',
items={'taskId':task_id})
[문서] def search_multiple_async(self, task_id, queries,astr_option=None,\
ystr_option=None, mtdna_option=None, targets=None):
self.tm.update(task_id, 'status', 'Searching')
percentage_to_record = 10
astrs = ystrs = mtdnas = af_table = None
if astr_option:
astrs = self.gm.get_genotypes_by_group('A-STR', self.target_group)
if targets is not None:
astrs = astrs.loc[targets]
af_table = self.afm.get(astr_option['af_table'], 'A-STR')
if ystr_option:
ystrs = self.gm.get_genotypes_by_group('Y-STR', self.target_group)
if targets is not None:
ystrs = ystrs.loc[targets]
if mtdna_option:
mtdnas = self.gm.get_genotypes_by_group('mtDNA', self.target_group)
if targets is not None:
mtdnas = mtdnas.loc[targets]
if self.relationship_type == 'kinship':
astr_kwargs = {
'af_table': af_table,
'min_kinship_index': astr_option['min_kinship_index'],
}
else:
if astr_option:
astr_kwargs = {
'permit_mismatches': astr_option['permit_mismatches'],
'af_table': af_table,
'partial': astr_option['partial'],
}
else:
astr_kwargs = astr_option
total_result = pd.DataFrame()
total = len(queries)
query_count_to_record = int(total * percentage_to_record / 100)
for i, identifier in enumerate(queries):
print('search...', identifier)
query = self.gm.get_genotype(identifier)
types = list(query.keys())
for type_ in types:
if type_ in ['A-STR', 'Y-STR', 'mtDNA']:
is_all_None = [genotype is None for genotype in\
list(query[type_].values())]
if all(is_all_None):
del(query[type_])
si = SearchIntegrator(query)
result = si.search_relationship(self.relationship_type, partial=self.partial,
astrs=astrs, ystrs=ystrs, mtdnas=mtdnas,
astr_kwargs=astr_kwargs,
ystr_kwargs=ystr_option,
mtdna_kwargs=mtdna_option,
#is_multiprocess=False,
)
result['Identifier A'] = identifier
result['Identifier B'] = result.index
total_result = result.append(total_result, ignore_index=True)
#total_result = total_result.append(result, ignore_index=True)
if ((i+1) % query_count_to_record) == 0:
self.tm.update(task_id, 'progress', ((i + 1) * 100 / total))
self.tm.update(task_id, 'progress', 100)
prev_columns = total_result.columns.tolist()
if self.relationship_type == 'kinship':
columns = ['Identifier A', 'Identifier B', 'A-STR max_index',\
'A-STR max_relative', 'A-STR parent-child',\
'A-STR full-sib', 'A-STR half-sib', 'A-STR first cousin',\
'A-STR 5 chon', 'A-STR 6 chon', 'A-STR 7 chon',\
'A-STR 8 chon', 'A-STR unrelated']
else:
columns = ['Identifier A', 'Identifier B']
for column in columns:
if column in prev_columns:
prev_columns.remove(column)
columns = columns + prev_columns
"""
if self.target_group == 'customer':
total_result = total_result.join(pd.DataFrame({'relationship':\
{customer_id: Customer.objects.filter(sample__identifier=\
customer_id)[0].relationship for customer_id\
in total_result['Identifier B']}}), on='Identifier B')
prev_columns = columns
columns = ['Identifier A', 'Identifier B', 'relationship']
for column in columns:
if column in prev_columns:
prev_columns.remove(column)
columns += prev_columns
"""
try:
total_result = total_result[columns]
except KeyError:
for column in columns:
if column not in total_result.columns:
total_result[column] = np.nan
total_result = total_result[columns]
if 'A-STR max_index' in total_result.columns and (
not total_result['A-STR max_index'].isnull().all()):
total_result = total_result.sort_index(by=['A-STR max_index'],\
ascending=False)
total_result.index = list(range(total_result.shape[0]))
"""
excel = df_to_excel(total_result, index=True)
msr.result = SimpleUploadedFile('{}.xlsx'.format(msr.identifier),
excel.read(),
content_type='application/vnd.openxmlformats-officedocument'
'.spreadsheetml.sheet')
"""
total_result
self.tm.update(task_id, 'result', total_result.T.to_json())
self.tm.update(task_id, 'status', 'Finished')
[문서]class ManageCheckBetween():
def __init__(self, relationship_type, identifier_a, identifier_b,\
partial=True,\
check_astr=True, astr_partial=True, astr_permit_mismatches=2,\
af_table=None, min_kinship_index=1,\
check_ystr=False, ystr_partial=True, ystr_permit_mismatches=2,\
check_mtdna=False, mtdna_partial=True, mtdna_permit_mismatches=2,
mtdna_exclude_cstretches=False, dbname=config.DEFAULT_MONGODB_NAME):
self.relationship_type = relationship_type
self.identifier_a = identifier_a
self.identifier_b = identifier_b
self.partial = partial
self.check_astr = check_astr
self.astr_partial = astr_partial
self.astr_permit_mismatches = astr_permit_mismatches
self.af_table = af_table
self.min_kinship_index = min_kinship_index
self.check_ystr = check_ystr
self.ystr_partial = ystr_partial
self.ystr_permit_mismatches = ystr_permit_mismatches
self.check_mtdna = check_mtdna
self.mtdna_partial = mtdna_partial
self.mtdna_permit_mismatches = mtdna_permit_mismatches
self.mtdna_exclude_cstretches = mtdna_exclude_cstretches
self.dbname = dbname
self.gm = db.GenotypeManager(self.dbname)
self.afm = db.AlleleFrequenciesManager(self.dbname)
[문서] def total_row(self, result, is_kinship=False, is_paternity=False):
last_row = []
for column in result.columns:
if column in [self.identifier_a, self.identifier_b]:
last_row.append(result[column].count())
elif column == 'shared_allele':
last_row.append(np.nan)
elif column == 'status':
if is_paternity:
last_row.append(sum((result[column]=='shared') |\
(result[column]=='partial')))
else:
last_row.append(sum((result[column]=='matched') |\
(result[column]=='partial')))
else:
last_row.append(np.prod(result[column]))
last_row = pd.Series(last_row, index=result.columns)
if is_kinship:
ci_row = last_row / last_row['unrelated']
probability_row = ci_row * 100 / (ci_row + 1)
df = pd.DataFrame({
'total': last_row,
'KI': ci_row,
'probability': probability_row,
}, columns=['total', 'KI', 'probability']).T
df.loc['KI':'probability', self.identifier_a:self.identifier_b] =\
np.nan
return df
else:
return pd.DataFrame({'total': last_row}).T
[문서] def check(self):
sample_a = self.gm.get_genotype(self.identifier_a)
sample_b = self.gm.get_genotype(self.identifier_b)
result = {}
if self.check_astr and ('A-STR' in sample_a) and ('A-STR' in sample_b):
astr_result = self.run_check_between(self.relationship_type,\
'A-STR', shared_allele=True, partial=self.astr_partial)
astr_result = astr_result.loc[
astr_result[self.identifier_a].notnull()
| astr_result[self.identifier_b].notnull()]
is_kinship = True if self.relationship_type == 'kinship' else False
is_paternity = True if self.relationship_type == 'paternity'\
else False
astr_result = pd.concat([astr_result, self.total_row(astr_result,\
is_kinship, is_paternity)])
astr_result.index = [decode_marker(marker, 'A-STR') for\
marker in astr_result.index]
result['astr_result'] = astr_result
if self.relationship_type == 'kinship':
astr_result_summary = astr_result.ix[
['total','KI','probability'],
['parent-child', 'full-sib', 'half-sib',\
'first cousin', '5 chon', '6 chon',\
'unrelated',]]
astr_result_summary =\
astr_result_summary.T.sort(['KI'], ascending=False).T
columns_temp = astr_result_summary.columns
columns_temp_final = []
for column in columns_temp:
if column == 'parent-child':
column = 'Parent child'
elif column == 'half-sib':
column = 'Half sibling'
elif column == 'full-sib':
column = 'Full sibling'
elif column == 'first cousin':
column = 'First cousin'
elif column == 'unrelated':
column = 'Unrelated'
columns_temp_final.append(column)
astr_result_summary.columns = columns_temp_final
result['astr_result_summary'] = astr_result_summary
ordered_relationship = list(\
result['astr_result'].ix[:,2:].T.sort('probability',\
ascending=False).index)
ordered_relationship.remove('7 chon')
ordered_relationship.remove('8 chon')
result['ordered_relationship'] = ordered_relationship
result['ordered_astr_result'] = result['astr_result'][list(\
result['astr_result'].columns[:2])+ordered_relationship]
if self.check_ystr and ('Y-STR' in sample_a) and ('Y-STR' in sample_b):
ystr_result = self.run_check_between('identity','Y-STR',\
False, self.ystr_partial)
ystr_result = ystr_result.loc[
ystr_result[self.identifier_a].notnull()
| ystr_result[self.identifier_b].notnull()]
ystr_result = pd.concat([ystr_result,
self.total_row(ystr_result)])
ystr_result.index = [decode_marker(marker, 'Y-STR') for\
marker in ystr_result.index]
result['ystr_result'] = ystr_result
if self.check_mtdna and ('mtDNA' in sample_a) and ('mtDNA' in sample_b):
mtdna_result = self.run_check_between('identity', 'mtDNA',\
False, self.mtdna_partial)
mtdna_result = mtdna_result.loc[
mtdna_result[self.identifier_a].notnull()
| mtdna_result[self.identifier_b].notnull()]
mtdna_index = []
for index in mtdna_result.index:
decoded_index = decode_marker(index, 'mtDNA')
mtdna_index.append((float(decoded_index),index))
mtdna_index.sort()
mtdna_index = [index[1] for index in mtdna_index]
mtdna_result = mtdna_result.loc[mtdna_index]
mtdna_result = pd.concat([mtdna_result,
self.total_row(mtdna_result)])
mtdna_result.index = [decode_marker(marker, 'mtDNA') for\
marker in mtdna_result.index]
result['mtdna_result'] = mtdna_result
del(sample_a['_id'])
del(sample_b['_id'])
result['sample_a'] = sample_a
result['sample_b'] = sample_b
result['report'] = self.make_report(result)
return DataContainer(status='Success',
message='Success checking between search',
items=result)
[문서] def get_astr_report(self, df):
result = {}
result['astr_option'] = {
'permit_mismatches': self.astr_permit_mismatches,
'partial': self.astr_partial,
'min_kinship_index': self.min_kinship_index,
'af_table': self.af_table,
}
markers = int(df.loc['total', self.identifier_a])
if self.relationship_type == 'kinship':
matches = None
ci_series = df.loc['KI', '5 chon':'unrelated']
max_column_index = ci_series.argmax()
index = ci_series.max()
try:
relation = ci_series.index[max_column_index]
except ValueError:
relation = max_column_index
decision = index >= self.min_kinship_index
result['astr_report'] = {
'markers': markers,
'relation': ci_series[ci_series == index].index[0],
'index': index,
'decision': decision,
}
else:
matches = int(df.loc['total', 'status'])
index = df.loc['total', 'index']
if result['astr_option']['partial']:
if self.relationship_type == 'identity':
decision = df.loc[df['status']=='unmatched','status'].\
count() <= self.astr_permit_mismatches
else:
decision = df.loc[df['status']=='unshared','status'].\
count() <= self.astr_permit_mismatches
else:
decision = markers - matches <= self.astr_permit_mismatches
result['astr_report'] = {
'markers': markers,
'matches': matches,
'decision': decision,
}
return result
[문서] def get_ystr_report(self, df):
result = {}
result['ystr_option'] = {
'permit_mismatches': self.ystr_permit_mismatches,
'partial': self.ystr_partial,
}
markers = int(df.loc['total', self.identifier_a])
matches = int(df.loc['total', 'status'])
if result['ystr_option']['partial']:
decision = df.loc[df['status']=='unmatched','status'].count() <=\
self.ystr_permit_mismatches
else:
decision = markers - matches <= self.ystr_permit_mismatches
result['ystr_report'] = {
'markers': markers,
'matches': matches,
'decision': decision,
}
return result
[문서] def get_mtdna_report(self, df):
result = {}
result['mtdna_option'] = {
'permit_mismatches': self.mtdna_permit_mismatches,
'partial': self.mtdna_partial,
'exclude_cstretchs': self.mtdna_exclude_cstretches,
}
markers = int(df.loc['total', self.identifier_a])
matches = int(df.loc['total', 'status'])
if result['mtdna_option']['partial']:
decision = df.loc[df['status']=='unmatched','status'].count() <=\
self.mtdna_permit_mismatches
else:
decision = markers - matches <= self.mtdna_permit_mismatches
result['mtdna_report'] = {
'markers': markers,
'matches': matches,
'decision': decision,
}
return result
[문서] def make_report(self, data):
result = {
'total_option': {
'type': self.relationship_type,
'partial': self.partial,
},
}
if 'astr_result' in data:
result.update(self.get_astr_report(data['astr_result']))
if 'ystr_result' in data:
result.update(self.get_ystr_report(data['ystr_result']))
if 'mtdna_result' in data:
result.update(self.get_mtdna_report(data['mtdna_result']))
decisions = []
for report in ('astr_report', 'ystr_report', 'mtdna_report'):
if report in result:
decisions.append(result[report]['decision'])
if self.partial and True in decisions:
result['total_decision'] = True
else:
result['total_decision'] = all(decisions)
return result
[문서] def run_check_between(self, relationship_type, type_, shared_allele=False,\
partial=False, format=None):
s_identifier_a = pd.Series(\
self.gm.get_genotype(self.identifier_a)[type_])
s_identifier_b = pd.Series(\
self.gm.get_genotype(self.identifier_b)[type_])
s_identifier_a.name = self.identifier_a
s_identifier_b.name = self.identifier_b
query_a = s_identifier_a
query_b = s_identifier_b
##
try:
af_table = self.afm.get(self.af_table, type_) if self.af_table\
else None
except:
af_table = None
cb = CheckerBetween(query_a, query_b, type_, af_table)
if relationship_type == 'identity':
result = cb.check_identities()
result = cb.arrange_result(result, 'matched')
elif relationship_type == 'paternity':
result = cb.check_paternities(add_shared_allele=shared_allele)
result = cb.arrange_result(result, 'shared')
elif relationship_type == 'kinship':
result = cb.check_kinships()
result = cb.arrange_result(result, 'shared')
return result
"""
def run_search_multiple(self, relationship_type, group_a, group_b,
name, partial=False,
permit_mismatches_astr=float('inf'), partial_astr=False,
permit_mismatches_ystr=float('inf'), partial_ystr=False,
permit_mismatches_mtdna=float('inf'), partial_mtdna=False,
exclude_cstretch_mtdna=False):
group_a = group_a.split(',')
group_b = group_b.split(',')
af_astrs = self.afm.get(name,'A-STR')
af_ystrs = None
identifiers_a = pd.Index([])
try:
astrs_a = self.gm.get_genotypes_by_group('A-STR', group_a)
identifiers_a = identifiers_a + astrs_a.index
except:
astrs_a = None
try:
ystrs_a = self.gm.get_genotypes_by_group('Y-STR', group_a)
identifiers_a = identifiers_a + ystrs_a.index
except:
ystrs_a = None
try:
mtdnas_a = self.gm.get_genotypes_by_group('mtDNA', group_a)
identifiers_a = identifiers_a + mtdnas_a.index
except:
mtdnas_a = None
identifiers_b = pd.Index([])
try:
astrs_b = self.gm.get_genotypes_by_group('A-STR', group_b)
identifiers_b = identifiers_b + astrs_b.index
except:
astrs_b = None
try:
ystrs_b = self.gm.get_genotypes_by_group('Y-STR', group_b)
identifiers_b = identifiers_b + ystrs_b.index
except:
ystrs_b = None
try:
mtdnas_b = self.gm.get_genotypes_by_group('mtDNA', group_b)
identifiers_b = identifiers_b + mtdnas_b.index
except:
mtdnas_b = None
total_result = pd.DataFrame()
for identifier in identifiers_a:
query = self.gm.get_genotype(identifier)
if relationship_type == 'kinship':
astr_kwargs={'af_table': af_astrs}
else:
astr_kwargs={'permit_mismatches': permit_mismatches_astr,
'af_table': af_astrs, 'partial': partial_astr}
si = SearchIntegrator(query)
result = si.search_relationship(relationship_type,
astrs=astrs_b, ystrs=ystrs_b, mtdnas=mtdnas_b,
astr_kwargs=astr_kwargs,
ystr_kwargs={'permit_mismatches':\
permit_mismatches_ystr,
'af_table': af_ystrs,
'partial': partial_ystr},
mtdna_kwargs={'permit_mismatches':
permit_mismatches_mtdna,
'partial': partial_mtdna,
'exclude_cstretches': exclude_cstretch_mtdna,},
partial=partial
)
index_length = len(result.index)
index = pd.MultiIndex(levels=[[identifier], result.index],
labels=[[0]*index_length, range(index_length)],
names=['group_A','group_B'])
result.index = index
total_result = total_result.append(result)
return DataContainer(status='Success', message='Search',
items=total_result)
"""
[문서]def run_check_hml(customer_identifier, goldstar_identifier):
gm = db.GenotypeManager()
afm = db.AlleleFrequenciesManager()
#customer_identifier = 'FRS08-2161'
#goldstar_identifier = 'KWHR08-0219'
#template_file = '../data/%s'%config.HML_TEMPLATE
##astr
try:
s_query_a = pd.Series(gm.get_genotype(query_a)['A-STR'])
s_query_a.name = query_a
query_a = s_query_a
except (TypeError, KeyError):
query_a = None
try:
s_query_b = pd.Series(gm.get_genotype(query_b)['A-STR'])
s_query_b.name = query_b
query_b = s_query_b
except (TypeError, KeyError):
query_b = None
af_table = afm.get('Korean', 'A-STR')
if query_a is not None and query_b is not None:
cb = CheckerBetween(query_a, query_b, 'A-STR', af_table)
astr_result = cb.check_paternities(add_shared_allele=True)
astr_result = cb.arrange_result(astr_result, 'shared')
astr_result = astr_result.drop('total')
astr_rows, astr_row_count = \
create_astr_rows(astr_result, goldstar_identifier,
customer_identifier, start_row_num=1)
else:
astr_rows, astr_row_count = '', 1
##ystr
try:
s_query_a = pd.Series(gm.get_genotype(query_a)['Y-STR'])
s_query_a.name = query_a
query_a = s_query_a
except (TypeError, KeyError):
query_a = None
try:
s_query_b = pd.Series(gm.get_genotype(query_b)['Y-STR'])
s_query_b.name = query_b
query_b = s_query_b
except (TypeError, KeyError):
query_b = None
af_table = None
if query_a is not None and query_b is not None:
cb = CheckerBetween(query_a, query_b, 'Y-STR', af_table)
ystr_result = cb.check_identities()
ystr_result = cb.arrange_result(ystr_result,'matched')
ystr_result = ystr_result.drop('total')
ystr_rows, ystr_row_count = \
create_ystr_rows(ystr_result, goldstar_identifier,
customer_identifier, start_row_num=1)
else:
ystr_rows, ystr_row_count = '', 1
##mtdna
try:
s_query_a = pd.Series(gm.get_genotype(query_a)['mtDNA'])
s_query_a.name = query_a
query_a = s_query_a
except (TypeError, KeyError):
query_a = None
try:
s_query_b = pd.Series(gm.get_genotype(query_b)['mtDNA'])
s_query_b.name = query_b
query_b = s_query_b
except (TypeError, KeyError):
query_b = None
af_table = None
if query_a is not None and query_b is not None:
cb = CheckerBetween(query_a, query_b, 'mtDNA', af_table)
mtdna_result = cb.check_identities()
mtdna_result = cb.arrange_result(mtdna_result,'matched')
mtdna_result = mtdna_result.drop('total')
mtdna_rows, mtdna_row_count = \
create_mtdna_rows(mtdna_result, goldstar_identifier,
customer_identifier, start_row_num=1)
else:
mtdna_rows, mtdna_row_count = '', 1
##kinship
try:
s_query_a = pd.Series(gm.get_genotype(query_a)['A-STR'])
s_query_a.name = query_a
query_a = s_query_a
except (TypeError, KeyError):
query_a = None
try:
s_query_b = pd.Series(gm.get_genotype(query_b)['A-STR'])
s_query_b.name = query_b
query_b = s_query_b
except (TypeError, KeyError):
query_b = None
af_table = afm.get('Korean', 'A-STR')
cb = CheckerBetween(query_a, query_b, 'A-STR', af_table)
if query_a is not None and query_b is not None:
kinship_result = cb.check_kinships()
kinship_result = cb.arrange_result(kinship_result, 'shared')
ci = kinship_result.loc['total']
ci = ci.drop(customer_identifier)
ci = ci.drop(goldstar_identifier)
ci = ci.astype(float)
ci = ci/ci['unrelated']
ci.sort(ascending=False)
kinship_df = pd.DataFrame(index=ci.index)
kinship_df['ratio'] = ci
kinship_df['probability'] = ci/(ci+1)
kinship_rows, kinship_row_count = \
create_astr_kinship_rows(kinship_df, goldstar_identifier,
customer_identifier, start_row_num=2)
else:
kinship_rows, kinship_row_count = '', 2
##
key_value = {
'customer_goldstar_name' : 'aaa',
'goldstar_identifier' : goldstar_identifier,
'customer_relationship' : 'bbb',
'customer_name' : 'cc',
'customer_identifier' : customer_identifier,
'kinship_row_count' : kinship_row_count,
'kinship_rows' : kinship_rows,
'astr_row_count' : astr_row_count,
'astr_rows' : astr_rows,
'ystr_row_count' : ystr_row_count,
'ystr_rows' : ystr_rows,
'mtdna_row_count' : mtdna_row_count,
'mtdna_rows' : mtdna_rows,
}
return key_value