Source code for kinmatch.search

# -*- coding:utf-8 -*-
from __future__ import unicode_literals
import sys
import time
import threading
import pandas as pd
import numpy as np

from kinmatch import config
from kinmatch.utils import (apply_by_multiprocessing, decode_marker, 
        decode_alleles)
from kinmatch import db
from kinmatch.utils import DataContainer
from kinmatch import NoValueException, ParameterException

## dataframe apply functions
[문서]def kinship_index(row, query=None, calculator=None, active_markers=None):
    return pd.Series([calculator.get_cumulative_index(query, row, 
            relationship) for relationship in calculator.relationships],
                    index=calculator.relationships)


[문서]def paternities_match_sum(row, query=None, active_markers=None):
    new_active_markers = row.index[row.notnull()] & active_markers 
    nan_count = len(active_markers) - len(new_active_markers)
    return pd.Series([len(active_markers), 
            sum([(not bool(set(t[0]) & set(t[1]))) 
                for t in map(lambda a,b: (a,b), query[new_active_markers], 
                    row[new_active_markers])]) + nan_count], 
                index = ['markers','mismatches'])
    """
    return sum([(not bool(set(t[0]) & set(t[1]))) 
        for t in map(lambda a,b: (a,b), 
        query[new_active_markers], row[new_active_markers])]) + nan_count
    """
    

[문서]def paternities_match_sum_partial(row, query=None, active_markers=None):
    new_active_markers = row.index[row.notnull()] & active_markers 
    return pd.Series([len(new_active_markers),
        sum([(not bool(set(t[0]) & set(t[1])))
            for t in map(lambda a,b: (a,b), query[new_active_markers], 
                row[new_active_markers])])],
        index = ['markers','mismatches'])


[문서]def identities_match_sum(row, query=None, active_markers=None):
    return pd.Series([len(active_markers),
        (row[active_markers] != query[active_markers]).sum()], 
        index= ['markers','mismatches'])


[문서]def identities_match_sum_partial(row, query=None, active_markers=None):
    new_active_markers = row.index[row.notnull()] & active_markers 
    return pd.Series([len(new_active_markers),
        (row[new_active_markers] != query[new_active_markers]).sum()], 
        index=['markers','mismatches'])


[문서]class IndexCalculator:
    formula = {}
    
    def __init__(self, af_table):
        self.af_table = af_table

[문서]    def get_af(self, marker, allele):
        allele_frequency = 0.001
        try:
            if marker not in self.af_table.columns:
                allele_frequency = np.nan
            else:
                allele_frequency = self.af_table[marker][allele]
                if np.isnan(allele_frequency):
                    allele_frequency = 0.001
        except KeyError:
            pass
        return allele_frequency

[문서]    def get_cumulative_index(self, markers, query, target):
        raise NotImplementedError

[문서]    def get_index(self, marker, a1, a2, b1, b2):
        raise NotImplementedError

[문서]    def find_formula(self, query_alleles, target_alleles):
        raise NotImplementedError


[문서]class IdentityIndexCalculator(IndexCalculator):
[문서]    def get_cumulative_index(self, markers, query, target):
        index = 1
        for marker in markers:
            if marker not in config.SEX_MARKERS and \
                    query[marker] == target[marker]:
                a, b = query[marker]
                pa = self.get_af(marker, a)
                pb = self.get_af(marker, b)
                if a == b:
                    p = pa * pb
                elif a != b:
                    p = 2 * pa * pb
                index *= p
        return 1 / index


[문서]class PaternityIndexCalculator(IndexCalculator):
    formula = {
        1: (lambda pa, pb: 0.25 / pa),
        2: (lambda pa, pb: (pa + pb) / (4 * pa * pb)),
        3: (lambda pa, pb: 0.5 / pa),
        4: (lambda pa, pb: 0.5 / pa),
        5: (lambda pa, pb: 1 / pa),
    }

[문서]    def get_cumulative_index(self, markers, query, target):
        cpi = 1
        for marker in markers:
            if marker not in config.SEX_MARKERS and type(query[marker]) == list\
                and type(target[marker]) == list and \
                    set(query[marker]) & set(target[marker]):
                a1, a2 = query[marker]
                b1, b2 = target[marker]
                cpi *= self.get_index(marker, a1, a2, b1, b2)
        return cpi

[문서]    def get_index(self, marker, a1, a2, b1, b2):
        try:
            f, A, B = self.find_formula([a1, a2], [b1, b2])
        except AssertionError:
            return 1
        pa = self.get_af(marker, A)
        pb = self.get_af(marker, B)
        return self.formula[f](pa, pb)

[문서]    def find_formula(self, query_alleles, target_alleles):
        # 10, 15, 10, 10k
        a1, a2 = query_alleles
        b1, b2 = target_alleles

        if a1 != b1:
            if a1 == b2:
                b1, b2 = b2, b1
            if a2 == b1:
                a1, a2 = a2, a1
            if a2 == b2:
                a1, a2 = a2, a1
                b1, b2 = b2, b1
        if a1 == b1 and a2 != b2 and a1 != a2 and b1 != b2:
            f = 1; A = a1; B = a2
        elif a1 == b1 and a2 == b2 and a1 != a2:
            f = 2; A = a1; B = a2
        elif a1 == b1 and a2 != b2 and b1 == b2:
            f = 3; A = a1; B = a2
        elif a1 == b1 and a2 != b2 and a1 == a2:
            f = 4; A = a1; B = a2
        elif a1 == b1 and a2 == b2 and a1 == a2:
            f = 5; A = a1; B = a2
        else:
            raise AssertionError(
                    'No formualr for this allele combination %s %s %s %s' % (
                        a1, a2, b1, b2))
        return f, A, B


[문서]class KinshipIndexCalculator(IndexCalculator):
    coefficients = {
        'parent-child': (0, 1, 0),
        'full-sib': (1./4, 1./2, 1./4),
        'half-sib': (1./2, 1./2, 0),
        'first cousin': (3./4, 1./4, 0),
        'unrelated': (1, 0, 0),
        '5 chon': (7./8, 1./8, 0),
        '6 chon': (15./16, 1./16, 0),
        '7 chon': (31./32, 1./32, 0),
        '8 chon': (63./64, 1./64, 0),
    }
    relationships = list(sorted(coefficients.keys()))
    relationships_explanation = {
        'parent-child': '부모-자식관계',
        'full-sib': '형제관계 (부모가 모두 같을 경우)',
        'half-sib': '이복/이부 형제, 삼촌-조카, 조부-손자',
        'first cousin': '4촌 형제관계',
        'unrelated': '무관한 관계',
        '5 chon': '5촌',
        '6 chon': '6촌 형제',
        '7 chon': '7촌',
        '8 chon': '8촌 형제',
    }
    formulas = {
        1: (lambda phi, pa, pb, pc, pd: 
                phi[2] + 0.5*phi[1]*(pa + pb) + 2*phi[0]*pa*pb ),
        2: (lambda phi, pa, pb, pc, pd: phi[2] + phi[1]*pa + phi[0]*pa*pa ),
        3: (lambda phi, pa, pb, pc, pd: phi[1]*pb + 2*phi[0]*pa*pb ),
        4: (lambda phi, pa, pb, pc, pd: 0.5*phi[1]*pa + phi[0]*pa*pa ),
        5: (lambda phi, pa, pb, pc, pd: 0.5*phi[1]*pc + 2*phi[0]*pa*pc ),
        6: (lambda phi, pa, pb, pc, pd: 2*phi[0]*pc*pd ),
        7: (lambda phi, pa, pb, pc, pd: phi[0]*pb*pb ),
        8: (lambda phi, pa, pb, pc, pd: 2*phi[0]*pb*pc ),
    }

[문서]    def get_cumulative_index(self, query, target, relationship):
        cpi = 1
        """   ## comprehension is slower
        markers = target.index[target.notnull()]
        return np.product([self.get_index(marker, relationship, query[marker], 
            target[marker])  for marker in markers ])
        """
        markers = target.index
        for i, marker in enumerate(markers):
            target_alleles = target[marker]
            if type(target_alleles) != list:
                continue
            pi = self.get_index(marker, relationship, query[marker], 
                    target_alleles)
            cpi *= pi
        return cpi

[문서]    def get_index(self, marker, relationship, query_alleles, target_alleles):
        f, A, B, C, D = self.find_formula(query_alleles, target_alleles)
        if f is None:
            return 1
        pa = self.get_af(marker, A)
        pb = self.get_af(marker, B)
        pc = self.get_af(marker, C)
        pd = self.get_af(marker, D)
        phi = self.coefficients[relationship]
        #print(marker, f, A, B, C, D, pa, pb, pc, pd, query_alleles,\
        #        target_alleles, pa, pb, self.formulas[f](phi, pa, pb, pc, pd))
        return self.formulas[f](phi, pa, pb, pc, pd)

[문서]    def find_formula(self, query_alleles, target_alleles):
        if type(target_alleles) != list:
            return [None] * 5
        a1, a2 = query_alleles
        b1, b2 = target_alleles
        if a1 == b1 and a2 == b2 and a1 != a2:
            f = 1; A = a1; B = a2; C = None; D = None
        elif a1 == b2 and a2 == b1 and a1 != a2:
            f = 1; A = a1; B = a2; C = None; D = None
        elif a1 == a2 == b1 == b2:
            f = 2; A = a1; B = None; C = None; D = None
        elif a1 == a2 and a1 == b1 and b1 != b2:
            f = 3; A = a1; B = b2; C = None; D = None
        elif a1 == a2 and a1 == b2 and b1 != b2:
            f = 3; A = a1; B = b1; C = None; D = None
        #f4
        elif a1 != a2 and a1 == b1 and b1 == b2:
            f = 4; A = a1; B = a2; C = None; D = None
        elif a1 != a2 and a2 == b1 and b1 == b2:
            f = 4; A = a2; B = a1; C = None; D = None
        #f5
        elif a1 != a2 and a1 == b1 and a2 != b2:
            f = 5; A = a1; B = a2; C = b2; D = None
        elif a1 != a2 and a2 == b1 and b1 != b2:
            f = 5; A = a2; B = a1; C = b2; D = None
        elif a1 != a2 and a2 == b2 and b1 != b2:
            f = 5; A = a2; B = a1; C = b1; D = None
        elif a1 != a2 and a1 == b2 and b1 != b2:
            f = 5; A = a1; B = a2; C = b1; D = None
        #f6,7,8
        elif a1 != a2 and a1 != b1 and b1 != b2:
            f = 6; A = a1; B = a2; C = b1; D = b2
        elif a1 == a2 and a1 != b1 and b1 == b2:
            f = 7; A = a1; B = b1; C = None; D = None
        elif a1 == a2 and a1 != b1 and b1 != b2:
            f = 8; A = a1; B = b1; C = b2; D = None
        elif a1 != a2 and a1 != b1 and b1 == b2:
            f = 8; A = a1; B = a2; C = b1; D = None
        else:
            return [None] * 5
        return f, A, B, C, D


[문서]class BaseSearcher(object):
    def __init__(self, query=None, df_genotypes=None):
        self.set_params(query, df_genotypes)

[문서]    def set_params(self, query, df_genotypes):
        self.genotypes = df_genotypes
        self.query = query
        if isinstance(query, pd.Series) or isinstance(query, pd.DataFrame):
            self.identifier = query.name
        else:
            self.identifier = None

[문서]    def search_identities(self):
        raise NotImplementedError


[문서]class MtdnaSearcher(BaseSearcher):
[문서]    def exclude_markers_in_cstretch(self, columns):
        filtered_columns = []
        for column in columns:
            position = float(decode_marker(column, 'mtDNA'))
            if (16279 <= position <= 16282) or (16384 <= position <=16393):
                continue
            filtered_columns.append(column)
        return pd.Index(filtered_columns)

[문서]    def search_identities_by_query_index(self, permit_mismatches=2,
            exclude_cstretches=False):
        if exclude_cstretches:
            columns = self.exclude_markers_in_cstretch(self.query.index)
        else:
            columns = self.query.index
        common_columns = self.genotypes.columns & columns
        targets = self.genotypes[common_columns]
        for col in (columns - common_columns):
            targets[col] = np.nan
        targets = targets[columns]
        try:
            targets = self.genotypes[columns].drop(self.identifier)
        except:
            pass
        query_t = self.query[columns]
        result_compare = targets.apply(self._compare_by_query_index,
                query=query_t, axis=1)
        result = pd.DataFrame(result_compare, columns=['mismatches'])
        result['markers'] = (query_t.notnull()).sum()
        return result.loc[result['mismatches'] <= permit_mismatches,
                ['markers','mismatches']]

    def _compare_by_query_index(self, row, query):
        columns = query.notnull()
        return (row[columns] != query[columns]).sum()
    
    """
    def search_identities_union(self, permit_mismatches=2):
        targets = self.genotypes.drop(self.identifier)
        result_compare = targets.apply(self._compare_union,
                query=self.query, axis=1)
        return result_compare[result_compare <= permit_mismatches]

    def _compare_union(self, row, query):
        return ((row != query) & (row.notnull() | query.notnull())).sum()
    """

[문서]    def search_identities_intersection(self, permit_mismatches=2,
            exclude_cstretches=False):
        if exclude_cstretches:
            columns = self.exclude_markers_in_cstretch(self.query.index)
        else:
            columns = self.query.index
        columns = columns & self.genotypes.columns
        try:
            targets = self.genotypes[columns].drop(self.identifier)
        except:
            targets = self.genotypes[columns]
        query_t = self.query[columns]
        result_compare = targets.apply(self._compare_intersection, 
                query=query_t, axis=1)
        #result = pd.DataFrame(result_compare, columns=['mismatches'])
        result = result_compare
        #result['markers'] = targets.apply(self._compare_intersection_columns,
        #        query=query_t, axis=1)
        return result.loc[result['mismatches'] <= permit_mismatches, 
                ['markers','mismatches']]

    def _compare_intersection(self, row, query):
        columns = (row.notnull() & query.notnull())
        return pd.Series([(row.notnull() & query.notnull()).sum(),
            (row[columns] != query[columns]).sum()], 
            index=['markers','mismatches'])
        #return (row[columns] != query[columns]).sum()

    def _compare_intersection_columns(self, row, query):
        return (row.notnull() & query.notnull()).sum()

[문서]    def search_identities(self, permit_mismatches=2, exclude_cstretches=False, 
            partial=False, is_multiprocess=config.PYTHON_MULTIPROCESSING):
        if partial:
            result = self.search_identities_intersection(permit_mismatches, 
                    exclude_cstretches)
        else:
            result = self.search_identities_by_query_index(permit_mismatches,
                    exclude_cstretches)
        return result


[문서]class StrSearcher(BaseSearcher):
    def __init__(self, query, df_genotypes):
        super(StrSearcher, self).__init__(query, df_genotypes)
        self.active_markers = []
        for index in self.query.index:
            if ((type(self.query.loc[index]) == list) and
                    not pd.isnull(self.query.loc[index]).all()):
                if '.' in index:
                    index = index.replace('.', '__')
                self.active_markers.append(index)
        self.active_markers = pd.core.index.Index(self.active_markers)
        self.active_genotypes = self.genotypes[self.active_markers]

[문서]    def search_identities(self, permit_mismatches=2, af_table=None, 
            partial=False, is_multiprocess=config.PYTHON_MULTIPROCESSING):
        calculator = None
        if af_table is not None and not af_table.empty:
            calculator = IdentityIndexCalculator(af_table)
        if partial:
            match_sum_func = identities_match_sum_partial
        else:
            match_sum_func = identities_match_sum
        if len(self.active_markers) == 0:
            return pd.DataFrame(columns=['markers','mismatches']) 
        else:
            return self._search_by_match_sum(match_sum_func, 
                    calculator, permit_mismatches=permit_mismatches,
                    is_multiprocess=is_multiprocess)

[문서]    def search_paternities(self, permit_mismatches=2, af_table=None,
            partial=False, is_multiprocess=config.PYTHON_MULTIPROCESSING):
        calculator = None
        if af_table is not None and not af_table.empty:
            calculator = PaternityIndexCalculator(af_table)
        if partial:
            match_sum_func = paternities_match_sum_partial
        else:
            match_sum_func = paternities_match_sum

        return self._search_by_match_sum(match_sum_func, 
                calculator, permit_mismatches=permit_mismatches,
                is_multiprocess=is_multiprocess)

    def _search_by_match_sum(self, match_sum_func, calculator, 
            permit_mismatches=2, is_multiprocess=config.PYTHON_MULTIPROCESSING):
        try:
            targets = self.active_genotypes.drop(self.identifier)
        except:
            targets = self.active_genotypes
        if targets.empty:
            return

        if is_multiprocess:
            result = apply_by_multiprocessing(targets, match_sum_func, 
                    query=self.query[self.active_markers], 
                    active_markers=self.active_markers, axis=1)
        else:
            result = targets.apply(match_sum_func, 
                    query=self.query[self.active_markers], 
                    active_markers=self.active_markers, axis=1) 

        result = result.loc[result['mismatches'] <= permit_mismatches]
        result.sort(ascending=True, columns=['mismatches'])
        result = result.reindex_axis(['markers', 'mismatches'], axis=1)
        if result is not None and not result.empty and calculator:
            result['index'] = targets.loc[result.index].apply(
                    self._identity_index, calculator=calculator, axis=1)
        return result

[문서]    def search_kinships(self, af_table, 
            min_kinship_index=config.DEFAULT_MIN_KINSHIP_INDEX,
            is_multiprocess=config.PYTHON_MULTIPROCESSING):
        calculator = KinshipIndexCalculator(af_table)
        active_autosomal_markers = [m for m in self.active_markers 
                if m not in config.SEX_MARKERS]

        if active_autosomal_markers == []:
            return pd.DataFrame(columns = ['5 chon', '6 chon', '7 chon',\
                    '8 chon', 'first cousin', 'full-sib', 'half-sib',\
                    'parent-child', 'unrelated', 'max_index', 'max_relative'])
        targets = self.active_genotypes[active_autosomal_markers]
        try:
            targets = targets.drop(self.identifier)
        except:
            pass

        if is_multiprocess:
            result = apply_by_multiprocessing(targets, kinship_index, 
                    query=self.query, calculator=calculator, axis=1)
        else:
            result = targets.apply(self._kinship_index, 
                    calculator=calculator, axis=1)

        if not min_kinship_index:
            min_kinship_index = config.DEFAULT_MIN_KINSHIP_INDEX
       
        if result is not None:
            max_index = result.max(axis=1) / result['unrelated']
            max_relative = result.idxmax(axis=1)
            result['max_index'] = max_index
            result['max_relative'] = max_relative
            result = result[result['max_index'] > min_kinship_index]
            result = result.sort(['max_index', 'max_relative'], ascending=False)
        else:
            result = pd.DataFrame(columns=['max_index','max_relative']) 

        return result

    def _identity_index(self, row, calculator=None):
        if calculator:
            return calculator.get_cumulative_index(self.active_markers, 
                    self.query, row) 

    def _kinship_index(self, row, calculator=None):
        return kinship_index(row, query=self.query, calculator=calculator)


[문서]class SearchIntegrator:
    def __init__(self, query):
        self.query = query
        self.identifier = query['identifier']

[문서]    def search_relationship(self, relationship_type, 
            astrs=None, ystrs=None, mtdnas=None, 
            astr_kwargs={}, ystr_kwargs={}, mtdna_kwargs={}, partial=False,
            is_multiprocess=config.PYTHON_MULTIPROCESSING):
        """Mutable dict was used in keyword argument.
           Be careful for this mutation.  """

        astr_kwargs_all = astr_kwargs.copy()
        astr_kwargs_all['is_multiprocess'] = is_multiprocess
        if not relationship_type == 'kinship':
            astr_kwargs_all['permit_mismatches'] = float('inf')
        ystr_kwargs_all = ystr_kwargs.copy()
        ystr_kwargs_all['permit_mismatches'] = float('inf')
        ystr_kwargs_all['is_multiprocess'] = is_multiprocess
        mtdna_kwargs_all = mtdna_kwargs.copy()
        mtdna_kwargs_all['permit_mismatches'] = float('inf')
        mtdna_kwargs_all['is_multiprocess'] = is_multiprocess

        index_total = pd.Index([])
        try: index_total = index_total + astrs.index
        except: pass
        try: index_total = index_total + ystrs.index
        except: pass
        try: index_total = index_total + mtdnas.index
        except: pass
        if self.identifier in index_total:
            index_total = index_total.drop(self.identifier)

        results = pd.DataFrame(index=index_total)
        if astrs is not None and ('A-STR' in self.query):
            ss = StrSearcher(pd.Series(self.query['A-STR'],
                name=self.identifier), astrs)
            if relationship_type == 'identity':
                result = ss.search_identities(**astr_kwargs_all)
            elif relationship_type == 'paternity':
                result = ss.search_paternities(**astr_kwargs_all)
            elif relationship_type == 'kinship':
                result = ss.search_kinships(**astr_kwargs_all)
            result.columns = ['A-STR {}'.format(c) for c in result.columns]
            results = results.join(result)

        if ystrs is not None and ('Y-STR' in self.query):
            ss = StrSearcher(pd.Series(self.query['Y-STR'],
                name=self.identifier), ystrs)
            result = ss.search_identities(**ystr_kwargs_all)
            result.columns = ['Y-STR {}'.format(c) for c in result.columns]
            results = results.join(result)

        if mtdnas is not None and ('mtDNA' in self.query):
            ss = MtdnaSearcher(pd.Series(self.query['mtDNA'],
                name=self.identifier), mtdnas)
            result = ss.search_identities(**mtdna_kwargs_all)
            result.columns = ['mtDNA {}'.format(c) for c in result.columns]
            results = results.join(result)

        if astrs is not None and relationship_type == 'kinship':
            results = results.loc[results['A-STR max_index'].notnull()]

        if 'A-STR markers' in results.columns and \
            astr_kwargs and ('permit_mismatches' in astr_kwargs) and \
            (not np.isinf(astr_kwargs['permit_mismatches'])):
            if partial:
                results = results.loc[pd.isnull(results['A-STR markers']) | 
                        (results['A-STR mismatches'] <= \
                        astr_kwargs['permit_mismatches'])]
            else:
                results = results.loc[(results['A-STR mismatches'] <= \
                        astr_kwargs['permit_mismatches'])]
        if 'Y-STR markers' in results.columns and \
            ystr_kwargs and not np.isinf(ystr_kwargs['permit_mismatches']):
            if partial:
                results = results.loc[pd.isnull(results['Y-STR markers']) | 
                        (results['Y-STR mismatches'] <= \
                        ystr_kwargs['permit_mismatches'])]
            else:
                results = results.loc[(results['Y-STR mismatches'] <= \
                        ystr_kwargs['permit_mismatches'])]
        if 'mtDNA markers' in results.columns and \
            mtdna_kwargs and not np.isinf(mtdna_kwargs['permit_mismatches']):
            if partial:
                results = results.loc[pd.isnull(results['mtDNA markers']) | 
                        (results['mtDNA mismatches'] <= \
                        mtdna_kwargs['permit_mismatches'])]
            else:
                results = results.loc[(results['mtDNA mismatches'] <= \
                        mtdna_kwargs['permit_mismatches'])]

        sort_columns = []
        if 'A-STR max_index' in results.columns and (
                not results['A-STR max_index'].isnull().all()):
            sort_columns.append(('A-STR max_index', False))
        elif 'A-STR mismatches' in results.columns and (
                sum(results['A-STR mismatches'].notnull()) > 1):
            sort_columns.append(('A-STR mismatches', True))
            sort_columns.append(('A-STR index', False))
        if 'Y-STR mismatches' in results.columns and (
                sum(results['Y-STR mismatches'].notnull()) > 1):
            sort_columns.append(('Y-STR mismatches', True))
        if 'mtDNA mismatches' in results.columns and (
                sum(results['mtDNA mismatches'].notnull()) > 1):
            sort_columns.append(('mtDNA mismatches', True))

        if sort_columns and not results.empty:
            if len(sort_columns) == 1 and astrs is not None:
                ## it looks bug, when one column, assending not working so,
                sort_columns.append(('A-STR max_relative', True))
            results = results.sort_index(by=[sc[0] for sc in sort_columns],
                    ascending=[sc[1] for sc in sort_columns])
        return results


[문서]    def search_kinships(self, astrs=None, ystrs=None,
            astr_kwargs={}, ystr_kwargs={}):
        """Mutable dict was used in keyword argument.
           Be careful for this mutation.  """
        results = []
        #basis_ids = []
        if 'A-STR' in self.query and astrs:
            ss = StrSearcher(self.identifier, astrs)
            result = ss.search_kinships(**astr_kwargs)
            result.columns = ['A-STR {}'.format(c) for c in result.columns]
            #basis_ids.extend(list(result.index))
            results.append(result)
        if 'Y-STR' in self.query and ystrs:
            ss = StrSearcher(self.identifier, ystrs)
            result = ss.search_kinships(**ystr_kwargs)
            result.columns = ['Y-STR {}'.format(c) for c in result.columns]
            #if basis_ids:
            #    result = result.loc[basis_ids]
            #else:
            #    basis_ids.extend(list(result.index))
            results.append(result)
        results = pd.concat(results, axis=1)
        return results


[문서]class CheckerBetween():
    def __init__(self, query_a, query_b, type_, af_table=None):
        self.query_a = query_a
        self.query_b = query_b
        self.type_ = type_
        self.active_markers = self.query_a.index[self.query_a.notnull()]
        self.af_table = af_table

[문서]    def arrange_result(self, result, true_string):
        samples = [self.query_a.name, self.query_b.name]
        result_re = result.copy()
        total_row = pd.Series(name='total')
        for column in result_re.columns:
            if column == 'status':
                match_count = sum(result_re['status'] == true_string)
                total_row = total_row.set_value('status',match_count)
            elif (column not in samples) and (column != 'shared_allele'):
                total_index = np.product(list(
                    result_re.loc[result_re[column].notnull(),column]))
                total_row = total_row.set_value(column,total_index)
                result_re[column] = result_re[[column]].astype(object)

        allele_columns = [self.query_a.name, self.query_b.name]
        if 'shared_allele' in result_re.columns:
            allele_columns.append('shared_allele')
        for query in allele_columns:
            for marker in result_re[query].index:
                cleaned_marker = decode_marker(marker, self.type_)
                alleles = decode_alleles(result_re.loc[marker, query], 
                        self.type_, marker, null=np.nan)
                if alleles != '-':
                    result_re.loc[marker, query] = alleles
        result_re = result_re.append(total_row)
        return result_re

[문서]    def check_identities(self):
        result = pd.DataFrame(index=self.query_a.index)
        result[self.query_a.name] = self.query_a
        result[self.query_b.name] = self.query_b
        result = result.loc[result[self.query_a.name].notnull() |
                result[self.query_b.name].notnull()]
        result['status'] = result.apply(self._check_identities, axis=1)
        calculator=None
        if self.af_table is not None and not self.af_table.empty:
            calculator = IdentityIndexCalculator(self.af_table)
        if calculator:
            index_ = []
            for marker in self.active_markers:
                if marker not in self.query_b:
                    self.query_b[marker] = None
                index_.append(calculator.get_cumulative_index(
                    pd.Index([marker]), self.query_a, self.query_b))
            index_ = pd.Series(index_, index=self.active_markers)
            result['index'] = index_
        return result
        
    def _check_identities(self, row):
        if row[0] == row[1]:
            return 'matched'
        elif (type(row[0]) != list and pd.isnull(row[0])) or \
            (type(row[1]) != list and pd.isnull(row[1])):
            return 'partial'
        else:
            return 'unmatched'

[문서]    def check_paternities(self, add_shared_allele=False):
        result = pd.DataFrame(index=self.query_a.index)
        result[self.query_a.name] = self.query_a
        result[self.query_b.name] = self.query_b
        result = result.loc[result[self.query_a.name].notnull() | 
                result[self.query_b.name].notnull()]
        result['status'] = result.apply(self._check_paternities,axis=1)
        calculator=None
        if self.af_table is not None and not self.af_table.empty:
            calculator = PaternityIndexCalculator(self.af_table)
        if calculator:
            index_ = []
            for marker in self.active_markers:
                if marker not in self.query_b:
                    self.query_b[marker] = None
                index_.append(calculator.get_cumulative_index(
                    pd.Index([marker]), self.query_a, self.query_b))
            index_ = pd.Series(index_, index=self.active_markers)
            result['index'] = index_
        if add_shared_allele:
            result = self.check_paternities_add_shared_allele(result)
        return result
        
    def _check_paternities(self, row):
        if (type(row[0]) != list and pd.isnull(row[0])) or \
            (type(row[1]) != list and pd.isnull(row[1])):
            return 'partial'
        elif bool(set(row[0]) & set(row[1])):
            return 'shared'
        else:
            return 'unshared'

[문서]    def check_paternities_add_shared_allele(self, result):
        added_allele = {}
        index = result.index
        for i in range(result.shape[0]):
            if result.ix[i, 'status'] == 'shared':
                added_allele[index[i]] = \
                    list(set(result.ix[i,0]) & set(result.ix[i,1]))
        result['shared_allele'] = pd.Series(added_allele, name='shared_allele')
        return result

[문서]    def check_kinships(self):
        result = pd.DataFrame(index=self.query_a.index)
        result[self.query_a.name] = self.query_a
        result[self.query_b.name] = self.query_b
        active_autosomal_markers = [m for m in self.active_markers
                if m not in config.SEX_MARKERS]
        calculator = KinshipIndexCalculator(self.af_table)
        for relationship in calculator.relationships:
            index_ = []
            for marker in active_autosomal_markers:
                try:
                    aa = pd.Series([self.query_a[marker]], index=[marker])
                except KeyError:
                    aa = pd.Series([None], index=[marker])
                try:
                    bb = pd.Series([self.query_b[marker]], index=[marker])
                except KeyError:
                    bb = pd.Series([None], index=[marker])

                cc = calculator.get_cumulative_index(aa, bb, relationship)
                index_.append(calculator.get_cumulative_index(
                    aa, bb, relationship))
            index_ = pd.Series(index_, index=active_autosomal_markers)
            result[relationship] = index_
        return result


[문서]class ManageSearcher():
    def __init__(self, relationship_type, identifier, target_group,\
            partial=True,\
            check_astr=True, astr_partial=True, astr_permit_mismatches=2,\
            af_table=None, min_kinship_index=1,\
            check_ystr=False, ystr_partial=True, ystr_permit_mismatches=2,\
            check_mtdna=False, mtdna_partial=True, mtdna_permit_mismatches=2, 
            mtdna_exclude_cstretches=False, dbname=config.DEFAULT_MONGODB_NAME):
        self.relationship_type = relationship_type
        self.identifier = identifier
        self.target_group = target_group
        self.partial = partial
        self.check_astr = check_astr
        self.astr_partial = astr_partial
        self.astr_permit_mismatches = astr_permit_mismatches
        self.af_table = af_table
        self.min_kinship_index = min_kinship_index
        self.check_ystr = check_ystr
        self.ystr_partial = ystr_partial
        self.ystr_permit_mismatches = ystr_permit_mismatches
        self.check_mtdna = check_mtdna
        self.mtdna_partial = mtdna_partial
        self.mtdna_permit_mismatches = mtdna_permit_mismatches
        self.mtdna_exclude_cstretches = mtdna_exclude_cstretches
        self.dbname = dbname
        self.gm = db.GenotypeManager(self.dbname)
        self.afm = db.AlleleFrequenciesManager(self.dbname)

[문서]    def get_target_group_info(self, row, group=None):
        sample = Sample.objects.get(identifier=row.name)
        individual = sample.get_individual()
        columns = self.TARGET_GROUP_INFO_COLUMNS[group]
        result = []
        for column in columns:
            if column == 'sex':
                result.append(individual.get_sex_display())
            else:
                result.append(getattr(individual, column))
        return pd.Series(result, index=columns)

[문서]    def search(self):
        targets = {}
        if self.check_astr:
            targets['A-STR'] = self.gm.get_genotypes_by_group('A-STR',\
                    self.target_group)
        if self.check_ystr:
            targets['Y-STR'] = self.gm.get_genotypes_by_group('Y-STR',\
                    self.target_group)
        if self.check_mtdna:
            targets['mtDNA'] = self.gm.get_genotypes_by_group('mtDNA',\
                    self.target_group)
        result_df = self.run_search(targets)

        if result_df is not None and not result_df.empty:
            """
            result_df[self.TARGET_GROUP_INFO_COLUMNS[target_group]] = (
                    result_df.apply(self.get_target_group_info, 
                            group=self.target_group, axis=1)
            )
            """
            new_columns = []
            for column in result_df.columns:
                new_columns.append(column.replace(' ', '_').replace('-', '_'))
            result_df.columns = new_columns

        return DataContainer(status='Success', message='Search',
                                items=result_df)


[문서]    def run_search(self, targets):
        query = self.gm.get_genotype(self.identifier)
        if self.check_astr and 'A-STR' not in query:
            raise NoValueException('{} has no A-STR'.format(self.identifier))
        if self.check_ystr and 'Y-STR' not in query:
            raise NoValueException('{} has no Y-STR'.format(self.identifier))
        if self.check_mtdna and 'mtDNA' not in query:
            raise NoValueException('{} has no mtDNA'.format(self.identifier))
        if self.af_table:
            af_table = self.afm.get(self.af_table, 'A-STR')

        if targets:
            astrs = targets.get('A-STR')
            ystrs = targets.get('Y-STR')
            mtdnas = targets.get('mtDNA')
        else:
            astrs = self.gm.get_genotypes('A-STR')
            ystrs = self.gm.get_genotypes('Y-STR')
            mtdnas = self.gm.get_genotypes('mtDNA')

        if self.relationship_type == 'kinship':
            astr_kwargs = {
                'af_table': af_table,
                'min_kinship_index': self.min_kinship_index,
            } 
        else:
            astr_kwargs = {
                'permit_mismatches': self.astr_permit_mismatches,
                'af_table': af_table, 
                'partial': self.astr_partial,
            }
        si = SearchIntegrator(query)
        result = si.search_relationship(self.relationship_type, 
                astrs=astrs, ystrs=ystrs, mtdnas=mtdnas,
                astr_kwargs=astr_kwargs,
                ystr_kwargs={
                    'permit_mismatches': self.ystr_permit_mismatches,
                    'partial': self.ystr_partial,
                },
                mtdna_kwargs={
                    'permit_mismatches': self.mtdna_permit_mismatches,
                    'partial': self.mtdna_partial,
                    'exclude_cstretches': self.mtdna_exclude_cstretches,
                },
                partial=self.partial,
        )
        return result

[문서]class ManageMultipleSearch():
    def __init__(self, query_group='soldier', query_type='all',\
            query_first = None, query_second=None,\
            target_group='customer', target_type='all',\
            target_first = None, target_second=None,
            relationship_type='kinship', partial=True,\
            check_astr=True, astr_partial=True, astr_permit_mismatches=2,\
            af_table=None, min_kinship_index=1,\
            check_ystr=False, ystr_partial=True, ystr_permit_mismatches=2,\
            check_mtdna=False, mtdna_partial=True, mtdna_permit_mismatches=2,\
            mtdna_exclude_cstretches=False, dbname=config.DEFAULT_MONGODB_NAME,\
            description=None):
        if query_type not in ['all', 'range', 'keyword', 'manual']:
            raise ParameterException('{} is not proper value for query_type'.\
                    format(query_type))
        if target_type not in ['all', 'range', 'keyword', 'manual']:
            raise ParameterException('{} is not proper value for target_type'.\
                    format(target_type))
        self.query_group = query_group
        self.query_type = query_type
        self.query_first = query_first
        self.query_second = query_second
        self.target_group = target_group
        self.target_type = target_type
        self.target_first = target_first
        self.target_second = target_second
        self.relationship_type = relationship_type
        self.partial = partial
        self.check_astr = check_astr
        self.astr_partial = astr_partial
        self.astr_permit_mismatches = astr_permit_mismatches
        self.af_table = af_table
        self.min_kinship_index = min_kinship_index
        self.check_ystr = check_ystr
        self.ystr_partial = ystr_partial
        self.ystr_permit_mismatches = ystr_permit_mismatches
        self.check_mtdna = check_mtdna
        self.mtdna_partial = mtdna_partial
        self.mtdna_permit_mismatches = mtdna_permit_mismatches
        self.mtdna_exclude_cstretches = mtdna_exclude_cstretches
        self.dbname = dbname
        self.description = description
        self.gm = db.GenotypeManager(self.dbname)
        self.afm = db.AlleleFrequenciesManager(self.dbname)
        self.tm = db.TaskManager(self.dbname)
        self.grm = db.GroupManager(self.dbname)

[문서]    def get_filtered_identifiers(self, group, query_type, queries):
        ids = self.grm.ids_belong_group(group)
        ids.sort()
        if query_type == 'all':
            pass
        elif query_type == 'range':
            query_first = queries[0]
            query_second = queries[1]
            
            if not query_first in ids:
                raise NoValueException('There is no id {}'.format(query_first))
            if not query_second in ids:
                raise NoValueException('There is no id {}'.format(query_second))
            
            ids = ids[ids.index(query_first) : ids.index(query_second)+1]
        elif query_type == 'keyword':
            keyword = queries[0]
            ids = [id_ for id_ in ids if id_.find(keyword) != -1]
        elif query_type == 'manual':
            ids = queries[0].strip().split()

        return ids

[문서]    def search(self):
        ## make task & save into db
        today_tasks = self.tm.get_alltasks('identifier', time.strftime("%Y%m%d"))
        counts = today_tasks.count() if today_tasks else 0
        task_id = '{}-{}'.format(time.strftime("%Y%m%d"), str(counts + 1))
        astr_option = {}
        ystr_option = {}
        mtdna_option = {}
        if self.check_astr:
            astr_option = {
                'permit_mismatches': self.astr_permit_mismatches,
                'partial': self.astr_partial,
                'min_kinship_index': self.min_kinship_index,
                'af_table': self.af_table,
            }
        if self.check_ystr:
            ystr_option = {
                'permit_mismatches': self.ystr_permit_mismatches,
                'partial': self.ystr_partial,
            }
        if self.check_mtdna:
            mtdna_option = {
                'permit_mismatches': self.mtdna_permit_mismatches,
                'partial': self.mtdna_partial,
                'exclude_cstretches': self.mtdna_exclude_cstretches,
            }
        ## get query ids, target ids
        queries = self.get_filtered_identifiers(self.query_group,\
                self.query_type, [self.query_first, self.query_second])
        targets = self.get_filtered_identifiers(self.target_group,\
                self.target_type, [self.target_first, self.target_second])

        mst = self.tm.create(identifier=task_id,
                query_group=self.query_group,
                query_type=self.query_type,
                query_count=len(queries),
                query_first=self.query_first,
                query_second=self.query_second,
                target_group=self.target_group,
                target_type=self.target_type,
                target_count=len(targets),
                target_first=self.target_first,
                target_second=self.target_second,
                relationship_type=self.relationship_type,
                description=self.description,
                partial=self.partial,
                astr_option=astr_option,
                ystr_option=ystr_option,
                mtdna_option=mtdna_option,
                dbname=self.dbname
        )


        ## run multiple searching
        threading.Thread(target=self.search_multiple_async,\
                args=(task_id, queries, astr_option,\
                ystr_option, mtdna_option, targets,)).start()

        return DataContainer(status='Success', message='Search',
                                items={'taskId':task_id})

[문서]    def search_multiple_async(self, task_id, queries,astr_option=None,\
            ystr_option=None, mtdna_option=None, targets=None):
        self.tm.update(task_id, 'status', 'Searching')
        percentage_to_record = 10
        astrs = ystrs = mtdnas = af_table = None
        if astr_option:
            astrs = self.gm.get_genotypes_by_group('A-STR', self.target_group)
            if targets is not None:
                astrs = astrs.loc[targets]
            af_table = self.afm.get(astr_option['af_table'], 'A-STR')
        if ystr_option:
            ystrs = self.gm.get_genotypes_by_group('Y-STR', self.target_group)
            if targets is not None:
                ystrs = ystrs.loc[targets]
        if mtdna_option:
            mtdnas = self.gm.get_genotypes_by_group('mtDNA', self.target_group)
            if targets is not None:
                mtdnas = mtdnas.loc[targets]

        if self.relationship_type == 'kinship':
            astr_kwargs = {
                'af_table': af_table,
                'min_kinship_index': astr_option['min_kinship_index'],
            }
        else:
            if astr_option:
                astr_kwargs = {
                    'permit_mismatches': astr_option['permit_mismatches'],
                    'af_table': af_table,
                    'partial': astr_option['partial'],
                }
            else:
                astr_kwargs = astr_option

        total_result = pd.DataFrame()
        total = len(queries)
        query_count_to_record = int(total * percentage_to_record / 100)
        for i, identifier in enumerate(queries):
            print('search...', identifier)
            query = self.gm.get_genotype(identifier)
            types = list(query.keys())
            for type_ in types:
                if type_ in ['A-STR', 'Y-STR', 'mtDNA']:
                    is_all_None = [genotype is None for genotype in\
                            list(query[type_].values())]
                    if all(is_all_None):
                        del(query[type_])
            si = SearchIntegrator(query)
            result = si.search_relationship(self.relationship_type, partial=self.partial,
                    astrs=astrs, ystrs=ystrs, mtdnas=mtdnas,
                    astr_kwargs=astr_kwargs,
                    ystr_kwargs=ystr_option,
                    mtdna_kwargs=mtdna_option,
                    #is_multiprocess=False,
            )
            result['Identifier A'] = identifier
            result['Identifier B'] = result.index
            total_result = result.append(total_result, ignore_index=True)
            #total_result = total_result.append(result, ignore_index=True)
            if ((i+1) % query_count_to_record) == 0:
                self.tm.update(task_id, 'progress', ((i + 1) * 100 / total))
        self.tm.update(task_id, 'progress', 100)

        prev_columns = total_result.columns.tolist()
        if self.relationship_type == 'kinship':
            columns = ['Identifier A', 'Identifier B', 'A-STR max_index',\
                    'A-STR max_relative', 'A-STR parent-child',\
                    'A-STR full-sib', 'A-STR half-sib', 'A-STR first cousin',\
                    'A-STR 5 chon', 'A-STR 6 chon', 'A-STR 7 chon',\
                    'A-STR 8 chon', 'A-STR unrelated']
        else:
            columns = ['Identifier A', 'Identifier B']
        for column in columns:
            if column in prev_columns:
                prev_columns.remove(column)
        columns = columns + prev_columns
        """
        if self.target_group == 'customer':
            total_result = total_result.join(pd.DataFrame({'relationship':\
                    {customer_id: Customer.objects.filter(sample__identifier=\
                    customer_id)[0].relationship for customer_id\
                    in total_result['Identifier B']}}), on='Identifier B')
            prev_columns = columns
            columns = ['Identifier A', 'Identifier B', 'relationship']
            for column in columns:
                if column in prev_columns:
                    prev_columns.remove(column)
            columns += prev_columns
        """
        try:
            total_result = total_result[columns]
        except KeyError:
            for column in columns:
                if column not in total_result.columns:
                    total_result[column] = np.nan
            total_result = total_result[columns]
        if 'A-STR max_index' in total_result.columns and (
                not total_result['A-STR max_index'].isnull().all()):
            total_result = total_result.sort_index(by=['A-STR max_index'],\
                    ascending=False)
            total_result.index = list(range(total_result.shape[0]))
        """
        excel = df_to_excel(total_result, index=True)
        msr.result = SimpleUploadedFile('{}.xlsx'.format(msr.identifier),
                excel.read(), 
                content_type='application/vnd.openxmlformats-officedocument'
                             '.spreadsheetml.sheet')
        """
        total_result
        self.tm.update(task_id, 'result', total_result.T.to_json())
        self.tm.update(task_id, 'status', 'Finished')


[문서]class ManageCheckBetween():
    def __init__(self, relationship_type, identifier_a, identifier_b,\
            partial=True,\
            check_astr=True, astr_partial=True, astr_permit_mismatches=2,\
            af_table=None, min_kinship_index=1,\
            check_ystr=False, ystr_partial=True, ystr_permit_mismatches=2,\
            check_mtdna=False, mtdna_partial=True, mtdna_permit_mismatches=2, 
            mtdna_exclude_cstretches=False, dbname=config.DEFAULT_MONGODB_NAME):
        self.relationship_type = relationship_type
        self.identifier_a = identifier_a
        self.identifier_b = identifier_b
        self.partial = partial
        self.check_astr = check_astr
        self.astr_partial = astr_partial
        self.astr_permit_mismatches = astr_permit_mismatches
        self.af_table = af_table
        self.min_kinship_index = min_kinship_index
        self.check_ystr = check_ystr
        self.ystr_partial = ystr_partial
        self.ystr_permit_mismatches = ystr_permit_mismatches
        self.check_mtdna = check_mtdna
        self.mtdna_partial = mtdna_partial
        self.mtdna_permit_mismatches = mtdna_permit_mismatches
        self.mtdna_exclude_cstretches = mtdna_exclude_cstretches
        self.dbname = dbname
        self.gm = db.GenotypeManager(self.dbname)
        self.afm = db.AlleleFrequenciesManager(self.dbname)

[문서]    def total_row(self, result, is_kinship=False, is_paternity=False):
        last_row = []
        for column in result.columns:
            if column in [self.identifier_a, self.identifier_b]:
                last_row.append(result[column].count())
            elif column == 'shared_allele':
                last_row.append(np.nan)
            elif column == 'status':
                if is_paternity:
                    last_row.append(sum((result[column]=='shared') |\
                            (result[column]=='partial')))
                else:
                    last_row.append(sum((result[column]=='matched') |\
                            (result[column]=='partial')))
            else:
                last_row.append(np.prod(result[column]))

        last_row = pd.Series(last_row, index=result.columns)
        if is_kinship:
            ci_row = last_row / last_row['unrelated']
            probability_row = ci_row * 100 / (ci_row + 1)
            df = pd.DataFrame({
                    'total': last_row,
                    'KI': ci_row, 
                    'probability': probability_row, 
                    }, columns=['total', 'KI', 'probability']).T
            df.loc['KI':'probability', self.identifier_a:self.identifier_b] =\
                    np.nan
            return df
        else:
            return pd.DataFrame({'total': last_row}).T

[문서]    def check(self):
        sample_a = self.gm.get_genotype(self.identifier_a)
        sample_b = self.gm.get_genotype(self.identifier_b)
        result = {}

        if self.check_astr and ('A-STR' in sample_a) and ('A-STR' in sample_b):
            astr_result = self.run_check_between(self.relationship_type,\
                    'A-STR', shared_allele=True, partial=self.astr_partial)
            astr_result = astr_result.loc[
                    astr_result[self.identifier_a].notnull() 
                    | astr_result[self.identifier_b].notnull()]

            is_kinship = True if self.relationship_type == 'kinship' else False
            is_paternity = True if self.relationship_type == 'paternity'\
                    else False
            astr_result = pd.concat([astr_result, self.total_row(astr_result,\
                    is_kinship, is_paternity)])
            astr_result.index = [decode_marker(marker, 'A-STR') for\
                    marker in astr_result.index]
            result['astr_result'] = astr_result
            if self.relationship_type == 'kinship':
                astr_result_summary = astr_result.ix[
                        ['total','KI','probability'], 
                        ['parent-child', 'full-sib', 'half-sib',\
                                'first cousin', '5 chon', '6 chon',\
                                'unrelated',]]
                astr_result_summary =\
                        astr_result_summary.T.sort(['KI'], ascending=False).T
                columns_temp = astr_result_summary.columns
                columns_temp_final = []
                for column in columns_temp:
                    if column == 'parent-child':
                        column = 'Parent child'
                    elif column == 'half-sib':
                        column = 'Half sibling'
                    elif column == 'full-sib':
                        column = 'Full sibling'
                    elif column == 'first cousin':
                        column = 'First cousin'
                    elif column == 'unrelated':
                        column = 'Unrelated'
                    columns_temp_final.append(column)
                astr_result_summary.columns = columns_temp_final
                result['astr_result_summary'] = astr_result_summary
                ordered_relationship = list(\
                        result['astr_result'].ix[:,2:].T.sort('probability',\
                        ascending=False).index)
                ordered_relationship.remove('7 chon')
                ordered_relationship.remove('8 chon')
                result['ordered_relationship'] = ordered_relationship
                result['ordered_astr_result'] = result['astr_result'][list(\
                        result['astr_result'].columns[:2])+ordered_relationship]

        if self.check_ystr and ('Y-STR' in sample_a) and ('Y-STR' in sample_b):
            ystr_result = self.run_check_between('identity','Y-STR',\
                    False, self.ystr_partial)
            ystr_result = ystr_result.loc[
                    ystr_result[self.identifier_a].notnull() 
                    | ystr_result[self.identifier_b].notnull()]
            ystr_result = pd.concat([ystr_result, 
                    self.total_row(ystr_result)])
            ystr_result.index = [decode_marker(marker, 'Y-STR') for\
                    marker in ystr_result.index] 
            result['ystr_result'] = ystr_result

        if self.check_mtdna and ('mtDNA' in sample_a) and ('mtDNA' in sample_b):
            mtdna_result = self.run_check_between('identity', 'mtDNA',\
                    False, self.mtdna_partial)
            mtdna_result = mtdna_result.loc[
                    mtdna_result[self.identifier_a].notnull() 
                    | mtdna_result[self.identifier_b].notnull()]
            mtdna_index = []
            for index in mtdna_result.index:
                decoded_index = decode_marker(index, 'mtDNA')
                mtdna_index.append((float(decoded_index),index))
            mtdna_index.sort()
            mtdna_index = [index[1] for index in mtdna_index]
            mtdna_result = mtdna_result.loc[mtdna_index]
            mtdna_result = pd.concat([mtdna_result, 
                self.total_row(mtdna_result)])
            mtdna_result.index = [decode_marker(marker, 'mtDNA') for\
                    marker in mtdna_result.index] 
            result['mtdna_result'] = mtdna_result
        
        del(sample_a['_id'])
        del(sample_b['_id'])
        result['sample_a'] = sample_a
        result['sample_b'] = sample_b
        result['report'] = self.make_report(result)
        return DataContainer(status='Success',
                            message='Success checking between search',
                            items=result)

[문서]    def get_astr_report(self, df):
        result = {}
        result['astr_option'] = {
            'permit_mismatches': self.astr_permit_mismatches,
            'partial': self.astr_partial,
            'min_kinship_index': self.min_kinship_index,
            'af_table': self.af_table,
        }
        markers = int(df.loc['total', self.identifier_a])
        if self.relationship_type == 'kinship':
            matches = None
            ci_series = df.loc['KI', '5 chon':'unrelated']
            max_column_index = ci_series.argmax()
            index = ci_series.max()
            try:
                relation = ci_series.index[max_column_index]
            except ValueError:
                relation = max_column_index
            decision = index >= self.min_kinship_index
            result['astr_report'] = {
                'markers': markers,
                'relation': ci_series[ci_series == index].index[0],
                'index': index,
                'decision': decision,
            }
        else:
            matches = int(df.loc['total', 'status'])
            index = df.loc['total', 'index']
            if result['astr_option']['partial']:
                if self.relationship_type == 'identity':
                    decision = df.loc[df['status']=='unmatched','status'].\
                            count() <=  self.astr_permit_mismatches
                else:
                    decision = df.loc[df['status']=='unshared','status'].\
                            count() <=  self.astr_permit_mismatches
            else:
                decision = markers - matches <= self.astr_permit_mismatches
            result['astr_report'] = {
                'markers': markers,
                'matches': matches,
                'decision': decision,
            }
        return result

[문서]    def get_ystr_report(self, df):
        result = {}
        result['ystr_option'] = {
            'permit_mismatches': self.ystr_permit_mismatches,
            'partial': self.ystr_partial,
        }
        markers = int(df.loc['total', self.identifier_a])
        matches = int(df.loc['total', 'status'])
        if result['ystr_option']['partial']:
            decision = df.loc[df['status']=='unmatched','status'].count() <=\
                    self.ystr_permit_mismatches
        else:
            decision = markers - matches <= self.ystr_permit_mismatches
        result['ystr_report'] = {
            'markers': markers,
            'matches': matches,
            'decision': decision,
        }
        return result

[문서]    def get_mtdna_report(self, df):
        result = {}
        result['mtdna_option'] = {
            'permit_mismatches': self.mtdna_permit_mismatches,
            'partial': self.mtdna_partial,
            'exclude_cstretchs': self.mtdna_exclude_cstretches,
        }
        markers = int(df.loc['total', self.identifier_a])
        matches = int(df.loc['total', 'status'])
        if result['mtdna_option']['partial']:
            decision = df.loc[df['status']=='unmatched','status'].count() <=\
                    self.mtdna_permit_mismatches
        else:
            decision = markers - matches <= self.mtdna_permit_mismatches
        result['mtdna_report'] = {
            'markers': markers,
            'matches': matches,
            'decision': decision,
        }
        return result

[문서]    def make_report(self, data):
        result = {
            'total_option': {
                'type': self.relationship_type,
                'partial': self.partial,
            },
        }
        if 'astr_result' in data:
            result.update(self.get_astr_report(data['astr_result']))
        if 'ystr_result' in data:
            result.update(self.get_ystr_report(data['ystr_result']))
        if 'mtdna_result' in data:
            result.update(self.get_mtdna_report(data['mtdna_result']))

        decisions = []
        for report in ('astr_report', 'ystr_report', 'mtdna_report'):
            if report in result:
                decisions.append(result[report]['decision'])
        if self.partial and True in decisions:
            result['total_decision'] = True
        else:
            result['total_decision'] = all(decisions)
        return result

[문서]    def run_check_between(self, relationship_type, type_, shared_allele=False,\
            partial=False, format=None):
        s_identifier_a = pd.Series(\
                self.gm.get_genotype(self.identifier_a)[type_])
        s_identifier_b = pd.Series(\
                self.gm.get_genotype(self.identifier_b)[type_])
        s_identifier_a.name = self.identifier_a
        s_identifier_b.name = self.identifier_b
        query_a = s_identifier_a
        query_b = s_identifier_b
        ##

        try:
            af_table = self.afm.get(self.af_table, type_) if self.af_table\
                    else None
        except:
            af_table = None

        cb = CheckerBetween(query_a, query_b, type_, af_table)
        if relationship_type == 'identity':
            result = cb.check_identities()
            result = cb.arrange_result(result, 'matched')
        elif relationship_type == 'paternity':
            result = cb.check_paternities(add_shared_allele=shared_allele)
            result = cb.arrange_result(result, 'shared')
        elif relationship_type == 'kinship':
            result = cb.check_kinships()
            result = cb.arrange_result(result, 'shared')
        return result


"""
    def run_search_multiple(self, relationship_type, group_a, group_b, 
            name, partial=False, 
            permit_mismatches_astr=float('inf'), partial_astr=False, 
            permit_mismatches_ystr=float('inf'), partial_ystr=False, 
            permit_mismatches_mtdna=float('inf'), partial_mtdna=False, 
            exclude_cstretch_mtdna=False):
        group_a = group_a.split(',')
        group_b = group_b.split(',')
        af_astrs = self.afm.get(name,'A-STR')
        af_ystrs = None

        identifiers_a = pd.Index([])
        try:
            astrs_a = self.gm.get_genotypes_by_group('A-STR', group_a)
            identifiers_a = identifiers_a + astrs_a.index
        except:
            astrs_a = None
        try:
            ystrs_a = self.gm.get_genotypes_by_group('Y-STR', group_a)
            identifiers_a = identifiers_a + ystrs_a.index
        except:
            ystrs_a = None
        try:
            mtdnas_a = self.gm.get_genotypes_by_group('mtDNA', group_a)
            identifiers_a = identifiers_a + mtdnas_a.index
        except:
            mtdnas_a = None
        identifiers_b = pd.Index([])
        try:
            astrs_b = self.gm.get_genotypes_by_group('A-STR', group_b)
            identifiers_b = identifiers_b + astrs_b.index
        except:
            astrs_b = None
        try:
            ystrs_b = self.gm.get_genotypes_by_group('Y-STR', group_b)
            identifiers_b = identifiers_b + ystrs_b.index
        except:
            ystrs_b = None
        try:
            mtdnas_b = self.gm.get_genotypes_by_group('mtDNA', group_b)
            identifiers_b = identifiers_b + mtdnas_b.index
        except:
            mtdnas_b = None

        total_result = pd.DataFrame()
        for identifier in identifiers_a:
            query = self.gm.get_genotype(identifier)
            if relationship_type == 'kinship':
                astr_kwargs={'af_table': af_astrs} 
            else:
                astr_kwargs={'permit_mismatches': permit_mismatches_astr,
                        'af_table': af_astrs, 'partial': partial_astr}
            si = SearchIntegrator(query)
            result = si.search_relationship(relationship_type, 
                    astrs=astrs_b, ystrs=ystrs_b, mtdnas=mtdnas_b,
                    astr_kwargs=astr_kwargs,
                    ystr_kwargs={'permit_mismatches':\
                        permit_mismatches_ystr,
                        'af_table': af_ystrs, 
                        'partial': partial_ystr},
                    mtdna_kwargs={'permit_mismatches': 
                            permit_mismatches_mtdna,
                        'partial': partial_mtdna,
                        'exclude_cstretches': exclude_cstretch_mtdna,},
                    partial=partial
                    )
            index_length = len(result.index)
            index = pd.MultiIndex(levels=[[identifier], result.index], 
                    labels=[[0]*index_length, range(index_length)], 
                    names=['group_A','group_B'])
            result.index = index
            total_result = total_result.append(result)
        return DataContainer(status='Success', message='Search',
                                items=total_result)
"""


[문서]def run_check_hml(customer_identifier, goldstar_identifier):
    gm = db.GenotypeManager()
    afm = db.AlleleFrequenciesManager()
    #customer_identifier = 'FRS08-2161'
    #goldstar_identifier = 'KWHR08-0219'
    #template_file = '../data/%s'%config.HML_TEMPLATE

    ##astr
    try:
        s_query_a = pd.Series(gm.get_genotype(query_a)['A-STR'])
        s_query_a.name = query_a
        query_a = s_query_a
    except (TypeError, KeyError):
        query_a = None
    try:
        s_query_b = pd.Series(gm.get_genotype(query_b)['A-STR'])
        s_query_b.name = query_b
        query_b = s_query_b
    except (TypeError, KeyError):
        query_b = None
    af_table = afm.get('Korean', 'A-STR')
    if query_a is not None and query_b is not None:
        cb = CheckerBetween(query_a, query_b, 'A-STR', af_table)
        astr_result = cb.check_paternities(add_shared_allele=True)
        astr_result = cb.arrange_result(astr_result, 'shared')
        astr_result = astr_result.drop('total')
        astr_rows, astr_row_count = \
            create_astr_rows(astr_result, goldstar_identifier,
                    customer_identifier, start_row_num=1)
    else:
        astr_rows, astr_row_count = '', 1

    ##ystr
    try:
        s_query_a = pd.Series(gm.get_genotype(query_a)['Y-STR'])
        s_query_a.name = query_a
        query_a = s_query_a
    except (TypeError, KeyError):
        query_a = None
    try:
        s_query_b = pd.Series(gm.get_genotype(query_b)['Y-STR'])
        s_query_b.name = query_b
        query_b = s_query_b
    except (TypeError, KeyError):
        query_b = None
    af_table = None
    if query_a is not None and query_b is not None:
        cb = CheckerBetween(query_a, query_b, 'Y-STR', af_table)
        ystr_result = cb.check_identities()
        ystr_result = cb.arrange_result(ystr_result,'matched')
        ystr_result = ystr_result.drop('total')
        ystr_rows, ystr_row_count = \
                create_ystr_rows(ystr_result, goldstar_identifier,
                        customer_identifier, start_row_num=1)
    else:
        ystr_rows, ystr_row_count = '', 1
    ##mtdna
    try:
        s_query_a = pd.Series(gm.get_genotype(query_a)['mtDNA'])
        s_query_a.name = query_a
        query_a = s_query_a
    except (TypeError, KeyError):
        query_a = None
    try:
        s_query_b = pd.Series(gm.get_genotype(query_b)['mtDNA'])
        s_query_b.name = query_b
        query_b = s_query_b
    except (TypeError, KeyError):
        query_b = None
    af_table = None
    if query_a is not None and query_b is not None:
        cb = CheckerBetween(query_a, query_b, 'mtDNA', af_table)
        mtdna_result = cb.check_identities()
        mtdna_result = cb.arrange_result(mtdna_result,'matched')
        mtdna_result = mtdna_result.drop('total')
        mtdna_rows, mtdna_row_count = \
                create_mtdna_rows(mtdna_result, goldstar_identifier,
                        customer_identifier, start_row_num=1)  
    else:
        mtdna_rows, mtdna_row_count = '', 1
    ##kinship
    try:
        s_query_a = pd.Series(gm.get_genotype(query_a)['A-STR'])
        s_query_a.name = query_a
        query_a = s_query_a
    except (TypeError, KeyError):
        query_a = None
    try:
        s_query_b = pd.Series(gm.get_genotype(query_b)['A-STR'])
        s_query_b.name = query_b
        query_b = s_query_b
    except (TypeError, KeyError):
        query_b = None
    af_table = afm.get('Korean', 'A-STR')
    cb = CheckerBetween(query_a, query_b, 'A-STR', af_table)
    if query_a is not None and query_b is not None:
        kinship_result = cb.check_kinships()
        kinship_result = cb.arrange_result(kinship_result, 'shared')
        ci = kinship_result.loc['total']
        ci = ci.drop(customer_identifier)
        ci = ci.drop(goldstar_identifier)
        ci = ci.astype(float)
        ci = ci/ci['unrelated']
        ci.sort(ascending=False)
        kinship_df = pd.DataFrame(index=ci.index)
        kinship_df['ratio'] = ci
        kinship_df['probability'] = ci/(ci+1)

        kinship_rows, kinship_row_count = \
            create_astr_kinship_rows(kinship_df, goldstar_identifier,
                    customer_identifier, start_row_num=2)
    else:
        kinship_rows, kinship_row_count = '', 2
    ##
    key_value = {
            'customer_goldstar_name' : 'aaa',
            'goldstar_identifier' : goldstar_identifier,
            'customer_relationship' : 'bbb',
            'customer_name' : 'cc',
            'customer_identifier' : customer_identifier,
            'kinship_row_count' : kinship_row_count,
            'kinship_rows' : kinship_rows,
            'astr_row_count' : astr_row_count,
            'astr_rows' : astr_rows,
            'ystr_row_count' : ystr_row_count,
            'ystr_rows' : ystr_rows,
            'mtdna_row_count' : mtdna_row_count,
            'mtdna_rows' : mtdna_rows,
        }
    return key_value