Source code for kinmatch.report

# -*- coding:UTF-8 -*-
import pandas as pd
import numpy as np
import re

from kinmatch import db, search, config

template_astr_kinship_row = """
<ROW>
<CELL BorderFill="1" ColAddr="0" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="2697" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="5506">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="7">
<CHAR>%(index)d</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="1" ColAddr="1" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="2697" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="10616">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="6">
<CHAR>%(relationship)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="8" ColAddr="2" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="2697" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="29982">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="6">
<CHAR>%(ratio)f (%(possibility)f)</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
</ROW>
"""

template_astr_row = """
<ROW>
<CELL BorderFill="7" ColAddr="0" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="2198" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11861">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="17" Style="0">
<TEXT CharShape="6">
<CHAR>%(marker)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="1" ColAddr="1" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="2198" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11603">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="5" Style="0">
<TEXT CharShape="6">
<CHAR>%(goldstar_gene)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="1" ColAddr="2" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="2198" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11320">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="5" Style="0">
<TEXT CharShape="6">
<CHAR>%(customer_gene)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="1" ColAddr="3" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="2198" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11320">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="5" Style="0">
<TEXT CharShape="6">
<CHAR>%(result)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
</ROW>
"""

template_ystr_row = """
<ROW>
<CELL BorderFill="9" ColAddr="0" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="1765" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11295">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="20" Style="0">
<TEXT CharShape="6">
<CHAR>%(marker)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="7" ColAddr="1" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="1765" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11603">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="6">
<CHAR>%(goldstar_gene)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="7" ColAddr="2" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="1765" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11603">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="6">
<CHAR>%(customer_gene)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="7" ColAddr="3" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="1765" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="12169">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="5" Style="0">
<TEXT CharShape="6">
<CHAR>%(result)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
</ROW>
"""

template_mtdna_row = """
<ROW>
<CELL BorderFill="9" ColAddr="0" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="1765" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11295">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="9">
<CHAR>%(hv)s (%(position)s)</CHAR>
</TEXT>
<TEXT CharShape="9"/>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="7" ColAddr="1" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="1765" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11603">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="9">
<CHAR>%(goldstar_gene)s</CHAR>
</TEXT>
<TEXT CharShape="9"/>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="7" ColAddr="2" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="1765" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11603">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="9">
<CHAR>%(customer_gene)s</CHAR>
</TEXT>
<TEXT CharShape="9"/>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="7" ColAddr="3" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="1765" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="12169">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="9">
<CHAR>%(result)s</CHAR>
</TEXT>
<TEXT CharShape="9"/>
</P>
</PARALIST>
</CELL>
</ROW>
"""
[문서]def relative(value):
    for item in RELATIVES_TYPES:
        if item[0] == value:
            return item[1]
    return value

[문서]def create_row(result_row, args):
    return result_row % args

[문서]def create_astr_kinship_rows(kinship_result, goldstar_identifier,
        customer_identifier ,start_row_num=2):
    rows =  ''
    for i in range(kinship_result.shape[0]):
        relationship = kinship_result.index[i]
        ratio = kinship_result.loc[relationship, 'ratio']
        possibility = kinship_result.loc[relationship, 'probability']
        d = {
             'relationship': relationship,
             'ratio': ratio,
             'possibility': possibility,
             'index': i+1,
             'row_num': start_row_num+i
             }
        rows += create_row(template_astr_kinship_row, d)
    return rows, kinship_result.shape[0]+2


[문서]def create_astr_rows(astr_result, goldstar_identifier, 
        customer_identifier, start_row_num=1):
    rows =  ''
    count = 0
    for i in range(astr_result.shape[0]):
        marker = astr_result.index[i]
        goldstar_gene = astr_result.loc[marker, goldstar_identifier]
        goldstar_gene = goldstar_gene if not pd.isnull(goldstar_gene) else ''
        customer_gene = astr_result.loc[marker, customer_identifier]
        customer_gene = customer_gene if not pd.isnull(customer_gene) else ''
        shared_allele = astr_result.loc[marker,'shared_allele']
        shared_allele = shared_allele if not pd.isnull(shared_allele) else ''
        if marker == 'XY':
            continue
        d = {
             'marker': marker,
             'goldstar_gene': goldstar_gene,
             'customer_gene': customer_gene,
             'row_num': start_row_num+count,
             'result': shared_allele,
             }
        count += 1
        rows += create_row(template_astr_row, d)
    return rows, count+1

[문서]def create_ystr_rows(ystr_result, goldstar_identifier,
        customer_identifier, start_row_num=1):
    rows =  ''
    count = 0
    for i in range(ystr_result.shape[0]):
        marker = ystr_result.index[i]
        goldstar_gene = ystr_result.loc[marker, goldstar_identifier]
        goldstar_gene = goldstar_gene if not pd.isnull(goldstar_gene) else ''
        customer_gene = ystr_result.loc[marker, customer_identifier]
        customer_gene = customer_gene if not pd.isnull(customer_gene) else ''
        if ystr_result.loc[marker, 'status'] == 'matched':
            msg = 'not excluded'
        else:
            msg = 'excluded'

        d = {
             'marker': marker,
             'goldstar_gene': goldstar_gene,
             'customer_gene': customer_gene,
             'row_num': start_row_num+count,
             'result': msg,
             }
        count += 1

        rows += create_row(template_ystr_row, d)
    return rows, count+1

[문서]def create_mtdna_rows(mtdna_result, goldstar_identifier,
        customer_identifier, start_row_num=1):
    rows =  ''
    count = 0
    for i in range(mtdna_result.shape[0]):
        hv = 'HV'
        position = mtdna_result.index[i]
        goldstar_gene = mtdna_result.loc[position, goldstar_identifier]
        goldstar_gene = goldstar_gene if not pd.isnull(goldstar_gene) else ''
        customer_gene = mtdna_result.loc[position, customer_identifier]
        customer_gene = customer_gene if not pd.isnull(customer_gene) else ''
        if mtdna_result.loc[position, 'status'] == 'matched':
            msg = '배제안됨'
        else:
            msg = '배제됨'

        d = {
                'hv': hv.upper(),
                'position': position,
                'goldstar_gene': goldstar_gene,
                'customer_gene': customer_gene,
                'row_num': start_row_num+count,
                'result': msg
            }
        count += 1
        rows += create_row(template_mtdna_row, d)
    return rows, count+1


[문서]def render_django_style(template_file, key_value, outfile):
    """
    only {% if %} and {{ }} statements is considered.
    key_value argument should be like this => {'key': 'value'}
    """
    def return_component_list(result):
        starts = []; ends = []; groups = [];
        for i in result:
            starts.append(i.start())
            ends.append(i.end())
            groups.append(i.group())
        return starts, ends, groups

    with open(template_file) as f:
        reads = f.read()
        #reads = f.read().decode('utf8')

    syntax_condition = re.compile('{%[\s*\w]+%}')
    syntax_value = re.compile('{{[\s*\w]+}}')
    result_codition = syntax_condition.finditer(reads)
    condition_start, condition_end, condition_group = \
            return_component_list(result_codition)

    new_reads = []
    for i in range(len(condition_group)):
        if i%2 == 0:
            if i == 0:
                new_reads.append(reads[:condition_start[i]])
            else:
                new_reads.append(reads[condition_end[i-1]:condition_start[i]])
            state = condition_group[i]
            condition = state[2:-2].split()[-1]
            if condition in key_value and key_value[condition]:
                fragment = reads[condition_end[i]:condition_start[i+1]]
                new_fragment = []
                result_value = syntax_value.finditer(fragment)
                value_start, value_end, value_group = \
                        return_component_list(result_value)
                for j in range(len(value_group)):
                    if j == 0:
                        new_fragment.append(fragment[:value_start[j]])
                    else:
                        new_fragment.append(
                                new_reads[value_end[i-1]:value_start[j]])
                    value_ = key_value[
                            fragment[value_start[j]+2 : value_end[j]-2].strip()]
                    new_fragment.append(value_)
                    if j == len(value_group) - 1:
                        new_fragment.append(fragment[value_end[j]:])
                new_fragment = ''.join(new_fragment)
                new_reads.append(new_fragment)
            if i == len(condition_group) - 2:
                new_reads.append(reads[condition_end[i+1]:])

    new_reads = ''.join(new_reads)
    result_value = syntax_value.finditer(new_reads)
    value_start, value_end, value_group = return_component_list(result_value)
    final_reads = []
    for i in range(len(value_group)):
        if i == 0 :
            final_reads.append(new_reads[:value_start[i]])
        else:
            final_reads.append(new_reads[value_end[i-1]:value_start[i]])
        value_ = key_value[
                new_reads[value_start[i]+2 : value_end[i]-2].strip()]
        if type(value_) == int:
            final_reads.append(str(value_))
            #final_reads.append(str(value_).decode('utf8'))
        else:
            final_reads.append(value_)
        if i == len(value_group) - 1:
            final_reads.append(new_reads[value_end[i]:])
    result_hml = ''.join(final_reads)
    outfile.write(result_hml)


[문서]def temp_main(identifier_a, identifier_b,
        customer_goldstar_name='', customer_name='', customer_relationship='',
        af_table_name='Korean'):
    gm = db.GenotypeManager()
    afm = db.AlleleFrequenciesManager()
    a = gm.get_genotype(identifier_a)
    b = gm.get_genotype(identifier_b)

    ##astr
    if a and b and 'A-STR' in a and 'A-STR' in b:
        query_a = pd.Series(a['A-STR'])
        query_b = pd.Series(b['A-STR'])
        query_a.name = identifier_a
        query_b.name = identifier_b

        af_table = afm.get(af_table_name, 'A-STR')
        cb = search.CheckerBetween(query_a, query_b, 'A-STR', af_table)
        astr_result = cb.check_paternities(add_shared_allele=True)
        #print(astr_result)
        astr_result = cb.arrange_result(astr_result, 'shared')
        astr_result = astr_result.drop('total')
        astr_result = astr_result.loc[
                astr_result[identifier_a].notnull() | \
                        astr_result[identifier_b].notnull() ]
        astr_rows, astr_row_count = \
            create_astr_rows(astr_result, identifier_b,
                    identifier_a, start_row_num=1)
    else:
        astr_rows, astr_row_count = '', 1

    ##ystr
    if a and b and 'Y-STR' in a and 'Y-STR' in b:
        query_a = pd.Series(a['Y-STR'])
        query_b = pd.Series(b['Y-STR'])
        query_a.name = identifier_a
        query_b.name = identifier_b

        af_table = None
        cb = search.CheckerBetween(query_a, query_b, 'Y-STR', af_table)
        ystr_result = cb.check_identities()
        ystr_result = cb.arrange_result(ystr_result,'matched')
        ystr_result = ystr_result.drop('total')
        ystr_result = ystr_result.loc[
                ystr_result[identifier_a].notnull() | \
                        ystr_result[identifier_b].notnull() ]
        ystr_rows, ystr_row_count = \
                create_ystr_rows(ystr_result, identifier_b,
                        identifier_a, start_row_num=1)
    else:
        ystr_rows, ystr_row_count = '', 1

    ##mtdna
    if a and b and 'mtDNA' in a and 'mtDNA' in b:
        query_a = pd.Series(a['mtDNA'])
        query_b = pd.Series(b['mtDNA'])
        query_a.name = identifier_a
        query_b.name = identifier_b

        af_table = None
        cb = search.CheckerBetween(query_a, query_b, 'mtDNA', af_table)
        mtdna_result = cb.check_identities()
        mtdna_result = cb.arrange_result(mtdna_result,'matched')
        mtdna_result = mtdna_result.drop('total')
        mtdna_result = mtdna_result.loc[
                mtdna_result[identifier_a].notnull() | \
                        mtdna_result[identifier_b].notnull() ]
        mtdna_rows, mtdna_row_count = \
                create_mtdna_rows(mtdna_result, identifier_b,
                        identifier_a, start_row_num=1)  
    else:
        mtdna_rows, mtdna_row_count = '', 1

    ##kinship
    if a and b and 'A-STR' in a and 'A-STR' in b:
        query_a = pd.Series(a['A-STR'])
        query_b = pd.Series(b['A-STR'])
        query_a.name = identifier_a
        query_b.name = identifier_b

        af_table = afm.get('Korean', 'A-STR')
        cb = search.CheckerBetween(query_a, query_b, 'A-STR', af_table)
        kinship_result = cb.check_kinships()
        kinship_result = cb.arrange_result(kinship_result, 'shared')
        ci = kinship_result.loc['total']
        ci = ci.drop(identifier_a)
        ci = ci.drop(identifier_b)
        ci = ci.astype(float)
        ci = ci/ci['unrelated']
        ci.sort(ascending=False)
        kinship_df = pd.DataFrame(index=ci.index)
        kinship_df['ratio'] = ci
        kinship_df['probability'] = ci/(ci+1)*100

        kinship_rows, kinship_row_count = \
            create_astr_kinship_rows(kinship_df, identifier_b,
                    identifier_a, start_row_num=2)
    else:
        kinship_rows, kinship_row_count = '', 2
    ##
    key_value = {
            'customer_goldstar_name' : customer_goldstar_name,
            'goldstar_identifier' : identifier_b,
            'customer_relationship' : customer_relationship,
            'customer_name' : customer_name,
            'customer_identifier' : identifier_a,
            'kinship_row_count' : kinship_row_count,
            'kinship_rows' : kinship_rows,
            'astr_row_count' : astr_row_count,
            'astr_rows' : astr_rows,
            'ystr_row_count' : ystr_row_count,
            'ystr_rows' : ystr_rows,
            'mtdna_row_count' : mtdna_row_count,
            'mtdna_rows' : mtdna_rows,
        }
    return key_value
    #result_hml = render_django_style(template_file, key_value)
    #with open(output_filename,'w') as f:
    #    f.write(result_hml)
        #f.write(result_hml.encode('utf8'))
    #output_filename.write(result_hml.encode('utf8'))
    #output_filename.close()


[문서]def make_hml(query_a, query_b, outfile, customer_goldstar_name, 
        customer_name, customer_relationship, hml_template=config.HML_TEMPLATE):
    key_value = temp_main(query_a, query_b,
        customer_goldstar_name, customer_name, customer_relationship)
    render_django_style(hml_template, key_value, outfile)