Source code for kinmatch.report

# -*- coding:UTF-8 -*-
import pandas as pd
import numpy as np
import re

from kinmatch import db, search, config

template_astr_kinship_row = """
<ROW>
<CELL BorderFill="1" ColAddr="0" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="2697" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="5506">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="7">
<CHAR>%(index)d</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="1" ColAddr="1" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="2697" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="10616">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="6">
<CHAR>%(relationship)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="8" ColAddr="2" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="2697" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="29982">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="6">
<CHAR>%(ratio)f (%(possibility)f)</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
</ROW>
"""

template_astr_row = """
<ROW>
<CELL BorderFill="7" ColAddr="0" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="2198" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11861">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="17" Style="0">
<TEXT CharShape="6">
<CHAR>%(marker)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="1" ColAddr="1" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="2198" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11603">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="5" Style="0">
<TEXT CharShape="6">
<CHAR>%(goldstar_gene)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="1" ColAddr="2" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="2198" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11320">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="5" Style="0">
<TEXT CharShape="6">
<CHAR>%(customer_gene)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="1" ColAddr="3" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="2198" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11320">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="5" Style="0">
<TEXT CharShape="6">
<CHAR>%(result)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
</ROW>
"""

template_ystr_row = """
<ROW>
<CELL BorderFill="9" ColAddr="0" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="1765" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11295">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="20" Style="0">
<TEXT CharShape="6">
<CHAR>%(marker)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="7" ColAddr="1" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="1765" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11603">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="6">
<CHAR>%(goldstar_gene)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="7" ColAddr="2" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="1765" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11603">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="6">
<CHAR>%(customer_gene)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="7" ColAddr="3" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="1765" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="12169">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="5" Style="0">
<TEXT CharShape="6">
<CHAR>%(result)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
</ROW>
"""

template_mtdna_row = """
<ROW>
<CELL BorderFill="9" ColAddr="0" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="1765" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11295">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="9">
<CHAR>%(hv)s (%(position)s)</CHAR>
</TEXT>
<TEXT CharShape="9"/>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="7" ColAddr="1" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="1765" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11603">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="9">
<CHAR>%(goldstar_gene)s</CHAR>
</TEXT>
<TEXT CharShape="9"/>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="7" ColAddr="2" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="1765" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11603">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="9">
<CHAR>%(customer_gene)s</CHAR>
</TEXT>
<TEXT CharShape="9"/>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="7" ColAddr="3" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="1765" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="12169">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="9">
<CHAR>%(result)s</CHAR>
</TEXT>
<TEXT CharShape="9"/>
</P>
</PARALIST>
</CELL>
</ROW>
"""
[문서]def relative(value): for item in RELATIVES_TYPES: if item[0] == value: return item[1] return value
[문서]def create_row(result_row, args): return result_row % args
[문서]def create_astr_kinship_rows(kinship_result, goldstar_identifier, customer_identifier ,start_row_num=2): rows = '' for i in range(kinship_result.shape[0]): relationship = kinship_result.index[i] ratio = kinship_result.loc[relationship, 'ratio'] possibility = kinship_result.loc[relationship, 'probability'] d = { 'relationship': relationship, 'ratio': ratio, 'possibility': possibility, 'index': i+1, 'row_num': start_row_num+i } rows += create_row(template_astr_kinship_row, d) return rows, kinship_result.shape[0]+2
[문서]def create_astr_rows(astr_result, goldstar_identifier, customer_identifier, start_row_num=1): rows = '' count = 0 for i in range(astr_result.shape[0]): marker = astr_result.index[i] goldstar_gene = astr_result.loc[marker, goldstar_identifier] goldstar_gene = goldstar_gene if not pd.isnull(goldstar_gene) else '' customer_gene = astr_result.loc[marker, customer_identifier] customer_gene = customer_gene if not pd.isnull(customer_gene) else '' shared_allele = astr_result.loc[marker,'shared_allele'] shared_allele = shared_allele if not pd.isnull(shared_allele) else '' if marker == 'XY': continue d = { 'marker': marker, 'goldstar_gene': goldstar_gene, 'customer_gene': customer_gene, 'row_num': start_row_num+count, 'result': shared_allele, } count += 1 rows += create_row(template_astr_row, d) return rows, count+1
[문서]def create_ystr_rows(ystr_result, goldstar_identifier, customer_identifier, start_row_num=1): rows = '' count = 0 for i in range(ystr_result.shape[0]): marker = ystr_result.index[i] goldstar_gene = ystr_result.loc[marker, goldstar_identifier] goldstar_gene = goldstar_gene if not pd.isnull(goldstar_gene) else '' customer_gene = ystr_result.loc[marker, customer_identifier] customer_gene = customer_gene if not pd.isnull(customer_gene) else '' if ystr_result.loc[marker, 'status'] == 'matched': msg = 'not excluded' else: msg = 'excluded' d = { 'marker': marker, 'goldstar_gene': goldstar_gene, 'customer_gene': customer_gene, 'row_num': start_row_num+count, 'result': msg, } count += 1 rows += create_row(template_ystr_row, d) return rows, count+1
[문서]def create_mtdna_rows(mtdna_result, goldstar_identifier, customer_identifier, start_row_num=1): rows = '' count = 0 for i in range(mtdna_result.shape[0]): hv = 'HV' position = mtdna_result.index[i] goldstar_gene = mtdna_result.loc[position, goldstar_identifier] goldstar_gene = goldstar_gene if not pd.isnull(goldstar_gene) else '' customer_gene = mtdna_result.loc[position, customer_identifier] customer_gene = customer_gene if not pd.isnull(customer_gene) else '' if mtdna_result.loc[position, 'status'] == 'matched': msg = '배제안됨' else: msg = '배제됨' d = { 'hv': hv.upper(), 'position': position, 'goldstar_gene': goldstar_gene, 'customer_gene': customer_gene, 'row_num': start_row_num+count, 'result': msg } count += 1 rows += create_row(template_mtdna_row, d) return rows, count+1
[문서]def render_django_style(template_file, key_value, outfile): """ only {% if %} and {{ }} statements is considered. key_value argument should be like this => {'key': 'value'} """ def return_component_list(result): starts = []; ends = []; groups = []; for i in result: starts.append(i.start()) ends.append(i.end()) groups.append(i.group()) return starts, ends, groups with open(template_file) as f: reads = f.read() #reads = f.read().decode('utf8') syntax_condition = re.compile('{%[\s*\w]+%}') syntax_value = re.compile('{{[\s*\w]+}}') result_codition = syntax_condition.finditer(reads) condition_start, condition_end, condition_group = \ return_component_list(result_codition) new_reads = [] for i in range(len(condition_group)): if i%2 == 0: if i == 0: new_reads.append(reads[:condition_start[i]]) else: new_reads.append(reads[condition_end[i-1]:condition_start[i]]) state = condition_group[i] condition = state[2:-2].split()[-1] if condition in key_value and key_value[condition]: fragment = reads[condition_end[i]:condition_start[i+1]] new_fragment = [] result_value = syntax_value.finditer(fragment) value_start, value_end, value_group = \ return_component_list(result_value) for j in range(len(value_group)): if j == 0: new_fragment.append(fragment[:value_start[j]]) else: new_fragment.append( new_reads[value_end[i-1]:value_start[j]]) value_ = key_value[ fragment[value_start[j]+2 : value_end[j]-2].strip()] new_fragment.append(value_) if j == len(value_group) - 1: new_fragment.append(fragment[value_end[j]:]) new_fragment = ''.join(new_fragment) new_reads.append(new_fragment) if i == len(condition_group) - 2: new_reads.append(reads[condition_end[i+1]:]) new_reads = ''.join(new_reads) result_value = syntax_value.finditer(new_reads) value_start, value_end, value_group = return_component_list(result_value) final_reads = [] for i in range(len(value_group)): if i == 0 : final_reads.append(new_reads[:value_start[i]]) else: final_reads.append(new_reads[value_end[i-1]:value_start[i]]) value_ = key_value[ new_reads[value_start[i]+2 : value_end[i]-2].strip()] if type(value_) == int: final_reads.append(str(value_)) #final_reads.append(str(value_).decode('utf8')) else: final_reads.append(value_) if i == len(value_group) - 1: final_reads.append(new_reads[value_end[i]:]) result_hml = ''.join(final_reads) outfile.write(result_hml)
[문서]def temp_main(identifier_a, identifier_b, customer_goldstar_name='', customer_name='', customer_relationship='', af_table_name='Korean'): gm = db.GenotypeManager() afm = db.AlleleFrequenciesManager() a = gm.get_genotype(identifier_a) b = gm.get_genotype(identifier_b) ##astr if a and b and 'A-STR' in a and 'A-STR' in b: query_a = pd.Series(a['A-STR']) query_b = pd.Series(b['A-STR']) query_a.name = identifier_a query_b.name = identifier_b af_table = afm.get(af_table_name, 'A-STR') cb = search.CheckerBetween(query_a, query_b, 'A-STR', af_table) astr_result = cb.check_paternities(add_shared_allele=True) #print(astr_result) astr_result = cb.arrange_result(astr_result, 'shared') astr_result = astr_result.drop('total') astr_result = astr_result.loc[ astr_result[identifier_a].notnull() | \ astr_result[identifier_b].notnull() ] astr_rows, astr_row_count = \ create_astr_rows(astr_result, identifier_b, identifier_a, start_row_num=1) else: astr_rows, astr_row_count = '', 1 ##ystr if a and b and 'Y-STR' in a and 'Y-STR' in b: query_a = pd.Series(a['Y-STR']) query_b = pd.Series(b['Y-STR']) query_a.name = identifier_a query_b.name = identifier_b af_table = None cb = search.CheckerBetween(query_a, query_b, 'Y-STR', af_table) ystr_result = cb.check_identities() ystr_result = cb.arrange_result(ystr_result,'matched') ystr_result = ystr_result.drop('total') ystr_result = ystr_result.loc[ ystr_result[identifier_a].notnull() | \ ystr_result[identifier_b].notnull() ] ystr_rows, ystr_row_count = \ create_ystr_rows(ystr_result, identifier_b, identifier_a, start_row_num=1) else: ystr_rows, ystr_row_count = '', 1 ##mtdna if a and b and 'mtDNA' in a and 'mtDNA' in b: query_a = pd.Series(a['mtDNA']) query_b = pd.Series(b['mtDNA']) query_a.name = identifier_a query_b.name = identifier_b af_table = None cb = search.CheckerBetween(query_a, query_b, 'mtDNA', af_table) mtdna_result = cb.check_identities() mtdna_result = cb.arrange_result(mtdna_result,'matched') mtdna_result = mtdna_result.drop('total') mtdna_result = mtdna_result.loc[ mtdna_result[identifier_a].notnull() | \ mtdna_result[identifier_b].notnull() ] mtdna_rows, mtdna_row_count = \ create_mtdna_rows(mtdna_result, identifier_b, identifier_a, start_row_num=1) else: mtdna_rows, mtdna_row_count = '', 1 ##kinship if a and b and 'A-STR' in a and 'A-STR' in b: query_a = pd.Series(a['A-STR']) query_b = pd.Series(b['A-STR']) query_a.name = identifier_a query_b.name = identifier_b af_table = afm.get('Korean', 'A-STR') cb = search.CheckerBetween(query_a, query_b, 'A-STR', af_table) kinship_result = cb.check_kinships() kinship_result = cb.arrange_result(kinship_result, 'shared') ci = kinship_result.loc['total'] ci = ci.drop(identifier_a) ci = ci.drop(identifier_b) ci = ci.astype(float) ci = ci/ci['unrelated'] ci.sort(ascending=False) kinship_df = pd.DataFrame(index=ci.index) kinship_df['ratio'] = ci kinship_df['probability'] = ci/(ci+1)*100 kinship_rows, kinship_row_count = \ create_astr_kinship_rows(kinship_df, identifier_b, identifier_a, start_row_num=2) else: kinship_rows, kinship_row_count = '', 2 ## key_value = { 'customer_goldstar_name' : customer_goldstar_name, 'goldstar_identifier' : identifier_b, 'customer_relationship' : customer_relationship, 'customer_name' : customer_name, 'customer_identifier' : identifier_a, 'kinship_row_count' : kinship_row_count, 'kinship_rows' : kinship_rows, 'astr_row_count' : astr_row_count, 'astr_rows' : astr_rows, 'ystr_row_count' : ystr_row_count, 'ystr_rows' : ystr_rows, 'mtdna_row_count' : mtdna_row_count, 'mtdna_rows' : mtdna_rows, } return key_value #result_hml = render_django_style(template_file, key_value) #with open(output_filename,'w') as f: # f.write(result_hml) #f.write(result_hml.encode('utf8')) #output_filename.write(result_hml.encode('utf8')) #output_filename.close()
[문서]def make_hml(query_a, query_b, outfile, customer_goldstar_name, customer_name, customer_relationship, hml_template=config.HML_TEMPLATE): key_value = temp_main(query_a, query_b, customer_goldstar_name, customer_name, customer_relationship) render_django_style(hml_template, key_value, outfile)