# -*- coding:UTF-8 -*-
import pandas as pd
import numpy as np
import re
from kinmatch import db, search, config
template_astr_kinship_row = """
<ROW>
<CELL BorderFill="1" ColAddr="0" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="2697" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="5506">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="7">
<CHAR>%(index)d</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="1" ColAddr="1" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="2697" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="10616">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="6">
<CHAR>%(relationship)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="8" ColAddr="2" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="2697" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="29982">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="6">
<CHAR>%(ratio)f (%(possibility)f)</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
</ROW>
"""
template_astr_row = """
<ROW>
<CELL BorderFill="7" ColAddr="0" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="2198" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11861">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="17" Style="0">
<TEXT CharShape="6">
<CHAR>%(marker)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="1" ColAddr="1" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="2198" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11603">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="5" Style="0">
<TEXT CharShape="6">
<CHAR>%(goldstar_gene)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="1" ColAddr="2" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="2198" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11320">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="5" Style="0">
<TEXT CharShape="6">
<CHAR>%(customer_gene)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="1" ColAddr="3" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="2198" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11320">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="5" Style="0">
<TEXT CharShape="6">
<CHAR>%(result)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
</ROW>
"""
template_ystr_row = """
<ROW>
<CELL BorderFill="9" ColAddr="0" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="1765" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11295">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="20" Style="0">
<TEXT CharShape="6">
<CHAR>%(marker)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="7" ColAddr="1" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="1765" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11603">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="6">
<CHAR>%(goldstar_gene)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="7" ColAddr="2" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="1765" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11603">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="6">
<CHAR>%(customer_gene)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="7" ColAddr="3" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="1765" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="12169">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="5" Style="0">
<TEXT CharShape="6">
<CHAR>%(result)s</CHAR>
</TEXT>
</P>
</PARALIST>
</CELL>
</ROW>
"""
template_mtdna_row = """
<ROW>
<CELL BorderFill="9" ColAddr="0" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="1765" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11295">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="9">
<CHAR>%(hv)s (%(position)s)</CHAR>
</TEXT>
<TEXT CharShape="9"/>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="7" ColAddr="1" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="1765" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11603">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="9">
<CHAR>%(goldstar_gene)s</CHAR>
</TEXT>
<TEXT CharShape="9"/>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="7" ColAddr="2" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="1765" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="11603">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="9">
<CHAR>%(customer_gene)s</CHAR>
</TEXT>
<TEXT CharShape="9"/>
</P>
</PARALIST>
</CELL>
<CELL BorderFill="7" ColAddr="3" ColSpan="1" Dirty="false" Editable="false" HasMargin="false" Header="false" Height="1765" Protect="false" RowAddr="%(row_num)s" RowSpan="1" Width="12169">
<PARALIST LineWrap="Break" LinkListID="0" LinkListIDNext="0" TextDirection="0" VertAlign="Center">
<P ParaShape="11" Style="0">
<TEXT CharShape="9">
<CHAR>%(result)s</CHAR>
</TEXT>
<TEXT CharShape="9"/>
</P>
</PARALIST>
</CELL>
</ROW>
"""
[문서]def relative(value):
for item in RELATIVES_TYPES:
if item[0] == value:
return item[1]
return value
[문서]def create_row(result_row, args):
return result_row % args
[문서]def create_astr_kinship_rows(kinship_result, goldstar_identifier,
customer_identifier ,start_row_num=2):
rows = ''
for i in range(kinship_result.shape[0]):
relationship = kinship_result.index[i]
ratio = kinship_result.loc[relationship, 'ratio']
possibility = kinship_result.loc[relationship, 'probability']
d = {
'relationship': relationship,
'ratio': ratio,
'possibility': possibility,
'index': i+1,
'row_num': start_row_num+i
}
rows += create_row(template_astr_kinship_row, d)
return rows, kinship_result.shape[0]+2
[문서]def create_astr_rows(astr_result, goldstar_identifier,
customer_identifier, start_row_num=1):
rows = ''
count = 0
for i in range(astr_result.shape[0]):
marker = astr_result.index[i]
goldstar_gene = astr_result.loc[marker, goldstar_identifier]
goldstar_gene = goldstar_gene if not pd.isnull(goldstar_gene) else ''
customer_gene = astr_result.loc[marker, customer_identifier]
customer_gene = customer_gene if not pd.isnull(customer_gene) else ''
shared_allele = astr_result.loc[marker,'shared_allele']
shared_allele = shared_allele if not pd.isnull(shared_allele) else ''
if marker == 'XY':
continue
d = {
'marker': marker,
'goldstar_gene': goldstar_gene,
'customer_gene': customer_gene,
'row_num': start_row_num+count,
'result': shared_allele,
}
count += 1
rows += create_row(template_astr_row, d)
return rows, count+1
[문서]def create_ystr_rows(ystr_result, goldstar_identifier,
customer_identifier, start_row_num=1):
rows = ''
count = 0
for i in range(ystr_result.shape[0]):
marker = ystr_result.index[i]
goldstar_gene = ystr_result.loc[marker, goldstar_identifier]
goldstar_gene = goldstar_gene if not pd.isnull(goldstar_gene) else ''
customer_gene = ystr_result.loc[marker, customer_identifier]
customer_gene = customer_gene if not pd.isnull(customer_gene) else ''
if ystr_result.loc[marker, 'status'] == 'matched':
msg = 'not excluded'
else:
msg = 'excluded'
d = {
'marker': marker,
'goldstar_gene': goldstar_gene,
'customer_gene': customer_gene,
'row_num': start_row_num+count,
'result': msg,
}
count += 1
rows += create_row(template_ystr_row, d)
return rows, count+1
[문서]def create_mtdna_rows(mtdna_result, goldstar_identifier,
customer_identifier, start_row_num=1):
rows = ''
count = 0
for i in range(mtdna_result.shape[0]):
hv = 'HV'
position = mtdna_result.index[i]
goldstar_gene = mtdna_result.loc[position, goldstar_identifier]
goldstar_gene = goldstar_gene if not pd.isnull(goldstar_gene) else ''
customer_gene = mtdna_result.loc[position, customer_identifier]
customer_gene = customer_gene if not pd.isnull(customer_gene) else ''
if mtdna_result.loc[position, 'status'] == 'matched':
msg = '배제안됨'
else:
msg = '배제됨'
d = {
'hv': hv.upper(),
'position': position,
'goldstar_gene': goldstar_gene,
'customer_gene': customer_gene,
'row_num': start_row_num+count,
'result': msg
}
count += 1
rows += create_row(template_mtdna_row, d)
return rows, count+1
[문서]def render_django_style(template_file, key_value, outfile):
"""
only {% if %} and {{ }} statements is considered.
key_value argument should be like this => {'key': 'value'}
"""
def return_component_list(result):
starts = []; ends = []; groups = [];
for i in result:
starts.append(i.start())
ends.append(i.end())
groups.append(i.group())
return starts, ends, groups
with open(template_file) as f:
reads = f.read()
#reads = f.read().decode('utf8')
syntax_condition = re.compile('{%[\s*\w]+%}')
syntax_value = re.compile('{{[\s*\w]+}}')
result_codition = syntax_condition.finditer(reads)
condition_start, condition_end, condition_group = \
return_component_list(result_codition)
new_reads = []
for i in range(len(condition_group)):
if i%2 == 0:
if i == 0:
new_reads.append(reads[:condition_start[i]])
else:
new_reads.append(reads[condition_end[i-1]:condition_start[i]])
state = condition_group[i]
condition = state[2:-2].split()[-1]
if condition in key_value and key_value[condition]:
fragment = reads[condition_end[i]:condition_start[i+1]]
new_fragment = []
result_value = syntax_value.finditer(fragment)
value_start, value_end, value_group = \
return_component_list(result_value)
for j in range(len(value_group)):
if j == 0:
new_fragment.append(fragment[:value_start[j]])
else:
new_fragment.append(
new_reads[value_end[i-1]:value_start[j]])
value_ = key_value[
fragment[value_start[j]+2 : value_end[j]-2].strip()]
new_fragment.append(value_)
if j == len(value_group) - 1:
new_fragment.append(fragment[value_end[j]:])
new_fragment = ''.join(new_fragment)
new_reads.append(new_fragment)
if i == len(condition_group) - 2:
new_reads.append(reads[condition_end[i+1]:])
new_reads = ''.join(new_reads)
result_value = syntax_value.finditer(new_reads)
value_start, value_end, value_group = return_component_list(result_value)
final_reads = []
for i in range(len(value_group)):
if i == 0 :
final_reads.append(new_reads[:value_start[i]])
else:
final_reads.append(new_reads[value_end[i-1]:value_start[i]])
value_ = key_value[
new_reads[value_start[i]+2 : value_end[i]-2].strip()]
if type(value_) == int:
final_reads.append(str(value_))
#final_reads.append(str(value_).decode('utf8'))
else:
final_reads.append(value_)
if i == len(value_group) - 1:
final_reads.append(new_reads[value_end[i]:])
result_hml = ''.join(final_reads)
outfile.write(result_hml)
[문서]def temp_main(identifier_a, identifier_b,
customer_goldstar_name='', customer_name='', customer_relationship='',
af_table_name='Korean'):
gm = db.GenotypeManager()
afm = db.AlleleFrequenciesManager()
a = gm.get_genotype(identifier_a)
b = gm.get_genotype(identifier_b)
##astr
if a and b and 'A-STR' in a and 'A-STR' in b:
query_a = pd.Series(a['A-STR'])
query_b = pd.Series(b['A-STR'])
query_a.name = identifier_a
query_b.name = identifier_b
af_table = afm.get(af_table_name, 'A-STR')
cb = search.CheckerBetween(query_a, query_b, 'A-STR', af_table)
astr_result = cb.check_paternities(add_shared_allele=True)
#print(astr_result)
astr_result = cb.arrange_result(astr_result, 'shared')
astr_result = astr_result.drop('total')
astr_result = astr_result.loc[
astr_result[identifier_a].notnull() | \
astr_result[identifier_b].notnull() ]
astr_rows, astr_row_count = \
create_astr_rows(astr_result, identifier_b,
identifier_a, start_row_num=1)
else:
astr_rows, astr_row_count = '', 1
##ystr
if a and b and 'Y-STR' in a and 'Y-STR' in b:
query_a = pd.Series(a['Y-STR'])
query_b = pd.Series(b['Y-STR'])
query_a.name = identifier_a
query_b.name = identifier_b
af_table = None
cb = search.CheckerBetween(query_a, query_b, 'Y-STR', af_table)
ystr_result = cb.check_identities()
ystr_result = cb.arrange_result(ystr_result,'matched')
ystr_result = ystr_result.drop('total')
ystr_result = ystr_result.loc[
ystr_result[identifier_a].notnull() | \
ystr_result[identifier_b].notnull() ]
ystr_rows, ystr_row_count = \
create_ystr_rows(ystr_result, identifier_b,
identifier_a, start_row_num=1)
else:
ystr_rows, ystr_row_count = '', 1
##mtdna
if a and b and 'mtDNA' in a and 'mtDNA' in b:
query_a = pd.Series(a['mtDNA'])
query_b = pd.Series(b['mtDNA'])
query_a.name = identifier_a
query_b.name = identifier_b
af_table = None
cb = search.CheckerBetween(query_a, query_b, 'mtDNA', af_table)
mtdna_result = cb.check_identities()
mtdna_result = cb.arrange_result(mtdna_result,'matched')
mtdna_result = mtdna_result.drop('total')
mtdna_result = mtdna_result.loc[
mtdna_result[identifier_a].notnull() | \
mtdna_result[identifier_b].notnull() ]
mtdna_rows, mtdna_row_count = \
create_mtdna_rows(mtdna_result, identifier_b,
identifier_a, start_row_num=1)
else:
mtdna_rows, mtdna_row_count = '', 1
##kinship
if a and b and 'A-STR' in a and 'A-STR' in b:
query_a = pd.Series(a['A-STR'])
query_b = pd.Series(b['A-STR'])
query_a.name = identifier_a
query_b.name = identifier_b
af_table = afm.get('Korean', 'A-STR')
cb = search.CheckerBetween(query_a, query_b, 'A-STR', af_table)
kinship_result = cb.check_kinships()
kinship_result = cb.arrange_result(kinship_result, 'shared')
ci = kinship_result.loc['total']
ci = ci.drop(identifier_a)
ci = ci.drop(identifier_b)
ci = ci.astype(float)
ci = ci/ci['unrelated']
ci.sort(ascending=False)
kinship_df = pd.DataFrame(index=ci.index)
kinship_df['ratio'] = ci
kinship_df['probability'] = ci/(ci+1)*100
kinship_rows, kinship_row_count = \
create_astr_kinship_rows(kinship_df, identifier_b,
identifier_a, start_row_num=2)
else:
kinship_rows, kinship_row_count = '', 2
##
key_value = {
'customer_goldstar_name' : customer_goldstar_name,
'goldstar_identifier' : identifier_b,
'customer_relationship' : customer_relationship,
'customer_name' : customer_name,
'customer_identifier' : identifier_a,
'kinship_row_count' : kinship_row_count,
'kinship_rows' : kinship_rows,
'astr_row_count' : astr_row_count,
'astr_rows' : astr_rows,
'ystr_row_count' : ystr_row_count,
'ystr_rows' : ystr_rows,
'mtdna_row_count' : mtdna_row_count,
'mtdna_rows' : mtdna_rows,
}
return key_value
#result_hml = render_django_style(template_file, key_value)
#with open(output_filename,'w') as f:
# f.write(result_hml)
#f.write(result_hml.encode('utf8'))
#output_filename.write(result_hml.encode('utf8'))
#output_filename.close()
[문서]def make_hml(query_a, query_b, outfile, customer_goldstar_name,
customer_name, customer_relationship, hml_template=config.HML_TEMPLATE):
key_value = temp_main(query_a, query_b,
customer_goldstar_name, customer_name, customer_relationship)
render_django_style(hml_template, key_value, outfile)