From cf030962766af4e2df7e83f20152015e95163011 Mon Sep 17 00:00:00 2001 From: yhlhhhhh <67506574+yhlhhhhh@users.noreply.github.com> Date: Sat, 24 Apr 2021 16:10:56 +0800 Subject: [PATCH 1/6] wegene_utils.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 新定义了Get_MT(),Get_Simple_MT(length),Get_Y(),Get_Simple_Y(length)四种函数,方便了对于输入单倍群的处理 --- examples/python3/scaffold-app/wegene_utils.py | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/examples/python3/scaffold-app/wegene_utils.py b/examples/python3/scaffold-app/wegene_utils.py index 6e4ee46..5e58a78 100644 --- a/examples/python3/scaffold-app/wegene_utils.py +++ b/examples/python3/scaffold-app/wegene_utils.py @@ -5,6 +5,7 @@ import sys import gzip import base64 +import re from io import BytesIO @@ -65,3 +66,61 @@ def is_genotype_exist(input, rsid): def is_wegene_format(format_str): return 'wegene_' in format_str + +#这是一个获取用户mt单倍群的函数 +def Get_MT(): + body = sys.stdin.read() + inputs = json.loads(body)['inputs'] + user_mt = inputs['haplogroup']['mt']['haplogroup'] + return user_mt + +#这是一个可以获取用户Simple mt的函数,其中length值表示想要去的单倍群长度,例如length为2,用户的单倍群是A8a1,则Simple mt为A8 +def Get_Simple_MT(length): + Get_MT() + if len(user_mt) == 1: + Simple_mt = user_mt + elif "'" in user_mt: + Simple_mt = user_mt + else: + letter_list = re.split('\d',user_mt) + number_list = re.split('\D',user_mt) + counter = 0 + Simple_mt = '' + while length - 1 == counter: + if isinstance(counter,int) == True: + Simple_mt += letter_list[counter] + counter += 1 + else: + Simple_mt += number_list[counter] + return Simple_mt + +#这是一个获取用户y单倍群的函数 +def Get_Y(): + body = sys.stdin.read() + inputs = json.loads(body)['inputs'] + user_gender = inputs['sex'] + if user_gender == 1: + user_y = inputs['haplogroup']['y']['haplogroup'] + return user_y + elif user_gender == 2: + sys.stderr.write('女性没有Y染色体哦~') + else: + sys.stderr.write('性别数据缺失') + +#这是一个可以获取用户Simple y的函数,其中length值表示想要去的单倍群长度,例如length为3,用户的单倍群是O2a2b1a1b,则Simple y为O2a +def Get_Simple_Y(length): + Get_Y() + if len(user_y) == 1: + Simple_y = user_y + else: + letter_list = re.split('\d',user_y) + number_list = re.split('\D',user_y) + counter = 0 + Simple_y = '' + while length - 1 == counter: + if isinstance(counter,int) == True: + Simple_y += letter_list[counter] + counter += 1 + else: + Simple_y += number_list[counter] + return Simple_y From 341100732b7ae461c40e24be7920205d0841644d Mon Sep 17 00:00:00 2001 From: yhlhhhhh <67506574+yhlhhhhh@users.noreply.github.com> Date: Sat, 24 Apr 2021 17:29:30 +0800 Subject: [PATCH 2/6] Update wegene_utils.py --- examples/python3/scaffold-app/wegene_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/python3/scaffold-app/wegene_utils.py b/examples/python3/scaffold-app/wegene_utils.py index 5e58a78..4ea50f1 100644 --- a/examples/python3/scaffold-app/wegene_utils.py +++ b/examples/python3/scaffold-app/wegene_utils.py @@ -87,7 +87,7 @@ def Get_Simple_MT(length): counter = 0 Simple_mt = '' while length - 1 == counter: - if isinstance(counter,int) == True: + if isinstance(counter/2,int) == True: Simple_mt += letter_list[counter] counter += 1 else: @@ -118,7 +118,7 @@ def Get_Simple_Y(length): counter = 0 Simple_y = '' while length - 1 == counter: - if isinstance(counter,int) == True: + if isinstance(counter/2,int) == True: Simple_y += letter_list[counter] counter += 1 else: From 5871669016ff6bc315698eb731fe707282b461d2 Mon Sep 17 00:00:00 2001 From: yhlhhhhh <67506574+yhlhhhhh@users.noreply.github.com> Date: Sat, 24 Apr 2021 22:16:32 +0800 Subject: [PATCH 3/6] Update wegene_utils.py --- examples/python3/scaffold-app/wegene_utils.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/examples/python3/scaffold-app/wegene_utils.py b/examples/python3/scaffold-app/wegene_utils.py index 4ea50f1..648a204 100644 --- a/examples/python3/scaffold-app/wegene_utils.py +++ b/examples/python3/scaffold-app/wegene_utils.py @@ -71,7 +71,11 @@ def is_wegene_format(format_str): def Get_MT(): body = sys.stdin.read() inputs = json.loads(body)['inputs'] - user_mt = inputs['haplogroup']['mt']['haplogroup'] + if 'haplogroup' in inputs: + user_mt = inputs['haplogroup']['mt']['haplogroup'] + else: + sys.stderr.write('无法获取您的MT数据,请联系作者解决') + exit(2) return user_mt #这是一个可以获取用户Simple mt的函数,其中length值表示想要去的单倍群长度,例如length为2,用户的单倍群是A8a1,则Simple mt为A8 @@ -98,7 +102,11 @@ def Get_Simple_MT(length): def Get_Y(): body = sys.stdin.read() inputs = json.loads(body)['inputs'] - user_gender = inputs['sex'] + if 'haplogroup' in inputs: + user_gender = inputs['sex'] + else: + sys.stderr.write('无法获取您的Y数据,请联系作者解决') + exit(2) if user_gender == 1: user_y = inputs['haplogroup']['y']['haplogroup'] return user_y From 07bd183c83324e46c604ad6ddd6319906e4b4418 Mon Sep 17 00:00:00 2001 From: yhlhhhhh <67506574+yhlhhhhh@users.noreply.github.com> Date: Tue, 27 Apr 2021 11:04:05 +0800 Subject: [PATCH 4/6] wegene_utils.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 目前先把get_mt( ),get_y( )和to_markdown_table( )这三个函数加上,先用着吧吧,get_simple_mt( )和get_simple_y( )这两个函数可能还要修改一段时间。 --- examples/python3/scaffold-app/wegene_utils.py | 118 ++++++++---------- 1 file changed, 55 insertions(+), 63 deletions(-) diff --git a/examples/python3/scaffold-app/wegene_utils.py b/examples/python3/scaffold-app/wegene_utils.py index 648a204..78b5729 100644 --- a/examples/python3/scaffold-app/wegene_utils.py +++ b/examples/python3/scaffold-app/wegene_utils.py @@ -5,7 +5,6 @@ import sys import gzip import base64 -import re from io import BytesIO @@ -67,68 +66,61 @@ def is_genotype_exist(input, rsid): def is_wegene_format(format_str): return 'wegene_' in format_str + #这是一个获取用户mt单倍群的函数 -def Get_MT(): - body = sys.stdin.read() - inputs = json.loads(body)['inputs'] - if 'haplogroup' in inputs: - user_mt = inputs['haplogroup']['mt']['haplogroup'] - else: - sys.stderr.write('无法获取您的MT数据,请联系作者解决') - exit(2) - return user_mt - -#这是一个可以获取用户Simple mt的函数,其中length值表示想要去的单倍群长度,例如length为2,用户的单倍群是A8a1,则Simple mt为A8 -def Get_Simple_MT(length): - Get_MT() - if len(user_mt) == 1: - Simple_mt = user_mt - elif "'" in user_mt: - Simple_mt = user_mt - else: - letter_list = re.split('\d',user_mt) - number_list = re.split('\D',user_mt) - counter = 0 - Simple_mt = '' - while length - 1 == counter: - if isinstance(counter/2,int) == True: - Simple_mt += letter_list[counter] - counter += 1 - else: - Simple_mt += number_list[counter] - return Simple_mt +def get_mt(inputs): + return inputs['haplogroup']['mt']['haplogroup'] + #这是一个获取用户y单倍群的函数 -def Get_Y(): - body = sys.stdin.read() - inputs = json.loads(body)['inputs'] - if 'haplogroup' in inputs: - user_gender = inputs['sex'] - else: - sys.stderr.write('无法获取您的Y数据,请联系作者解决') - exit(2) - if user_gender == 1: - user_y = inputs['haplogroup']['y']['haplogroup'] - return user_y - elif user_gender == 2: - sys.stderr.write('女性没有Y染色体哦~') - else: - sys.stderr.write('性别数据缺失') - -#这是一个可以获取用户Simple y的函数,其中length值表示想要去的单倍群长度,例如length为3,用户的单倍群是O2a2b1a1b,则Simple y为O2a -def Get_Simple_Y(length): - Get_Y() - if len(user_y) == 1: - Simple_y = user_y - else: - letter_list = re.split('\d',user_y) - number_list = re.split('\D',user_y) - counter = 0 - Simple_y = '' - while length - 1 == counter: - if isinstance(counter/2,int) == True: - Simple_y += letter_list[counter] - counter += 1 - else: - Simple_y += number_list[counter] - return Simple_y +def get_y(inputs): + return inputs['haplogroup']['y']['haplogroup'] + + +#这是一个将嵌套list转化为markdown表格形式的函数 +def to_markdown_table(input_head,input_body,output_style): + result_list = [] + column_num = len(input_head) + for row in input_body: + element_list = [] + for element in row: + element_list.append(str(element)) + result_list.append('|'+'|'.join(element_list)) + body_md = '|\n'.join(result_list)+'|' + if output_style == 'left': + style = [':---'] + if output_style == 'right': + style = ['---:'] + if output_style == 'center': + style = [':---:'] + element_list = [] + for element in input_head: + element_list.append(str(element)) + head_md = '|'+'|'.join(element_list)+'|\n' + style_md = '|'+'|'.join(style * column_num)+'|\n' + result = head_md + style_md + body_md + return result +''' +在 to_markdown_table(input_head,input_body,output_style)函数中 +input_head,input_body,input_type,output_style分别指的是 表格的表头 表格内容 以及 文字风格 +文字风格 有三种选择,可以是 left right 或 center 分别代表 文字左对齐 文字右对齐 和 文字居中 +如果输出结果中包含 整型 或 浮点型 无需对结果进行 str() 处理,函数中会自动处理 + +调用本函数的示例代码如下: + +head = [1,2,3,4,5] +body = [ + [7,8,9,0,1], + [4,7,9,10,4] +] +result = to_markdown_table(head,body,'lists','center') +print(result) + +本段代码输出结果如下 + +|1|2|3|4|5| +|:---:|:---:|:---:|:---:|:---:| +|7|8|9|0|1| +|4|7|9|10|4| + +''' From df94edaef0dd19d711f5d78a1e19c90cd6d5df6a Mon Sep 17 00:00:00 2001 From: yhlhhhhh <67506574+yhlhhhhh@users.noreply.github.com> Date: Tue, 27 Apr 2021 11:12:45 +0800 Subject: [PATCH 5/6] wegene_utils.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 目前先加这三个函数先用着:get_mt( ), get_y( ), to_markdown_table( ) get_simple_mt( ), get_simple_y( )稍后再加 --- examples/python3/scaffold-app/wegene_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/python3/scaffold-app/wegene_utils.py b/examples/python3/scaffold-app/wegene_utils.py index 78b5729..d3126ab 100644 --- a/examples/python3/scaffold-app/wegene_utils.py +++ b/examples/python3/scaffold-app/wegene_utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -__all__ = ['process_raw_genome_data', 'is_genotype_exist', 'is_wegene_format'] +__all__ = ['process_raw_genome_data', 'is_genotype_exist', 'is_wegene_format', 'get_mt', 'get_y', 'to_markdown_table'] import sys import gzip From 775e61b8fc10ef64e7fe5a0b044d4a651fdb545e Mon Sep 17 00:00:00 2001 From: yhlhhhhh <67506574+yhlhhhhh@users.noreply.github.com> Date: Sat, 1 May 2021 11:52:56 +0800 Subject: [PATCH 6/6] ADD 5 functions Add 5 functions: get_mt( ), get_y( ), get_simple_mt( ), get_simple_y( ), to_markdown_table( ) --- examples/python3/scaffold-app/wegene_utils.py | 57 ++++++++++++++++++- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/examples/python3/scaffold-app/wegene_utils.py b/examples/python3/scaffold-app/wegene_utils.py index d3126ab..e36bba1 100644 --- a/examples/python3/scaffold-app/wegene_utils.py +++ b/examples/python3/scaffold-app/wegene_utils.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -__all__ = ['process_raw_genome_data', 'is_genotype_exist', 'is_wegene_format', 'get_mt', 'get_y', 'to_markdown_table'] +__all__ = ['process_raw_genome_data', 'is_genotype_exist', 'is_wegene_format', 'get_mt', 'get_y', 'get_simple_mt', 'get_simple_y', 'to_markdown_table'] +import re import sys import gzip import base64 @@ -77,6 +78,56 @@ def get_y(inputs): return inputs['haplogroup']['y']['haplogroup'] +#这是一个获取用户simple mt单倍群的函数 +def get_simple_mt(inputs,length): + inputs = get_mt(inputs) + letter = re.split(r'\d+',inputs) + number = re.split(r'\D+',inputs) + while '' in letter: + letter.remove('') + while '' in number: + number.remove('') + #处理当指定切割长度比mt实际长度长的情况 + if len(letter + number) <= length: + output = inputs + #处理特殊mt + elif len(inputs) == 1 or "'" in inputs: + output = inputs + else: + min_num = min(len(letter),len(number)) + output_list = [] + for element in range(min_num): + output_list.append(letter[element]) + output_list.append(number[element]) + output = ''.join(output_list[0:length]) + return output + + +#这是一个获取用户simple y单倍群的函数 +def get_simple_y(inputs,length): + inputs = get_y(inputs) + letter = re.split(r'\d+',inputs) + number = re.split(r'\D+',inputs) + while '' in letter: + letter.remove('') + while '' in number: + number.remove('') + #处理当指定切割长度比y实际长度长的情况 + if len(letter + number) <= length: + output = inputs + #处理边缘y + elif len(inputs) == 1 or "~" in inputs or inputs.isalpha() is True: + output = inputs + else: + min_num = min(len(letter),len(number)) + output_list = [] + for element in range(min_num): + output_list.append(letter[element]) + output_list.append(number[element]) + output = ''.join(output_list[0:length]) + return output + + #这是一个将嵌套list转化为markdown表格形式的函数 def to_markdown_table(input_head,input_body,output_style): result_list = [] @@ -102,7 +153,7 @@ def to_markdown_table(input_head,input_body,output_style): return result ''' 在 to_markdown_table(input_head,input_body,output_style)函数中 -input_head,input_body,input_type,output_style分别指的是 表格的表头 表格内容 以及 文字风格 +input_head input_body output_style分别指的是 表格的表头 表格内容 以及 文字风格 文字风格 有三种选择,可以是 left right 或 center 分别代表 文字左对齐 文字右对齐 和 文字居中 如果输出结果中包含 整型 或 浮点型 无需对结果进行 str() 处理,函数中会自动处理 @@ -113,7 +164,7 @@ def to_markdown_table(input_head,input_body,output_style): [7,8,9,0,1], [4,7,9,10,4] ] -result = to_markdown_table(head,body,'lists','center') +result = to_markdown_table(head,body,'center') print(result) 本段代码输出结果如下