diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..16694b5 Binary files /dev/null and b/.DS_Store differ diff --git a/README.md b/README.md index 4fd92f5..8b58b6b 100644 --- a/README.md +++ b/README.md @@ -25,5 +25,7 @@ $ python generate_barcodes Once the potential barcode strings have been generated, you can run `validate_barcodes.py` to get your barcodes by specifying the file of all possible codes, the name where your barcodes should be written, and how many barcodes you need: ``` -$ python validate_barcodes +$ python validate_barcodes + ``` +e.g. python3.8 validate_barcodes.py NIST_Barcode_raw.txt NIST_validate.txt 1000 18 0.4 0.6 3 \ No newline at end of file diff --git a/src/.DS_Store b/src/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/src/.DS_Store differ diff --git a/src/.ipynb_checkpoints/A1_Add_interval_nucleic_acid_into_bc-checkpoint.ipynb b/src/.ipynb_checkpoints/A1_Add_interval_nucleic_acid_into_bc-checkpoint.ipynb new file mode 100644 index 0000000..363fcab --- /dev/null +++ b/src/.ipynb_checkpoints/A1_Add_interval_nucleic_acid_into_bc-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/A1_Add_interval_nucleic_acid_into_bc.ipynb b/src/A1_Add_interval_nucleic_acid_into_bc.ipynb new file mode 100644 index 0000000..6188fed --- /dev/null +++ b/src/A1_Add_interval_nucleic_acid_into_bc.ipynb @@ -0,0 +1,589 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5429a44c", + "metadata": {}, + "source": [ + "This py file is basically to add interval A/T nucleic acid into the barcode library generated by LZamparo bc design tool" + ] + }, + { + "cell_type": "markdown", + "id": "efc8a613", + "metadata": {}, + "source": [ + "# Import barcode library file" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "8e1c388f", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "import re\n", + "import sys\n", + "import common as common" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "2ea66e9d", + "metadata": {}, + "outputs": [], + "source": [ + "dir = common.THIS_MODULE_PATH" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f0209e82", + "metadata": {}, + "outputs": [], + "source": [ + "BC_dir = os.path.join(dir,'BC.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "87c65126", + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.read_csv(BC_dir,header = None)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a562fec9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
0AAAAAAAAAAAA
1AAAAAAAAAAAC
2AAAAAAAAAAAG
3AAAAAAAAAAAT
4AAAAAAAAAACA
\n", + "
" + ], + "text/plain": [ + " 0\n", + "0 AAAAAAAAAAAA\n", + "1 AAAAAAAAAAAC\n", + "2 AAAAAAAAAAAG\n", + "3 AAAAAAAAAAAT\n", + "4 AAAAAAAAAACA" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(data)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "7b5f9588", + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv(\"Barcode_length12.csv\", header = [\"Barcode\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "51f6a79c", + "metadata": {}, + "outputs": [], + "source": [ + "data_bc = pd.read_csv('Barcode_length12.csv',index_col = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "cc3f14c7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0Barcode
00AAAAAAAAAAAA
11AAAAAAAAAAAC
22AAAAAAAAAAAG
33AAAAAAAAAAAT
44AAAAAAAAAACA
\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 Barcode\n", + "0 0 AAAAAAAAAAAA\n", + "1 1 AAAAAAAAAAAC\n", + "2 2 AAAAAAAAAAAG\n", + "3 3 AAAAAAAAAAAT\n", + "4 4 AAAAAAAAAACA" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "BC_df = pd.DataFrame(data_bc)\n", + "BC_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "c5fd3d10", + "metadata": {}, + "outputs": [], + "source": [ + "BC_df = BC_df.iloc[: , 1:]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ffe03f06", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Barcode
0AAAAAAAAAAAA
1AAAAAAAAAAAC
2AAAAAAAAAAAG
3AAAAAAAAAAAT
4AAAAAAAAAACA
\n", + "
" + ], + "text/plain": [ + " Barcode\n", + "0 AAAAAAAAAAAA\n", + "1 AAAAAAAAAAAC\n", + "2 AAAAAAAAAAAG\n", + "3 AAAAAAAAAAAT\n", + "4 AAAAAAAAAACA" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "BC_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "9a0a4a71", + "metadata": {}, + "outputs": [], + "source": [ + "test_df = BC_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "e72743fd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: []" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "testout_df = pd.DataFrame()\n", + "testout_df" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f6f0fc40", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TAAAAAAAAAAAAA\n", + "TAAAAAAAAAAACA\n", + "TAAAAAAAAAAAGA\n", + "TAAAAAAAAAAATA\n", + "TAAAAAAAAAACAA\n" + ] + } + ], + "source": [ + "# break the barcode string\n", + "BC_list = []\n", + "for _, row in test_df.iterrows():\n", + " Barcode = row['Barcode']\n", + " BC = 'T'+ Barcode + 'A'\n", + " sp1 = BC[0:3]\n", + " sp2 = BC[3:6]\n", + " sp3 = BC[6:8]\n", + " sp4 = BC[8:11]\n", + " sp5 = BC[11:14]\n", + " NewBC = sp1+'T'+ sp2 + 'A' +sp3 + 'T'+sp4+'A'+sp5\n", + " BC_list.append(NewBC)\n", + " print(BC)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "871f890b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "18" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(BC_list[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "2daefb5e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Barcode
0TAATAAAAAATAAAAAAA
1TAATAAAAAATAAAAACA
2TAATAAAAAATAAAAAGA
3TAATAAAAAATAAAAATA
4TAATAAAAAATAAAACAA
\n", + "
" + ], + "text/plain": [ + " Barcode\n", + "0 TAATAAAAAATAAAAAAA\n", + "1 TAATAAAAAATAAAAACA\n", + "2 TAATAAAAAATAAAAAGA\n", + "3 TAATAAAAAATAAAAATA\n", + "4 TAATAAAAAATAAAACAA" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "testout_df['Barcode'] = BC_list\n", + "testout_df" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "f5c9c7a7", + "metadata": {}, + "outputs": [], + "source": [ + "testout_df.to_csv(\"test.csv\",header = Barcode)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "7930ede1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([['TAATAAAAAATAAAAAAA'],\n", + " ['TAATAAAAAATAAAAACA'],\n", + " ['TAATAAAAAATAAAAAGA'],\n", + " ['TAATAAAAAATAAAAATA'],\n", + " ['TAATAAAAAATAAAACAA']], dtype=object)" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "testout_df.values" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "cf178ce5", + "metadata": {}, + "outputs": [], + "source": [ + "testout_df.to_csv(r'pandas.txt', header=None, index=None, sep='\\n', mode='a')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10bd9e71", + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv(\"Barcode_length12.csv\", header = [\"Barcode\"]) \n", + "#BC = re.compile('T[ACTG]{2}T[ACTG]{3}A[ACTG]{2}T[ACTG]{3}A[ACTG]{2}A')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/A1_Add_interval_nucleic_acid_into_bc.py b/src/A1_Add_interval_nucleic_acid_into_bc.py new file mode 100644 index 0000000..3fd67af --- /dev/null +++ b/src/A1_Add_interval_nucleic_acid_into_bc.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# coding: utf-8 + +# This py file is basically to add interval A/T nucleic acid into the barcode library generated by LZamparo bc design tool + +# # Import barcode library file + +# In[1]: + + +import os +import numpy as np +import pandas as pd +import re +import sys +import common as common + + +dir = common.THIS_MODULE_PATH + +BC_dir = os.path.join(dir,'Barcode_length12.csv') + + +data = pd.read_csv(BC_dir,index_col = False) + +df = pd.DataFrame(data) + + +BC_df = df.iloc[: , 1:] + + + +# break the barcode string and add intervel nucleic acid +def Add_BC_intervel(inputdf, outputdf): + BC_list = [] + count = 0 + outputdf = pd.DataFrame() + for _, row in inputdf.iterrows(): + Barcode = row['Barcode'] + BC = 'T'+ Barcode + 'A' + sp1 = BC[0:3] + sp2 = BC[3:6] + sp3 = BC[6:8] + sp4 = BC[8:11] + sp5 = BC[11:14] + NewBC = sp1+'T'+ sp2 + 'A' +sp3 + 'T'+sp4+'A'+sp5 + BC_list.append(NewBC) + count += 1 + print(count) + outputdf['Barcode'] = BC_list + outputdf.to_csv(r'NIST_Barcode_raw.txt', header=None, index=None, sep='\n', mode='a') + return count + +# Run the function +outputdf = pd.DataFrame() +Add_BC_intervel(BC_df, outputdf) + diff --git a/src/NIST_validate.txt b/src/NIST_validate.txt new file mode 100644 index 0000000..d623c2f --- /dev/null +++ b/src/NIST_validate.txt @@ -0,0 +1,1000 @@ +TAATAACAGCTCGCACGA +TAATAACAGGTCCGACGA +TAATAACAGGTGGCAGGA +TAATAACACCTCCGAGGA +TAATAACACGTCGCAGCA +TAATAACACGTGCCACCA +TAATAACACCTCGGACCA +TAATAACAGCTGCGACCA +TAATAAGACCTGGCACGA +TAATAAGACGTCCGACCA +TAATAAGACCTGCCAGCA +TAATAAGACGTGCGAGGA +TAATAAGAGCTCGGAGGA +TAATAAGAGGTCGCACCA +TAATAAGAGGTGCCACGA +TAATACAACGTCGCAGGA +TAATACAACGTGGCACCA +TAATACAAGCTCCGACCA +TAATACAAGGTGCCAGGA +TAATACAAGGTCGGACGA +TAATACAACCTGCGAGCA +TAATACAAGCTCGCAGCA +TAATACAAGCTGGCACGA +TAATACAACGTGCGACGA +TAATACCAAGTGCGAGCA +TAATACCACGTGGTAGGA +TAATACCACGTGTCACGA +TAATACCACTTGCGAGGA +TAATACCAACTCCGACGA +TAATACCACATGCGACCA +TAATACCACGTCCGATCA +TAATACCACCTCACACGA +TAATACCAACTGCCAGGA +TAATACCACCTAGGAGGA +TAATACCACGTCCAAGGA +TAATACCACGTCGGAAGA +TAATACCACCTACCACCA +TAATACCACCTGGAAGCA +TAATACCACGTCGTACCA +TAATACCACCTGGCACAA +TAATACCACCTCGCAACA +TAATACCACGTGACAGCA +TAATACCACCTCTGAGCA +TAATACCAAGTCGCACGA +TAATACCACCTGCTACGA +TAATACCACGTTGGAGCA +TAATACCAGGTCTGACCA +TAATACCAGTTCGCACCA +TAATACCAGGTGCCACAA +TAATACCAGTTGCCAGCA +TAATACCAGCTCCTAGGA +TAATACCAGGTCTCAGGA +TAATACCAGGTCGAAGCA +TAATACCAGATCGGAGGA +TAATACCAGGTGCGAAGA +TAATACCAGCTGAGACGA +TAATACCAGCTGGCAGTA +TAATACCAGCTCGGACAA +TAATACGAACTCGCACCA +TAATACCAGGTCCGAGAA +TAATACCAGCTTCGAGCA +TAATACCAGCTTCCACGA +TAATACCAGCTGGTACCA +TAATACCAGGTGGAACGA +TAATACGAACTGCGACCA +TAATACCAGGTGGCAACA +TAATACCATCTCGCAGGA +TAATACGAGCTGCAACGA +TAATACGACTTCGGAGGA +TAATACGAGCTGAGAGCA +TAATACGACCTCCGAACA +TAATACGAGGTACGAGCA +TAATACGACGTGGCAGAA +TAATACGAGCTAGGACGA +TAATACGAGATGGCAGGA +TAATACGACGTCCGAGTA +TAATACGACGTTGGACGA +TAATACGAGCTCCAAGCA +TAATACGAGGTGTGAGGA +TAATACGACCTCGTAGCA +TAATACGACGTTCCAGGA +TAATACGAGCTGCCATCA +TAATACGAGGTCCTACGA +TAATACGAGGTTCCACCA +TAATACGACGTGCTACCA +TAATACGAGGTCACAGCA +TAATACGAGGTCGGAACA +TAATACGAGCTCGCATGA +TAATACGACATCCGACGA +TAATACGACCTCTCAGGA +TAATACGACCTCGGACTA +TAATACGAGGTGGCACTA +TAATACGAGGTGGTAGCA +TAATACGAGCTACCAGGA +TAATACGACCTGTGACGA +TAATACGACTTGCCACGA +TAATACGACGTCTCACCA +TAATACGACCTGACACCA +TAATACGATCTCCGAGGA +TAATACGATCTGGCAGCA +TAATACTACCTGGCAGGA +TAATACTAGGTGCGACCA +TAATACTAGCTGCGAGGA +TAATAGAACGTCGGAGCA +TAATAGAACCTCGCACCA +TAATAGAACCTGCCAGGA +TAATAGAACCTCCGACGA +TAATAGCAAGTCCGACCA +TAATAGAAGGTGGCAGCA +TAATAGCAACTGCCACCA +TAATAGCAACTCGGAGCA +TAATAGAAGGTCCGAGGA +TAATAGCACATGCCACGA +TAATAGCAGTTCGGACGA +TAATAGCACCTGCTAGCA +TAATAGCACCTGGCATCA +TAATAGCAGGTCGGATCA +TAATAGCAGGTGCTACCA +TAATAGCACCTGTGAGGA +TAATAGCACGTCAGAGGA +TAATAGCAGCTCGAAGGA +TAATAGCAGGTGACACGA +TAATAGCACCTCCAACCA +TAATAGCACGTTGCAGGA +TAATAGCAGCTCCGACTA +TAATAGCACGTGGCACTA +TAATAGCAGGTAGCACCA +TAATAGCAGCTTGGACCA +TAATAGCACCTAGCACGA +TAATAGCACCTCGCAGTA +TAATAGCACTTCCGAGCA +TAATAGCAGCTGCCAAGA +TAATAGCAGCTGTCAGCA +TAATAGCAGATGCGAGCA +TAATAGCACGTCGGACAA +TAATAGCACGTGCGAGAA +TAATAGCACGTTCGACGA +TAATAGCAGCTCACACCA +TAATAGCACGTGAGACCA +TAATAGCAGGTTCCAGCA +TAATAGCAGATCGCAGCA +TAATAGCAGGTCGCAAGA +TAATAGCAGGTAGGAGGA +TAATAGCAGGTGCAAGGA +TAATAGCACCTCGGATGA +TAATAGGACCTGGTAGGA +TAATAGGACCTGGAACCA +TAATAGGACCTCGCAAGA +TAATAGGACCTCAGACCA +TAATAGGACCTAGCAGCA +TAATAGGAAGTCGGACGA +TAATAGGAAGTGGCACCA +TAATAGGACCTCCAAGGA +TAATAGGAACTGCGAGGA +TAATAGCATCTGCGACGA +TAATAGGACCTGCGATCA +TAATAGGACCTTCCACCA +TAATAGGACCTCGGAGAA +TAATAGGAGGTCCGACAA +TAATAGGAGCTTCGACGA +TAATAGGAGGTGTGACCA +TAATAGGACGTCCGAAGA +TAATAGGAGGTGGTACGA +TAATAGGATGTGCCAGCA +TAATAGGAGTTCGGAGCA +TAATAGGAGGTGCGATGA +TAATAGGAGCTGCCAGAA +TAATAGGAGGTCGCAGAA +TAATAGGACGTGCGACTA +TAATAGGACGTGTCAGGA +TAATAGGACGTCACACGA +TAATAGGAGTTGCCACCA +TAATAGGAGCTCGTACCA +TAATAGGACGTTCGAGCA +TAATAGGACGTCGCATCA +TAATAGGAGGTCCTAGCA +TAATAGGAGCTCACAGGA +TAATAGGACGTAGGACCA +TAATAGGAGCTGGCAACA +TAATAGGAGATCGCACGA +TAATAGGAGCTGTCACGA +TAATAGTAGCTCCGAGCA +TAATATCACGTGCCAGGA +TAATCACAACTCGCAGCA +TAATCACAACTCCGACCA +TAATCACACGTAGCAGGA +TAATCACAGATGCCACGA +TAATCACACGTGTCAGCA +TAATCACACGTCGGACTA +TAATCACAGATCGGAGCA +TAATCACAAGTGCCAGGA +TAATCACAGGTGGTACCA +TAATCACAGCTAGCACCA +TAATCACAGGTTGCACGA +TAATCACAGCTGCGAGGA +TAATCACACCTGAGACGA +TAATCACACCTGGCAGAA +TAATCACAACTGGCACGA +TAATCACACGTGCGAAGA +TAATCACAGGTGCCAACA +TAATCACACCTCGGAAGA +TAATCACACGTCTCACGA +TAATCACACCTCAGAGCA +TAATCACACCTGCGATCA +TAATCACAGGTACGACCA +TAATCACAGCTCCTAGCA +TAATCACAGGTCGCAGTA +TAATCACACATCCGACGA +TAATCACAGGTCGGATGA +TAATCACAGGTGAGAGCA +TAATCACACCTTCCACCA +TAATCACAGCTGGAAGCA +TAATCACAGCTCTGACGA +TAATCACACGTCCTACCA +TAATCACACATCGCACCA +TAATCACACGTCCGAGAA +TAATCACAGCTCACAGGA +TAATCACAGGTCACACCA +TAATCACAGGTCCAAGGA +TAATCACACGTGGAACGA +TAATCAGACATGCGACCA +TAATCAGAAGTGCGACGA +TAATCAGAAGTCGCAGGA +TAATCAGAACTGCCACCA +TAATCAGAAGTCCGAGCA +TAATCAGACATGCCAGGA +TAATCAGACATCGGAGGA +TAATCACATCTGCCAGCA +TAATCAGAACTCGGACGA +TAATCAGAGGTCAGAGGA +TAATCAGACGTCGGAACA +TAATCAGACGTCGCACAA +TAATCAGAGTTCCGACGA +TAATCAGAGCTGCGACAA +TAATCAGAGGTGGCAAGA +TAATCAGAGCTGGTAGGA +TAATCAGACCTGTGAGGA +TAATCAGAGGTGCGATCA +TAATCAGAGGTAGCAGCA +TAATCAGACTTGGCACCA +TAATCAGACGTGACACGA +TAATCAGACCTGCTACGA +TAATCAGAGCTTGGACCA +TAATCAGACGTGGCAGTA +TAATCAGAGCTCTGAGCA +TAATCAGAGCTGGCATCA +TAATCAGAGCTCCAACCA +TAATCAGAGGTGCCAGAA +TAATCAGAGGTCGTACGA +TAATCAGACCTACGAGCA +TAATCAGACGTCCAACGA +TAATCAGACGTTCCAGCA +TAATCAGACCTCACACCA +TAATCAGACCTCCGATGA +TAATCAGAGCTCGCAGAA +TAATCAGAGCTTCCACGA +TAATCAGACCTCGAAGCA +TAATCAGAGGTGTCACCA +TAATCAGACCTTGCAGGA +TAATCAGACGTAGGACGA +TAATCATACCTCGCACGA +TAATCATACGTGCGAGCA +TAATCATAGGTCGGACCA +TAATCCAACCTCCGACTA +TAATCCAACATGCCAGCA +TAATCCAACCTGGAACGA +TAATCCAACGTGCGAGTA +TAATCCAACGTAGCACGA +TAATCCAAGGTTGCAGGA +TAATCCAAGCTCGGATGA +TAATCCAAGCTGCTAGCA +TAATCCAACCTCGCATCA +TAATCCAAGGTCTCACGA +TAATCCAAGATCCGACGA +TAATCCAACCTGACAGGA +TAATCCAACGTCAGAGCA +TAATCCAACGTTCGACCA +TAATCCAAGTTGGCAGCA +TAATCCAACCTGTGACCA +TAATCCAACCTCCAAGCA +TAATCCAAGGTGCTACGA +TAATCCAAGGTCCAACCA +TAATCCAAGGTGTGAGCA +TAATCCAAGGTCGCAACA +TAATCCAAGGTCGGAGTA +TAATCCAACGTCCGAAGA +TAATCCAAGGTGCCATCA +TAATCCAACCTTCCACGA +TAATCCAACGTCGGACAA +TAATCCAACCTTGGAGGA +TAATCCAAGGTACGAGGA +TAATCCAAGGTGGCACAA +TAATCCAAGCTCGCACTA +TAATCCAAGTTCGGACCA +TAATCCAACGTGGAAGCA +TAATCCAAGCTAGGAGCA +TAATCCAAGCTCCGAGAA +TAATCCAACCTGCGATGA +TAATCCAAGCTACCACCA +TAATCCAAGCTGCCAAGA +TAATCCAATCTGGCACCA +TAATCCGAACTGGCAGTA +TAATCCGAGATGGCACCA +TAATCCGACGTGCCATTA +TAATCCGACATCGCACTA +TAATCCGACTTCCTACGA +TAATCCGACGTGAGATCA +TAATCCGAGCTAGTACCA +TAATCCGAACTCCAAGGA +TAATCCGACGTGATAGGA +TAATCCGACCTACGAAGA +TAATCCGACCTGTCATCA +TAATCCGACCTGCTAACA +TAATCCGAACTGCCATGA +TAATCCGACATTGCAGCA +TAATCCGACGTAGGAGTA +TAATCCGAACTCGGAGCA +TAATCCGACTTACCAGCA +TAATCCGACCTCCTAGAA +TAATCCGAGCTCAGACTA +TAATCCGAGATGCCAGTA +TAATCCGAGATCGAAGCA +TAATCCGAACTCCGACAA +TAATCCGACTTCCGATCA +TAATCCGAAGTGCCACAA +TAATCCGAGATGCGAAGA +TAATCCGAAGTGTCAGCA +TAATCCGACGTCGAAGGA +TAATCCGAAGTCGGATGA +TAATCCGACGTGGAACAA +TAATCCGACATGGCATGA +TAATCCGAGCTATCAGCA +TAATCCGAAGTTGCACCA +TAATCCGACGTCACAGTA +TAATCCGAGCTCACATCA +TAATCCGACGTCTGAGAA +TAATCCGACTTGAGACGA +TAATCCGACCTTGTACGA +TAATCCGACCTAGGACAA +TAATCCGAGATTCGACCA +TAATCCGAGATCGGACAA +TAATCCGACGTAGCAACA +TAATCCGAGATACCACGA +TAATCCGAAGTGGTACGA +TAATCCGACCTACCACTA +TAATCCGACCTTCGAGTA +TAATCCGAAGTCTGACCA +TAATCCGACGTTCGATGA +TAATCCGAAGTACCAGGA +TAATCCGAGATAGGAGGA +TAATCCGACGTGTGACTA +TAATCCGAACTCTCACGA +TAATCCGACCTTAGACCA +TAATCCGACCTCTAACCA +TAATCCGAATTGCGAGCA +TAATCCGACATGTGAGCA +TAATCCGACGTGTCAAGA +TAATCCGAGCTACGATCA +TAATCCGAACTGAGAGGA +TAATCCGACTTGCAAGGA +TAATCCGAGATCTCAGGA +TAATCCGACGTCGTATCA +TAATCCGAGCTCATAGGA +TAATCCGAGCTAGCAAGA +TAATCCGACTTGGTAGCA +TAATCCGACGTACAACCA +TAATCCGACTTCGCAAGA +TAATCCGAACTGGAACCA +TAATCCGAACTTCCAGCA +TAATCCGAGCTCTGAAGA +TAATCCGAGGTCGCATTA +TAATCCGAGGTAAGACGA +TAATCCGAGTTGTCACGA +TAATCCGAGGTCACAAGA +TAATCCGAGCTTGCACAA +TAATCCGAGGTTGGACTA +TAATCCGAGCTGCAAGAA +TAATCCGAGGTCCGATAA +TAATCCGATGTGCCAACA +TAATCCGATGTGGCAGGA +TAATCCTAAGTCCGACGA +TAATCCGATGTCACACCA +TAATCCGAGTTGCGACTA +TAATCCGATGTCCTAGCA +TAATCCGAGGTGATACCA +TAATCCGATCTGCTAGGA +TAATCCGATGTCCGACTA +TAATCCGAGGTGGAATGA +TAATCCGAGCTGTGAGTA +TAATCCGATCTCGAACGA +TAATCCGAGCTTGAAGGA +TAATCCGAGGTCGTAGAA +TAATCCGATGTTGGAGCA +TAATCCGATGTGCGAGAA +TAATCCGAGGTTCAAGCA +TAATCCGAGTTCAGAGCA +TAATCCTAAGTCGCAGCA +TAATCCGATGTGCAACGA +TAATCCGATCTTCGACGA +TAATCCGATCTGACACGA +TAATCCGATCTCGCAACA +TAATCCTACTTGCGACCA +TAATCCTAGGTAGCACCA +TAATCCTACCTCACAGCA +TAATCCTACGTGTGAGGA +TAATCCTACTTGGCACGA +TAATCCTAGCTGGCATGA +TAATCCTACATCGGACCA +TAATCCTAGCTCCAACGA +TAATCCTACCTGGCAACA +TAATCCTAGGTCCGAACA +TAATCCTAGGTGGCAGTA +TAATCCTACCTCCGAGGA +TAATCCTAGATCGCACGA +TAATCCTAGTTGCCAGGA +TAATCCTACCTGCCAGTA +TAATCCTAGGTCCTAGGA +TAATCCTACCTCCTACCA +TAATCCTACGTGTCACCA +TAATCCTAGGTGACACGA +TAATCCTAGCTTGGACGA +TAATCCTAGCTCTCACCA +TAATCCTAGCTGAGACCA +TAATCCTACGTCGTACGA +TAATCCTAGTTCGGAGGA +TAATCCTAGCTTGCAGCA +TAATCCTAGCTGCCACAA +TAATCCTAGGTGCGATGA +TAATCCTAGATGCGAGCA +TAATCGAACCTGCCACCA +TAATCGAACGTCTGACGA +TAATCGAACCTTGGACCA +TAATCGAACATGCGACGA +TAATCGAACGTCTCAGCA +TAATCGAACGTGCGATCA +TAATCGAACCTCGCAGAA +TAATCGAACGTCGTACCA +TAATCGAACCTGCGAGAA +TAATCGAACGTCGCATGA +TAATCGAACGTCCTAGGA +TAATCGAACGTGAGAGGA +TAATCGAACCTCCGAACA +TAATCGAACATGGCAGGA +TAATCGAACCTGGTAGCA +TAATCGAACGTGCCAAGA +TAATCGAACCTCACACGA +TAATCGAAGGTGGTAGGA +TAATCGAAGGTACCAGCA +TAATCGAAGCTCAGAGGA +TAATCGAAGGTTCCACGA +TAATCGAAGCTCGAACGA +TAATCGAAGGTAGGACGA +TAATCGAAGATCGCACCA +TAATCGAAGTTCCGAGCA +TAATCGAAGCTGCCAGTA +TAATCGAAGCTGTGACGA +TAATCGAAGGTCAGACCA +TAATCGAAGTTCGCAGGA +TAATCGAAGCTCCTACCA +TAATCGAAGGTTGGAGCA +TAATCGAAGGTGGAACCA +TAATCGAATCTCGGAGCA +TAATCGAAGGTGCGACAA +TAATCGAATGTGGCACGA +TAATCGCAGATTGGACGA +TAATCGCAGGTACGAGTA +TAATCGCACGTACCACGA +TAATCGCAAGTGCAACGA +TAATCGCACGTAGAAGCA +TAATCGCACTTCCAAGGA +TAATCGCACTTGCGACAA +TAATCGCACCTTCAAGCA +TAATCGCAACTGACAGGA +TAATCGCACGTCACAAGA +TAATCGCACATCGTAGCA +TAATCGCACGTCTGAGTA +TAATCGCACGTTGCACAA +TAATCGCACCTGCGAATA +TAATCGCACGTTGGATCA +TAATCGCACCTCTTACCA +TAATCGCACGTCCGATGA +TAATCGCAGCTGACATCA +TAATCGCAGCTCTGAGAA +TAATCGCAACTCGGACTA +TAATCGCACTTGGAACCA +TAATCGCAGCTAGGATCA +TAATCGCAGCTTCCAGAA +TAATCGCAGCTCAGAACA +TAATCGCAACTTGCACCA +TAATCGCACCTAGCAACA +TAATCGCAGGTAGCATGA +TAATCGCAGCTGGTACGA +TAATCGCAGATCACACGA +TAATCGCAGATGCTAGGA +TAATCGCAGATACCACCA +TAATCGCACCTCAGACAA +TAATCGCACTTCGCACTA +TAATCGCAAGTGGCAACA +TAATCGCACATACGAGCA +TAATCGCACCTGCCATAA +TAATCGCACATGCTACCA +TAATCGCAACTCCTAGGA +TAATCGCACCTAGGAGAA +TAATCGCAGCTGCAACCA +TAATCGCAGCTACAAGGA +TAATCGCAACTGCGATGA +TAATCGCAAGTGCTAGCA +TAATCGCACCTGTCAAGA +TAATCGCACATCGAACGA +TAATCGCAAGTCTCACCA +TAATCGCACTTGCCAGTA +TAATCGCACGTGGTAGAA +TAATCGCAGCTAGCAGTA +TAATCGCACTTCACAGCA +TAATCGCAGCTTGTAGCA +TAATCGCACTTGGCATGA +TAATCGCAAGTTGGAGGA +TAATCGCACCTATGACGA +TAATCGCAACTCGCATGA +TAATCGCACGTCCAACTA +TAATCGCAGATGGCAGAA +TAATCGCAGATGCGACTA +TAATCGCACCTGGAAGTA +TAATCGCACATCTCAGGA +TAATCGCAGCTGTCACTA +TAATCGCAGCTTCGAAGA +TAATCGCAGATCCGATCA +TAATCGCAAGTCGCAGAA +TAATCGCACGTGTGAACA +TAATCGCACGTGTTACGA +TAATCGCACGTAGGAAGA +TAATCGCAACTGTGACCA +TAATCGCACCTTCTACGA +TAATCGCACGTTCCAACA +TAATCGCAAGTAGGACCA +TAATCGCACTTCGGAACA +TAATCGCAGCTACGACAA +TAATCGCAAGTCAGAGCA +TAATCGCAACTACCAGCA +TAATCGCACCTCGAATCA +TAATCGCAGCTGAGAGTA +TAATCGCAGCTCGCACAA +TAATCGCACCTTAGAGGA +TAATCGCAAGTCGTACGA +TAATCGCAATTCCGACGA +TAATCGCATCTCCAACGA +TAATCGGAACTAGCACGA +TAATCGCATATGGCACCA +TAATCGCATGTCGCATCA +TAATCGGAAGTCCTACCA +TAATCGCAGTTGTGAGCA +TAATCGCATGTGACAGCA +TAATCGCATGTGCCACAA +TAATCGGAACTACGACCA +TAATCGCAGGTCCTAACA +TAATCGCATATCGGAGGA +TAATCGCAGTTCGGAGTA +TAATCGGAAGTGAGACCA +TAATCGCATCTTGCAGGA +TAATCGCAGGTCATAGGA +TAATCGCAGGTGAGATGA +TAATCGGAATTCGCAGCA +TAATCGGAATTGCCAGGA +TAATCGCATCTTCGACCA +TAATCGGAACTCCGAGTA +TAATCGCAGGTGCCATTA +TAATCGGAAGTCTGAGGA +TAATCGGAACTGGAAGGA +TAATCGCATGTGGAAGGA +TAATCGGAAGTGCCATCA +TAATCGCAGGTCTAAGCA +TAATCGCATCTCTCAGCA +TAATCGCATGTCAGACGA +TAATCGCAGTTCGTACCA +TAATCGCAGGTCTGAAGA +TAATCGCAGGTGTCAGGA +TAATCGGAAGTCGCACTA +TAATCGGAAGTGTCACGA +TAATCGGACTTCGTAGGA +TAATCGGAGATCCGAGAA +TAATCGGAGTTCACACCA +TAATCGGACGTCTGATCA +TAATCGGACCTCGAACTA +TAATCGGACCTGAGAACA +TAATCGGATCTCGGAAGA +TAATCGGACCTGTAAGCA +TAATCGGAGATCCTACGA +TAATCGGACCTGGCACAA +TAATCGGATCTGGCATGA +TAATCGGACCTGACAGTA +TAATCGGAGGTCGAAGTA +TAATCGGAGGTCTGACTA +TAATCGGACGTTACACCA +TAATCGGAGCTGATAGCA +TAATCGGAGGTTGTACCA +TAATCGGAGATTCCAGGA +TAATCGGACGTGGTAACA +TAATCGGAGGTACTAGGA +TAATCGGACGTGCAATGA +TAATCGGACCTACCAGGA +TAATCGGAGCTTGGAGTA +TAATCGGAGGTAAGAGCA +TAATCGGAGCTCGCAATA +TAATCGGACATCAGAGCA +TAATCGGAGTTGCGAACA +TAATCGGACATCCAACCA +TAATCGGAGCTCTCATGA +TAATCGGAGATCGGATGA +TAATCGGAGATGAGACGA +TAATCGGACGTTGCAAGA +TAATCGGACATGCGAGTA +TAATCGGACTTCCGACTA +TAATCGGACGTACGACAA +TAATCGGAGTTGCAACGA +TAATCGGACATGGTACGA +TAATCGGAGCTAGAAGCA +TAATCGGAGGTGCTACTA +TAATCGGAGGTAGCACAA +TAATCGGAGGTGACAACA +TAATCGGACATGCCAACA +TAATCGGAGATGCCACAA +TAATCGGAGCTACCAACA +TAATCGGAGCTGCTATGA +TAATCGGACGTCCAAGAA +TAATCGGAGGTGTGAGAA +TAATCGGACATAGCACCA +TAATCGGATCTGCGAGCA +TAATCGGACCTCCTATCA +TAATCGGACTTCTCACGA +TAATCGGACCTAGGATGA +TAATCGGATCTGGTACCA +TAATCGGAGGTCCAATCA +TAATCGGAGCTGTGATCA +TAATCGGACTTGTGACCA +TAATCGGAGTTGGCAGTA +TAATCGGACTTAGGAGCA +TAATCGGACCTCTCAACA +TAATCGGACCTTGCATCA +TAATCGGAGATGCAAGCA +TAATCGGATCTGCCACTA +TAATCGGATCTCTGACCA +TAATCGGAGCTATGAGGA +TAATCGGACGTTGGAGAA +TAATCGTACGTTGGACGA +TAATCGTACGTGCAACCA +TAATCGTAACTGGCAGCA +TAATCGTAACTCGGAGGA +TAATCGTACGTACGAGGA +TAATCGTACGTGCCAGAA +TAATCGGATGTAGGAGGA +TAATCGTACGTGGCATCA +TAATCGGATGTCGAACCA +TAATCGTACCTGCAAGGA +TAATCGGATGTGCGAAGA +TAATCGGATGTACCACCA +TAATCGGATGTCACAGGA +TAATCGTAACTGCCACGA +TAATCGTACCTCTGAGCA +TAATCGTACGTCGCAGTA +TAATCGTAGCTCCGATGA +TAATCGTAGCTTCCACCA +TAATCGTAGTTGGCACCA +TAATCGTATCTCGCACCA +TAATCGTAGGTCACAGCA +TAATCGTAGGTCGGAGAA +TAATCGTATGTCCGAGCA +TAATCTAACCTCGGACGA +TAATCTAAGCTGGCAGGA +TAATCTAAGCTGCGACCA +TAATCTCAACTGCGAGCA +TAATCTCACATGGCACGA +TAATCTCACATGCGAGGA +TAATCTCAAGTGCCACCA +TAATCTCAAGTCCGAGGA +TAATCTCACGTACCAGCA +TAATCTCACGTGCGACTA +TAATCTCACCTTGGAGCA +TAATCTCAGCTCGGAGGA +TAATCTCAGGTGGCATCA +TAATCTCATCTCGGACCA +TAATCTCACGTCGCAACA +TAATCTCACGTCAGACCA +TAATCTCAGTTCGCAGCA +TAATCTCAGGTCCGACAA +TAATCTCAGGTGTGACGA +TAATCTCAGGTAGGAGCA +TAATCTCACCTTCCAGGA +TAATCTCAGGTCGAACGA +TAATCTCAGCTACCACGA +TAATCTCACCTGACACCA +TAATCTCACCTCCGAGTA +TAATCTCAGATGCCAGCA +TAATCTCACCTGCAACGA +TAATCTCACCTGGTAGGA +TAATCTCACCTACGACCA +TAATCTCATGTCGCAGGA +TAATCTGACCTCGGATCA +TAATCTGACTTGGCAGGA +TAATCTGACGTCTCAGGA +TAATCTGACGTGACAGCA +TAATCTGACGTTCCACGA +TAATCTGACGTTGGACCA +TAATCTGACCTCAGAGGA +TAATCTGACCTGTCACGA +TAATCTGACGTGCGAACA +TAATCTGACCTCGCAGTA +TAATCTGACCTGCCAGAA +TAATCTGAGCTCGTAGCA +TAATCTGAGGTTGGAGGA +TAATCTGATGTCGGACGA +TAATCTGAGCTGGAACGA +TAATCTGAGTTGCGAGGA +TAATCTGAGGTGGAAGCA +TAATCTGATGTGGCACCA +TAATCTGAGGTGCCATGA +TAATCTGAGCTCACACGA +TAATCTGAGGTCCGAAGA +TAATCTGAGCTTCGAGCA +TAATCTTACGTCGGAGCA +TAATGACACCTGCGACTA +TAATGACACTTCGCACGA +TAATGACACCTCCGAACA +TAATGACACTTGCGAGCA +TAATGACACCTGGCACCA +TAATGACAAGTCGGACCA +TAATGACACGTGGCATGA +TAATGACACGTGGTAGCA +TAATGACACCTCCTACGA +TAATGACACCTCTCAGGA +TAATGACACCTACCAGCA +TAATGACACGTCGGAGGA +TAATGACACGTGTGACGA +TAATGACACGTGCAAGGA +TAATGACAGATCGGACGA +TAATGACACCTTGGACGA +TAATGACACGTTCCACGA +TAATGACAGATGGCAGCA +TAATGACAGCTACGACGA +TAATGACACCTGCCAAGA +TAATGACATCTCGGAGCA +TAATGACAGGTGCGACAA +TAATGAGAACTGCGAGCA +TAATGACAGGTCTCAGCA +TAATGACAGCTGACACGA +TAATGACAGCTCGCAACA +TAATGACAGCTTGCAGGA +TAATGACAGGTTCGAGCA +TAATGACAGCTCCGAGAA +TAATGAGAAGTGGCAGCA +TAATGACAGGTACCAGGA +TAATGACAGCTGCCAGTA +TAATGACAGGTGGCACTA +TAATGACAGCTGTGAGCA +TAATGACAGCTCAGACCA +TAATGACAGGTCCAACCA +TAATGACAGTTGCCACCA +TAATGAGAGGTCCGAGTA +TAATGAGAGGTGCGAAGA +TAATGAGAGGTCGTAGCA +TAATGAGACCTTCGACCA +TAATGAGAGCTCTCACCA +TAATGAGACGTGCCATCA +TAATGAGACGTGTGAGCA +TAATGAGACGTCACAGCA +TAATGAGAGCTGGTACCA +TAATGAGACGTCGAACCA +TAATGAGACGTCAGACGA +TAATGAGAGCTTCGAGGA +TAATGAGAGGTCGCATGA +TAATGAGAGCTCGGATCA +TAATGAGACCTGACAGGA +TAATGAGAGCTAGCACGA +TAATGAGACCTAGGAGGA +TAATGAGAGGTGCAAGCA +TAATGAGAGATCCGACCA +TAATGAGAGCTGCCAACA +TAATGAGAGGTTGGACGA +TAATGAGAGGTACCACCA +TAATGAGACCTGCCACAA +TAATGAGAGGTGTCAGGA +TAATGAGACTTCGGAGCA +TAATGAGACCTCGGACAA +TAATGAGACTTGCGACGA +TAATGAGACGTCCTAGGA +TAATGAGAGCTGTGACGA +TAATGAGAGGTGAGACCA +TAATGAGACGTGGTACGA +TAATGAGATCTCGCAGGA +TAATGATACCTGCGAGGA +TAATGATACCTCGCAGCA +TAATGAGATCTCCGACGA +TAATGCAACCTAGCACCA +TAATGCAACCTCCAACGA +TAATGCAAGCTCACAGGA +TAATGCAACCTTCCAGCA +TAATGCAACCTCAGACCA +TAATGCAACGTGCCACTA +TAATGCAACGTCGTAGCA +TAATGCAACGTGCGAACA +TAATGCAACATGGCACGA +TAATGCAACGTAGGAGGA +TAATGCAAGATGCGACCA +TAATGCAACTTGCCAGGA +TAATGCAACCTGGTAGGA +TAATGCAAGCTCGAACCA +TAATGCAACCTGCGACAA +TAATGCAACATCCGAGGA +TAATGCAACCTCGGAAGA +TAATGCAAGCTGGCATCA +TAATGCAAGCTGTGAGGA +TAATGCAACGTCACACGA +TAATGCAAGCTGCCAGAA +TAATGCAACCTCGCAGTA +TAATGCAAGCTTCGACGA +TAATGCAACGTGTCAGCA +TAATGCAAGATCGGAGCA +TAATGCCAAGTCGAAGGA +TAATGCCAAGTCGCATCA +TAATGCCAAGTCCTACCA +TAATGCCAATTCGGACGA +TAATGCCAAGTTCCAGCA +TAATGCCAACTCTCAGCA +TAATGCAATGTCGCACCA +TAATGCCAAGTGGCAAGA +TAATGCAAGGTTGGACCA +TAATGCAATGTGCGAGGA +TAATGCCAACTCAGAGGA +TAATGCCAACTGCGAAGA +TAATGCCAACTAGCACGA +TAATGCCAAGTACGAGGA +TAATGCCAACTGGTAGCA +TAATGCAAGGTCCGATCA +TAATGCCAACTTCGACCA +TAATGCAAGGTGGAAGGA +TAATGCCAATTGGCACCA +TAATGCCAAGTGCGACTA +TAATGCCAATTCCGAGCA +TAATGCCAACTCGGAACA +TAATGCAAGTTCGCACGA +TAATGCAAGGTGACACCA +TAATGCCAACTGCCATCA +TAATGCAATCTCCGAGCA +TAATGCAATCTGCCACGA +TAATGCAAGGTCGCAGAA +TAATGCAAGGTACCACGA +TAATGCCACCTGCTAGTA +TAATGCCAGGTCGTACGA +TAATGCCACCTTGGATCA +TAATGCCACGTCAGATGA +TAATGCCAGCTGCAACTA +TAATGCCACCTTGCACTA +TAATGCCACCTTCCATGA +TAATGCCAGTTGCGAGTA +TAATGCCAGCTTGAAGCA +TAATGCCAGATCGCACAA +TAATGCCAGATGAGAGCA +TAATGCCACATCTGACGA +TAATGCCACGTATGACCA +TAATGCCAGGTGCCATGA +TAATGCCACGTGCTATCA +TAATGCCAGGTCCAAGTA +TAATGCCATCTCCGATGA +TAATGCCACGTGGCAGTA +TAATGCCAGATAGCAGGA +TAATGCCACGTTGGACAA +TAATGCCAGGTTCCACTA +TAATGCCACATGACAGGA +TAATGCCACTTCGAAGCA +TAATGCCATATCGCAGCA +TAATGCCAGGTCGGATAA +TAATGCCAGTTGTGACGA +TAATGCCACCTACGAGAA +TAATGCCACGTACCAACA +TAATGCCAGCTGTCAAGA +TAATGCCACCTAAGACGA +TAATGCCAGCTACGAACA +TAATGCCACCTGTAACCA +TAATGCCACCTCGAACAA +TAATGCCAGGTGTGAGAA +TAATGCCAGCTAACAGCA +TAATGCCACGTTACACCA +TAATGCCAGGTGTTACCA +TAATGCCACTTGCAACGA +TAATGCCAGCTAGGATGA +TAATGCCACCTCCAATCA +TAATGCCAGATGCAAGGA +TAATGCCACGTCACAGAA +TAATGCCACATCGTAGGA +TAATGCCAGATAGGACCA +TAATGCCAGCTGATAGGA +TAATGCCAGCTCACACTA +TAATGCCACATACCACGA +TAATGCCATCTCGTACCA +TAATGCCAGCTCCTACAA +TAATGCCAGCTCTGATCA +TAATGCCACTTCCGACAA +TAATGCCAGGTTGGAGGA +TAATGCCACTTCTCACCA +TAATGCCAGTTCCGAAGA +TAATGCCACCTGTGATGA +TAATGCCATATCCGACCA +TAATGCCAGCTCTAAGGA +TAATGCCAGTTCGCAGTA +TAATGCCATCTCTCACGA +TAATGCCAGGTCAGAACA +TAATGCCACGTTCGAGTA +TAATGCCAGATCCTAGCA +TAATGCCACATGGTACCA +TAATGCCAGGTGGAATCA +TAATGCCACGTAGAACGA +TAATGCCACCTCATAGCA +TAATGCCACCTGTCAGAA +TAATGCCACCTGACAACA +TAATGCCAGCTGCGATAA +TAATGCCAGATGCCAACA +TAATGCCACGTATCAGGA +TAATGCCATCTACCAGGA +TAATGCCACTTGAGACCA +TAATGCCACGTCCTAAGA +TAATGCCACGTGATACGA +TAATGCCAGGTAGTAGCA +TAATGCCATCTGGAACGA +TAATGCGAAGTCTCAGGA +TAATGCGACCTGGCAAGA +TAATGCGAAGTCCAAGCA +TAATGCGACCTGCTATGA +TAATGCGACTTAGCACGA +TAATGCGAAGTGCGATCA +TAATGCGACCTCTGAGTA +TAATGCGAAGTAGGACGA +TAATGCGACGTGTAAGGA +TAATGCGACCTTGGAGAA +TAATGCGACGTACTACGA +TAATGCGACCTAAGAGCA +TAATGCGAACTCCTACGA +TAATGCGACCTCGCATAA +TAATGCGACGTCGTACTA +TAATGCGAAGTGCTAGGA +TAATGCCATGTGCCAGAA +TAATGCGAACTAGCAGCA +TAATGCCATGTCGGACTA +TAATGCGACCTGGAATCA +TAATGCGACGTGAGAAGA +TAATGCGACGTGACACAA +TAATGCGACGTTGAAGCA +TAATGCGACCTTACACGA +TAATGCGACATACGACCA +TAATGCGACGTAGGATCA +TAATGCGAAGTTCCACGA +TAATGCGACCTGAGACTA +TAATGCGACTTACGAGGA +TAATGCGACATGCCAGAA +TAATGCGACGTCTGAACA +TAATGCGACCTGGTACAA +TAATGCCATGTCCAACGA +TAATGCGACATCGAACGA +TAATGCGACATGTCACCA +TAATGCGAAGTCCGAAGA +TAATGCCATGTCTGAGGA +TAATGCGACGTCCAACAA +TAATGCGAAGTCGGAGTA +TAATGCGACGTTGCATGA +TAATGCGAAGTCGCACAA +TAATGCGAACTGCCACTA +TAATGCGACGTACCAGTA +TAATGCGACATCGCAACA +TAATGCGAATTGGCAGGA +TAATGCCATCTGCAAGCA +TAATGCGACCTTCAAGGA +TAATGCCATGTTGCACGA +TAATGCGACCTGTTAGCA +TAATGCGACCTACCATCA +TAATGCGAGTTCCTAGGA +TAATGCGAGCTGACATGA +TAATGCGATCTCACAGCA +TAATGCGAGCTACGAGTA +TAATGCGATGTCGTAGGA +TAATGCGACTTGACAGCA +TAATGCGAGGTCAGAGAA +TAATGCGAGGTCTCATCA +TAATGCGATGTGGAACCA +TAATGCGAGCTTCTAGCA +TAATGCGAGATTGCACGA +TAATGCGAGCTCCAATGA +TAATGCGATCTCGCACTA +TAATGCGATATGCGACGA +TAATGCGAGGTGGCATAA +TAATGCGAGCTCGTAACA +TAATGCGAGTTCGGACTA +TAATGCGATCTCCAACCA +TAATGCGAGTTGTGAGCA +TAATGCGAGGTGGTAAGA +TAATGCGAGGTTCGACAA +TAATGCGAGGTTACAGGA +TAATGCGATCTGTCAGGA +TAATGCGAGATACCAGCA +TAATGCGATGTGAGAGCA +TAATGCGAGCTCTGACAA +TAATGCGAGTTGCCAAGA +TAATGCGAGATCACACCA diff --git a/src/__pycache__/common.cpython-38.pyc b/src/__pycache__/common.cpython-38.pyc new file mode 100644 index 0000000..3149c70 Binary files /dev/null and b/src/__pycache__/common.cpython-38.pyc differ diff --git a/src/common.py b/src/common.py new file mode 100644 index 0000000..0842360 --- /dev/null +++ b/src/common.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Sat Jan 8 16:49:58 2022 + +@author: weiqiyao +""" + +#!/usr/bin/env python +# coding: utf-8 + +# In[21]: + +from collections import defaultdict + +import pandas as pd +import matplotlib.pyplot as plt + +import os +import re +import Bio.SeqIO as SeqIO + + +THIS_MODULE_PATH = os.path.dirname(os.path.abspath(__file__)) +#print(THIS_MODULE_PATH) + +############################################################################### +# Paths +############################################################################### + +BC = re.compile( + 'T[ACTG]{2}T[ACTG]{3}A[ACTG]{2}T[ACTG]{3}A[ACTG]{2}A') diff --git a/src/filter_generated_codes.sh b/src/filter_generated_codes.sh index 0f79719..95b6641 100644 --- a/src/filter_generated_codes.sh +++ b/src/filter_generated_codes.sh @@ -1,4 +1,6 @@ #! /bin/bash +### Qiyao modified a little bit based on modular cloning system in Thyer lab. +# Gonna filter cutting site for BsaI,BsmBI,AarI ### Shuffle the generated inputs, and apply simple rules to prune unwanted barcodes inputs=$1