Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
- [@Korusuke](https://github.com/Korusuke)
- [@Rusherrg](https://github.com/RusherRG)
- [@hetzz](https://github.com/hetzz)
- [@aditya1999](https://github.com/aditya1999)
- [@Syn3rman](https://github.com/Syn3rman)
- [@akshay-99](https://github.com/akshay-99)
- [@aviiiij](https://github.com/aviiij)

Expand Down
362 changes: 362 additions & 0 deletions dataset/priority.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,362 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 106,
"metadata": {},
"outputs": [],
"source": [
"from gensim.models.doc2vec import Doc2Vec, TaggedDocument\n",
"from nltk.tokenize import word_tokenize\n",
"import csv\n",
"import re"
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {},
"outputs": [],
"source": [
"stopwords = ['MON','TUE','WED','THU','FRI','SAT','SUN',\n",
" 'MONDAY','TUESDAY','WEDNESDAY','THURSDAY','FRIDAY','SATURDAY','SUNDAY',\n",
" 'JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC',\n",
" 'JANUARY','FEBRUARY','MARCH','APRIL','MAY','JUNE','JULY','AUGUST','SEPTEMBER','OCTOBER','NOVEMBER','DECEMBER']\n",
"def data():\n",
" #CSV data read\n",
" notams = []\n",
" with open('foreign.csv') as csv_file:\n",
" csv_reader = csv.reader(csv_file, delimiter=',')\n",
" for rows in csv_reader:\n",
" notams.append(rows)\n",
" \n",
" labels = set()\n",
" #data clean\n",
" for i in range(1,len(notams)):\n",
" notams[i][0] = re.sub(r'[^\\w\\s]','',notams[i][0])\n",
" notams[i][0] = notams[i][0].split('\\n')\n",
" lines = \"\"\n",
" for line in notams[i][0]:\n",
" if \"EAIP\" not in line:\n",
" lines += line\n",
" notams[i][0] = re.sub(\" +\",\" \",lines)\n",
" for sw in stopwords[:14:-1]:\n",
" notams[i][0] = re.sub(sw,\" DAYS \",notams[i][0])\n",
" for sw in stopwords[14::-1]:\n",
" notams[i][0] = re.sub(sw,\" MONTHS \",notams[i][0])\n",
" notams[i][1] = int(eval(notams[i][1]))\n",
" if notams[i][1]>2:\n",
" notams[i][1] = 2\n",
" elif notams[i][1]==2:\n",
" notams[i][1] = 1\n",
" else:\n",
" notams[i][1] = 0\n",
" notams[i][0] = re.sub(' +',' ',notams[i][0])\n",
" labels.add(notams[i][1])\n",
" \n",
" print(\"AL: LABELS:\", labels)\n",
" return notams"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"AL: LABELS: {0, 1, 2}\n"
]
},
{
"data": {
"text/plain": [
"['UTTI TWR OPR HR MONTHS MONTHS THS 05301300 MONTHS THS 05301130 MONTHS THS MONTHS THS CLSD',\n",
" 2]"
]
},
"execution_count": 108,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"notams = data()[1:]\n",
"notams[0]"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[TaggedDocument(words=['caution', 'twr', 'lkpr', 'is', 'channel', 'with', 'days', 'arationmake', 'sure', 'about', 'the', 'correct', 'setting', 'on', 'radio', 'panel'], tags=['1']),\n",
" TaggedDocument(words=['due', 'to', 'unreliable', 'aftn', 'feed', 'from', 'pakistankabul', 'area', 'control', 'center', 'will', 'not', 'accept', 'anycoordination', 'information', 'via', 'the', 'aftn', 'system', 'fromthe', 'following', 'firs', 'lahore', 'control', 'karachi', 'controland', 'tehran', 'control', 'only', 'valid', 'method', 'of', 'coordinationwill', 'be', 'via', 'telephone', 'or', 'cellphone', 'communication', 'ufn'], tags=['1']),\n",
" TaggedDocument(words=['in', 'chart', 'standard', 'intrument', 'departure', 'page', 'ad', 'of', 'date', 'days', 'barcelona', 'gral', 'jose', 'antonio', 'anzoategui', 'intlrnav', 'gnss', 'arvex', 'rwy', 'sid', 'osamo', 'transition', 'is', 'suspended'], tags=['0']),\n",
" TaggedDocument(words=['wip', 'wi', 'rectangular', 'areapsn'], tags=['1']),\n",
" TaggedDocument(words=['rnav', 'rte', 'changedfm', 'trs', 'vordme', 'coord', 'tna', 'vordme', 'coord', 'hdg', 'and', 'dist', 'mntn', 'ref', 'aip', 'enr', 'enr', 'enr'], tags=['0'])]"
]
},
"execution_count": 109,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import re\n",
"def cleaners(tokens):\n",
" normalized = []\n",
" for token in tokens :\n",
" # remove digits + in range throw x>20 x<2\n",
" if 2<= len(token) <= 20 and not re.search(r'\\d', token):\n",
" normalized.append(token)\n",
" return normalized\n",
"\n",
"tagged_notams = [TaggedDocument(words=cleaners(word_tokenize(notam[0].lower())), tags=[str(notam[1])]) for notam in notams]\n",
"tagged_notams[-5:]"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/rusherrg/anaconda3/envs/tensorflow_gpuenv/lib/python3.6/site-packages/gensim/models/doc2vec.py:580: UserWarning: The parameter `size` is deprecated, will be removed in 4.0.0, use `vector_size` instead.\n",
" warnings.warn(\"The parameter `size` is deprecated, will be removed in 4.0.0, use `vector_size` instead.\")\n",
"/home/rusherrg/anaconda3/envs/tensorflow_gpuenv/lib/python3.6/site-packages/ipykernel_launcher.py:22: DeprecationWarning: Call to deprecated `iter` (Attribute will be removed in 4.0.0, use self.epochs instead).\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"iteration 50\n",
"Doc2vec embeddings Model Saved\n"
]
}
],
"source": [
"import sys, random\n",
"import multiprocessing\n",
"\n",
"n_notams = len(tagged_notams)\n",
"\n",
"# PARAMS\n",
"max_epochs = 50\n",
"vec_size = 250\n",
"min_count = 2\n",
"dm = 1 #0 CBOW / 1 DM\n",
"negative = 10\n",
"sample = 0 # drop freq threshold\n",
"alpha = 0.025\n",
"\n",
"model = Doc2Vec(size=vec_size,\n",
" min_count=min_count,\n",
" dm = dm,\n",
" negative = negative,\n",
" sample = sample,\n",
" workers=multiprocessing.cpu_count())\n",
"\n",
"update_train_epochs = model.iter\n",
"model.build_vocab(tagged_notams)\n",
"\n",
"for epoch in range(max_epochs):\n",
" sys.stdout.write('\\r' + 'iteration {0}'.format(epoch+1))\n",
" \n",
" random.shuffle(tagged_notams)\n",
" model.train(tagged_notams,\n",
" total_examples=n_notams,\n",
" epochs=update_train_epochs)\n",
" \n",
" # decrease the learning rate\n",
" model.alpha -= 0.002\n",
" # fix the learning rate, no decay\n",
" model.min_alpha = model.alpha\n",
"\n",
"d2v_filename = \"{epochs}Eps_{up_eps}UpEps_{dim}D_{dm}dm_{mc}mincount_{neg}neg_{sp}sample.d2vmodel\".format(epochs=max_epochs,\n",
" dim=vec_size,mc=min_count,\n",
" up_eps=update_train_epochs,\n",
" dm=dm,neg=negative,sp=sample,)\n",
"model.save(d2v_filename)\n",
"print(\"\\nDoc2vec embeddings Model Saved\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Manual Cosine Test Acc: 31.644049477602977\n"
]
}
],
"source": [
"model= Doc2Vec.load(d2v_filename)\n",
"\n",
"\n",
"# Manual COSINE SIMILARITY tester ahead \n",
"from scipy import spatial\n",
"\n",
"'''\n",
"to find the vector of a document which is not in training data\n",
"test = 'PAPI RWY 32 IS RELOCATED AND REVISED GLIDE ANGLE/MEHT AS PER THE MENTIONED DETAILS PAPI RWY 32 LEFT/3.26DEG MEHT/66.71FT PAPI RWY 32 AT DIST OF 387M FM THR OF RWY 32'\n",
"test_data = cleaners(word_tokenize(test.lower()))\n",
"test_vector= model.infer_vector(test_data)\n",
"'''\n",
"\n",
"corr = 0\n",
"\n",
"for notam in notams:\n",
" test_vector = model.infer_vector(cleaners(word_tokenize(notam[0].lower())),steps=20)\n",
" preds = [max(0,1 - spatial.distance.cosine(test_vector, model.docvecs[str(i)])) for i in range(3)]\n",
"\n",
" if preds.index(max(preds)) == notam[1]:\n",
" corr += 1\n",
"\n",
"print(\"Manual Cosine Test Acc: \", corr*100/len(notams))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"model = Doc2Vec.load(d2v_filename)\n",
"\n",
"x_train = []\n",
"y_train = []\n",
"random.shuffle(notams)\n",
"\n",
"for notam in notams:\n",
" x_train.append(model.infer_vector(cleaners(word_tokenize(notam[0].lower())),steps=100))\n",
" \n",
" labels = \"PRIORITY_\"+str(notam[1])\n",
" y_train.append(labels)\n",
"\n",
"x_train = np.array(x_train)\n",
"y_train = np.array(y_train)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.metrics import accuracy_score, f1_score\n",
"import pickle\n",
"\n",
"print(\"N SAMPLES: \", len(x_train),len(y_train))\n",
"\n",
"SPLIT_RATIO = 0.9\n",
"n_tr = int(n_notams*SPLIT_RATIO)\n",
"n_te = n_notams - n_tr\n",
"\n",
"print(\"SPLITS: Train:{tr} | Test{te}\".format(tr=n_tr,te=n_te))\n",
"\n",
"X_train = x_train[:n_tr]*10\n",
"Y_train = y_train[:n_tr]\n",
"X_test = x_train[-n_tr:]*10\n",
"Y_test = y_train[-n_tr:]\n",
"\n",
"logreg_model = LogisticRegression(multi_class='multinomial', solver = 'lbfgs', max_iter = 200)\n",
"logreg_model.fit(X_train, Y_train)\n",
"\n",
"\n",
"train_acc = logreg_model.score(X_train, Y_train)*100\n",
"test_acc = logreg_model.score(X_test, Y_test)*100\n",
"\n",
"print(\"TRAIN ACC: \",train_acc)\n",
"print(\"TEST ACC: \",test_acc)\n",
"\n",
"# Example test item\n",
"for i in range(3):\n",
" pick = random.randint(0,n_te)\n",
" print(pick)\n",
" print(logreg_model.predict([X_test[pick]]))\n",
" print(notams[n_te+pick])\n",
"\n",
"y_preds = logreg_model.predict(X_test)\n",
"print('Testing accuracy %s' % accuracy_score(Y_test, y_preds))\n",
"print('Testing F1 score: {}'.format(f1_score(Y_test, y_preds, average='weighted')))\n",
"\n",
"model_labels = {}\n",
"for i in range(len(y_preds)):\n",
" diff = abs(int(y_preds[i][-1]) - int(Y_test[i][-1]))\n",
" try:\n",
" model_labels[diff]\n",
" except:\n",
" model_labels[diff] = 1\n",
" else:\n",
" model_labels[diff] += 1\n",
"print(model_labels)\n",
"\n",
"# SAVER\n",
"filename = \"{train_ac}TRA_{test_ac}TEA.model\".format(train_ac=int(train_acc),test_ac=int(test_acc))\n",
"\n",
"pickle.dump(logreg_model, open(filename, 'wb'))\n",
"print(\"Model Saved!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test = '''PAPI RWY 32 IS RELOCATED AND REVISED GLIDE ANGLE/MEHT AS PER THE MENTIONED DETAILS PAPI RWY 32 LEFT/3.26DEG MEHT/66.71FT PAPI RWY 32 AT DIST OF 387M FM THR OF RWY 32'''\n",
"test = re.sub(r'[^\\w\\s]','',test)\n",
"test = \" \".join(test.split('\\n'))\n",
"lines = \"\"\n",
"for line in test:\n",
" if \"EAIP\" not in line:\n",
" lines += line\n",
"test = re.sub(\" +\",\" \",lines)\n",
"for sw in stopwords[::-1]:\n",
" test = re.sub(sw,\" \",test)\n",
"test = re.sub(' +',' ',test)\n",
"print(cleaners(word_tokenize(test.lower())))\n",
"print(logreg_model.predict([model.infer_vector(cleaners(word_tokenize(test.lower())),steps=1000)]))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
15 changes: 15 additions & 0 deletions webapp/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
FROM python:3.6-alpine

ENV FLASK_SECRET_KEY=trewqyuio

RUN mkdir /app
WORKDIR /app

COPY requirements.txt /app/
RUN pip install --no-cache-dir -r requirements.txt

COPY . /app

EXPOSE 5000

CMD ["gunicorn", "--chdir", "/app", "run:app", "-b", "0.0.0.0:5000"]
Binary file removed webapp/app/static/js/imports/pass.jpeg
Binary file not shown.
Loading