Team-Probably · dependabot · Mar 25, 2019 · Mar 27, 2019 · May 17, 2019 · Apr 27, 2020
diff --git a/README.md b/README.md
@@ -70,7 +70,7 @@
 - [@Korusuke](https://github.com/Korusuke)
 - [@Rusherrg](https://github.com/RusherRG)
 - [@hetzz](https://github.com/hetzz)
-- [@aditya1999](https://github.com/aditya1999)
+- [@Syn3rman](https://github.com/Syn3rman)
 - [@akshay-99](https://github.com/akshay-99)
 - [@aviiiij](https://github.com/aviiij)
 

diff --git a/dataset/priority.ipynb b/dataset/priority.ipynb
@@ -0,0 +1,362 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 106,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from gensim.models.doc2vec import Doc2Vec, TaggedDocument\n",
+    "from nltk.tokenize import word_tokenize\n",
+    "import csv\n",
+    "import re"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 107,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stopwords = ['MON','TUE','WED','THU','FRI','SAT','SUN',\n",
+    "             'MONDAY','TUESDAY','WEDNESDAY','THURSDAY','FRIDAY','SATURDAY','SUNDAY',\n",
+    "             'JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC',\n",
+    "             'JANUARY','FEBRUARY','MARCH','APRIL','MAY','JUNE','JULY','AUGUST','SEPTEMBER','OCTOBER','NOVEMBER','DECEMBER']\n",
+    "def data():\n",
+    "    #CSV data read\n",
+    "    notams = []\n",
+    "    with open('foreign.csv') as csv_file:\n",
+    "        csv_reader = csv.reader(csv_file, delimiter=',')\n",
+    "        for rows in csv_reader:\n",
+    "            notams.append(rows)\n",
+    "    \n",
+    "    labels = set()\n",
+    "    #data clean\n",
+    "    for i in range(1,len(notams)):\n",
+    "        notams[i][0] = re.sub(r'[^\\w\\s]','',notams[i][0])\n",
+    "        notams[i][0] = notams[i][0].split('\\n')\n",
+    "        lines = \"\"\n",
+    "        for line in notams[i][0]:\n",
+    "            if \"EAIP\" not in line:\n",
+    "                lines += line\n",
+    "        notams[i][0] = re.sub(\" +\",\" \",lines)\n",
+    "        for sw in stopwords[:14:-1]:\n",
+    "            notams[i][0] = re.sub(sw,\" DAYS \",notams[i][0])\n",
+    "        for sw in stopwords[14::-1]:\n",
+    "            notams[i][0] = re.sub(sw,\" MONTHS \",notams[i][0])\n",
+    "        notams[i][1] = int(eval(notams[i][1]))\n",
+    "        if notams[i][1]>2:\n",
+    "            notams[i][1] = 2\n",
+    "        elif notams[i][1]==2:\n",
+    "            notams[i][1] = 1\n",
+    "        else:\n",
+    "            notams[i][1] = 0\n",
+    "        notams[i][0] = re.sub(' +',' ',notams[i][0])\n",
+    "        labels.add(notams[i][1])\n",
+    "        \n",
+    "    print(\"AL: LABELS:\", labels)\n",
+    "    return notams"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 108,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "AL: LABELS: {0, 1, 2}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "['UTTI TWR OPR HR MONTHS MONTHS THS 05301300 MONTHS THS 05301130 MONTHS THS MONTHS THS CLSD',\n",
+       " 2]"
+      ]
+     },
+     "execution_count": 108,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "notams = data()[1:]\n",
+    "notams[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 109,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[TaggedDocument(words=['caution', 'twr', 'lkpr', 'is', 'channel', 'with', 'days', 'arationmake', 'sure', 'about', 'the', 'correct', 'setting', 'on', 'radio', 'panel'], tags=['1']),\n",
+       " TaggedDocument(words=['due', 'to', 'unreliable', 'aftn', 'feed', 'from', 'pakistankabul', 'area', 'control', 'center', 'will', 'not', 'accept', 'anycoordination', 'information', 'via', 'the', 'aftn', 'system', 'fromthe', 'following', 'firs', 'lahore', 'control', 'karachi', 'controland', 'tehran', 'control', 'only', 'valid', 'method', 'of', 'coordinationwill', 'be', 'via', 'telephone', 'or', 'cellphone', 'communication', 'ufn'], tags=['1']),\n",
+       " TaggedDocument(words=['in', 'chart', 'standard', 'intrument', 'departure', 'page', 'ad', 'of', 'date', 'days', 'barcelona', 'gral', 'jose', 'antonio', 'anzoategui', 'intlrnav', 'gnss', 'arvex', 'rwy', 'sid', 'osamo', 'transition', 'is', 'suspended'], tags=['0']),\n",
+       " TaggedDocument(words=['wip', 'wi', 'rectangular', 'areapsn'], tags=['1']),\n",
+       " TaggedDocument(words=['rnav', 'rte', 'changedfm', 'trs', 'vordme', 'coord', 'tna', 'vordme', 'coord', 'hdg', 'and', 'dist', 'mntn', 'ref', 'aip', 'enr', 'enr', 'enr'], tags=['0'])]"
+      ]
+     },
+     "execution_count": 109,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import re\n",
+    "def cleaners(tokens):\n",
+    "    normalized = []\n",
+    "    for token in tokens :\n",
+    "        # remove digits + in range throw x>20 x<2\n",
+    "        if 2<= len(token) <= 20 and not re.search(r'\\d', token):\n",
+    "            normalized.append(token)\n",
+    "    return normalized\n",
+    "\n",
+    "tagged_notams = [TaggedDocument(words=cleaners(word_tokenize(notam[0].lower())), tags=[str(notam[1])]) for notam in notams]\n",
+    "tagged_notams[-5:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 110,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/rusherrg/anaconda3/envs/tensorflow_gpuenv/lib/python3.6/site-packages/gensim/models/doc2vec.py:580: UserWarning: The parameter `size` is deprecated, will be removed in 4.0.0, use `vector_size` instead.\n",
+      "  warnings.warn(\"The parameter `size` is deprecated, will be removed in 4.0.0, use `vector_size` instead.\")\n",
+      "/home/rusherrg/anaconda3/envs/tensorflow_gpuenv/lib/python3.6/site-packages/ipykernel_launcher.py:22: DeprecationWarning: Call to deprecated `iter` (Attribute will be removed in 4.0.0, use self.epochs instead).\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "iteration 50\n",
+      "Doc2vec embeddings Model Saved\n"
+     ]
+    }
+   ],
+   "source": [
+    "import sys, random\n",
+    "import multiprocessing\n",
+    "\n",
+    "n_notams = len(tagged_notams)\n",
+    "\n",
+    "# PARAMS\n",
+    "max_epochs = 50\n",
+    "vec_size = 250\n",
+    "min_count = 2\n",
+    "dm = 1         #0 CBOW / 1 DM\n",
+    "negative = 10\n",
+    "sample = 0     # drop freq threshold\n",
+    "alpha = 0.025\n",
+    "\n",
+    "model = Doc2Vec(size=vec_size,\n",
+    "                min_count=min_count,\n",
+    "                dm = dm,\n",
+    "                negative = negative,\n",
+    "                sample = sample,\n",
+    "                workers=multiprocessing.cpu_count())\n",
+    "\n",
+    "update_train_epochs = model.iter\n",
+    "model.build_vocab(tagged_notams)\n",
+    "\n",
+    "for epoch in range(max_epochs):\n",
+    "    sys.stdout.write('\\r' + 'iteration {0}'.format(epoch+1))\n",
+    "    \n",
+    "    random.shuffle(tagged_notams)\n",
+    "    model.train(tagged_notams,\n",
+    "                total_examples=n_notams,\n",
+    "                epochs=update_train_epochs)\n",
+    "    \n",
+    "    # decrease the learning rate\n",
+    "    model.alpha -= 0.002\n",
+    "    # fix the learning rate, no decay\n",
+    "    model.min_alpha = model.alpha\n",
+    "\n",
+    "d2v_filename = \"{epochs}Eps_{up_eps}UpEps_{dim}D_{dm}dm_{mc}mincount_{neg}neg_{sp}sample.d2vmodel\".format(epochs=max_epochs,\n",
+    "                                                                                    dim=vec_size,mc=min_count,\n",
+    "                                                                                    up_eps=update_train_epochs,\n",
+    "                                                                                    dm=dm,neg=negative,sp=sample,)\n",
+    "model.save(d2v_filename)\n",
+    "print(\"\\nDoc2vec embeddings Model Saved\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Manual Cosine Test Acc:  31.644049477602977\n"
+     ]
+    }
+   ],
+   "source": [
+    "model= Doc2Vec.load(d2v_filename)\n",
+    "\n",
+    "\n",
+    "# Manual COSINE SIMILARITY tester ahead \n",
+    "from scipy import spatial\n",
+    "\n",
+    "'''\n",
+    "to find the vector of a document which is not in training data\n",
+    "test = 'PAPI RWY 32 IS RELOCATED AND REVISED GLIDE ANGLE/MEHT AS PER THE MENTIONED DETAILS PAPI RWY 32 LEFT/3.26DEG MEHT/66.71FT PAPI RWY 32 AT DIST OF 387M FM THR OF RWY 32'\n",
+    "test_data = cleaners(word_tokenize(test.lower()))\n",
+    "test_vector= model.infer_vector(test_data)\n",
+    "'''\n",
+    "\n",
+    "corr = 0\n",
+    "\n",
+    "for notam in notams:\n",
+    "    test_vector = model.infer_vector(cleaners(word_tokenize(notam[0].lower())),steps=20)\n",
+    "    preds = [max(0,1 - spatial.distance.cosine(test_vector, model.docvecs[str(i)])) for i in range(3)]\n",
+    "\n",
+    "    if preds.index(max(preds)) == notam[1]:\n",
+    "        corr += 1\n",
+    "\n",
+    "print(\"Manual Cosine Test Acc: \", corr*100/len(notams))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "model = Doc2Vec.load(d2v_filename)\n",
+    "\n",
+    "x_train = []\n",
+    "y_train = []\n",
+    "random.shuffle(notams)\n",
+    "\n",
+    "for notam in notams:\n",
+    "    x_train.append(model.infer_vector(cleaners(word_tokenize(notam[0].lower())),steps=100))\n",
+    "    \n",
+    "    labels = \"PRIORITY_\"+str(notam[1])\n",
+    "    y_train.append(labels)\n",
+    "\n",
+    "x_train = np.array(x_train)\n",
+    "y_train = np.array(y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.metrics import accuracy_score, f1_score\n",
+    "import pickle\n",
+    "\n",
+    "print(\"N SAMPLES: \", len(x_train),len(y_train))\n",
+    "\n",
+    "SPLIT_RATIO = 0.9\n",
+    "n_tr = int(n_notams*SPLIT_RATIO)\n",
+    "n_te = n_notams - n_tr\n",
+    "\n",
+    "print(\"SPLITS: Train:{tr} | Test{te}\".format(tr=n_tr,te=n_te))\n",
+    "\n",
+    "X_train = x_train[:n_tr]*10\n",
+    "Y_train = y_train[:n_tr]\n",
+    "X_test = x_train[-n_tr:]*10\n",
+    "Y_test = y_train[-n_tr:]\n",
+    "\n",
+    "logreg_model = LogisticRegression(multi_class='multinomial', solver = 'lbfgs', max_iter = 200)\n",
+    "logreg_model.fit(X_train, Y_train)\n",
+    "\n",
+    "\n",
+    "train_acc = logreg_model.score(X_train, Y_train)*100\n",
+    "test_acc = logreg_model.score(X_test, Y_test)*100\n",
+    "\n",
+    "print(\"TRAIN ACC: \",train_acc)\n",
+    "print(\"TEST ACC: \",test_acc)\n",
+    "\n",
+    "# Example test item\n",
+    "for i in range(3):\n",
+    "    pick = random.randint(0,n_te)\n",
+    "    print(pick)\n",
+    "    print(logreg_model.predict([X_test[pick]]))\n",
+    "    print(notams[n_te+pick])\n",
+    "\n",
+    "y_preds = logreg_model.predict(X_test)\n",
+    "print('Testing accuracy %s' % accuracy_score(Y_test, y_preds))\n",
+    "print('Testing F1 score: {}'.format(f1_score(Y_test, y_preds, average='weighted')))\n",
+    "\n",
+    "model_labels = {}\n",
+    "for i in range(len(y_preds)):\n",
+    "    diff = abs(int(y_preds[i][-1]) - int(Y_test[i][-1]))\n",
+    "    try:\n",
+    "        model_labels[diff]\n",
+    "    except:\n",
+    "        model_labels[diff] = 1\n",
+    "    else:\n",
+    "        model_labels[diff] += 1\n",
+    "print(model_labels)\n",
+    "\n",
+    "# SAVER\n",
+    "filename = \"{train_ac}TRA_{test_ac}TEA.model\".format(train_ac=int(train_acc),test_ac=int(test_acc))\n",
+    "\n",
+    "pickle.dump(logreg_model, open(filename, 'wb'))\n",
+    "print(\"Model Saved!\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test = '''PAPI RWY 32 IS RELOCATED AND REVISED GLIDE ANGLE/MEHT AS PER THE MENTIONED DETAILS PAPI RWY 32 LEFT/3.26DEG MEHT/66.71FT PAPI RWY 32 AT DIST OF 387M FM THR OF RWY 32'''\n",
+    "test = re.sub(r'[^\\w\\s]','',test)\n",
+    "test = \" \".join(test.split('\\n'))\n",
+    "lines = \"\"\n",
+    "for line in test:\n",
+    "    if \"EAIP\" not in line:\n",
+    "        lines += line\n",
+    "test = re.sub(\" +\",\" \",lines)\n",
+    "for sw in stopwords[::-1]:\n",
+    "    test = re.sub(sw,\" \",test)\n",
+    "test = re.sub(' +',' ',test)\n",
+    "print(cleaners(word_tokenize(test.lower())))\n",
+    "print(logreg_model.predict([model.infer_vector(cleaners(word_tokenize(test.lower())),steps=1000)]))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/webapp/Dockerfile b/webapp/Dockerfile
@@ -0,0 +1,15 @@
+FROM python:3.6-alpine
+
+ENV FLASK_SECRET_KEY=trewqyuio
+
+RUN mkdir /app
+WORKDIR /app
+
+COPY requirements.txt /app/
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . /app
+
+EXPOSE 5000
+
+CMD ["gunicorn", "--chdir", "/app", "run:app", "-b", "0.0.0.0:5000"]
diff --git a/webapp/app/static/js/imports/pass.jpeg b/webapp/app/static/js/imports/pass.jpeg