From def2aa050f8884fb3671e8c904508088f36ab03b Mon Sep 17 00:00:00 2001 From: arrowboy47 Date: Sat, 8 Jul 2023 15:37:31 -0700 Subject: [PATCH 1/6] custom vader(start) + new search crit(continued) --- projectEnv/__pycache__/app.cpython-311.pyc | Bin 1384 -> 3365 bytes projectEnv/__pycache__/search.cpython-311.pyc | Bin 727 -> 719 bytes .../__pycache__/searchCode.cpython-311.pyc | Bin 2565 -> 4617 bytes projectEnv/app.py | 6 - projectEnv/search.py | 15 +- projectEnv/searchCode.py | 2 +- .../vaderSentiment/vaderSentiment.py | 2 +- .../vaderSentiment/vaderSentimentAiden.py | 687 ++++++++++++++++++ 8 files changed, 703 insertions(+), 9 deletions(-) create mode 100644 projectEnv/venv/lib/python3.8/site-packages/vaderSentiment/vaderSentimentAiden.py diff --git a/projectEnv/__pycache__/app.cpython-311.pyc b/projectEnv/__pycache__/app.cpython-311.pyc index 444f35ba914716aa4f35234ddb72dac200ed9b56..4f6f922cdec19f7675d71cd8bbaa2ec2a852935b 100644 GIT binary patch literal 3365 zcmZ`*-ESMm5#Rgb*BwcH#rm)%3bJgAwk$SI6GN6DREcayjjg7ZTEGMfg5s?y%6B|w z?_^mth0=wB%7y@|KmiMdgYv;p5m*oDTVL}Z90dX)4iFlk&_my7H~|blb@r$uc~ZT1 zH#<8!J3F&G`@8)|Fc?5kW`Ffu{Wll1Wd67xWP%ci2Uvu8EbfB+SMb$KT#~(5 zS`IpG**6I+XnL@8Of!^0@v!X2Qlib##!{)>;ah7tAd9k;X7_@-EX)R9y~v>rvYvF-1fzvG0BE?JmI(a!%;kD`plT#T|-K*vdvTADSmYWWl` zDc{xzhECyzlGN`56PW+)t!X8-gw?ePy)gah^!)Y2@9xaz!)w|Kxvb+9!N%qLcrEXX5v;E$hHh~;uP<2q zm%o``fC;=RHkLG5Pc_yJ7$1Bbd~Zg9LdGzVDM)435rYiAsaT>qCyIL)KBi-+y`xfc zve6OPwR?=6UyzUP9cEBgJc~X=I={+3UC8cNjSOed*4M5b3l_M&(D5CMhUW;~UV5;;B$~XEGA#c+ji?DY z$4Ov`rp{(%{Ltd{-zSxP0INA;ZEVR<6&T{Lg9CI}BGtT8Kd?8Vg5{+rK9P|vK~Di< zSQ5pB#7d4URqjT@f;;FZzy;c`>YB8x}fD+(t zYcf`J5(57%E}6?&B9KH%%NqEhVX=TV0YDQvZ&Fb#FAUQ&TGryy*dP=YEKb2$D@-s1 zND_c>IR%t0FTr1}VBN5Ix)6l!JE4GJaanEE;wiXU9@|x!(1fu(Bxz)_X+n39^iYYQ zVv?p>nyO|DOURK-)`)uRuJV~I7^X=``lvEb*TmuhD^;6M>J$*d`{Y)MmbEJ=vA zx~Q$iX0+6binE3u(^DjqgPPzR^(l4-=jLs&j^&89j8n$-?1LDr$XITT#$;nDxdNtp z;nV*K|Ggrrd600~x`}T#M;TNN&d|A3u zk*=81mA8jt-_F!`=S^|EERI*iag!g{DX5kDraj#E9)3FD{XWP5eaFO7Aa4|k0rYz_ z2;@038aEsSe?~VN!T+9}%Lg*_h##Fdau!B?i{1c5gu;y_*xlqR@DuRqG-xu?f2jzLA}za}pod2Gl3EdU|(+{)+%pctQ4 zv_&!uTI3uhgOt$qA>%-Fy0C-wHS!)AA=IB~+DVtLhz>&?TN$(GQaLnQ35^zS9-JO} zG5Ph)%`eJ)Pkn>0n&L!RoT!KsCO=^>V)O$`xJR#7&h6_n06H77>R?3Ux7uut1YEL` z_1hPrs9}L0=`;l+idckh7r8*?zCz*@<{l?D5=FKu28+IG`w%FEpo()e9AZjK6*T`F;HVUCR#&4s9KvN2slb7tn$oc4FPFdbtJafUgG}&a$Dzt literal 1384 zcmZ`(&1)M+6rb5oNh`-`rMguNiCs)XbHj?=lk`T6x4zAg zYy}l3{uZYdafm+wmf_T5^~RSzbzrA{VOxA%s4HJD-m>cr>TGVMOh$Rj_IW7ZDKCf0 zy{{_EV4^$JZ@7fl$Cm>^fhog0{tMVM#*P{^G{RBP^yC+U~i-?pA@H^zE z&y2Rw8z}aTb0|)tL_WR2JO0F(6;Za)zM3pS1tl!BZM?6)X;DlqF;Ul0;SVgEi8!`0 z)baeyYYpGAAy?^9mI2+$o+dMfw9&q(hdQH=T9o^tHp*M@gMxXs<2N$kqW=H>Ox=X0(F?WsD zeGZlN_^MCM`_!vMjSxfS=7wo`-ZgJi+$$Bl=I0LbbG--s{MAAJYA1a(S?Ct~lXHW~xsG;ZT~wdbZ_E#j`9PVE zK3FOmq54ofllbl2hKxo9V&aDpUW~vRe`H&u2gEd?PSt8n%b~SeC_`RDmANgSMk$Tl zvH}>L-RLB>;JBn^)0=D-01+Sm4cJh|7$2fs5RXGN8zjdedOt{xL-cWw9IrH^O*?lV zyE|@M9%hlGJ}vxM=v?oqy^ZbDW!2Xeu^yyeL72ALI`0~!>p(Nqr P5CIcb7tbpqZ4}ghj0rb$ diff --git a/projectEnv/__pycache__/search.cpython-311.pyc b/projectEnv/__pycache__/search.cpython-311.pyc index e55bb601854dcadd4baa35ca6a4567249655f22a..bccd2c01f9c1b12531fd104d85b1447b9071b348 100644 GIT binary patch delta 66 zcmcc4dY+YgIWI340}z}um*2=;%&6h6pOK%Ns-IX?lwY2dU#ahspIn-onpaY+Uz}W& TSpcMqQVU8lb2qPJ)MNwzsnHfH delta 74 zcmX@ldYzSfIWI340}!yq3vT2tW;F2E4=qkDD%Q`+Ow1@v1d@5_`Yx%(*(Lb}`pNkz bsrm&)`B|ySCB^zhsRhNEC7YKqYBB-%0emaif#x?7ZOUctIBjW8azYdWb83- zCWKgy)I@deS9O%d+E*}BuEcYQ%J-Fi1*kKO~=tM>x->3x9xdOu)N7iLI0`3yy- z(t?|~WHIAIE1x$Cjt#uf8v-3uN1vpvP9K*@w4WN5$hZ$O4oihps(;SPr z#2s8%EKGlLF+VY{{$??sJ8|pcOwm-&x}zr+rq6#`x~|Wi|JB?Tc5CLObvbjkr2R5e zDCpz1E zE+gn9BkBTtWJnhQsUCt)r7kU6wv&x?2&2i+pf569wk(v1>fbJlz_qoLatJuz#AOPc z?~1Yn9Bm7h!@#wJ16)5t?h8}tup3%prW*!h5ji7ue_vE;pWx1kb zk29l)t{cB%6lV;LW#`9>%vvzA&bh+PahuP4Xz9jSamAHYHCM`Sb7=OVxKJZ}v05APH3N8*$m6Xe{;#5J)8>yU~Dk-VuoHL*DjGih*QWtZD z#nY*hl*;L+Q?G?sI^@}wTN&-07T6xR*!cDo8v=AAstVB#=18>6!MS04>I48-qkC@M zxm&7r&%yBz`d!?k{x$LaM;=wXsy41>TQoc+&oLCAN=s;LT&=3QF;&f5`jUxsLRD`p zX{L9FVPvyWfcu2Y@nx}YoJGL3+b01k{%>0hOC#IFhb}>y7q-dU(bI{V0zRTJ!F_ZK z2=o!uHIf%r1%Z@>9qVuSX#kJ;qf9ozf)9Kju8L*xcagHVfLi&OAr4g3_COB@s~A*7 z3O<#l7&;zGPUJEJ^JJY;v|u@@>()|1pMI>?GWtjvU0($fapU&@affuK#|&Y;FbRQ*j=vA3+U|_jm~AoV3a>H@ll6yi z+L&*!;CN!~RAqYi+pt>kw-tauSFb`sZUDV|=;WaobS;DfnINAHfr|yFBN)Khe#!G~DF6}la?isi_Pk|dfb2@$|5Ok)`1r38) zqKk{%Z0xk-Hxc4hz#S=gVyE8=#OY{ipq;||`FK;iF0Kjztu*)qXyW@FPkJNVQWLm1 zSSVUUhsL)cx49A!*fbX~+V}w4*9Ww}{<5$Mx*z!Z-&_SYO{nIxlcjF(9yZKY3Aalz zOn$FVmImh?r)ZxZA75Tx9%F{C=bSN!fpnZ_sw?JnmWFjO^gDYWz>V@&YeF)+VXas+ z3OcW#FmJHDW`A9=PQj?Et_3mAN|LzC%-Xo3P zBM?}LmeqP9$kRP}2VGAb4TxXDF;1-ND25Z{Vmq!~wQFHhkB$XYjfS=L3HNg2xX$ zLjF$)KLkLcL)upgwI~=I8y!ip^Hdp{lOEFPLunJVJn%I^UD~%Q2G-T_#t!*iHEnGq z5a{9HDuUk%+xy6uBJLHu0dMVH4LMk)TcaF$Q+};4U|&CF+7ftmaL3gey8JNuW`pe1 z00|SDa^sVlJ)g4*u*eMO5JY9ax_!j$u5DpQG4l{ zmy~YU+hDRYAmXJYrVBfVfISK}=c0ZM(v4a9qT>oVHV(5kx)J(ut0BicB^Mkc&t~Ax z1U~XTK!vYjpUj()Guyf4I6nQdbT)l*5me zu}8|-x1TnDI?+6Rrg`@Kc7$||KPREC3&Kl|+k{F7I8=pKGKubaERQ~tN7rxoy&Ky1 zQyQ(s_SNY~gO1eb$d8?oa(BN(A&coM~Vy30(5NuY#m}U4^ART77x9W%y zjIy0TVPn8{?sjCZ&{iDp>57}~<^=@1!Pa&jia z9`44nOHB3BfF~F?ha0}an;W}?;@E$AgX5{=H8-A4m=xR$>;O{u4}))IU(08_KNi1W zzlAw0<@O=K?T{b{O%kv0Uz5CB3m#39thJ9OIa~`KO)^po9!>J&TJU(1=&HmJqRqZ! zrTYo(u1tLKakJ}Cv+sDbduTgCCGk0=nfMZ+B5Wt30{%~VJ3i<0FFWrJ%EG`l@gXS) iNyxZ1ge9R5x?UR+JpwdA9}W$mtPf)`WOoC%Bvq}bV=-<>(<%x~s= zeL47fBKmzK5+X3>R^E4hR&PdosCRQ{U`-;76ctq9a1=*%vf(t5@=wyHJTGzC)aex0yLa3py|*VVn_Z)gN#l6@OxFxE>_EBR`Fce(^viO;$r3U z)v5CMLROrm-*&5QHvAwpUi|F5_=k>}4w)hMXvS8=l+^oyt@?_~N^?947CeR!N9X`> zktKy=Zzg@!W!y~{3%P}*9AITWJZ<`z7$EsbeN>Aoxy z@<3|C*f5^KmgFg7d+6{DUUo+0<G44j?E=N#(Mznkw70@$t{#3L}0q70qO_8rlhDxo8$mORQ-@P$&0bYX^EmX z{1$S7d*~|W(>?5C$gOg3I_DKC^ZX?+I11A>0l+&eeB(_0GVc-%ZTl3S_Vs+bE-0?nYHi9yMYDYzqq zcMK7)d*2*-N`GKH-!+u1Vvm^OVYu%U-tH^QEXQrdS3h22d>M)yqR0nOV|j%#%gz-_ z9^>LWvuEv!E&1{yTlRI%K7lG{cG#!fH{GR=ICGqW=W8yI4 zg(i;xAk*>A`sBYFiN?gQk>0g88y6;;I*F!th#Z}yy8$+dt_}vUL7N6KJAXHNelvP* zC${?Ecub#I+x-{+OrE%tyIb5&jyz3{G?K?|O(49zqsx8Afu!#^@R@_KGJ)s-LPU*z zk@+<9)yfZ7>X~h2R7B%irU;TiBjjn~K5FEyek8=a$#SZ`BTRA~qLOWH-K f(`Hzw*kLcmoj|^ONGl$8q>?e()g=Fi;7$Jl0bNy+ diff --git a/projectEnv/app.py b/projectEnv/app.py index 53a14f6b9..53b59b4d4 100644 --- a/projectEnv/app.py +++ b/projectEnv/app.py @@ -68,9 +68,3 @@ def loading(): session['form_data'] = request.form return render_template("loading.html", gifList=gifList) - - - - - - diff --git a/projectEnv/search.py b/projectEnv/search.py index 839277947..816322b5e 100644 --- a/projectEnv/search.py +++ b/projectEnv/search.py @@ -1,4 +1,7 @@ import re +import praw +from wordcloud import WordCloud +import matplotlib.pyplot as plt from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer def sentiment_scores(comment_body): @@ -15,6 +18,16 @@ def sentiment_scores(comment_body): else: return 0 #neutral - +def sexbot(comment_body): + score = 0 + good_patt = re.compile(r'gl|good|\.*\sW\.*\s+|fire', re.IGNORECASE) + bad_patt = re.compile(r'rl|\sbad|\soff|return', re.IGNORECASE) + good = good_patt.findall(comment_body) + bad = bad_patt.findall(comment_body) + good_score = len(good) + bad_score = len(bad) + score = good_score + bad_score + score = good_score - bad_score + return(score) diff --git a/projectEnv/searchCode.py b/projectEnv/searchCode.py index 2726cbe63..421e9491b 100644 --- a/projectEnv/searchCode.py +++ b/projectEnv/searchCode.py @@ -98,4 +98,4 @@ def loadMore(): post_list.sort(key=sortRating, reverse = True)#sort the post obj in order of rating - return post_list \ No newline at end of file + return post_list diff --git a/projectEnv/venv/lib/python3.8/site-packages/vaderSentiment/vaderSentiment.py b/projectEnv/venv/lib/python3.8/site-packages/vaderSentiment/vaderSentiment.py index e831d9fee..35d9747d5 100644 --- a/projectEnv/venv/lib/python3.8/site-packages/vaderSentiment/vaderSentiment.py +++ b/projectEnv/venv/lib/python3.8/site-packages/vaderSentiment/vaderSentiment.py @@ -684,4 +684,4 @@ def score_valence(self, sentiments, text): str(vs['compound']), translator_name)) print("----------------------------------------------------") - print("\n\n Demo Done!") \ No newline at end of file + print("\n\n Demo Done!") diff --git a/projectEnv/venv/lib/python3.8/site-packages/vaderSentiment/vaderSentimentAiden.py b/projectEnv/venv/lib/python3.8/site-packages/vaderSentiment/vaderSentimentAiden.py new file mode 100644 index 000000000..35d9747d5 --- /dev/null +++ b/projectEnv/venv/lib/python3.8/site-packages/vaderSentiment/vaderSentimentAiden.py @@ -0,0 +1,687 @@ +# coding: utf-8 +# Author: C.J. Hutto +# Thanks to George Berry for reducing the time complexity from something like O(N^4) to O(N). +# Thanks to Ewan Klein and Pierpaolo Pantone for bringing VADER into NLTK. Those modifications were awesome. +# For license information, see LICENSE.TXT + +""" +If you use the VADER sentiment analysis tools, please cite: +Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule-based Model for +Sentiment Analysis of Social Media Text. Eighth International Conference on +Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014. +""" +import os +import re +import math +import string +import codecs +import json +from itertools import product +from inspect import getsourcefile +from io import open + +# ##Constants## + +# (empirically derived mean sentiment intensity rating increase for booster words) +B_INCR = 0.293 +B_DECR = -0.293 + +# (empirically derived mean sentiment intensity rating increase for using ALLCAPs to emphasize a word) +C_INCR = 0.733 +N_SCALAR = -0.74 + +NEGATE = \ + ["aint", "arent", "cannot", "cant", "couldnt", "darent", "didnt", "doesnt", + "ain't", "aren't", "can't", "couldn't", "daren't", "didn't", "doesn't", + "dont", "hadnt", "hasnt", "havent", "isnt", "mightnt", "mustnt", "neither", + "don't", "hadn't", "hasn't", "haven't", "isn't", "mightn't", "mustn't", + "neednt", "needn't", "never", "none", "nope", "nor", "not", "nothing", "nowhere", + "oughtnt", "shant", "shouldnt", "uhuh", "wasnt", "werent", + "oughtn't", "shan't", "shouldn't", "uh-uh", "wasn't", "weren't", + "without", "wont", "wouldnt", "won't", "wouldn't", "rarely", "seldom", "despite"] + +# booster/dampener 'intensifiers' or 'degree adverbs' +# http://en.wiktionary.org/wiki/Category:English_degree_adverbs + +BOOSTER_DICT = \ + {"absolutely": B_INCR, "amazingly": B_INCR, "awfully": B_INCR, + "completely": B_INCR, "considerable": B_INCR, "considerably": B_INCR, + "decidedly": B_INCR, "deeply": B_INCR, "effing": B_INCR, "enormous": B_INCR, "enormously": B_INCR, + "entirely": B_INCR, "especially": B_INCR, "exceptional": B_INCR, "exceptionally": B_INCR, + "extreme": B_INCR, "extremely": B_INCR, + "fabulously": B_INCR, "flipping": B_INCR, "flippin": B_INCR, "frackin": B_INCR, "fracking": B_INCR, + "fricking": B_INCR, "frickin": B_INCR, "frigging": B_INCR, "friggin": B_INCR, "fully": B_INCR, + "fuckin": B_INCR, "fucking": B_INCR, "fuggin": B_INCR, "fugging": B_INCR, + "greatly": B_INCR, "hella": B_INCR, "highly": B_INCR, "hugely": B_INCR, + "incredible": B_INCR, "incredibly": B_INCR, "intensely": B_INCR, + "major": B_INCR, "majorly": B_INCR, "more": B_INCR, "most": B_INCR, "particularly": B_INCR, + "purely": B_INCR, "quite": B_INCR, "really": B_INCR, "remarkably": B_INCR, + "so": B_INCR, "substantially": B_INCR, + "thoroughly": B_INCR, "total": B_INCR, "totally": B_INCR, "tremendous": B_INCR, "tremendously": B_INCR, + "uber": B_INCR, "unbelievably": B_INCR, "unusually": B_INCR, "utter": B_INCR, "utterly": B_INCR, + "very": B_INCR, + "almost": B_DECR, "barely": B_DECR, "hardly": B_DECR, "just enough": B_DECR, + "kind of": B_DECR, "kinda": B_DECR, "kindof": B_DECR, "kind-of": B_DECR, + "less": B_DECR, "little": B_DECR, "marginal": B_DECR, "marginally": B_DECR, + "occasional": B_DECR, "occasionally": B_DECR, "partly": B_DECR, + "scarce": B_DECR, "scarcely": B_DECR, "slight": B_DECR, "slightly": B_DECR, "somewhat": B_DECR, + "sort of": B_DECR, "sorta": B_DECR, "sortof": B_DECR, "sort-of": B_DECR} + +# check for sentiment laden idioms that do not contain lexicon words (future work, not yet implemented) +SENTIMENT_LADEN_IDIOMS = {"cut the mustard": 2, "hand to mouth": -2, + "back handed": -2, "blow smoke": -2, "blowing smoke": -2, + "upper hand": 1, "break a leg": 2, + "cooking with gas": 2, "in the black": 2, "in the red": -2, + "on the ball": 2, "under the weather": -2} + +# check for special case idioms and phrases containing lexicon words +SPECIAL_CASES = {"the shit": 3, "the bomb": 3, "bad ass": 1.5, "badass": 1.5, "bus stop": 0.0, + "yeah right": -2, "kiss of death": -1.5, "to die for": 3, "beating heart": 3.5} + + +# #Static methods# # + +def negated(input_words, include_nt=True): + """ + Determine if input contains negation words + """ + input_words = [str(w).lower() for w in input_words] + neg_words = [] + neg_words.extend(NEGATE) + for word in neg_words: + if word in input_words: + return True + if include_nt: + for word in input_words: + if "n't" in word: + return True + '''if "least" in input_words: + i = input_words.index("least") + if i > 0 and input_words[i - 1] != "at": + return True''' + return False + + +def normalize(score, alpha=15): + """ + Normalize the score to be between -1 and 1 using an alpha that + approximates the max expected value + """ + norm_score = score / math.sqrt((score * score) + alpha) + if norm_score < -1.0: + return -1.0 + elif norm_score > 1.0: + return 1.0 + else: + return norm_score + + +def allcap_differential(words): + """ + Check whether just some words in the input are ALL CAPS + :param list words: The words to inspect + :returns: `True` if some but not all items in `words` are ALL CAPS + """ + is_different = False + allcap_words = 0 + for word in words: + if word.isupper(): + allcap_words += 1 + cap_differential = len(words) - allcap_words + if 0 < cap_differential < len(words): + is_different = True + return is_different + + +def scalar_inc_dec(word, valence, is_cap_diff): + """ + Check if the preceding words increase, decrease, or negate/nullify the + valence + """ + scalar = 0.0 + word_lower = word.lower() + if word_lower in BOOSTER_DICT: + scalar = BOOSTER_DICT[word_lower] + if valence < 0: + scalar *= -1 + # check if booster/dampener word is in ALLCAPS (while others aren't) + if word.isupper() and is_cap_diff: + if valence > 0: + scalar += C_INCR + else: + scalar -= C_INCR + return scalar + + +class SentiText(object): + """ + Identify sentiment-relevant string-level properties of input text. + """ + + def __init__(self, text): + if not isinstance(text, str): + text = str(text).encode('utf-8') + self.text = text + self.words_and_emoticons = self._words_and_emoticons() + # doesn't separate words from\ + # adjacent punctuation (keeps emoticons & contractions) + self.is_cap_diff = allcap_differential(self.words_and_emoticons) + + @staticmethod + def _strip_punc_if_word(token): + """ + Removes all trailing and leading punctuation + If the resulting string has two or fewer characters, + then it was likely an emoticon, so return original string + (ie ":)" stripped would be "", so just return ":)" + """ + stripped = token.strip(string.punctuation) + if len(stripped) <= 2: + return token + return stripped + + def _words_and_emoticons(self): + """ + Removes leading and trailing puncutation + Leaves contractions and most emoticons + Does not preserve punc-plus-letter emoticons (e.g. :D) + """ + wes = self.text.split() + stripped = list(map(self._strip_punc_if_word, wes)) + return stripped + +class SentimentIntensityAnalyzer(object): + """ + Give a sentiment intensity score to sentences. + """ + + def __init__(self, lexicon_file="vader_lexicon.txt", emoji_lexicon="emoji_utf8_lexicon.txt"): + _this_module_file_path_ = os.path.abspath(getsourcefile(lambda: 0)) + lexicon_full_filepath = os.path.join(os.path.dirname(_this_module_file_path_), lexicon_file) + with codecs.open(lexicon_full_filepath, encoding='utf-8') as f: + self.lexicon_full_filepath = f.read() + self.lexicon = self.make_lex_dict() + + emoji_full_filepath = os.path.join(os.path.dirname(_this_module_file_path_), emoji_lexicon) + with codecs.open(emoji_full_filepath, encoding='utf-8') as f: + self.emoji_full_filepath = f.read() + self.emojis = self.make_emoji_dict() + + def make_lex_dict(self): + """ + Convert lexicon file to a dictionary + """ + lex_dict = {} + for line in self.lexicon_full_filepath.rstrip('\n').split('\n'): + if not line: + continue + (word, measure) = line.strip().split('\t')[0:2] + lex_dict[word] = float(measure) + return lex_dict + + def make_emoji_dict(self): + """ + Convert emoji lexicon file to a dictionary + """ + emoji_dict = {} + for line in self.emoji_full_filepath.rstrip('\n').split('\n'): + (emoji, description) = line.strip().split('\t')[0:2] + emoji_dict[emoji] = description + return emoji_dict + + def polarity_scores(self, text): + """ + Return a float for sentiment strength based on the input text. + Positive values are positive valence, negative value are negative + valence. + """ + # convert emojis to their textual descriptions + text_no_emoji = "" + prev_space = True + for chr in text: + if chr in self.emojis: + # get the textual description + description = self.emojis[chr] + if not prev_space: + text_no_emoji += ' ' + text_no_emoji += description + prev_space = False + else: + text_no_emoji += chr + prev_space = chr == ' ' + text = text_no_emoji.strip() + + sentitext = SentiText(text) + + sentiments = [] + words_and_emoticons = sentitext.words_and_emoticons + for i, item in enumerate(words_and_emoticons): + valence = 0 + # check for vader_lexicon words that may be used as modifiers or negations + if item.lower() in BOOSTER_DICT: + sentiments.append(valence) + continue + if (i < len(words_and_emoticons) - 1 and item.lower() == "kind" and + words_and_emoticons[i + 1].lower() == "of"): + sentiments.append(valence) + continue + + sentiments = self.sentiment_valence(valence, sentitext, item, i, sentiments) + + sentiments = self._but_check(words_and_emoticons, sentiments) + + valence_dict = self.score_valence(sentiments, text) + + return valence_dict + + def sentiment_valence(self, valence, sentitext, item, i, sentiments): + is_cap_diff = sentitext.is_cap_diff + words_and_emoticons = sentitext.words_and_emoticons + item_lowercase = item.lower() + if item_lowercase in self.lexicon: + # get the sentiment valence + valence = self.lexicon[item_lowercase] + + # check for "no" as negation for an adjacent lexicon item vs "no" as its own stand-alone lexicon item + if item_lowercase == "no" and i != len(words_and_emoticons)-1 and words_and_emoticons[i + 1].lower() in self.lexicon: + # don't use valence of "no" as a lexicon item. Instead set it's valence to 0.0 and negate the next item + valence = 0.0 + if (i > 0 and words_and_emoticons[i - 1].lower() == "no") \ + or (i > 1 and words_and_emoticons[i - 2].lower() == "no") \ + or (i > 2 and words_and_emoticons[i - 3].lower() == "no" and words_and_emoticons[i - 1].lower() in ["or", "nor"] ): + valence = self.lexicon[item_lowercase] * N_SCALAR + + # check if sentiment laden word is in ALL CAPS (while others aren't) + if item.isupper() and is_cap_diff: + if valence > 0: + valence += C_INCR + else: + valence -= C_INCR + + for start_i in range(0, 3): + # dampen the scalar modifier of preceding words and emoticons + # (excluding the ones that immediately preceed the item) based + # on their distance from the current item. + if i > start_i and words_and_emoticons[i - (start_i + 1)].lower() not in self.lexicon: + s = scalar_inc_dec(words_and_emoticons[i - (start_i + 1)], valence, is_cap_diff) + if start_i == 1 and s != 0: + s = s * 0.95 + if start_i == 2 and s != 0: + s = s * 0.9 + valence = valence + s + valence = self._negation_check(valence, words_and_emoticons, start_i, i) + if start_i == 2: + valence = self._special_idioms_check(valence, words_and_emoticons, i) + + valence = self._least_check(valence, words_and_emoticons, i) + sentiments.append(valence) + return sentiments + + def _least_check(self, valence, words_and_emoticons, i): + # check for negation case using "least" + if i > 1 and words_and_emoticons[i - 1].lower() not in self.lexicon \ + and words_and_emoticons[i - 1].lower() == "least": + if words_and_emoticons[i - 2].lower() != "at" and words_and_emoticons[i - 2].lower() != "very": + valence = valence * N_SCALAR + elif i > 0 and words_and_emoticons[i - 1].lower() not in self.lexicon \ + and words_and_emoticons[i - 1].lower() == "least": + valence = valence * N_SCALAR + return valence + + @staticmethod + def _but_check(words_and_emoticons, sentiments): + # check for modification in sentiment due to contrastive conjunction 'but' + words_and_emoticons_lower = [str(w).lower() for w in words_and_emoticons] + if 'but' in words_and_emoticons_lower: + bi = words_and_emoticons_lower.index('but') + for sentiment in sentiments: + si = sentiments.index(sentiment) + if si < bi: + sentiments.pop(si) + sentiments.insert(si, sentiment * 0.5) + elif si > bi: + sentiments.pop(si) + sentiments.insert(si, sentiment * 1.5) + return sentiments + + @staticmethod + def _special_idioms_check(valence, words_and_emoticons, i): + words_and_emoticons_lower = [str(w).lower() for w in words_and_emoticons] + onezero = "{0} {1}".format(words_and_emoticons_lower[i - 1], words_and_emoticons_lower[i]) + + twoonezero = "{0} {1} {2}".format(words_and_emoticons_lower[i - 2], + words_and_emoticons_lower[i - 1], words_and_emoticons_lower[i]) + + twoone = "{0} {1}".format(words_and_emoticons_lower[i - 2], words_and_emoticons_lower[i - 1]) + + threetwoone = "{0} {1} {2}".format(words_and_emoticons_lower[i - 3], + words_and_emoticons_lower[i - 2], words_and_emoticons_lower[i - 1]) + + threetwo = "{0} {1}".format(words_and_emoticons_lower[i - 3], words_and_emoticons_lower[i - 2]) + + sequences = [onezero, twoonezero, twoone, threetwoone, threetwo] + + for seq in sequences: + if seq in SPECIAL_CASES: + valence = SPECIAL_CASES[seq] + break + + if len(words_and_emoticons_lower) - 1 > i: + zeroone = "{0} {1}".format(words_and_emoticons_lower[i], words_and_emoticons_lower[i + 1]) + if zeroone in SPECIAL_CASES: + valence = SPECIAL_CASES[zeroone] + if len(words_and_emoticons_lower) - 1 > i + 1: + zeroonetwo = "{0} {1} {2}".format(words_and_emoticons_lower[i], words_and_emoticons_lower[i + 1], + words_and_emoticons_lower[i + 2]) + if zeroonetwo in SPECIAL_CASES: + valence = SPECIAL_CASES[zeroonetwo] + + # check for booster/dampener bi-grams such as 'sort of' or 'kind of' + n_grams = [threetwoone, threetwo, twoone] + for n_gram in n_grams: + if n_gram in BOOSTER_DICT: + valence = valence + BOOSTER_DICT[n_gram] + return valence + + @staticmethod + def _sentiment_laden_idioms_check(valence, senti_text_lower): + # Future Work + # check for sentiment laden idioms that don't contain a lexicon word + idioms_valences = [] + for idiom in SENTIMENT_LADEN_IDIOMS: + if idiom in senti_text_lower: + print(idiom, senti_text_lower) + valence = SENTIMENT_LADEN_IDIOMS[idiom] + idioms_valences.append(valence) + if len(idioms_valences) > 0: + valence = sum(idioms_valences) / float(len(idioms_valences)) + return valence + + @staticmethod + def _negation_check(valence, words_and_emoticons, start_i, i): + words_and_emoticons_lower = [str(w).lower() for w in words_and_emoticons] + if start_i == 0: + if negated([words_and_emoticons_lower[i - (start_i + 1)]]): # 1 word preceding lexicon word (w/o stopwords) + valence = valence * N_SCALAR + if start_i == 1: + if words_and_emoticons_lower[i - 2] == "never" and \ + (words_and_emoticons_lower[i - 1] == "so" or + words_and_emoticons_lower[i - 1] == "this"): + valence = valence * 1.25 + elif words_and_emoticons_lower[i - 2] == "without" and \ + words_and_emoticons_lower[i - 1] == "doubt": + valence = valence + elif negated([words_and_emoticons_lower[i - (start_i + 1)]]): # 2 words preceding the lexicon word position + valence = valence * N_SCALAR + if start_i == 2: + if words_and_emoticons_lower[i - 3] == "never" and \ + (words_and_emoticons_lower[i - 2] == "so" or words_and_emoticons_lower[i - 2] == "this") or \ + (words_and_emoticons_lower[i - 1] == "so" or words_and_emoticons_lower[i - 1] == "this"): + valence = valence * 1.25 + elif words_and_emoticons_lower[i - 3] == "without" and \ + (words_and_emoticons_lower[i - 2] == "doubt" or words_and_emoticons_lower[i - 1] == "doubt"): + valence = valence + elif negated([words_and_emoticons_lower[i - (start_i + 1)]]): # 3 words preceding the lexicon word position + valence = valence * N_SCALAR + return valence + + def _punctuation_emphasis(self, text): + # add emphasis from exclamation points and question marks + ep_amplifier = self._amplify_ep(text) + qm_amplifier = self._amplify_qm(text) + punct_emph_amplifier = ep_amplifier + qm_amplifier + return punct_emph_amplifier + + @staticmethod + def _amplify_ep(text): + # check for added emphasis resulting from exclamation points (up to 4 of them) + ep_count = text.count("!") + if ep_count > 4: + ep_count = 4 + # (empirically derived mean sentiment intensity rating increase for + # exclamation points) + ep_amplifier = ep_count * 0.292 + return ep_amplifier + + @staticmethod + def _amplify_qm(text): + # check for added emphasis resulting from question marks (2 or 3+) + qm_count = text.count("?") + qm_amplifier = 0 + if qm_count > 1: + if qm_count <= 3: + # (empirically derived mean sentiment intensity rating increase for + # question marks) + qm_amplifier = qm_count * 0.18 + else: + qm_amplifier = 0.96 + return qm_amplifier + + @staticmethod + def _sift_sentiment_scores(sentiments): + # want separate positive versus negative sentiment scores + pos_sum = 0.0 + neg_sum = 0.0 + neu_count = 0 + for sentiment_score in sentiments: + if sentiment_score > 0: + pos_sum += (float(sentiment_score) + 1) # compensates for neutral words that are counted as 1 + if sentiment_score < 0: + neg_sum += (float(sentiment_score) - 1) # when used with math.fabs(), compensates for neutrals + if sentiment_score == 0: + neu_count += 1 + return pos_sum, neg_sum, neu_count + + def score_valence(self, sentiments, text): + if sentiments: + sum_s = float(sum(sentiments)) + # compute and add emphasis from punctuation in text + punct_emph_amplifier = self._punctuation_emphasis(text) + if sum_s > 0: + sum_s += punct_emph_amplifier + elif sum_s < 0: + sum_s -= punct_emph_amplifier + + compound = normalize(sum_s) + # discriminate between positive, negative and neutral sentiment scores + pos_sum, neg_sum, neu_count = self._sift_sentiment_scores(sentiments) + + if pos_sum > math.fabs(neg_sum): + pos_sum += punct_emph_amplifier + elif pos_sum < math.fabs(neg_sum): + neg_sum -= punct_emph_amplifier + + total = pos_sum + math.fabs(neg_sum) + neu_count + pos = math.fabs(pos_sum / total) + neg = math.fabs(neg_sum / total) + neu = math.fabs(neu_count / total) + + else: + compound = 0.0 + pos = 0.0 + neg = 0.0 + neu = 0.0 + + sentiment_dict = \ + {"neg": round(neg, 3), + "neu": round(neu, 3), + "pos": round(pos, 3), + "compound": round(compound, 4)} + + return sentiment_dict + + +if __name__ == '__main__': + # --- examples ------- + sentences = ["VADER is smart, handsome, and funny.", # positive sentence example + "VADER is smart, handsome, and funny!", + # punctuation emphasis handled correctly (sentiment intensity adjusted) + "VADER is very smart, handsome, and funny.", + # booster words handled correctly (sentiment intensity adjusted) + "VADER is VERY SMART, handsome, and FUNNY.", # emphasis for ALLCAPS handled + "VADER is VERY SMART, handsome, and FUNNY!!!", + # combination of signals - VADER appropriately adjusts intensity + "VADER is VERY SMART, uber handsome, and FRIGGIN FUNNY!!!", + # booster words & punctuation make this close to ceiling for score + "VADER is not smart, handsome, nor funny.", # negation sentence example + "The book was good.", # positive sentence + "At least it isn't a horrible book.", # negated negative sentence with contraction + "The book was only kind of good.", + # qualified positive sentence is handled correctly (intensity adjusted) + "The plot was good, but the characters are uncompelling and the dialog is not great.", + # mixed negation sentence + "Today SUX!", # negative slang with capitalization emphasis + "Today only kinda sux! But I'll get by, lol", + # mixed sentiment example with slang and constrastive conjunction "but" + "Make sure you :) or :D today!", # emoticons handled + "Catch utf-8 emoji such as 💘 and 💋 and 😁", # emojis handled + "Not bad at all" # Capitalized negation + ] + + analyzer = SentimentIntensityAnalyzer() + + print("----------------------------------------------------") + print(" - Analyze typical example cases, including handling of:") + print(" -- negations") + print(" -- punctuation emphasis & punctuation flooding") + print(" -- word-shape as emphasis (capitalization difference)") + print(" -- degree modifiers (intensifiers such as 'very' and dampeners such as 'kind of')") + print(" -- slang words as modifiers such as 'uber' or 'friggin' or 'kinda'") + print(" -- contrastive conjunction 'but' indicating a shift in sentiment; sentiment of later text is dominant") + print(" -- use of contractions as negations") + print(" -- sentiment laden emoticons such as :) and :D") + print(" -- utf-8 encoded emojis such as 💘 and 💋 and 😁") + print(" -- sentiment laden slang words (e.g., 'sux')") + print(" -- sentiment laden initialisms and acronyms (for example: 'lol') \n") + for sentence in sentences: + vs = analyzer.polarity_scores(sentence) + print("{:-<65} {}".format(sentence, str(vs))) + print("----------------------------------------------------") + print(" - About the scoring: ") + print(""" -- The 'compound' score is computed by summing the valence scores of each word in the lexicon, adjusted + according to the rules, and then normalized to be between -1 (most extreme negative) and +1 (most extreme positive). + This is the most useful metric if you want a single unidimensional measure of sentiment for a given sentence. + Calling it a 'normalized, weighted composite score' is accurate.""") + print(""" -- The 'pos', 'neu', and 'neg' scores are ratios for proportions of text that fall in each category (so these + should all add up to be 1... or close to it with float operation). These are the most useful metrics if + you want multidimensional measures of sentiment for a given sentence.""") + print("----------------------------------------------------") + + # input("\nPress Enter to continue the demo...\n") # for DEMO purposes... + + tricky_sentences = ["Sentiment analysis has never been good.", + "Sentiment analysis has never been this good!", + "Most automated sentiment analysis tools are shit.", + "With VADER, sentiment analysis is the shit!", + "Other sentiment analysis tools can be quite bad.", + "On the other hand, VADER is quite bad ass", + "VADER is such a badass!", # slang with punctuation emphasis + "Without a doubt, excellent idea.", + "Roger Dodger is one of the most compelling variations on this theme.", + "Roger Dodger is at least compelling as a variation on the theme.", + "Roger Dodger is one of the least compelling variations on this theme.", + "Not such a badass after all.", # Capitalized negation with slang + "Without a doubt, an excellent idea." # "without {any} doubt" as negation + ] + print("----------------------------------------------------") + print(" - Analyze examples of tricky sentences that cause trouble to other sentiment analysis tools.") + print(" -- special case idioms - e.g., 'never good' vs 'never this good', or 'bad' vs 'bad ass'.") + print(" -- special uses of 'least' as negation versus comparison \n") + for sentence in tricky_sentences: + vs = analyzer.polarity_scores(sentence) + print("{:-<69} {}".format(sentence, str(vs))) + print("----------------------------------------------------") + + # input("\nPress Enter to continue the demo...\n") # for DEMO purposes... + + print("----------------------------------------------------") + print( + " - VADER works best when analysis is done at the sentence level (but it can work on single words or entire novels).") + paragraph = "It was one of the worst movies I've seen, despite good reviews. Unbelievably bad acting!! Poor direction. VERY poor production. The movie was bad. Very bad movie. VERY BAD movie!" + print(" -- For example, given the following paragraph text from a hypothetical movie review:\n\t'{}'".format( + paragraph)) + print( + " -- You could use NLTK to break the paragraph into sentence tokens for VADER, then average the results for the paragraph like this: \n") + # simple example to tokenize paragraph into sentences for VADER + from nltk import tokenize + + sentence_list = tokenize.sent_tokenize(paragraph) + paragraphSentiments = 0.0 + for sentence in sentence_list: + vs = analyzer.polarity_scores(sentence) + print("{:-<69} {}".format(sentence, str(vs["compound"]))) + paragraphSentiments += vs["compound"] + print("AVERAGE SENTIMENT FOR PARAGRAPH: \t" + str(round(paragraphSentiments / len(sentence_list), 4))) + print("----------------------------------------------------") + + # input("\nPress Enter to continue the demo...\n") # for DEMO purposes... + + print("----------------------------------------------------") + print(" - Analyze sentiment of IMAGES/VIDEO data based on annotation 'tags' or image labels. \n") + conceptList = ["balloons", "cake", "candles", "happy birthday", "friends", "laughing", "smiling", "party"] + conceptSentiments = 0.0 + for concept in conceptList: + vs = analyzer.polarity_scores(concept) + print("{:-<15} {}".format(concept, str(vs['compound']))) + conceptSentiments += vs["compound"] + print("AVERAGE SENTIMENT OF TAGS/LABELS: \t" + str(round(conceptSentiments / len(conceptList), 4))) + print("\t") + conceptList = ["riot", "fire", "fight", "blood", "mob", "war", "police", "tear gas"] + conceptSentiments = 0.0 + for concept in conceptList: + vs = analyzer.polarity_scores(concept) + print("{:-<15} {}".format(concept, str(vs['compound']))) + conceptSentiments += vs["compound"] + print("AVERAGE SENTIMENT OF TAGS/LABELS: \t" + str(round(conceptSentiments / len(conceptList), 4))) + print("----------------------------------------------------") + + # input("\nPress Enter to continue the demo...") # for DEMO purposes... + + do_translate = input( + "\nWould you like to run VADER demo examples with NON-ENGLISH text? \n (Note: requires Internet access and uses the 'requests' library) \n Type 'y' or 'n', then press Enter: ") + if do_translate.lower().lstrip().__contains__("y"): + import requests + print("\n----------------------------------------------------") + print(" - Analyze sentiment of NON ENGLISH text...for example:") + print(" -- French, German, Spanish, Italian, Russian, Japanese, Arabic, Chinese(Simplified) , Chinese(Traditional)") + print(" -- many other languages supported. \n") + languages = ["English", "French", "German", "Spanish", "Italian", "Russian", "Japanese", "Arabic", "Chinese(Simplified)", "Chinese(Traditional)"] + language_codes = ["en", "fr", "de", "es", "it", "ru", "ja", "ar", "zh-CN", "zh-TW"] + nonEnglish_sentences = ["I'm surprised to see just how amazingly helpful VADER is!", + "Je suis surpris de voir comment VADER est incroyablement utile !", + "Ich bin überrascht zu sehen, nur wie erstaunlich nützlich VADER!", + "Me sorprende ver sólo cómo increíblemente útil VADER!", + "Sono sorpreso di vedere solo come incredibilmente utile VADER è!", + "Я удивлен увидеть, как раз как удивительно полезно ВЕЙДЕРА!", + "私はちょうどどのように驚くほど役に立つベイダーを見て驚いています!", + "أنا مندهش لرؤية فقط كيف مثير للدهشة فيدر فائدة!", + "我很惊讶地看到VADER是如此有用!", + "我很驚訝地看到VADER是如此有用!" + ] + for sentence in nonEnglish_sentences: + to_lang = "en" + from_lang = language_codes[nonEnglish_sentences.index(sentence)] + if (from_lang == "en") or (from_lang == "en-US"): + translation = sentence + translator_name = "No translation needed" + else: # please note usage limits for My Memory Translation Service: http://mymemory.translated.net/doc/usagelimits.php + # using MY MEMORY NET http://mymemory.translated.net + api_url = "http://mymemory.translated.net/api/get?q={}&langpair={}|{}".format(sentence, from_lang, + to_lang) + hdrs = { + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', + 'Accept-Encoding': 'none', + 'Accept-Language': 'en-US,en;q=0.8', + 'Connection': 'keep-alive'} + response = requests.get(api_url, headers=hdrs) + response_json = json.loads(response.text) + translation = response_json["responseData"]["translatedText"] + translator_name = "MemoryNet Translation Service" + vs = analyzer.polarity_scores(translation) + print("- {: <8}: {: <69}\t {} ({})".format(languages[nonEnglish_sentences.index(sentence)], sentence, + str(vs['compound']), translator_name)) + print("----------------------------------------------------") + + print("\n\n Demo Done!") From 69ed496772e3696f47b47d2aea48512d4789f332 Mon Sep 17 00:00:00 2001 From: arrowboy47 Date: Sat, 15 Jul 2023 21:15:22 -0700 Subject: [PATCH 2/6] more complex rating system started --- projectEnv/stattime.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 projectEnv/stattime.py diff --git a/projectEnv/stattime.py b/projectEnv/stattime.py new file mode 100644 index 000000000..41a5bf961 --- /dev/null +++ b/projectEnv/stattime.py @@ -0,0 +1,36 @@ +# attempting to explore the relationship between the average special char score and the number of upvotes +from search import sexbot +from searchCode import * +import praw + + +r = praw.Reddit( + client_id='jknOULmDh_Xkmi5xLSpl_A', + client_secret='5jOGVzfdgGJgRrxS7oPZAzaBZnndEA', + user_agent="smol man", +) + +plist = [] +listyy = [] +prompt = "QC" + +for i in r.subreddit("FashionReps").search(query=prompt, + sort="relevance", + limit=100, + time_filter= "year"): + plist.append((i.score, i.num_comments)) + listyy.append(getRating(i.comments)) + + +print(len(plist)) +average_upvotes = sum(upvotes for upvotes, _ in plist) / len(plist) +average_comments = sum(comments for _, comments in plist) / len(plist) +average_score = sum(listyy) / len(listyy) + +print(f"average score is {average_score}") +print(f"Average number of upvotes for posts with '{len(plist)}' posts: {average_upvotes}") +print(f"Average number of comments for posts with '{len(plist)}' posts: {average_comments}") + + + + From 0009778828ed8c1907c1adcc5fca0f4d691c46ea Mon Sep 17 00:00:00 2001 From: arrowboy47 Date: Fri, 4 Aug 2023 16:29:56 -0700 Subject: [PATCH 3/6] more lin added --- .vscode/launch.json | 25 +++++++++++ projectEnv/__pycache__/app.cpython-311.pyc | Bin 3365 -> 3365 bytes projectEnv/__pycache__/search.cpython-310.pyc | Bin 850 -> 983 bytes projectEnv/__pycache__/search.cpython-311.pyc | Bin 719 -> 1559 bytes .../__pycache__/searchCode.cpython-311.pyc | Bin 4617 -> 4617 bytes .../__pycache__/dataModels.cpython-311.pyc | Bin 0 -> 980 bytes projectEnv/instance/posts.db | Bin 20480 -> 24576 bytes projectEnv/search.py | 8 ++++ projectEnv/searchCode.py | 2 - projectEnv/searching.R | 40 ++++++++++++++++++ projectEnv/stattime.py | 3 ++ 11 files changed, 76 insertions(+), 2 deletions(-) create mode 100644 .vscode/launch.json create mode 100644 projectEnv/databases/__pycache__/dataModels.cpython-311.pyc create mode 100644 projectEnv/searching.R diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 000000000..0a5775df8 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,25 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + + { + "type": "firefox", + "request": "launch", + "reAttach": true, + "name": "Launch index.html", + "file": "${workspaceFolder}/index.html" + }, + { + "type": "node", + "request": "launch", + "name": "Launch Program", + "skipFiles": [ + "/**" + ], + "program": "${file}" + } + ] +} \ No newline at end of file diff --git a/projectEnv/__pycache__/app.cpython-311.pyc b/projectEnv/__pycache__/app.cpython-311.pyc index 4f6f922cdec19f7675d71cd8bbaa2ec2a852935b..fe70f9f1286b80c1da9e985d2d1318031d7344e3 100644 GIT binary patch delta 22 ccmZ1~wN#3CIWI340}#wTvNh%AMqUYC07tC`jQ{`u delta 22 ccmZ1~wN#3CIWI340}yyROi#JLkynBj06*mgod5s; diff --git a/projectEnv/__pycache__/search.cpython-310.pyc b/projectEnv/__pycache__/search.cpython-310.pyc index 2b84d6a8a47c639d99b0f315e2ae85a06d16a62d..b4cd999c6e16aeff9923f0e4fdbeaa56572a056d 100644 GIT binary patch delta 532 zcmYjO!D`$v5cOD!t#x*{EhGdIa!5`!rQIJWfv^opD5Q`jvF_QB2fVw|%?T&aVKiUiUYir?=ZrJVsaUymcu#o3#~28hI3fFMd!K~R^j<41 z#!}5ib^fFtx))P|75<=5GSXSOoSH=G)P$tzCo0d=SvifBSaRQFVyfZ~-0L3vaI}xa z+r!a&m8YXfYqf~ga`ZwabIUR#ljy7{P5l!dKYeHr%Gz!n-Ez$koks%b1BJdzffC(A zsQR=CZ>n!E>4x(esy}BtJUBi)d3Aa)-k%(B|6Mi{k(3RQ=`3X0Lq`1;c7fMlVe3mf zRp%I18qcI3V^ywWZr0FI7@`Ge?A|D7J4m>y?FTfp(9weT7AR9v@wzKBGsCux1>Zj7y>?IvyG|0c3q>vk;mJXc~a(>*|_6@obog|Nol G{_r2m`hlwe delta 431 zcmYjMy-ve05Vqqq329QQ?p+WP4GbNC4Jsj2A%s+Zx_PORUVxyaMt1q>gamAzsys#C zf>(eADR02Q3&6rTS@22s+27sy`|i{JuzcbBF2VEOb6|bpw|6@D00J~6T%(cF%-{wk zz`%SZoIwpNL=)(c1ooGn9W3Ts`8W0HS^i5;@_})_S-8hXgVT%t;lZ$9*q6x!!Za;d z8czbJ@Wvv`;>mOr37F+?#@cqlq7b-yB?OGrSjd>KAW>bHMAlK{XSaJTvbY;cDIOv* z%jdMSVX62r5>tkC)xu=b28N5_lwnfoBfVI@JQ;|5^;V>zOs2EyYZWl*Xx>%{j6`(9 ztx{c;idFKkUNjZjO-j#ILjzf*lf_)>QImRxNlhl3NCh>i4e|DU2(cVille1i?Q0LE ep|bCvh+a^ZO3q#;;4Y1O6e*?6ImE+Vy!s!^e`f3e diff --git a/projectEnv/__pycache__/search.cpython-311.pyc b/projectEnv/__pycache__/search.cpython-311.pyc index bccd2c01f9c1b12531fd104d85b1447b9071b348..0fa2e7745bdfdd4db3ce3d386fc3266fdf52ffd0 100644 GIT binary patch literal 1559 zcma)6&1)M+6rcUju6CuhB-bg(v8hvHh*Yq^mtI0kY%8h@aUlu>gutw{Ye&jTyO~|t zl~}_ba`2%Crx>CZ3O>06Uz=M?FZ}}~Bk(d{DD>o;z~quk`)2h~%WffWci!jBn>RDR znV%I!MnFIP@mBkuL+CGRjEEg7mk&XCjWnb&1}Y3BQ((Ym4YtM=IA-AEYoUS==$s+e z!i6yC{7Y1jv=DGvWAZ2^T+pN&DTYW-O{}dNW z<1#ZWg9<1yXqPWRd5xSav19O@KpW5=<1$BAq9Z>-E;;~*BwBTaw>i5nzbHfxsIsB7Cl{y^!?mlw3x$ol=_Zy-!}GsR*a1c_Ou zp<-C%rd>9%Zl^dBg(P(Sngh25bEe!qRqh@YyUKD;S@s_KqOxD@iOKepQ}IqmyyM5>`!D{K zVh3_hy4~J6l@>eFq904_;7XXaeW3HZPb$mt|>(IeN zs*Zq(jzK#aTYK#PjOIRQH0QEMw3-`q12MeIePGX!BmJAs<*y-6n23h1MMYO|#TE<) zEK?0GDn&J5j^7B-)Jkp}?zU-aZgDAHw4Vn2Gk2?k_0}zHxJA39Ld-U|wp^?`4O~x! z2#a+hQovRWoyhCCjc*=jS0CiFMBJ*>HPtW(XXy1*7}IxwgaY^xiO#ZVsg6U!kaM8% z)JZt7i5$d-eQAdQYQvBMKpPNNXg!XY%Z+ zJlnqCmGAZBd*14KV&>qiF=Q53<^m_ zZ~_G3Ev)VmX%`--GJT*hg#}5}RL3$*$EcLx0#S_!L&;f&L*{o>O~>PhY-!RAE`D&w zg2lf3ySm|Ga4 zm{S;o88lg5f;9O}Y<>SS+L1#4!YU=c`WB|{M( zh|L2eesS33=BJeAq}mk;0lADoTxJ}@&fGTva|YyiW_H&~VU73KY$3ZZUBdo BBOU+% diff --git a/projectEnv/__pycache__/searchCode.cpython-311.pyc b/projectEnv/__pycache__/searchCode.cpython-311.pyc index 9a03821a87e06c3b6c346eaa7871cc85444b3f6a..1536e0acd797e9b5666f96c1261db929bf549154 100644 GIT binary patch delta 56 zcmeBF=~Ure&dbZi00fa2wx(!ryAW@zIu{cym{zg ziU$V_alu0-m*NiH`%lE+!Q52nE69}_j~XC?wvkoGHJl%d-)C8 zhXDKxoJfRQhCvK->p_cns@Y6focc7_vDRnMbiICE{pW*tndZEK!ym&90xF8k43=Hi;#@KfMJJh&INH_`2j0GTu_kX%keB%B0?|6 z%E>#()PIoaW#s*w{;INaZ)|KCz3j;%5?BM{H1U$y$k)8-4kLKXTDjs7-Bd|KIm3-9 zVw;shh9(uoQcbKVG_5Em3ors(RFt>AYRpea@HjNSXB)n0(RkG(x^+Smj_rDij%cD} zdH4hqD!7_WFpVASCZ-}%J*@anuplaUx@Ta@`^5MHu7if%ypm3B6H6)~_WPRMkoIlO zH?ifplB*HjVJpFo$5Kg0*r{TbXs41x>^j!G1M8i{@~Sn}#cpVoZG;W?vC|;ItA{Ee z?S5pcxfnmPYpq&a@1=TNKXtkJDbvqYIg+_~8`M`C^~<{3JjYi+g*ol7&-6(*&~Qe<;kAw>W~DDo6LHVM&Ou1Ly~(H|_SHA#yUX)b9&ag^zBIU-kF z?o#_>hDHFH^05WlKmpoI&;mtzZsAJ~IpkEdryPs+)*K21DB50{7AT7L&F(5zNl4_( zaAx28zW2Uw`0VG=XP-rXyt=R+iA1h^0N-0#kEFF|EdH;^Ts#(u$Ktz42^{;T*6;lJ@uh2IEkaZ%V7 zPK7&yBJ_oy2tN?MjH`kx9KOiG?_;M|=U1arc7yhh-DEKCcek>cL@;LkOnz|NALr;K z+uOM1k2gA-z5LNz%k$Tx<3Y#hZ{G~IOfuGqbHiWm4y3&MX7H3Fi)}h<{y3kV9KDx= zaJ$}{f0}vY#V^uyCH2k7yAk1!0>qxjUc07U{p{-D@^6=8uac$D7yokQKMQ|Y=)AHv z|KZ#ZqaR1!{oo{hdbV?WX*Ih3d}XRiSwGJ!(&oL~#=TrFoy~7>Dl$Swr3I!Cs>X zsy7@m8_Lu(K*|hykf?huSVyptAM}#oT2Q1W?NUgnay1DJYP%q%AyM$`4zukEsC~C+INsE2rnIns#Z;~{;-LpUB{c`mVz2!}&l zOPqFZaeAihXslnOT zT)YouW)GQzYei=0CWA!7GAt(`B;rj^4RbITd3ZBDjUD~;$)Ks_w32JLDlKoz4A4WD- zHU)I3r=SxSGzDGH93bEHSW1E8dX6)}Pj6(iA(pD=X&UbC>K2y5!OAkw9fO+P6lnAb z8;&lp1=)!b=iRS{YZ1@EKg@>ox@_o>i%f{02f;MN>GI9R>r3m(R0;%y;H1pGd^WwA z%iP__6nMM?#-3uA8G09)epm!4mx1EmLEm*p&OK2akH=~I{IA)y)8G+e2M>`6KSUh= z5INBZ9wM4K5zoGRV`=r(_2<#)ft2%gIur7~ZaZ$Ek3>Gb0lTOQ3|hCb8GR91V=f0e zVvb9ZwwKwRvX@gv9TdSPsAERCS}u2d8d?mQEg*{nKE$t!)b69wTxeL@fVshjuNTV{ zRZ_D|4Ml^(qNa|=)iJfLC(b~Jf=xAMFgNOVSwDm&9&sQ+De@ZxtAZ?`(tH%ew8o?p zVhFery*C%HN7u#a>e%eV_lni9^{}(=}_rLQ$m9b&n~!DP%Ia zpyfvAf|NZCf{R#q>gbw7j`77~ks%L7Ixj;JjYlGP!(Nzv{|z5OY1+ni(tA%cvj~u; zJFtVU00r7h1tac#DcCE>@POG4n{224&qpt7g@d@TQH10BN+tA_FP{C9bH5oz=IL|$ z!G7%p1D|TML&>opCSW}@Pw~a)WohwR41Wg939m8;(;VQ>TZ_n@J}<6_^|9lXIzFf6Zwmy2#*&b&d5A;@OW;DAcjgxXk0-xBb` zqXUCY4t<|G|8q8p7nuM1LfqWCi g$D5G8-s_VS4}rQk;Wu@cxl}h;m)Puy&c@>Z0M~3LF#rGn delta 143 zcmZoTz}T>Wae_3f0s{jB>qG^6K?MfAcv)Wl9}GyzQvQ-6=VhYH}EU-{pa7qcZ^@3?7yb~K{ vY+kIFC@96vah`$i7M}udGp{gDGWP-QSgsRXyqxhI=QkE^S)T<^# diff --git a/projectEnv/search.py b/projectEnv/search.py index 816322b5e..07b3d9249 100644 --- a/projectEnv/search.py +++ b/projectEnv/search.py @@ -30,4 +30,12 @@ def sexbot(comment_body): score = good_score - bad_score return(score) +def linreg(): + # creates score for a post based on the linear relation + # between the comments and the average number of upvotes for a QC post + sd = 28.6 + m = 2.59 + int = 5.85 + + diff --git a/projectEnv/searchCode.py b/projectEnv/searchCode.py index 421e9491b..23cdf6be9 100644 --- a/projectEnv/searchCode.py +++ b/projectEnv/searchCode.py @@ -67,8 +67,6 @@ def addToPosts(p_data): def searchItem(db,prompt): - - #purge old processed_ids and posts post_list.clear() diff --git a/projectEnv/searching.R b/projectEnv/searching.R new file mode 100644 index 000000000..7eafc58e0 --- /dev/null +++ b/projectEnv/searching.R @@ -0,0 +1,40 @@ +library(car) +library(RedditExtractoR) +library(tidyverse) + + +sexbot <- function(comment_body) { + good_patt <- "gl|good|\\.*\\sW\\.*\\s+|fire" + bad_patt <- "rl|\\sbad|\\soff|return" + + good <- str_count(comment_body, regex(good_patt, ignore_case = TRUE)) + bad <- str_count(comment_body, regex(bad_patt, ignore_case = TRUE)) + + score <- good - bad + return(score) +} + +prompt <- "QC" +subreddit <- "FashionReps" +time_filter <- "year" # For RedditExtractoR, this will be either "all" or "day" + +# Fetch posts +posts <- find_thread_urls(subreddit, keywords = prompt, sort_by = "relevance", period = time_filter) +post_content <- get_thread_content(posts$url) +threads <- post_content$threads +comments <- post_content$comments + +# Compute averages +average_upvotes <- mean(threads$score) +average_comments <- mean(posts$num_comments) + +# linear reg +comxup <- lm(threads$upvotes ~ threads$comments) +summary(comxup) + + + + +# Print results +print(paste("Average number of upvotes for posts: ", average_upvotes)) +print(paste("Average number of comments for posts: ", average_comments)) diff --git a/projectEnv/stattime.py b/projectEnv/stattime.py index 41a5bf961..36b1fed83 100644 --- a/projectEnv/stattime.py +++ b/projectEnv/stattime.py @@ -23,10 +23,13 @@ print(len(plist)) +# all the averaged values for 100 post average_upvotes = sum(upvotes for upvotes, _ in plist) / len(plist) average_comments = sum(comments for _, comments in plist) / len(plist) average_score = sum(listyy) / len(listyy) + + print(f"average score is {average_score}") print(f"Average number of upvotes for posts with '{len(plist)}' posts: {average_upvotes}") print(f"Average number of comments for posts with '{len(plist)}' posts: {average_comments}") From f2d7d6ede4fc4c062066c93b205813d8f576c1c9 Mon Sep 17 00:00:00 2001 From: arrowboy47 Date: Fri, 1 Sep 2023 15:22:05 -0700 Subject: [PATCH 4/6] more reg stuff added --- projectEnv/__pycache__/search.cpython-311.pyc | Bin 1559 -> 1815 bytes projectEnv/search.py | 9 ++++- projectEnv/searchCode.py | 15 ++++--- projectEnv/testing.py | 38 ++++++++++++++++++ 4 files changed, 55 insertions(+), 7 deletions(-) create mode 100644 projectEnv/testing.py diff --git a/projectEnv/__pycache__/search.cpython-311.pyc b/projectEnv/__pycache__/search.cpython-311.pyc index 0fa2e7745bdfdd4db3ce3d386fc3266fdf52ffd0..d613726f0bbce59256ebee30e6f22839b2ef6fc0 100644 GIT binary patch delta 362 zcmbQvGo6obIWI340}v#zm}zj zrG~MJn}GpnG>c#QteGHSG%yf%&}1wU18UG@y~R~rnv|PaT%4Jo zmwAh+IOP^&?k(obypmgt$;Ci}85BUEh!@DZ#g>zq2XtzY3Xr1+#Kn3*qJiN98v_?# zgVPNTo(9JoK$i0j4$cm*D?Bn6Ib>!OU*u2%3Ul*x1a+D;_!RL2wP^B87GTX(1Dmsw r0W1Wz;1`EYZhlH>PO4pz4p12=Fp66y&tlD&m0)KS{J?-ofb{|ZE<{(- delta 105 zcmbQvH=T!XIWI340}#wTvNdHA^F%%gMz@LTZmf*K44P~kEB-Sx7pGQCu40+Rq{%(` y4{N64N`@kSpz Date: Thu, 7 Sep 2023 20:35:04 -0700 Subject: [PATCH 5/6] good searching now --- projectEnv/__pycache__/app.cpython-311.pyc | Bin 3365 -> 3365 bytes projectEnv/__pycache__/search.cpython-311.pyc | Bin 1815 -> 2258 bytes .../__pycache__/searchCode.cpython-311.pyc | Bin 4617 -> 4930 bytes projectEnv/instance/posts.db | Bin 24576 -> 36864 bytes projectEnv/search.py | 46 ------------------ projectEnv/searchCode.py | 18 +++++-- projectEnv/searching.R | 40 --------------- 7 files changed, 15 insertions(+), 89 deletions(-) delete mode 100644 projectEnv/search.py delete mode 100644 projectEnv/searching.R diff --git a/projectEnv/__pycache__/app.cpython-311.pyc b/projectEnv/__pycache__/app.cpython-311.pyc index fe70f9f1286b80c1da9e985d2d1318031d7344e3..00c7783af922e81acf607ddcb84b0dee80661043 100644 GIT binary patch delta 20 acmZ1~wN#3GIWI340}y!D{o2Sa#R~v6g#`Hk delta 20 acmZ1~wN#3GIWI340}#wTvUMZ36fXcc90g7Q diff --git a/projectEnv/__pycache__/search.cpython-311.pyc b/projectEnv/__pycache__/search.cpython-311.pyc index d613726f0bbce59256ebee30e6f22839b2ef6fc0..5fc0a4edb7c6aee1dc1c41dcfcd0a34efc216004 100644 GIT binary patch delta 505 zcmbQvcS(?MIWI340}w>k{Yp8(I+0I;F=(Q?4>x-YLljR6V=#jz$Hs~#R=s3KkRm8x z1=7qw{CNS8n9h*GSj$+$kOgEzzyij}j?5xrDGY0vmI1Y`hHGVHC}Ewf$ShsYgsj|! zAy%@MxrVWX9cBOnLk)8l3yghn`7HCb-46%-ZZ7nj^(D@rWM z%uBz;k(5}R3S_6=Vuu+5a)bg96aj5zxW$v6S^`uV1Xf<82Nco*;^GJ((ZFznm#@L? z1`Ah<|8*9HODqZtN-nS{Tx8L?!lHA5MdvBM=!CS+x}LfQj~l{9l2hzIFfg(jf{70H z8ytLBg_IVUFR{5OWN<~u;39`X2U`c*4Q{~(-y(jX>6*Ng|FGt%fvsH002Ttf=og1g jZhlH>PO4pz8BiH0po=F@?q;i#6=G);`oMrmfb{|ZyJL5Z delta 97 zcmca4IGvAgIWI340}v#=BL&vA_YaGmTaVj!5Pfl+iPP~;0X z!*R$URNzyX4mp%1Tfwf{^H3<-mpco%0C~Ozp(dB-f#(d!524}$-moF`2CNAV{ZofI z%@h9bDsVF>Buo+}3~?5pPG*wE&n37Dvhe-5u32tf%NofHI1)FK+4+!QiHTf>5xUh% zw1sp$q0MD-M6-l+GE201%X1`|(-Tn8;i#PYgoM}-!4ek8LQXe`4^k3klN=2}R<%Sn zlEjw`&2k(L5uhX30P(YC=e%Z2H$H_7Lf#9eZ-OhKy6h^A)sJ_Srs^(tX}pf5r&C{W za2E$(8C4uKakz%VRUH2AZeMv&SzKN$UETBfpAYUM8y{%6vZF3`d{%DuZjHa4FqK$M ziJ4-o?5?-@*N0Yn%Z{>RPi|j7w>55_xcKg&>F+b;SZVCHh^QFd>1=uy(dg z(h#yCpsu*P`P&et=(fAUU8k2lBfdt7wOhiprm9JU%+ghlFTxhr6tj7R$w(J-3Uh4D e2jV~`r#?z+S4e{HdVaZmT-!c6auNE;+y4g~Naafa delta 624 zcmYLF&ubGw6rS1aZuUoZn{2d@iWsyhgotcC)R+c(u=NN2g4QAhYl+#VY)rCcb}xnv z_TV8!Z*vgA!ybxWmEzG$PaYGAblIynLFu31n+?$i^Z34RzBg~)J6<@L*FS2Sf*`hr zU#yDyS^p_&U7&A~h5I-_AkKfD% zcmtJF4+sbMWgH7_r{nnLRGcC9Uc1q>cRDVy2{dz zkvzN*r>B(lEEbETz*KqH_SYM})7>Ri;1z`KBJU2MeH3Z>{>_NbobZM3d?DnAd|||w z#(ZhWm!kZ+Q-p=ZBn{G$r2jB3hSv`3Bf}gU=14LJ;y{e(bNK?U(${hw*XTF7%C&el zBGr}BWLMA#A{5zfOn+uVS+;}#wWKNU0SGuyd}e*6<@gp zu&1=$hGkntw`)Hlw?Qe3U4Y@DKhDMO`v?JwM~lgj`A diff --git a/projectEnv/instance/posts.db b/projectEnv/instance/posts.db index 3c93b2d47f2bf2e63ecec9fd8f835d73d55839c3..ae7a3349c685b59f3dca2f385fabad654bd4be2c 100644 GIT binary patch literal 36864 zcmeHwYj7jkbsp!ndfYBhl- z*u(8Wv8Mlxg^C&lu9B$N{MCVA(s=W6sc6P z6-#nSvdbz-`OdlBAPBAEz!g`;RY1*~x9^W~)nZ1)R8;01m>J_~q0z(fE zojEhKD8$gv&~x~I5&yl{862GV-rzUA|Epfl4bA4yj7|K-(AkNxp^34H-~Ow5Dm3#A z_t)8ICKk!XL@u(Fhza+kn7nkcX4wtREQq_2Omr=hnGE@+L^72V$<0J!T3otl=dHRS z;>lcWHI|_h1-)Tt%^LkGh+KRlMkgC)qil5FgHAT<<<2S9=bNH)=}dehlGzsPv28Kw zjy-j0YUb=ik43JY88WK{f7_#D^x{H}f3azEIz>lr>)kLC*D z{X6owP&o>jX+93_&KC`TN}e|=zPWmmkNu`U+`PZR$G)BYT5Bi4$Du}ju3cW2=R>-_ zW8Y>HDvipn9p}T6e|LUpHBx2(^4y{JjQg~pZC!vN0sVMJFM{O(2oDm z4A1hhKV;8U4`$@$2S+>pg=u+>|IlByuk*1lo2|iW>^-4&6oR58a=B?gzw8!LW zf2A4TAC<>@)w#m`%Y5t))po0gm*la(xw99#$X_iV>ihl?dAiyt*XzUb_{azast@t8 zFIcOn_654UWfy|Yh4XZHXdU?V!a2T4HCU}Q&t5ur^`h#Vx9hF&80^I(L%%mPp^oRq z3ZuVv`429s7k_f(zmDt-s}EH#eDwSe&PUGu!?RyGd*)0Tm;HbJa^CRB@YRdgx1EUU z=jl>IHv;$1_DRe6c_a;R6^R@D%ts?`#$Jp9FSqaidNLTgwWhR`rXm%8u}iZeAaeC5n_!`V1sJMu%C zk>HKc+^jz|7YK%-$D*-k+9DlU5dojCk`M*cMlTR`Er}NTit1v=YM`O$*0uh~mE*BC z(*Rmc-_>Y@9Y>Nq7V#21yW@V>tl*{_GSh5a9 z1ez7Ktg=`w>$Yv!;%2{EAxBp_({+kWt?7;ivJY8Z@)%5UrpJb_T)b{NciL>3d*qyL zkXpp;GU@${STnFiDq}!#%>zR$m79j>W~o0(%K;UehR{qNLfgXtPzX7Iwu1pqF`+nb z8zlqxE=N;U_8vX--lG-yy!iBo)zE^->Snbe5_+=^Lj~FH?i#Qy&7yeoDXK-*X!J+9 zJGMgRL&Hd#O(SV#Ig*y=k+e#)c#3$l2;Q_^tLVNxkgO?KWLk)#L+h^o(7ls6^1 z_7P(1RV_dyDbAS@GNCt}r`sGjv8(N#rM$S!*NdRQ^w)-Tpq#ad)DAtu*TOXviR#YA>3mC3x-sk;=sSo>L}D3H!BDQO;(`+A%c4AjOALX>G$eYHN1c zEZ~%0HuenV!4nS=Q!lL&QyhB^_b?Umm5PFNgS=?0S+{BijOS!Bz794b@r*~jDuUBX ziAZ!^q}L+JTxx^DheSL_&8>WXTBKG*8WDw9&BS6!xC|_$Gn4NFw$u1Q*MQjp&_CzJE5i znaytVs*5BNF7un=320PjWMj!NWcb zna)I(BVuw_H_K?+CDuhY6-P3zj3+|K=O)1XZ z3uK|3FlI<*1agA>f!Pp}x&_~40&z}B$Btjow2k_dfXT9|VnZ$gUJksX#ZFs9R2Z7N zWtGsXY`}+FPz6{QTwXR1IKx@h1(e0;H!PBbAuw$rMgt`pQKDfLTUD(h*H4#e*=cLK zsyLzZw5BcRp3H7&P5b9=_~vJQ^MUzbc+Du+h?mBmVH>P0l8su~&~bu|78A>HUEZqo zx7y{P${t1`LV1)f8s#XiX(W+wXopc0r*dutbUQ`oj?Be+37>}z$l@E)UtrbX-ePl| zjDFL$+ag;t>t?~K!hv-8l57bkUKcs&SPQNgA10v^bfZIPD<-RniNf6F@yU{2Gi+Sk zw|$iaEA!Al=80`Sln@kZ%@X_~5vbW7fztLqcb56R>1?$*;XKzfQBvP@k++(Vny_}= zDWehWS+Rt)OYE7&JrI7%b3t2N5vcgRVWP=h2aijn_9ED4<|aua;|-P55w;L5nzidr zEYqSPL@j7ciiQXPK2&iMXV|De2LCTp=w9ypFn^eYli6G~py1WEn7j)UT5XtBOJuBk zsotzs5zSC+>AE6StbuEqq<-y$q(s5cr>ChXht>|fku~UpJ2HjA3g!Q=8G8HB_#cjo zu?J(1jOH)@tIN+{`pl)Vi`vM482o49*5Z3qD^UUPUTxR*NE;j*4s(S#@2m0+qbTWUAR9_3(#E#VK+p zQN5}cbTJ7NJ|)r!e1tkHCYPG^9it9SfY{--pzbFpkU=AkvYH5g?l#Q^qA0A5GS0=2 zW?(Y(GOkbU?uxBFY?(M6Kf%wsF#@jf=7>};TMNMq!ZI$e_B&Hj42tRUtyODz&1Sx( zfhWE-tJxq;B({+!Do)|etaF=AN|i9%M2{E|an3Xt~GkRaZ4B=iCT$a)0{*9#(Ql!04_$+T6r+`utQ2n1P^a-&~HC7}wUMNo#g5Nn_; zS*#OXZoUA}q;6?7qDncw@FrsOcmmmhL=d0qVN(sQuU?no5|m*x3;#vBv${MJ+sIOo zq1W?Be%mmBwK^1KTIdyXTEG&reoPB?2spbBwn&vG_x_&QLn6$?hi);x-_MF#|HpjUA&JvDq~h_G#n%tV$VGn;uW>1a=TBD#@MW~Ln&xisArRV!wt23wXGb# z?MX`69-MKu)ga@yJ>+y}Iw6W@u~I=$s*V-H3)^bZ5|s`JYp0v?&O110EX+BPZ6O85 zMcrAwvr|MgwnR}$Yo!lqHfirm!msySAogJ4|DN?2RMhaRK=W2@~@3TGPi>R(dILa3H z^LX@N)=lSENDCm&mDxLzzp(lLbN_yh?oNAS@I6)N!!#c0QzGK1FVBIt}L5forY*)A7 z%Vpu=T+i%G-ic0$NH!kLiraWu&1!*EE5E0UC9_^KsxT9XJ)&i+2(^4F+@syurkO|R z?5@*kc%Xe9nYr-!f>PBP818SzT}ml%(QPwtYx{T+VOoXg(soYh((*)?7Irn!dB+pX z?y9qirA)PzUUnh;8L*-v71JPr^ zam%VzFFge1X7pW=H7gYa_+q&Zgf2}`1jMc51Sf0jlp^(OujGJ?<)X-;*>VJ}fDwS} zVgyJKBftZ@Ihf+i{{-2pcRHJGuB-JDyJ|JySB(SH0&<_Xss|>ZP@Cj|Thg$E3$L+UBwYsNWpaH8CB&%=MQKQ)Cxkgh^~Y4L9X7E%zVifaU{QHtp?Rc ze2E-5kNCITMrr8-@B8M<@Bd_fbhyLGDh%IWGm(4n{W*U1?yopAuah8qR@Ps82&7UP zd_OiO?;CkqkU%v>t*p1ZdTAISFb4!(#Xi6x%r*PR4 zRSutJ_=-9d;~885!jV~S_bC>49Hn3(VBxr2BDnI#NPwZ$^2Ul2ew2u_5&fxL8o%B{ zoEkRmJq9s7#7_tC9kuA^Jrq<6`cAX`ytua-Ujguq!5Yu^t7tr~!UJ`HO2WY~1S|vn zgd62?w4%8yQ;mjl{N!sWa6Mip(z%ik%Dcql0eDvqIDW{1YV zGydAKw~iiO{)5YBF2yb;M+(E@!WS=S=RSA#UHEwLVhJ24=0>g#6P~fDs@$!!c=zr{ zjNf{5R$aKEhHnJ?v*G#q(EP%}Y?v@TiDxhtPSrA-APkA^$r$&WFaPwf;r}mB3K=bV z>88|;te2Ql@`OMui^oa0co?23f$Zl{>@z+g0gRReFu3mLX%vAJ8@fr!)rMX#ng-Qd z(^l!<{0eL?w9cwF>L|MI!lF=lO6F#VNWc2TMqOzr3t=c+cht>nQ?0uL7*et+`BaWG zAwsGj%s`xeZg_$ied-NAqP2ieoe$2j7?(DeS7W(ONg?lIKvW;~fKAX|L!ncED-_s* z9acE0!n-BC02H7K5DuYAvjQ1Vwk?XX*zCPU_-sNdT(NMEQGx4Rv`Y8@%ImTqhfj7G zNTMDCQ8L=@H(cusXVZW`v+DK~kQWzk@6+;0nJ9B4)SYI5+6BBdEQKIz#GQnbM4&vl z6eOX(^aK(=**VrDR5c{L0uA{)!Nt}4dC{WsDtHol4S`qQM76#kCME1Cq{2yc6RLm* zgH%!JMZW|+Kh*soG>z&3B0}#yP?l5QN4bI~Aa4kK;^i$UJ1{tn8@2$m2#Jeg8t?!X z0QprU^F_C)SpecaOK1%Dj_d>WofaH)2@1NV1=U1^Y`MWzvObEUI%9%15;Y7#A%y^7 zY}ox;SSd_^IUOg&r2sAzBTHfNX(*p{iBI`E0g6f&oF`S;xYsikw6FooBg&`Z=@bMD z1EKb*CGyUSCq-F@RcXzsKyUmb2xcB!02I&df;|NIL)X3fV>&mB@$beXCq8hd1A0Pm zF8)bt%Oe}+D2sLi57wqonycJNqlIgL*N~rNn}RQO~ z&Z|~iuOn-v#C*llE23Vl;$8^@MY$^as0hQAIG8N?tfQ1GPpqNM23l5^?<98Z-nLIw zlM*JntM-6auYOfZ~QU#1)D5ydQM2fCu zQX44k#?UyH70>b~B1ftdjbv6+S%g(<@pL-2EGSF^78Q+&t$1!t#5OajG(D&(6JJ?L z#IjkDh$mw+OZ_s-q$+-@eX+*4tl65hEYz+;Me;*N{IrH)C{9D+%2&nNmaVrxROfGm z7APC@&jmw4--Zs=k_C>atb4?(vjW&HrD=$}k>;dLBxWLB6*;S+mjMn`i*y<%Rwy&N zjTXT&N*y^1>rOV0P!FBa`0|S%LhUSJVg-r_Tp=)ftJNRNZo6=Wh?FeL;*5r<5^yjz z+%b*7>p>%(Z4ix$voK3ZLesgY$}ZzxwpUCfcBtd%#A@d@z?!&(6AX1H4ZCMr5`TXfHqC5*ro`4-B=>Yyq>^GgwV4qsF$68S)N*7SDMnhXL^k?P)0iFwShbDC zgfeSq0ziOZX z?w0T2AwfwnJ)#YPja|h8ZE*K&jRGr}fjCM{g)X_bV91MD6dQVpR{XAxU?y*&MRLa$ zH7Ky~_sNtbNB-1q<)j9w}m?S5Ci zBu<&@*l4I%9;A?omXrYAbSXgyEz5w)@yI0CMqUMj72xd*z3hGS<)8ns_g|<1!Z)&; z=~#x5f%xj0xU(5StC9;705gx}BJl)Vkqaiwh*RE++*%@*jK(9Y=u4s|NASNHjmGhT z+g3zfN8X6^yU(eXY@o56f>Ts9jXGc#?p)B&+K5I8yrFiz0xuNIy)=ai)&D1k%0uI^ zv3HFA(dZ_!|Hj2{jeKbMQ^S)Fh0cHIoQA4@`TtG&e>@kX?D?1)u|vE!XL0CVx4!Zx z%L4I$U%e4PfAhT0=UWKRhEPV7ZW6{QV*yr0p`Jhzl>Mk6?S+b*#&u#-l{#fmcEshl z1o0&xo1R7(8C#-&dJ zL0|llITA@DU!t6iAT`srAuHsZ85l|WY8|*rq=+G5XBj_(vXiYrnRl;=>i|MhNjbVE z(tA{rilP~8cB zfAjn2XSRE0P6hEES}H=Fc$j>#tNs)%F=$b3ml5#8JsI}Rt}Ke6p?vA4Shlg?DWkv@ z@EM#}+?0J_?wYtM{9j{%7ZIQtn*L8xyns?i=lVE?k~Udi7P#l%VK8= zTBth0BQr;0^D?k&VgYc+jmb1h3Oklb3ezy%9X~)1WcM^cn-(?$m4wX=2(s%1`Ad!S zCL91eK$(Z22l{r66)6uza=S)4s1W4{qC0Kl6F`RmNARlP_lO~ID+%LwfFeRXcnNyB zpv*$ipe!+#JepJp3}Kr9$BRS08Ay~@^sCZRtcuM7JeeBy6VPlgql5UkSOVdZeQKaV zr4|{vGJM_HcDCfE$;G+%KKj+QFO2j|%^XY-W@2+4Jw>d#GEENtcaY;BhVQL?>9H$>+GcL?$Qv{nO!qvvk%G|V_#hrCO5apc!)pLy}CZ+a!7 zbWuit%!`AnwUxF2ZU9iWF3=9O3pkGN?n$|c2u|;GOJRAnx!*{_sr`98NgB5+Xdpov zPLM$R2wW005O9jSW>q=fd>gI*2iM5w@xI%|htFNUQu?>6J=}HmumA^$Mv~llO)GUEqrZ&qp9U2Np16`EqzGo`Dv*^7j0nPu> zbxdmOL6`J7Qd_&}2^I@Tb}r$dXyHvKW#5Y<8VLNDdBque zh7>vAJaNcdm=|BS?%Vv+D-%6ZMJGs(cbLg_ORKTZY3IAsv~xMJOLx)V_SRuc?K95P zC`8iM=|%N3$#;J6gWv9<;ob^rCeOReD1pw{#$PMql4Hp2N|KB+-Ai^qD93khvYtHw za7yLP+l#jz{)LavufDd2n$T>3aC5iajdyIs(L&nB2EjHmHH1gmga8QSt)%!y8rwLq zu>pVp?See5MYpAWORcQ9)}6oE1)R#Q3q7?P#8I`fnb)QF=a`0i>`-EuOr*AGi{(ZH z$6IE(Eb~qo<)d96kP-qcDh>9lW>P3hh%<|>-YUdl*zY5m*V=ZO05G|T73cg7n&1WJ z3037i@QcBJee;_l96lZ6i z)XPCHyz1wD|BI`SzmWX;`>*#9p^~#)b7L3On`Ivs0$!M-D48R-NQJoCP*t%E8;0l{ z!h;qdPLE-gm2@#XJ4<+!Y@)zsw0`%*NfH&6+@luZ#9Mb z6c|EtzJ(f|tIya$TLt(gkPKogrhQG^fY!Uacjl35Cj@X`;K9gHGELwv8NK zWK!E`lHQIa*JH6PJO=eL@)jNMOSGrw5UXh`E3#Yk(0`=sfRT|g#UqKV#Gt$0d;7J# z%|daE+uhSh**~;vDYA-0X)~RP0o4YCy^LX9nmeQ5H2#D@F4y-WWvMF*RPXo1`=b#Z znmt-NU_=@4(flfu|35qw92$S`*xk`!-#=eH_n*)H^D{p? z^xL?g>*c(9lh#$#K?@u|A7t_I|ET9@4nN}e@qQen?{i`F53kgzV>gwDsS`!S(tBoJ z5T=Kmqu;f~!zjCSL(QTaqRPcL)CjJ_4Y9Q}M}7_akmZ9)7N@S{YuCQ;C;tbUHqbeY zQjG3;7#W;;D#mR{%h>W^*l79bw?jVDJK zDWC3cW|8@jax5||rZ9n%{P3B@v&%1k{cAt-OAr$o928sT!#+Q{2gurV_cg;tWN71_ zNTlv!N8XZH-ei!2mM2%1R%zcK_8%_C(mAZocyv8QA7G`XQ|mE2yFjE9o7uIKbAEs2iTgq!+UPK>QIl~ zp%og(3YCxKS$z1tUpn{u|E$#`A^AiM4)nSw7Qij)DE$@y!6KS5h z5T;J$=L0Q1{!f1Ki!TPh)I%7;Ph=<@ZOEJ@JPW`8PBBt84x!yhTQ>4{n%FemX^G*@ z%_kVMLHl|f+Hwvsi&q!ohlxdnsIa%PnNDprIF;U|Ez8VMJPdN`-<$KFL5(;x`3@{V>db`Vi48`UZ{MrEu5L#7b|=n@9drDgodniKJzZust! zqGuK$P3%TN-%uoth-F1Ar+7E7iQO#x)C4Kjso6e7CKc-#ilWja8ICskO2P zZtipXr7#rU5$6S?T->2jC3c#cSUCj~&$Xa(rmRV0f97g}r0G0@`InCryY=nDV*TfS z?@^lnAniQ#1?GZkFloS@aRU^LjRdj*wec=M5oM@NM$u99^HAd#c5P0g(A&Lnx=(&2 zIZ71+bEh$Fvywhd3!;1rOk0x}p9}|_;yQB@BLsvWh_*N!xqSCMfAl*&oXC_EuvuN& z%E?#c2CE(2In=oY?~WW2t*Z8pmqK`zIkuWcBR@L@5ITr_w@(%&oh~Lp6v-^&mARy* zW7>V7ICpL{r;nl@=i|q{EnZ6A{nOtXZS-&|9~Qwyhu*oG5&p36(;tTQTYuKw$UcK6 ziP|zyR%|MrK?POz2=~d6q=(G-D>H#nwGy>}(1wbJRV&Ex6ekvEaPAzyIeu*2;^?P- zWAe(suk^6Aruu)3_F+3?N8hF>YD{>9gR_rJf;!v`BDTBYJd{b(b%S0uuG$!D5$loK@}U`*J! zL@iP7*#K?xqYYIq*8K|Z{w~3H1hl{yA;zVem4sagJc-o--sv^oE9}-y&|mv;{QsEu zyTAFDJa^fkju!CN@Xk9}KdU2G2yK-V8V`v<*@vG_xEL$E{g>bR-S72EEyQlTQ@rr3 zyH~t8k!81G;=a2>X^IcYQ{g#vc*alUwo0w0ct`;EV0HwZ(@_sbQg8&9j)jgo8eDZ` z1KFr7ebO^A?)pU(Xd6vMPChIqjnKPcp;Z=`LcbnR@{HL2l;`$rxK~){2#;-_a$H{_ zp*h4?X!x4!a(^A}Gze~zHOyF9G=x}XwoM|gT!p!dm`S(<_g;OFK5g8@K7F%PyQ zW(|E8O>j~8c1Lufq?`D5*SEeAc27d1`-%Z zU?72k1O^iL+av*J;aP&fO9UM9A9=X-jrUaofBe!R&8qF11eU_UJKLTnhKmN2~= z^X>q|Yy1H7TLk6yhn%=W)03|$`NRWasp(7ahQ6_Vm8**GQ_`u2R&jz`L_409sD}8D zQr!B>&wupx=RX?np`D=RR|2#X^lavwg$6^Q*Fup1cfXYLz+B1d0_r=|mN5jNKC}u_ z%L~B8{2rc7d6&q>lcdpgL4QAE@!Z_QhZ}c$$(9wjS0daKbh>;bs@Nrt0ztneCpq$u SrQ7X{sg;j3)SGw!<$nV&&u0Kar?8N1Ll^tM0kl`kIh%s>jB`N4)I z4zOgHO(t072zSmoL5*9c@Z%mwNET4U5II!P!3<-(SVtcX-tgvwU0k`QgPqkoMQxdX S%Ag8O-~-L-g= 0.05: #positive - return 1 - - elif scores['compound'] <= - 0.05: #negative - return -1 - - else: - return 0 #neutral - -def regbot(comment_body): - score = 0 - good_patt = re.compile(r'gl|good|\.*\sW\.*\s+|fire', re.IGNORECASE) - bad_patt = re.compile(r'rl|\sbad|\soff|return', re.IGNORECASE) - good = good_patt.findall(comment_body) - bad = bad_patt.findall(comment_body) - good_score = len(good) - bad_score = len(bad) - score = good_score + bad_score - score = good_score - bad_score - return(score) - -def linreg(submission): - # creates score for a post based on the linear relation - # between the comments and the average number of upvotes for a QC post - sd = 28.6 - m = 2.59 - int = 5.85 - c = m * submission.score + int - return(c/sd) - - - - - - diff --git a/projectEnv/searchCode.py b/projectEnv/searchCode.py index 4645a6660..80eab98d9 100644 --- a/projectEnv/searchCode.py +++ b/projectEnv/searchCode.py @@ -42,12 +42,24 @@ def sortRating(s): def getRating(prpost): rating = 0 + sentimentrating = 0 + sentind = 0 prpost.comments.replace_more(limit=None) #replace all unloaded comment obj with loaded comments # rating from the comment lin reg baserate = linreg(prpost) + for comment in prpost.comments: - rating += sentiment_scores(comment.body) - return rating + sentimentrating += sentiment_scores(comment.body) + sentind = sentind + 1 + + rating = baserate * ((sentimentrating / sentind) + 1) + + + # temp checks + print("base", baserate) + print("sentimentrating", sentimentrating/sentind) + + return rating @@ -57,7 +69,7 @@ def addToPosts(p_data): rating = get_data_if_exist(p_data.id) if rating == False: print("getting ratings", p_data.id) - rating = getRating(p_data.comments) + rating = getRating(p_data) cached = False print("not cached") #add to processed post diff --git a/projectEnv/searching.R b/projectEnv/searching.R deleted file mode 100644 index 7eafc58e0..000000000 --- a/projectEnv/searching.R +++ /dev/null @@ -1,40 +0,0 @@ -library(car) -library(RedditExtractoR) -library(tidyverse) - - -sexbot <- function(comment_body) { - good_patt <- "gl|good|\\.*\\sW\\.*\\s+|fire" - bad_patt <- "rl|\\sbad|\\soff|return" - - good <- str_count(comment_body, regex(good_patt, ignore_case = TRUE)) - bad <- str_count(comment_body, regex(bad_patt, ignore_case = TRUE)) - - score <- good - bad - return(score) -} - -prompt <- "QC" -subreddit <- "FashionReps" -time_filter <- "year" # For RedditExtractoR, this will be either "all" or "day" - -# Fetch posts -posts <- find_thread_urls(subreddit, keywords = prompt, sort_by = "relevance", period = time_filter) -post_content <- get_thread_content(posts$url) -threads <- post_content$threads -comments <- post_content$comments - -# Compute averages -average_upvotes <- mean(threads$score) -average_comments <- mean(posts$num_comments) - -# linear reg -comxup <- lm(threads$upvotes ~ threads$comments) -summary(comxup) - - - - -# Print results -print(paste("Average number of upvotes for posts: ", average_upvotes)) -print(paste("Average number of comments for posts: ", average_comments)) From 6b5dcfa0baee14452763f7c3a9c266f095380016 Mon Sep 17 00:00:00 2001 From: arrowboy47 Date: Thu, 7 Sep 2023 20:36:54 -0700 Subject: [PATCH 6/6] good searchin --- projectEnv/searchCode.py | 1 - 1 file changed, 1 deletion(-) diff --git a/projectEnv/searchCode.py b/projectEnv/searchCode.py index 80eab98d9..139188d5d 100644 --- a/projectEnv/searchCode.py +++ b/projectEnv/searchCode.py @@ -58,7 +58,6 @@ def getRating(prpost): # temp checks print("base", baserate) print("sentimentrating", sentimentrating/sentind) - return rating