From e52f8f0c6bd258898de7bfad08b341f82d2a1e1e Mon Sep 17 00:00:00 2001 From: JJ D'Aurora Date: Mon, 9 Mar 2020 21:16:16 -0700 Subject: [PATCH 1/5] parellel dots --- news_api/__pycache__/items.cpython-38.pyc | Bin 470 -> 491 bytes news_api/__pycache__/pipelines.cpython-38.pyc | Bin 2026 -> 2026 bytes news_api/items.py | 3 ++- news_api/pipelines.py | 2 +- news_api/settings.cfg | 4 ++++ .../__pycache__/news_agg.cpython-38.pyc | Bin 1972 -> 2148 bytes news_api/spiders/news_agg.py | 18 ++++++++++++++++++ 7 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 news_api/settings.cfg diff --git a/news_api/__pycache__/items.cpython-38.pyc b/news_api/__pycache__/items.cpython-38.pyc index 1f2c692f849f675755735f2ec63b897a9d4079bb..d68d6cc39456fffd2759f114aca147de8bbd03fa 100644 GIT binary patch delta 120 zcmcb{{F<3Bl$V!_0SGjO)8qJXPa^GY&vfy7FNA_1TgMFNxU7+pAofJ_b+4#vqHjA{Z9aj+za K5S+Y^Q33!qf*gwg delta 119 zcmaFOe2tkel$V!_0SLbQiHdvAIFT=b(Pd)3vsemaFoP!3EpETm@?yt=OwW?kTt7{o zi5GG?S27gw1JxJtPYz^sVG{r{IansoWmMCEh=Qd+1W4u5LZ7{_Ol$z+nvuB|QN%KCyH>b3~B)`Qp{szogY>2~Q0Ik=>mVVgCZ#F_D> zWtSf6QG_L+*Y@IF5d02a^^~(8!4Ke*Am(BI^ZQT!&&(v>F8rQq?9}Tng6H#0|HXGU zX|!-QS=gNCh~wDt9pRk0oStKho+55@_8xI2(7-%3{Tjzz)OIExSMK-=>-YR{s~g-u z9`^S-@p5;&KkNjHgTuAm2ao&F;?~lmozB{m{b%c&8()9432x?p$is#W2U{>g80t&D zYF$40Fg8afH=1ZnxH-18c}7NLWStvhtgTZtrtp?D(TH+u1p(70+^h2Tf6pq+jh89Dab2>A36SC?W2*RG>84MHb z5o&mN3d;Le%*;R0ReUA?MYoq`%MK5u%(k!#Ip#AR)n1n|f{G3u3hm{FmgBA_S3ziTsJNGW z0l$NO0l&uK=;Y|+=z9^&kGwn&c@y$}*50;)n`+fVuzo(GQJZaoeLUJ6o;S{ra0Gio zg2iZNukE2L@HGng=4J5e;*j9F?$hId!c77@f~}tQk~SWbm7Uvbv?5DeSWUDf!X6-C z(nNw9r~mF4cZ@T`%rH014ZDV2!=7OenA{?a?sf!%PgO34d@S>6l*DSNPo4f{Q4z6< zvz`L^JD Date: Sun, 22 Mar 2020 01:28:44 -0700 Subject: [PATCH 2/5] keyword matching --- .DS_Store | Bin 6148 -> 8196 bytes news_api/.DS_Store | Bin 8196 -> 8196 bytes news_api/__pycache__/items.cpython-38.pyc | Bin 491 -> 511 bytes news_api/__pycache__/pipelines.cpython-38.pyc | Bin 2026 -> 2026 bytes news_api/__pycache__/settings.cpython-38.pyc | Bin 373 -> 373 bytes news_api/items.py | 1 + news_api/pipelines.py | 7 +- .../__pycache__/news_agg.cpython-38.pyc | Bin 2148 -> 2709 bytes news_api/spiders/news_agg.py | 69 ++++++++++++------ settings.cfg | 4 + 10 files changed, 55 insertions(+), 26 deletions(-) create mode 100644 settings.cfg diff --git a/.DS_Store b/.DS_Store index 3c657618c90d77d19ae8cc252ca6c5dacd4de52f..607b25d26ca39324fe911aa99bb61142c60c1f88 100644 GIT binary patch literal 8196 zcmeHMTWl0n82-Pdz|3&z6hXGwg_Y7!VbQkGmYce5xz$S`w50_q%k0ho6Q?uF&TLDm z)q0CQ5R8|^L}FryFIs#d(FgTWBT-+BB%%+#81=yyjlTGV|Cuw}l(syW5G7$wa?XFw ze>vyOcfK=c&nyw4vurjKC5cGkETq1Oi#d|8i##hOBF#dU;CP}O8B`>@+wlssykkX- zK#V|)K#V|)K#aicfB>J_A}KdH_oX#1V+3LZ?nng0`ytI)$ZRAhqzqpjT!dEumQ}0{ z6W9435XPh>+_QI|meZFF^V97w2Daz=hpdv%GN)Jjj7WFEa}Sg(Vec<^PI<`nb9zm| za0+F|2&@jr>9NmR{y<=z2}JFP@0EhC&~d=#SfR`5dfB%QS{z^xTe~>S*P%`>Ua@LT zedDH<_MIJ9mn^N;wPo7!{$bm<^N!UwVw=IK!-ik5T+?<>9$`S;z&>GHCAB7TS?zU= zA?vEXw0g{R?)90$Hdr=8Gf+8Xt_MRc#6)tWb&}kcbKMSDvDB{ZIsI% zK52cnNw!p8;##_&PoRA?$UCG+qjZkW(<^j=-la=)nLed2=n8#BU(+}A1N}rl(=YTZ z{Z4<-pY#_14U4cCN!*LISce8|z-DYg8``lGyRirR(2e~Vz+oK4AWRIw#c7lc4Q-9<*p`SanN4h&$3&L4<`ylb&u7lcxVCmp)2o=& z(z;_OTNf~Wr8V2ZRubDw{rCL4NR#w2bMITaMn5v|eq+x4P1i9W)mVvI)FFk{SdaU# z5gBY^#%;wmY)4nfy1h7ngXl#c4j~5v1!kVb%queUd;}QBGk6xy;dz|HdAy3(@H*bW z+qj6y8SLw>@J>oxnu~qIJtf!i+>>k*tkw9ODNrf=QasALt3W5{r1)Yy?ZkPH{idHd z+>FyLM_``pF)8lt-GSZ^q4eVlx>x zHi$DXX6N7#WCkh&0s(Fy;R@2RvG6AE0nTm1k+PgTp{r_`LoA2*za5ng0 z&h49yj7OPi)z+@Sm6o27nI(ucTkh`ZwQsp++y%v@l{NKEZ5_)SH|0pwP4ny%o4tvn zOw$dEon(4k%ylh@Aw!MnsTSEI`vj>}7?_N(iAiQnJA;0?NR%E4!q>c-O$f?^tM_8|TVJrDhGApWU zced=dSM$Bue{r?f@~acevd8NyD5Of58Jd1Je1>V(=|R;T(G86mr~0Ru$s$o*%1TlZ zQr*Q{6h%tna2~}nh4R3}lvi2CJ=bq1P{W-xo{i(x)l)^v;3+tBjZKtFrRh8<$8K%o zb|z26v4b7lCdpsMel0iai{vd?;XFT*&z$HF@)H@zfd@YLp`ZvA*oJBZP{VodLNi*> znPS?5LpY2O!Z?ECP%*-}>YVF1=W4>jc_eTVmv9-^aNQpD+?>Bq^f1f&J>B)6-9NA^ F`~_>|`I7(u delta 1130 zcmeH_$xjqP6vpct2Iwvq3K*6qv<{1-!{$UpSp^Y;0uwHSEK$r1-9X2oS-M#yCKDs5 z@nq5mV}iv00He_;G4bMp(U=&CF-AS82aiT0i6^X{9^@bJXdSA$>U*#2z28@JO><4R z<};*gk+G;|n1R4MxRTt-DXD@;w@Am^+O5Tm*_d4DD-BfDHnyy8+n&ZK&Gd9nX@(Yw zs=Z-jK-CpZAL^Rc6mz&wQ&ip51~qj;6w_U|Lwa~bUCqwPWm2A$zmes{ZsrQCNa&f2 zXycQbdDgi@@)n2!yL(azn`(M+RIz`l#3f1c?y|0CdSa5lgolJfhBm7v3ai*Fip&U| z(ey+tt=un&OzDb-BWl!1r`0w5sJ(kioiMafJ#pA@ND$Le)-y0VGQvAB5?3`J5d^l< zZ&%R~Hq6d3Rp({I`$_Ujt(MQz61ztpl4oRvJSQ*7Yx16aCf~>p@{{}`Yh)b)L{Myn zghCXd7y;Cv7IkPqGuqLCAUYAkDfHts!WhCB#xVgC=P-|pxP&XPuz(v_#7!*WJ|18h zkMRVr@CF|UANhp1V1Mutbe4@7AFXxIa>}0aeFy6gTaU7Gt*=@BSe)r!q`Vg?kLJr= zoS8ba>51dfhGQ(%rX$#$OFQKPnb}yJZL#c=nTvbnZ2fX6Wl7u@F&QnN9!+lFWkuT&s`OU$KNJ9?C zngup54qpbX_WfNGAo4)r*SRvbec#~i{5bfXu27=VgF3`2`Up2Rd}a2^*h$5CIl Vf;kI!QghebUWed+uK$ed^*>=#3)TPt diff --git a/news_api/__pycache__/items.cpython-38.pyc b/news_api/__pycache__/items.cpython-38.pyc index d68d6cc39456fffd2759f114aca147de8bbd03fa..c91e64a4a5d7213af7d0527008f23779f048b484 100644 GIT binary patch delta 138 zcmaFO{GXXOl$V!_0SNB@&5P5S$eYL*GBMvb>|EYB}WDPGA?BnUL1NN{o>qYH-!kjcTq!8my~qlN}V94rYU agn+~?4x8Nkl+v73I}RYPm{yWLS(2LTr^!3< zMg`|eh9Uu=@*;uB$&4;+LO>=5%jETp8X6E$uoQ?81QNG6Y;yBcN^?@}IDoujCLqDV F3;>F98BYKJ diff --git a/news_api/__pycache__/pipelines.cpython-38.pyc b/news_api/__pycache__/pipelines.cpython-38.pyc index cd21fcc67edb8bc6c3703b90cb625c6dd3df76b6..6aaf694bd30fcd0a00f4f3d8232ffd81f281d924 100644 GIT binary patch delta 59 zcmaFG|B9bCl$V!_0SG28EsS&C$g9K5C^gxN`6i?MW)T)oMn>()ajZRzrjuW@?qRf? Oyn<~3BhzG0_Jsh`C=hG_ delta 59 zcmaFG|B9bCl$V!_0SML#rpMWBHvF#-TIcLZMm delta 20 acmey$^p%M_l$V!_0SH1hV>WVUF#-TJ3j}lk diff --git a/news_api/items.py b/news_api/items.py index 9e8cd11..6a6313d 100644 --- a/news_api/items.py +++ b/news_api/items.py @@ -20,5 +20,6 @@ class NewsApiItem(scrapy.Item): publishedat = scrapy.Field() content = scrapy.Field() sentiment = scrapy.Field() + keywords = scrapy.Field() pass \ No newline at end of file diff --git a/news_api/pipelines.py b/news_api/pipelines.py index 041b1e0..a7a165a 100644 --- a/news_api/pipelines.py +++ b/news_api/pipelines.py @@ -19,9 +19,8 @@ def process_item(self, item, spider): return item class MongoDBPipeline(object): - # pdb.set_trace() - today = 'newsAgg_' + datetime.strftime(datetime.now(), "%Y/%m/%d") + today = 'newsAgg_' + datetime.strftime((datetime.now() - timedelta(days=1)), "%Y/%m/%d") def __init__(self, mongo_uri, mongo_db, mongo_collection): @@ -52,12 +51,12 @@ def open_spider(self, spider): def close_spider(self, spider): - # ## clean up when spider is closed + ## clean up when spider is closed self.client.close() def process_item(self, item, spider): ## how to handle each post - + # pdb.set_trace() self.collection.insert(dict(item)) diff --git a/news_api/spiders/__pycache__/news_agg.cpython-38.pyc b/news_api/spiders/__pycache__/news_agg.cpython-38.pyc index 47b8ab6d30b0f6c61dd7121346c099d2f3822563..5250d9fabf4c08b1aafdb81ca565ed2a2d983b78 100644 GIT binary patch delta 1410 zcmZWpTW=dh6rR~#dvCs_O_MlD8@EjYfzlFzgg^~9wSraxRcedGN^KbLnb;ezcb%DW zQd_qCkh~%_vOtI@=aFZ`zu*_F;Efml1!p!*RSB==JKveJ=bJM#XZ=s@rz@4G<+4Zc zx3+Th{Fmx!WtFPacOSgdbpw|YAV6a;@VGbgp?F3q`HFxI&ItjBlYR5p3`(cJezag@ z4lu=4)a9A05qPZF>0h6EHI)7whdi0C)6X7q52{f6kuDLKJ2L_wF2QA(hbyoEi?9TB zScVl?g{yE4)}R5`TeYABH{j+83Cgez8)z%A`Iv0I_4|b0GA930)ZU_*1#BRP&c^q5 zHoA8iDw=X-xxfV;JAyW~~gl(3z_|TUs4LgmVh(dIloQX!4 zi4IpmA#6tp3*0bG*kL64Sap~lM)2PD$J0N|pRe8Dl&Mf(4t4~O`5{Zf!Dft-eb(YT zJefYXV5O?}Y-X0^@m^;2MJz=br+y=1VfwfA)W53F+d&}abf>UA@sSmAggE3no|qyt zI->i=866uV18AMxBQNPQXKap4FzRG%p*7JKMg<@pM|`5pnRP}e8Ckji@U>6&3-`#g zKgY#Uaa5QU&q${z6Xe!8e@Vss1gx>G?3r^eIs=0X9Ba4_nP9AvKYkIC4> zXt7R49@uvYa;Hwf&3*T^@8!Oi`+m;-oMYLTXXd<=^GPXpa9o?;_ zwmE#E-f)=8igNlxacQD2D03wW;MajFPKOVUQUP*UNmA?$6}B6DX+MFja%SyIlrF?6 z1DRRP6b6~ugQiYbyGpQ-XSIv;C+TBsuCT3NKjI=Q^_XCB%wtHE6l;3bg6zeUNCg#a z^sv2qktwV^h%K6^Enm!Q&k;=*RGkWz(WHTA`j*G)l;7 zwDju1xanBa_w7Ymoqla^6tAIzs7|H5Ink{x^#6>|;$lgIUh0kY&x_D=HiGtK1;L>f zU8Npfrt5~|=CQ4^nA7iF*RZa^(6Ft+Lc zr&>54UtV3nh$D^@FY<(U>~m*=F~B#^PDOQ}T2QDxh ztP7?-0%&xMhj@=n@Q4fv$4&GAJ$W;-h88DHG@{%Z+1fg$Lpro4WQ4VSh(-=%X%h_{ zZZ9L?P7`)e#qQsjRWYk#uj1Y^T0fD6LQ*CCD&bcgW`*|wa11^K@2n$?p3iUuYpFeq zSS)p~n+R0~T&S+><+_)pB@t=y-MxNbzPoh0S*k^AV$O1h`uqMc+hS0&xf z_@E5)SjI^r5}s)#TTm-0K*yESMu$nE@+?(CUNE8kB2M;2Nws*TBQJ^5ovm)hMZ{9^ zQmtyymyMf7qG4dhKpkKnOw9ox9FmYQOmRpZ65=|X@XujZ@yI)aYsIeDcFw|G85V`N zuu(H~)_{>ZQLg0w^>83znkn-a81)wRL792dWICao0w>P{lvd8SB{AV*G3Q@eya+T3 al+aso-rIU6235&_U5dUx@4)1i@BRiLMXuif diff --git a/news_api/spiders/news_agg.py b/news_api/spiders/news_agg.py index 0f88490..eb61bc5 100644 --- a/news_api/spiders/news_agg.py +++ b/news_api/spiders/news_agg.py @@ -5,7 +5,7 @@ from scrapy import Spider from newsapi import NewsApiClient from news_api.items import NewsApiItem -from datetime import datetime +from datetime import datetime, timedelta import paralleldots @@ -17,41 +17,66 @@ #API Key: 2f48e626e6bb43afa1d50e6a9cce7728 + + class NewsApiSpider(scrapy.Spider): name = "newsagg" headers = {'Connection': 'keep-alive','Cache-Control': 'max-age=0','DNT': '1','Upgrade-Insecure-Requests': '1','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.87 Safari/537.36','Sec-Fetch-User': '?1','Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3','Sec-Fetch-Site': 'same-origin','Sec-Fetch-Mode': 'navigate','Accept-Encoding': 'gzip, deflate, br','Accept-Language': 'en-US,en;q=0.9',} newsapi = NewsApiClient(api_key='2f48e626e6bb43afa1d50e6a9cce7728') - today = datetime.strftime(datetime.now(), "%Y/%m/%d") + today = datetime.strftime((datetime.now() - timedelta(days=1)), "%Y/%m/%d") + keywordsToSearch = ["model s","model 3","model x","model y", "solar", "battery", "gigafactory", "autonomous driving","cybertruck", "elon musk", "ganization change", "stock price","bear market","china","conavirus","covid-19"] + def start_requests(self): + # pdb.set_trace() urls = [ - 'http://newsapi.org/v2/everything?q=tesla&from=' + self.today + '&to=' + self.today + '&sortBy=popularity&apiKey=2f48e626e6bb43afa1d50e6a9cce7728' + 'http://newsapi.org/v2/everything?q=tesla&from=' + self.today + '&to=' + self.today + '&sortBy=relevancy&language=en&apiKey=2f48e626e6bb43afa1d50e6a9cce7728' ] for url in urls: yield scrapy.Request(url=url, callback=self.parse, headers=self.headers) - def parse(self, response): body = json.loads(response.body) - + for value in body['articles']: - text= value['description'] - lang_code="en" - response=paralleldots.sentiment(text,lang_code) - - # pdb.set_trace() - - newsItem = NewsApiItem() - - newsItem['publishedat'] = value['publishedAt'] - newsItem['id'] = value['source']['id'] - newsItem['name'] = value['source']['name'] - newsItem['author'] = value['author'] - newsItem['description'] = value['description'] - newsItem['url'] = value['url'] - newsItem['content'] = value['content'] - newsItem['sentiment'] = response.keys() + content = value['content'] + description = value['description'] + if description is None: + pdb.set_trace() + if content is None: + pdb.set_trace() + + for keyword in self.keywordsToSearch: + # pdb.set_trace() + if (keyword in description.lower()) or (keyword in content.lower()) : + # pdb.set_trace() + # if "model s" in content.lower() or "model 3" in content.lower() or "model x" in content.lower() or "model y" in content.lower() or "solar" in content.lower() or "battery" in content.lower() or "gigafactory" in content.lower() or "autonomous driving" in content.lower() or "cybertruck" in content.lower() or "elon musk" in content.lower() or "organization change" in content.lower() or "stock price" in content.lower() or "bear market" in content.lower() or "china" in content.lower() or "coronavirus" in content.lower() or "covid-19" in content.lower(): + lang_code="en" + response=paralleldots.sentiment(value['description'], lang_code) + splitText=value['content'].split(". ") + keywords=paralleldots.batch_keywords(splitText) + + # pdb.set_trace() + + newsItem = NewsApiItem() + + newsItem['publishedat'] = value['publishedAt'] + newsItem['id'] = value['source']['id'] + newsItem['name'] = value['source']['name'] + newsItem['author'] = value['author'] + newsItem['description'] = value['description'] + newsItem['url'] = value['url'] + newsItem['content'] = value['content'] + newsItem['sentiment'] = response + newsItem['keywords'] = keywords + + + yield newsItem + + else: + # pdb.set_trace() + print("no match found") + - yield newsItem diff --git a/settings.cfg b/settings.cfg new file mode 100644 index 0000000..99b25bc --- /dev/null +++ b/settings.cfg @@ -0,0 +1,4 @@ +[PD] +exist = true +api_key = D3ZcPSa5zmgWQl4SRgmQa1jhAV9Cgi1P2BUQAFXHDKI + From cb0f3f02036b6267882932dab1f0b92f437240c5 Mon Sep 17 00:00:00 2001 From: JJ D'Aurora Date: Sun, 7 Jun 2020 21:46:16 -0700 Subject: [PATCH 3/5] fix timedelta imports --- news_api/__pycache__/items.cpython-38.pyc | Bin 511 -> 511 bytes news_api/__pycache__/pipelines.cpython-38.pyc | Bin 2026 -> 2074 bytes news_api/pipelines.py | 2 +- .../__pycache__/news_agg.cpython-38.pyc | Bin 2709 -> 2725 bytes news_api/spiders/news_agg.py | 1 + 5 files changed, 2 insertions(+), 1 deletion(-) diff --git a/news_api/__pycache__/items.cpython-38.pyc b/news_api/__pycache__/items.cpython-38.pyc index c91e64a4a5d7213af7d0527008f23779f048b484..d19e7a5c271e14ef8a91b095aa6d8f3554b70139 100644 GIT binary patch delta 20 acmey*{GXXSl$V!_0SNrK?rh}#!UzC4R0VDT delta 20 acmey*{GXXSl$V!_0SNB@&D+TRg%JQi*9L?D diff --git a/news_api/__pycache__/pipelines.cpython-38.pyc b/news_api/__pycache__/pipelines.cpython-38.pyc index 6aaf694bd30fcd0a00f4f3d8232ffd81f281d924..7a2ab95fbc9f401782241b27ba00948250351572 100644 GIT binary patch delta 689 zcmZ{hOKTKC5XZZFUcECrv!k#{6wQXj7*+*|2QMCcAb4>>5IxKxvYG0vE4w?PXCQbn zhuwo04@=(!#ejYS!Mh-S0B;WHT|b3YolCM{L;b4$UHz!8{@Z$+#!uonWH`Qj{k4M6A^!U5O#D=F4lk^d1&`N=j-0B>|QzJli2d#XHP%PuXG67sUA6zu=h$$ZgIB)n0 z1NTTQvn)8SJHtAu;)gRfLLb}NTX;y`Yvm2><=VvzTq8m9#>NRY!V) zhe;$u>1s-Lo4R9mf(}1#UIiDnui%s|#@=50#qJB0SN7}e@)IqzZHvwKV9iZM>iz?e Cz=5Iw delta 599 zcmZ{hzfS@&6vwZBP_r2Hl_4P6Hm3B{E*HYw&1~>Kn$St9Iw6a}A z%7MakM*0dPKJBTVDYe0<)HK#s^ssKDA!Uuw4_h5S`s@)y9>WcBhMo`I*c^ugQkR&L zGr$rQ#kwiYu>ncDat23Y59hZSe!+RgBTQvlU-1l|CZ+X|XC_5?7BiV8V>YvyBUgvH zCrT|Po^V0D;~hkVrmmGuc_yC$WC1xqQZ!hOI93N}TKuSs_K5dwS=&Upc&k0RA<$d% zL^^Ajm*g_ko9%Ak^SQ~qUtOSz5Tu%;6CseU_#hj1E>D3-12OUH;) zmmx3<0lV351VP6SFT0Evz+D9_0RHVhbRJ$Ez*Q7yRjOlT+0>$%$st&?c~RUOWt11M iMsamXx{^9b_x#FlhVg&JnInyho8^daqoO%Dqx}ISu4q;O diff --git a/news_api/pipelines.py b/news_api/pipelines.py index a7a165a..d30c6e7 100644 --- a/news_api/pipelines.py +++ b/news_api/pipelines.py @@ -12,7 +12,7 @@ # import ipdb # import pdb import logging -from datetime import datetime +from datetime import datetime, timedelta class NewsApiPipeline(object): def process_item(self, item, spider): diff --git a/news_api/spiders/__pycache__/news_agg.cpython-38.pyc b/news_api/spiders/__pycache__/news_agg.cpython-38.pyc index 5250d9fabf4c08b1aafdb81ca565ed2a2d983b78..d5e75a70d7acfef166c9cca4e67bafa1e64af75f 100644 GIT binary patch delta 149 zcmbO#x>S@ml$V!_0SNRC-i=!_kyn!}C1Vap6h{g}3TqB$E@u>HE>{#6BLk4n zn8O{#oyxs{CxvYxBO`-5LkfEeM+-v=M=C=Wb2D=k@5XuaSQ+Iu+p=9_VpN|j!g-BR veDZ5f8AiUz99+h1w^&Ltb5kcAGLNOnZ;9z87lwsszlwwq2;@~V|2MTHO eP8Q%Yp6toxA|?b@!NJJE#KQ=J0!$ptT$}(qw-z@5 diff --git a/news_api/spiders/news_agg.py b/news_api/spiders/news_agg.py index eb61bc5..44821f0 100644 --- a/news_api/spiders/news_agg.py +++ b/news_api/spiders/news_agg.py @@ -5,6 +5,7 @@ from scrapy import Spider from newsapi import NewsApiClient from news_api.items import NewsApiItem +import time from datetime import datetime, timedelta import paralleldots From fbba350629b725da6ca24b30e9abda18128e147e Mon Sep 17 00:00:00 2001 From: JJ D'Aurora Date: Tue, 9 Jun 2020 23:34:56 -0700 Subject: [PATCH 4/5] type checking --- news_api/__pycache__/settings.cpython-38.pyc | Bin 373 -> 373 bytes news_api/settings.cfg | 1 + news_api/settings.py | 2 ++ .../__pycache__/news_agg.cpython-38.pyc | Bin 2725 -> 2687 bytes news_api/spiders/news_agg.py | 13 +++++++------ 5 files changed, 10 insertions(+), 6 deletions(-) diff --git a/news_api/__pycache__/settings.cpython-38.pyc b/news_api/__pycache__/settings.cpython-38.pyc index 0abfc57f32fed5d8e2214df242f0ff23329d729b..69ff8248cdc40a222ab11607eee88ded79c96a14 100644 GIT binary patch delta 22 ccmey$^p%M>l$V!_0SE-n-i`C#$eY0k07b9`{{R30 delta 22 ccmey$^p%M>l$V!_0SLTA(&Icg@@6ms06wk-2mk;8 diff --git a/news_api/settings.cfg b/news_api/settings.cfg index 99b25bc..1ee6354 100644 --- a/news_api/settings.cfg +++ b/news_api/settings.cfg @@ -1,4 +1,5 @@ [PD] exist = true api_key = D3ZcPSa5zmgWQl4SRgmQa1jhAV9Cgi1P2BUQAFXHDKI +GOOGLE_APPLICATION_CREDENTIALS="/home/user/Downloads/service-account-file.json" diff --git a/news_api/settings.py b/news_api/settings.py index e355d84..79eae25 100644 --- a/news_api/settings.py +++ b/news_api/settings.py @@ -70,6 +70,8 @@ } + + # Enable and configure the AutoThrottle extension (disabled by default) # See https://docs.scrapy.org/en/latest/topics/autothrottle.html #AUTOTHROTTLE_ENABLED = True diff --git a/news_api/spiders/__pycache__/news_agg.cpython-38.pyc b/news_api/spiders/__pycache__/news_agg.cpython-38.pyc index d5e75a70d7acfef166c9cca4e67bafa1e64af75f..d95c5ad1247602b26ae8f29789e0b380c44750d3 100644 GIT binary patch delta 254 zcmZ1~`d@@Ml$V!_0SGSFJ&3!rk=LA!QGT*3n+K!V`q)3U%r(p@Op*+>EI^tW zNVC>3WHGxi#6GBHt6@!H$Y!3vSj1Mt0%rk57qBd3$Yv`}r~!&Y*dR42EVb+@lgEeBARqlN>>;w<58W~k+=nQX*f#I0AtmBQc5$jDHrH+en# z1V--34jdwrQ#iJZX#m|_tOIgABM%b~69*#)I~SAQWJ}ITj4G4Qa2hZ&P5#R%%X5pl XASJ1Y38<_{f3g~v9;4diP_6&~g(f~8 delta 316 zcmew_vQ(5el$V!_0SNRC-i=$bk=LA!QEsv;n+K!H`U#A&9JS0f%qdKg47Dsk zni)v5*0829WOEc*l`t=0S;)Z1P{RV|d%^h(S*$J$v1e-8YS?O67qBg4$Yv|f1F~5l z>?Q0AI3Q-$urFk)WrWzXfHQ@4A!98EP?nRch6BjtEa9Fk&tA?QP{NbK z-^|F!P#7?IA^QXs-dikL#rb)+Sab3dQ;Kh~B;}`6-eN9DNxH>ZoLUlJQk0mSI@y(D zhnzalgT*EwZ!+>Q@iB2Qa)2-hGI23UO%~>y#H|LDERp~b!jm1j#3vu; Date: Wed, 10 Jun 2020 07:08:14 -0500 Subject: [PATCH 5/5] change filename date format --- news_api/.DS_Store | Bin 8196 -> 8196 bytes news_api/__pycache__/__init__.cpython-37.pyc | Bin 146 -> 146 bytes news_api/__pycache__/items.cpython-37.pyc | Bin 465 -> 506 bytes news_api/__pycache__/pipelines.cpython-37.pyc | Bin 453 -> 2062 bytes news_api/__pycache__/settings.cpython-37.pyc | Bin 323 -> 370 bytes news_api/pipelines.py | 2 +- .../__pycache__/__init__.cpython-37.pyc | Bin 154 -> 154 bytes .../__pycache__/news_agg.cpython-37.pyc | Bin 1295 -> 2688 bytes news_api/spiders/settings.cfg | 4 ++++ 9 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 news_api/spiders/settings.cfg diff --git a/news_api/.DS_Store b/news_api/.DS_Store index 712f5722b55d7734ab064bba1f83680f1b585f64..86f798c9a58a19033071eae913effe57e4f4029e 100644 GIT binary patch literal 8196 zcmeHMOK=oL82=b;?8U~ zAw=T=EiJB4D0;C4^)LWmz7)h@xKf;=xy0rB!;=zq@Bsut~fjRx!QR{rB|$ zfBpF8o9Xlf05DZBTLE+cP$+ZDYN%KsaX-)VQbpLpaY%Ro8v@ACuLZ{EMMn)00}%re z0}%re0}%uN2L@=*=1Ey(-LZj+&%l~%kus1Qd0vQ^Y964mJ( zATaUJCPO43FezV^plnjMNt(s!`4nuZ{69}9B*siKGPhZZf$E%G{<+eCuU|;rLLj1qv!b8 z)T!yyGta$7yivmNl?=7=dak@kwK7&5f1*pSvtqKe&aX@BRJ9uYvB$NZK|ANUY5SCw zVYNOjvxhEw^VS3{X&~o0g)!I9u$4K($rT(UusR)w2m6D8H5u^Quo!dvn2;-D`@+VZyEJ_;=Z%fgO8k}B95#3C)%9b11rxXn_hJ$|F@^i^DNJJ@ z_Tw=;fum^SGw9=Kd>&`<1$+fx#f$hlzJYJ!yZ9bn!T0ehUc-;%3@b{id_rEAO8l5= z^Hmu>eHBL+Qe})(*?31)`OPvZ#d}t-*;wDy+`4^ta_(N1-Mw1`cC)60kfc(Cf=UVz z_wqU^c5Q5(wxOiwa-H{+=B!DD3x`$9=y+_iLRw*pxYQbAjVcM6q4YMy9#j-oA$={e zgi0D_mD0CWYgb6EY>D*l)E-tz&#X#%JG4g>k}X>*y(z6*r3Z^ax%?It-zB&L*GPEZ z!gufk`~<(lUkIq+3e-tzn{YGMV}p=eE4JY_>nVv&TD&}8dK2lq>v--6c}jx46?7Y~ zDrMg~{~O@Z#lQdG#u-FiM+`&^+-U}|syp4CqFF5V2I6P!IOXG%dE#-i(h3L_JcPH4 k<2c#5e;86d9{Q4@ot2h6l>YM%0nz?%Inn-q*YE%T0HwvHSpWb4 literal 8196 zcmeHMTWl0n82=tKX4#o- zDYaTJL>~yoOQMmODDlOBFC_Y)K58WDi;+as=!*|N_@coVpYT6(W-Dx46CV&0=WNdT z&z%4N&gGkL=JZSf0DB5X8$dk(Fv`Me6;v#egkR(ZDH5~@91HW}DCX?-a5DNhd=KruLCpoEh@F2czs13M?Jgab-AU@$TUClvHYCtTbQ z2TV#EmLUTn0}B}tdw+$0W;emU>HI$3Z#kK?<)kS>JXE_Vw_NuV)rzPbOhX2A$`imv zKP9MlOHj-ogF_Zv|0_YsC|Fk#zltJFFz9Dd|5G$lp6ue-(U zEnmTy%G8zWqXQGp)PSdZ1-D1fkBO^;x;K#y>VuAB&DZtxVUtg#wAxWSng!6TxVpuxX!D^bt-B_bnm|XYD%k_ zr0E~Xd8T8#hk4#5o6@RWT5^BJu@C0C@DFDktB|$blvbV5txUnvJ>G3u1Li62j(Gf} zCu&DrC-3$8fg>hiMO#YK3NAmy31Ck0K7zR#Di}zdwoua+Z#tjfB-1Xlqhjob zb=NzPSJtFc)AUP8>}(eBJZoe(#B+~yi#b}ZlDJ_3HxE3gfCtPXW%Tn2ybCvA6K=#!xD~fyCwAd( z+=F{@KlbAR9Klf>!*Mh)i#DFXJkH>Acp9I_7w|QF3*W}`coE;j5AY-WL{2SIl;wl+ zx>(|;R9h&^;PHzbT}+m-J$v`vCRu)4DYRH!b4Ts!b&buNTRV0>Fn=3M?%Yk{x=~TS zh@?t<^%T`1RU56VZ&-7u8XF>A@sp)WFFpa<3ZWo&MMSTU$6}h0j=WTwVk&7{@qH~# zj*V(12S~*EtdUx$6?{X5a^d`H>3*zBY{<-?j!v*+| zl=n4UCB6LwzmVeofa|yntFR6ma4p7g9X8|LxET|;h4i)^ci>L!^)Ws@XYmZ4#h379d<9>{H%WsRO0?HsRGqYVaiR7m2lBS% z*vH9JRP4>5TXt0@d#)FM1MFG)`~NK)KkPVUAY|b7Gl1oN1ARR-kEPx};;bE~e2lV4 tB5qDvA3}u)5$*nQob3ER45=Oue96GhNlP9||N4i3aQ`>L{Xg9Q{{-OIrx*YL diff --git a/news_api/__pycache__/__init__.cpython-37.pyc b/news_api/__pycache__/__init__.cpython-37.pyc index 1456772003203d6aff9c5d0e9fe0fb0f5bda6dbc..7e4c28aa011fb54126e0d1dd6243d3831d273982 100644 GIT binary patch delta 20 acmbQlIEj(RiI=6c3c{yWLS(2LTr^z?*fGy*#iT@-`ZgCfsCgo%nXQZYimfT`b&d)1J%`3UZSqvmIb3sgw z?9|Hg{GycNl?+9KK>LaWCz~<4bBO?%94s7+9E_8j8Pzl(GGJ*CAp|6DaoFVMrcMmRx%Xv0?jJoogB^R&L#k4aFbK5d}+u2tFWji^C>2KczG$)s6$mD`o-`9LxZIxFf&- diff --git a/news_api/__pycache__/pipelines.cpython-37.pyc b/news_api/__pycache__/pipelines.cpython-37.pyc index 4a2a1db0b3953f177ffa534eb5520e72ec72ac2a..83342042d712856cf8b35177c6024f1790126bb2 100644 GIT binary patch literal 2062 zcmaJ?PjeeJ6qmHS_S)VgE~K>3P{J_j;OStWIJDD3nuO_Orgb_%Vb}vZo0VeL**~r% zH#p;4dhLlX&>ZqS!oh@e*5FkpW`|q zf8k*MxS(u9Q->fp;WQ&5_AH}03mGMXosrOi87FgdFZ5_-RSRn{=VtZ19@cX|^eK5v zxW{X+2(O98nG-f)T<1QFeLHS(`W@*s-oiPg-=SuGAapd6lmH&F;_nB&=SoP`8^)?E zj*C(#*t?gMT=WL1?iFxi6pzwNWi&fnbw?8uK-h^;D#G30!_zSiLQglYxY}Jb>_aSm z6cuqUqR2F(C@=XqLp_M17vngqR%9^u^3q?~!nqoVYtZazKDFZ|G%>zyH249;C7Qx{ z<3jV!iKubLo!9gxr2d@UAYr{qc?c=4l6^+QCU5X2;NRjc9zY-P6}}4n3SR?w+h*-) zSq#d(oq0Y&AK`j57({O|8$bb<#}jp0+xlrM-{J{o1|RGZC!4MhK$+4hiO7_Jq*DhZ zk zWgg8IKLHmNLP9)7oA91)T#a$jd?`)`zOr$C4vL<~2w9nuAsb?TT+pz29|8^$6cC7% z^-qs%TKM5zC12j}^?0h1@}-c1XQ{rwG}_M6V{uewY5B$RJ^iBS&c5sxLie;#`fDk) zoct)^VyTB(j>Vmmhj~0KVPev$oBC*CzX)44s@*IVg*LUs_F;X-L&Jb8o-Tofrnx{C zR*?mZ=~arf%8@9ds!ZI4^@=5KfC%2QXI=8WyI{Kn;03u?VF!Q-AUuZxH77NbAH%%y zlB`ss^FPi6)?(((m*|b9Bl~wQ7Y&!nBHlHd%i9;OXf~y1z*CKpP!8dieISaVs$xZm&)Y1ZqrG{&rCfd}qaxj2eZ#*sz#{(d+y@Cy}l<$Dnma}`X zSV8Kw*}F)&TqjrM3hst5$~6=i4qOW@kL=IHDasS+;R5nqVl6SPB#V{Gh5n`F@)k~i zisCaA|5;Jnu!B;F;7^}@PteKoc65dW37Y}@@8NN6h-7J1`8sD2Ay;gmuMd=@~l3LKxd HS=;>waHH)5 delta 170 zcmeAZILgfD#LLUY00h%ryyFxZC-O;1i2!--3@HpLj4eQ%%FxUl#hAhr%%I6U@q9HK z6HvB@X>tIgKO@)V1&jeH>|g;@f(^uF1QMJ;Tny5}!3gI2X)@hn%TLNmO)dfRiy+2< Zl!I9yjkh>#a`RJ4b5iX#vDy6*C^5Xc!f=s=F%!1UM t%)HcMJ>UGi^n4em0I0yk2|;WeKpjOK6JPlAftq^JRRk#j delta 20 acmbQmIE#_TiIq^M{{*c7 diff --git a/news_api/spiders/__pycache__/news_agg.cpython-37.pyc b/news_api/spiders/__pycache__/news_agg.cpython-37.pyc index 2353b3aac6fe06accb1a10444118162ea685d2b9..fb79302fe7549c8a527cad118e2d746d8849d2af 100644 GIT binary patch literal 2688 zcmai0-E!MD76wR4A|+XIl5QP)vn`t@-l(-C|HXDT^|p@PpG+E0Q#W+@pm>w8m!71d}2#R6J zDdiXkrenf*A+SQrvBI)bp2#asg;mb0w0MCr`Wn#^HD4iWGIXTB)SW4c_fXyX1vEgr zb(|IV5}z^-Xlj?8h^@rm4ty3#5L70DCz6GAEi);RO!^_qELBq$NaB3Dv*Nhl?U4s* z=p8%{R`fbAl!3uPW)?Q<$$V%rX6zc-9GQCEpU*lVdUcbQyRzLX9mJC&`j41)y%>-sK{ejK$IxWri9T3lYvdv9 zu)e;$QO8b+B!0WchQDB-EyRdH$xx)`y}!2Zg{^xuErqaGV2ewzatTf^!O$s)I3V09 zb%>OV51on!Ry-uGjQKFVLHaU|;xO(Dn{s~unmT27*kN4qzS~PJ7Q~Sq_C*h@P9i_e zJ#UYl1pbk++p`nyyDY7A7~ytEc#lb^;CB6pIF=hLukk)8?ZyM2HkKZ`>MZKfp%kI0 zKoyUnqIiUk&;+L( z*iD;VDU(07T6s7nelzA?Yp~p6130_{t-Q~VHzgARnLh;XZ>Gk)j5pIS=S9rr4LAR<%k)g|^B^-_5(FLM_Oeo!0m-*WJ-ryh6nLdIXJB!W?Xmw}{r~g*o18@F(RvZ;5 z>gjJVuZ@f=1k!Wk0`-cbJ1UNhF;;V^aaNLr^Wu1=QRzZE!{gP@OSAyHn=opQOhA^j zUZS%yj236ns7y hwf~nwnfck+1bl`LGElRj-MsHB&EJ{{`>4@d5w< literal 1295 zcmZuxPjBQj6t^8G@lL1Z&#qP!I3RHv#CFQCw2M@sgcao(Tbd(Kze+amF@mKKN&)Qg~j!t z03L$aZ$U{Uu||BDF~_hb8m$N?IP66_T2W4WOm)0Uc!JT_NX9aGj%1?Hl3YYQmG}vo zq_4mpnoqD#o;0OY2GI6g{bIjt%163Xm4l6~tBr4*TIJ#V^@spG1hF51QYb{iiNrio zR1z6oVjfE>yi!R+|{19dPvPzQ^}{ZGZQ_(XoT zuMWiLd9Dr*XAdTrr=lsdQ?<@9cnyPqq#z+S`!T2vEzuH73_(B`A%x3#gB@8$9fo&Q zCYR&~6bK3w%z5mT)#7|K+n?><+n?Q=9eScFK5FWw)xwl+J&8R&^P^)^ud+rMvGT;# zo@r4Pttb>vZEak(SWk$LTBAM7h1Ls^pTdVja!tv13gt5OOqsPiDXYRKHaDVK`}8|? z)++0K45qCD8`LU&%*S9O6ULRfR`w5M-iFlvel-2wDr2WhVe9I&s+Db&p60bw)1q|K zKx764-2mIy)n08B#U6Ovu@3BJX7Fp@2*iy+A;uzt!^5KU_pi!6)M)SZPx3%|7zs$| z&Yt|=066plcX&x&;tlz=>!2TY(TnIMdW@dxpU5WKP)9G&ChnpxmgER^R7Sf9aMWYk zW7^}m$8nF79w$9cdz|)|^_U$Y7kBXm#$YLw_Uzpshw|Q0F71hu!g*3k zPb;xfJQ1xssg2)~3QAHouBbwe0X*=MEXUz$u@Xn Jn2|nX`VWTtLY)8r diff --git a/news_api/spiders/settings.cfg b/news_api/spiders/settings.cfg new file mode 100644 index 0000000..99b25bc --- /dev/null +++ b/news_api/spiders/settings.cfg @@ -0,0 +1,4 @@ +[PD] +exist = true +api_key = D3ZcPSa5zmgWQl4SRgmQa1jhAV9Cgi1P2BUQAFXHDKI +