Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from scraping.parse import Scraper
from scraping.quartile import quartile_of
from scraping.research_gate import members_of_Inno

if __name__ == '__main__':
Expand Down
21 changes: 21 additions & 0 deletions scraping/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ def __init__(self):
key = file.readline()
self.client = ElsClient(key)
self.client.__min_req_interval = 1 # Set request interval. Reduce for minor speed up at the cost of stability.
dataframe = pd.read_excel('quartiles.xlsx', sheet_name=None)

def parse(self, preload=True):
print('Scraper', id(self), 'began parsing')
Expand Down Expand Up @@ -135,3 +136,23 @@ def _get_papers(self):

papers.to_csv('data/papers.csv', index=False)
return papers
def quartile_of(self, a: int, year: str):
df = dataframe["CiteScore " + year]
low = 0
high = df.shape[0] - 1
h = high
while low <= high:
mid = (high + low) // 2
if df.iloc[mid, 0] < a:
low = mid + 1
elif df.iloc[mid, 0] > a:
high = mid - 1
else:
low, high = mid, mid
while df.iloc[low, 0] == mid and low - 1 != -1:
low -= 1
while df.iloc[high, 0] == mid and high + 1 != h + 1:
high += 1
high += 1
return df.loc[low:high, "Quartile"].min()
return -1
23 changes: 0 additions & 23 deletions scraping/quartile.py

This file was deleted.

Binary file added scraping/quartiles.xlsx
Binary file not shown.
Binary file removed scraping/quartiles2020.xlsx
Binary file not shown.