-
Notifications
You must be signed in to change notification settings - Fork 177
Open
Labels
Description
Error In
Parse Outgoing XML citations from website
for a lot of the PMIDs this error is shown
import csv
import multiprocessing
import pubmed_parser as pp
def write_to_file(f, pmid, result):
try:
if isinstance(result, dict) and "pmid_cited" in result:
f.write(f'## PMID : {pmid}\n')
f.write(f'PMID CITED : {result["pmid_cited"]}\n')
# You can add more information from `result` here if needed
else:
f.write(f'Error processing PMID {pmid}: Invalid result format\n')
except Exception as e:
f.write(f'Error processing PMID {pmid}: {str(e)}\n')
def process_pmid(pmid):
try:
return pp.parse_outgoing_citation_web(pmid, id_type='PMID')
except Exception as e:
return f'Error processing PMID {pmid}: {str(e)}'
if __name__ == '__main__':
# Output Markdown file
output_file = 'out1.md'
# Open the output file for writing
with open(output_file, 'w') as f:
# Write Markdown headers or other content here if needed
f.write("# Outgoing Citations\n")
# Open and read the CSV file with PMID values
with open('pmidfinal.csv', 'r') as csvfile:
csvreader = csv.reader(csvfile)
# Skip the first 16021 rows
for i in range(16021):
next(csvreader, None)
# Create a multiprocessing pool
pool = multiprocessing.Pool()
for row in csvreader:
if row:
pmid = str(row[0]) # Assuming the 'PMID' column is the first (index 0) column
pool.apply_async(process_pmid, args=(pmid,), callback=lambda result: write_to_file(f, pmid, result))
pool.close()
pool.join()
print("Process Complete")This is my code for the parser (skipped first 16021 rows as i had already gotten information on the ones before)
I have a csv file containing only PMIDs
This is how it looks all PMIDs where taken from pubmeds oa subset

