Skip to content

Bug report - XML citations from website #127

@chungimungi

Description

@chungimungi

Error In
Parse Outgoing XML citations from website

for a lot of the PMIDs this error is shown

image

import csv
import multiprocessing
import pubmed_parser as pp

def write_to_file(f, pmid, result):
    try:
        if isinstance(result, dict) and "pmid_cited" in result:
            f.write(f'## PMID : {pmid}\n')
            f.write(f'PMID CITED : {result["pmid_cited"]}\n')
            # You can add more information from `result` here if needed
        else:
            f.write(f'Error processing PMID {pmid}: Invalid result format\n')
    except Exception as e:
        f.write(f'Error processing PMID {pmid}: {str(e)}\n')

def process_pmid(pmid):
    try:
        return pp.parse_outgoing_citation_web(pmid, id_type='PMID')
    except Exception as e:
        return f'Error processing PMID {pmid}: {str(e)}'

if __name__ == '__main__':
    # Output Markdown file
    output_file = 'out1.md'

    # Open the output file for writing
    with open(output_file, 'w') as f:
        # Write Markdown headers or other content here if needed
        f.write("# Outgoing Citations\n")

        # Open and read the CSV file with PMID values
        with open('pmidfinal.csv', 'r') as csvfile:
            csvreader = csv.reader(csvfile)
            
            # Skip the first 16021 rows
            for i in range(16021):
                next(csvreader, None)
            
            # Create a multiprocessing pool
            pool = multiprocessing.Pool()
            
            for row in csvreader:
                if row:
                    pmid = str(row[0])  # Assuming the 'PMID' column is the first (index 0) column
                    pool.apply_async(process_pmid, args=(pmid,), callback=lambda result: write_to_file(f, pmid, result))
            
            pool.close()
            pool.join()

    print("Process Complete")

This is my code for the parser (skipped first 16021 rows as i had already gotten information on the ones before)

I have a csv file containing only PMIDs

image

This is how it looks all PMIDs where taken from pubmeds oa subset

Metadata

Metadata

Assignees

No one assigned

    Labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions