This repository contains a Python script for parsing citations from text files and formatting them into valid BibTeX entries. The script automates the process of converting citation references in text into properly structured academic bibliography entries.
To use this script, you need to install the bibtexparser library:
pip install bibtexparser- Python 3.x
- bibtexparser
This function extracts citation references from a given text string in the format \cite{X}, where X is any string. It uses regular expressions to identify and collect these citations.
def find_citations(text):
"""Extracts citations from text using regex patterns."""
pattern = r'\\cite{([^}]*)}'
matches = re.findall(pattern, text)
full_citations = []
for citation in matches:
individual_citations = [f'\\cite{{{cite.strip()}}}' for cite in citation.split(',')]
full_citations.extend(individual_citations)
return full_citationsExample:
A systematic review of IAPS \cite{branco2023systematic}.
Output:
['branco2023systematic']This function reads and parses a local .bib file, returning the contents as a structured dictionary.
def read_bib_file(bib_file_path):
"""Reads and parses a .bib file using bibtexparser."""
with open(bib_file_path, 'r', encoding='utf-8') as file:
return bibtexparser.bib.BIBTeX(file)Example:
path/to/my_bibfile.bib
Output:
{'author': {'John Doe'},
'title': {'A Sample Title'},
...
}This function formats citation entries into valid LaTeX/BibTeX syntax.
def print_bibtex_entry(authors, title, year):
""" Prints a formatted BibTeX entry for a single author and work. """
content = f'@article{{{year}},'
if authors:
content += f' author = {{ {authors.replace(" ", "")} }},'
content += ' title = {{ " + '.join(title.split('"')) + '" }}},'
content += ' year = {{ {year}}}}'.format(year=year)
return contentExample:
authors="John Doe",
title="A Sample Title",
year=2020
Output:
@article{2020,
author = {John Doe},
title = {"A Sample Title"},
year = {2020}
}
Here's a complete example of how to use the script:
import re
from bibtexparser import BIBTeX
def find_citations(text):
pattern = r'\\cite{([^}]*)}'
matches = re.findall(pattern, text)
full_citations = []
for citation in matches:
individual_citations = [f'\\cite{{{cite.strip()}}}' for cite in citation.split(',')]
full_citations.extend(individual_citations)
return full_citations
def read_bib_file(bib_file_path):
with open(bib_file_path, 'r', encoding='utf-8') as file:
return BIBTeX(file)
def print_bibtex_entry(authors, title, year):
content = f'@article{{{year}},'
if authors:
content += f' author = {{ {authors.replace(" ", "")} }},'
content += ' title = {{ " + '.join(title.split('"')) + '" }}},'
content += ' year = {{ {year}}}}'.format(year=year)
return content
# Example usage
text = """A systematic review of IAPS \cite{branco2023systematic}.
Recent advances in data processing \cite{smith2021progress}."""
authors = "John Doe"
title = ["A Sample Title", "Another Sample Title"]
year = 2020
# Extract citations
citations = find_citations(text)
print("Extracted Citations:", citations)
# Print formatted entry for each citation
for citation in citations:
print(print_bibtex_entry(authors, title, year))Output:
Extracted Citations: ['branco2023systematic', 'smith2021progress']
@article{2020,
author = {John Doe},
title = {"A Sample Title"},
year = {2020}
}
@article{2020,
author = {John Doe},
title = {"Another Sample Title"},
year = {2020}
}
This script automates the conversion of citations from text format into properly structured BibTeX entries, making it easier to reference works in academic papers and documents. By following these steps, you can efficiently manage your bibliography formatting!