Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 76 additions & 15 deletions ace/export.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .database import Article
from sqlalchemy import func, or_
from sqlalchemy import or_
import logging
import csv
from pathlib import Path
Expand All @@ -14,43 +14,90 @@ def export_database(db, foldername, skip_empty=True, table_html=False):
foldername = Path(foldername)
foldername.mkdir(parents=True, exist_ok=True)

article_columns = ['pmid', 'doi', 'authors', 'title', 'journal', 'publication_year', 'coordinate_space']
article_columns = [
'pmid', 'doi', 'authors', 'title', 'journal',
'publication_year', 'coordinate_space'
]
art_results = []

coordinate_columns = ['pmid', 'table_id', 'table_label', 'table_caption', 'table_number',
'x', 'y', 'z', 'p_value', 'region', 'size', 'statistic', 'groups']
coordinate_columns = [
'pmid', 'table_id', 'table_label', 'x', 'y', 'z',
'p_value', 'region', 'size', 'statistic', 'groups'
]
coordinates = []

text_columns = ['pmid', 'title' ,'abstract', 'body']
# New table.csv columns
table_columns = [
'pmcid', 'table_id', 'table_label', 'table_caption',
'table_foot', 'n_header_rows', 'table_raw_file'
]
tables_data = []

text_columns = ['pmid', 'title', 'abstract', 'body']
texts = []

nv_colls_col = ['pmid','collection_id']
nv_colls_col = ['pmid', 'collection_id']
nv_colls = []

nv_images_col = ['pmid','image_id']
nv_images_col = ['pmid', 'image_id']
nv_images = []

print("Exporting database to %s" % foldername)

articles = db.session.query(Article)
if skip_empty:
articles = articles.filter(or_(Article.tables.any(), Article.neurovault_links.any()))
articles = articles.filter(
or_(Article.tables.any(), Article.neurovault_links.any())
)

for art in tqdm(articles):
art_results.append([art.id, art.doi, art.authors, art.title, art.journal, art.year, art.space])
art_results.append([
art.id, art.doi, art.authors, art.title,
art.journal, art.year, art.space
])
texts.append([art.id, art.title, art.abstract, art.text])

for t in art.tables:
# Prepare table data row
table_foot = t.footnotes if hasattr(t, 'footnotes') else ''
n_header_rows = t.header_rows if hasattr(t, 'header_rows') else 1
table_raw_file = (
f"tables/{art.id}/{t.id}.html"
if table_html
else ''
)

tables_data.append([
art.id, # Using PMID as pmcid for now
t.id,
t.label,
t.caption,
table_foot,
n_header_rows,
table_raw_file
])

for p in t.activations:
if t.number is None: t.number = ''
if isinstance(p.groups, str):
p.groups = [p.groups]
elif p.groups is None:
p.groups = []
groups = '///'.join(p.groups)

coordinates.append([art.id, t.id, t.label, t.caption, t.number,
p.x, p.y, p.z, p.p_value, p.region, p.size, p.statistic, groups])
# Only include specified fields for coordinates
coordinates.append([
art.id,
t.id,
t.label,
p.x,
p.y,
p.z,
p.p_value,
p.region,
p.size,
p.statistic,
groups
])

for nv in art.neurovault_links:
if nv.type == 'collection':
Expand All @@ -64,11 +111,23 @@ def export_database(db, foldername, skip_empty=True, table_html=False):
writer.writerow(article_columns)
writer.writerows(art_results)

# Save articles as tab separated file
with (foldername / 'metadata.csv').open('w', newline='') as f:
writer = csv.writer(f)
writer.writerow(article_columns)
writer.writerows(art_results)

# Save coordinates as tab separated file
with (foldername / 'coordinates.csv').open('w', newline='') as f:
writer = csv.writer(f)
writer.writerow(coordinate_columns)
writer.writerows(coordinates)

# Save table data as CSV
with (foldername / 'tables.csv').open('w', newline='') as f:
writer = csv.writer(f)
writer.writerow(table_columns)
writer.writerows(tables_data)

# Save texts as tab separated file
with (foldername / 'text.csv').open('w', newline='') as f:
Expand All @@ -77,7 +136,9 @@ def export_database(db, foldername, skip_empty=True, table_html=False):
writer.writerows(texts)

# Save NV links
with (foldername / 'neurovault_collections.csv').open('w', newline='') as f:
with (foldername / 'neurovault_collections.csv').open(
'w', newline=''
) as f:
writer = csv.writer(f)
writer.writerow(nv_colls_col)
writer.writerows(nv_colls)
Expand All @@ -92,9 +153,9 @@ def export_database(db, foldername, skip_empty=True, table_html=False):
"exported": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"n_articles": len(art_results),
"n_activations": len(coordinates),
"n_tables": len(tables_data),
"n_nv_collections": len(nv_colls),
"n_nv_images": len(nv_images)

}

with (foldername / 'export.json').open('w') as f:
Expand All @@ -107,10 +168,10 @@ def export_database(db, foldername, skip_empty=True, table_html=False):

for art in articles:
art_dir = tables_dir / str(art.id)
art_dir.mkdir(parents=True, exist_ok=True)

for t in art.tables:
if t.input_html:
art_dir.mkdir(parents=True, exist_ok=True)
table_file = art_dir / f"{t.id}.html"
with table_file.open('w', encoding='utf-8') as f:
f.write(t.input_html)