neurosynth · adelavega · Oct 23, 2025 · Oct 23, 2025
diff --git a/ace/export.py b/ace/export.py
@@ -1,5 +1,5 @@
 from .database import Article
-from sqlalchemy import func, or_
+from sqlalchemy import or_
 import logging
 import csv
 from pathlib import Path
@@ -14,43 +14,90 @@ def export_database(db, foldername, skip_empty=True, table_html=False):
     foldername = Path(foldername)
     foldername.mkdir(parents=True, exist_ok=True)
 
-    article_columns = ['pmid', 'doi', 'authors', 'title', 'journal', 'publication_year', 'coordinate_space']
+    article_columns = [
+        'pmid', 'doi', 'authors', 'title', 'journal',
+        'publication_year', 'coordinate_space'
+    ]
     art_results = []
 
-    coordinate_columns = ['pmid', 'table_id', 'table_label', 'table_caption', 'table_number', 
-        'x', 'y', 'z', 'p_value', 'region', 'size', 'statistic', 'groups']
+    coordinate_columns = [
+        'pmid', 'table_id', 'table_label', 'x', 'y', 'z',
+        'p_value', 'region', 'size', 'statistic', 'groups'
+    ]
     coordinates = []
 
-    text_columns = ['pmid', 'title' ,'abstract', 'body']
+    # New table.csv columns
+    table_columns = [
+        'pmcid', 'table_id', 'table_label', 'table_caption',
+        'table_foot', 'n_header_rows', 'table_raw_file'
+    ]
+    tables_data = []
+
+    text_columns = ['pmid', 'title', 'abstract', 'body']
     texts = []
 
-    nv_colls_col = ['pmid','collection_id']
+    nv_colls_col = ['pmid', 'collection_id']
     nv_colls = []
 
-    nv_images_col = ['pmid','image_id']
+    nv_images_col = ['pmid', 'image_id']
     nv_images = []
 
     print("Exporting database to %s" % foldername)
 
     articles = db.session.query(Article)
     if skip_empty:
-        articles = articles.filter(or_(Article.tables.any(), Article.neurovault_links.any()))
+        articles = articles.filter(
+            or_(Article.tables.any(), Article.neurovault_links.any())
+        )
 
     for art in tqdm(articles):
-        art_results.append([art.id, art.doi, art.authors, art.title, art.journal, art.year, art.space])
+        art_results.append([
+            art.id, art.doi, art.authors, art.title,
+            art.journal, art.year, art.space
+        ])
         texts.append([art.id, art.title, art.abstract, art.text])
 
         for t in art.tables:
+            # Prepare table data row
+            table_foot = t.footnotes if hasattr(t, 'footnotes') else ''
+            n_header_rows = t.header_rows if hasattr(t, 'header_rows') else 1
+            table_raw_file = (
+                f"tables/{art.id}/{t.id}.html"
+                if table_html
+                else ''
+            )
+
+            tables_data.append([
+                art.id,  # Using PMID as pmcid for now
+                t.id,
+                t.label,
+                t.caption,
+                table_foot,
+                n_header_rows,
+                table_raw_file
+            ])
+
             for p in t.activations:
-                if t.number is None: t.number = ''
                 if isinstance(p.groups, str):
                     p.groups = [p.groups]
                 elif p.groups is None:
                     p.groups = []
                 groups = '///'.join(p.groups)
 
-                coordinates.append([art.id, t.id, t.label, t.caption, t.number, 
-                    p.x, p.y, p.z, p.p_value, p.region, p.size, p.statistic, groups])
+                # Only include specified fields for coordinates
+                coordinates.append([
+                    art.id,
+                    t.id,
+                    t.label,
+                    p.x,
+                    p.y,
+                    p.z,
+                    p.p_value,
+                    p.region,
+                    p.size,
+                    p.statistic,
+                    groups
+                ])
 
         for nv in art.neurovault_links:
             if nv.type == 'collection':
@@ -64,11 +111,23 @@ def export_database(db, foldername, skip_empty=True, table_html=False):
         writer.writerow(article_columns)
         writer.writerows(art_results)
 
+    # Save articles as tab separated file
+    with (foldername / 'metadata.csv').open('w', newline='') as f:
+        writer = csv.writer(f)
+        writer.writerow(article_columns)
+        writer.writerows(art_results)
+
     # Save coordinates as tab separated file
     with (foldername / 'coordinates.csv').open('w', newline='') as f:
         writer = csv.writer(f)
         writer.writerow(coordinate_columns)
         writer.writerows(coordinates)
+
+    # Save table data as CSV
+    with (foldername / 'tables.csv').open('w', newline='') as f:
+        writer = csv.writer(f)
+        writer.writerow(table_columns)
+        writer.writerows(tables_data)
 
     # Save texts as tab separated file
     with (foldername / 'text.csv').open('w', newline='') as f:
@@ -77,7 +136,9 @@ def export_database(db, foldername, skip_empty=True, table_html=False):
         writer.writerows(texts)
 
     # Save NV links
-    with (foldername / 'neurovault_collections.csv').open('w', newline='') as f:
+    with (foldername / 'neurovault_collections.csv').open(
+        'w', newline=''
+    ) as f:
         writer = csv.writer(f)
         writer.writerow(nv_colls_col)
         writer.writerows(nv_colls)
@@ -92,9 +153,9 @@ def export_database(db, foldername, skip_empty=True, table_html=False):
         "exported": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
         "n_articles": len(art_results),
         "n_activations": len(coordinates),
+        "n_tables": len(tables_data),
         "n_nv_collections": len(nv_colls),
         "n_nv_images": len(nv_images)
-
     }
 
     with (foldername / 'export.json').open('w') as f:
@@ -107,10 +168,10 @@ def export_database(db, foldername, skip_empty=True, table_html=False):
 
         for art in articles:
             art_dir = tables_dir / str(art.id)
+            art_dir.mkdir(parents=True, exist_ok=True)
 
             for t in art.tables:
                 if t.input_html:
-                    art_dir.mkdir(parents=True, exist_ok=True)
                     table_file = art_dir / f"{t.id}.html"
                     with table_file.open('w', encoding='utf-8') as f:
                         f.write(t.input_html)