From a6f312b23fda8b271aef3e08cb8392b627eab063 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pierre=20M=C3=A9tras?= <p.metras@nfb.ca>
Date: Tue, 26 Aug 2025 08:41:41 -0400
Subject: [PATCH 1/4] Add globbed filenames to list of filenames

---
 src/parquet_impl.cpp | 64 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 61 insertions(+), 3 deletions(-)
diff --git a/src/parquet_impl.cpp b/src/parquet_impl.cpp
index 20a5695..388cb0e 100644
--- a/src/parquet_impl.cpp
+++ b/src/parquet_impl.cpp
@@ -11,6 +11,7 @@
 #include <math.h>
 #include <list>
 #include <set>
+#include <glob.h>
 
 #include "parquet/arrow/reader.h"
 #include "parquet/arrow/schema.h"
@@ -90,6 +91,7 @@ bool enable_multifile_merge;
 
 static void find_cmp_func(FmgrInfo *finfo, Oid type1, Oid type2);
 static void destroy_parquet_state(void *arg);
+static List * lappend_glob(List *filenames, const char *filename);
 
 
 /*
@@ -468,6 +470,7 @@ parse_filenames_list(const char *str)
     char       *f = cur;
     ParserState state = PS_START;
     List       *filenames = NIL;
+    bool       wildcard_seen = false;
 
     while (*cur)
     {
@@ -483,6 +486,11 @@ parse_filenames_list(const char *str)
                         f = cur + 1;
                         state = PS_QUOTE;
                         break;
+                    case '*':
+                    case '?':
+                    case '[':
+                        wildcard_seen = true;
+                        break;
                     default:
                         /* XXX we should check that *cur is a valid path symbol
                          * but let's skip it for now */
@@ -496,8 +504,12 @@ parse_filenames_list(const char *str)
                 {
                     case ' ':
                         *cur = '\0';
-                        filenames = lappend(filenames, makeString(f));
+                        if (wildcard_seen)
+                            filenames = lappend_glob(filenames, f);
+                        else
+                            filenames = lappend(filenames, makeString(f));
                         state = PS_START;
+                        wildcard_seen = false;
                         break;
                     default:
                         break;
@@ -508,8 +520,12 @@ parse_filenames_list(const char *str)
                 {
                     case '"':
                         *cur = '\0';
-                        filenames = lappend(filenames, makeString(f));
+                        if (wildcard_seen)
+                            filenames = lappend_glob(filenames, f);
+                        else
+                            filenames = lappend(filenames, makeString(f));
                         state = PS_START;
+                        wildcard_seen = false;
                         break;
                     default:
                         break;
@@ -520,11 +536,53 @@ parse_filenames_list(const char *str)
         }
         cur++;
     }
-    filenames = lappend(filenames, makeString(f));
+    if (wildcard_seen)
+        filenames = lappend_glob(filenames, f);
+    else
+        filenames = lappend(filenames, makeString(f));
 
     return filenames;
 }
 
+
+/*
+ * lappend_glob
+ *      The filename is a globbing pathname matching potentially multiple files.
+ *      All the matched file names are added to the list.
+ */
+static List *
+lappend_glob(List *filenames,
+             const char *filename)
+{
+    glob_t  globbuf;
+
+    int error = glob(filename, GLOB_NOCHECK | GLOB_ERR, NULL, &globbuf);
+    switch (error) {
+        case 0:
+            for (size_t i = 0; i < globbuf.gl_pathc; i++)
+            {
+                filenames = lappend(filenames, makeString(globbuf.gl_pathv[i]));
+            }
+            break;
+        case GLOB_NOSPACE:
+            ereport(ERROR,
+                    (errcode(ERRCODE_OUT_OF_MEMORY),
+                    errmsg("running out of memory while globbing Parquet filename \"%s\"",
+                           filename)));
+            break;
+        case GLOB_ABORTED:
+            ereport(ERROR,
+                    (errcode(ERRCODE_IO_ERROR),
+                    errmsg("read error while globbing Parquet filename \"%s\". Check file permissions.",
+                           filename)));
+            break;
+    }
+    globfree(&globbuf);
+
+    return filenames;
+}
+
+
 /*
  * extract_rowgroups_list
  *      Analyze query predicates and using min/max statistics determine which

From a387d543a366fb0a2fd5c96d0653d74049afa808 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pierre=20M=C3=A9tras?= <p.metras@nfb.ca>
Date: Wed, 27 Aug 2025 15:07:02 -0400
Subject: [PATCH 2/4] Upgrade to PostgreSQL 17 and fix memory bug

---
 src/common.cpp       |  24 ++++++++-
 src/exec_state.cpp   |   1 +
 src/parquet_impl.cpp | 118 +++++++++++++++++++++++--------------------
 3 files changed, 85 insertions(+), 58 deletions(-)

diff --git a/src/common.cpp b/src/common.cpp
index 58fe2fc..ffea9fb 100644
--- a/src/common.cpp
+++ b/src/common.cpp
@@ -10,12 +10,32 @@ extern "C"
 #include "utils/memutils.h"
 #include "utils/memdebug.h"
 #include "utils/timestamp.h"
+#include "utils/palloc.h"
 }
 
+#ifndef PG_VERSION_NUM
+#error "PG_VERSION_NUM is not defined"
+#endif
+
 #if PG_VERSION_NUM < 130000
 #define MAXINT8LEN 25
 #endif
 
+/* --- PG version guards for MemoryContext alloc APIs (PG14+ uses Extended with flags) --- */
+#ifndef PARQUET_FDW_MCXT_GUARD
+#define PARQUET_FDW_MCXT_GUARD
+
+#if PG_VERSION_NUM >= 140000
+  #define PF_MCTX_ALLOC(ctx, sz)        MemoryContextAllocExtended((ctx), (sz), 0)
+  #define PF_MCTX_REALLOC(ctx, p, sz)   MemoryContextReallocExtended((ctx), (p), (sz), 0)
+#else
+  #define PF_MCTX_ALLOC(ctx, sz)        MemoryContextAlloc((ctx), (sz))
+  /* repalloc does not require context in pre-14 */
+  #define PF_MCTX_REALLOC(ctx, p, sz)   repalloc((p), (sz))
+#endif
+
+#endif /* PARQUET_FDW_MCXT_GUARD */
+
 /*
  * exc_palloc
  *      C++ specific memory allocator that utilizes postgres allocation sets.
@@ -34,7 +54,7 @@ exc_palloc(std::size_t size)
 
 	context->isReset = false;
 
-	ret = context->methods->alloc(context, size);
+	ret = PF_MCTX_ALLOC(context, size);
 	if (unlikely(ret == NULL))
 		throw std::bad_alloc();
 
@@ -178,7 +198,7 @@ void datum_to_jsonb(Datum value, Oid typoid, bool isnull, FmgrInfo *outfunc,
                 jb.val.string.val = strval;
             }
             else {
-                Datum numeric;
+                Datum numeric = (Datum) 0;
 
                 switch (typoid)
                 {
diff --git a/src/exec_state.cpp b/src/exec_state.cpp
index dbb2948..a92354a 100644
--- a/src/exec_state.cpp
+++ b/src/exec_state.cpp
@@ -45,6 +45,7 @@ class TrivialExecutionState : public ParquetFdwExecutionState
     Size estimate_coord_size() 
     {
         Assert(false && "estimate_coord_size is not supported for TrivialExecutionState");
+        return 0;
     }
     void init_coord()
     {
diff --git a/src/parquet_impl.cpp b/src/parquet_impl.cpp
index 388cb0e..592c9db 100644
--- a/src/parquet_impl.cpp
+++ b/src/parquet_impl.cpp
@@ -91,7 +91,7 @@ bool enable_multifile_merge;
 
 static void find_cmp_func(FmgrInfo *finfo, Oid type1, Oid type2);
 static void destroy_parquet_state(void *arg);
-static List * lappend_glob(List *filenames, const char *filename);
+static List * lappend_globbed_filenames(List *filenames, const char *filename);
 
 
 /*
@@ -125,7 +125,7 @@ struct ParquetFdwPlanState
 static int
 get_strategy(Oid type, Oid opno, Oid am)
 {
-        Oid opclass;
+    Oid opclass;
     Oid opfamily;
 
     opclass = GetDefaultOpClass(type, am);
@@ -470,7 +470,6 @@ parse_filenames_list(const char *str)
     char       *f = cur;
     ParserState state = PS_START;
     List       *filenames = NIL;
-    bool       wildcard_seen = false;
 
     while (*cur)
     {
@@ -486,11 +485,6 @@ parse_filenames_list(const char *str)
                         f = cur + 1;
                         state = PS_QUOTE;
                         break;
-                    case '*':
-                    case '?':
-                    case '[':
-                        wildcard_seen = true;
-                        break;
                     default:
                         /* XXX we should check that *cur is a valid path symbol
                          * but let's skip it for now */
@@ -504,12 +498,8 @@ parse_filenames_list(const char *str)
                 {
                     case ' ':
                         *cur = '\0';
-                        if (wildcard_seen)
-                            filenames = lappend_glob(filenames, f);
-                        else
-                            filenames = lappend(filenames, makeString(f));
+                        filenames = lappend_globbed_filenames(filenames, f);
                         state = PS_START;
-                        wildcard_seen = false;
                         break;
                     default:
                         break;
@@ -520,12 +510,8 @@ parse_filenames_list(const char *str)
                 {
                     case '"':
                         *cur = '\0';
-                        if (wildcard_seen)
-                            filenames = lappend_glob(filenames, f);
-                        else
-                            filenames = lappend(filenames, makeString(f));
+                        filenames = lappend_globbed_filenames(filenames, f);
                         state = PS_START;
-                        wildcard_seen = false;
                         break;
                     default:
                         break;
@@ -536,46 +522,57 @@ parse_filenames_list(const char *str)
         }
         cur++;
     }
-    if (wildcard_seen)
-        filenames = lappend_glob(filenames, f);
-    else
-        filenames = lappend(filenames, makeString(f));
+    filenames = lappend_globbed_filenames(filenames, f);
 
     return filenames;
 }
 
 
 /*
- * lappend_glob
- *      The filename is a globbing pathname matching potentially multiple files.
+ * lappend_globbed_filenames
+ *      The filename can be a globbing pathname matching potentially multiple files.
  *      All the matched file names are added to the list.
  */
 static List *
-lappend_glob(List *filenames,
-             const char *filename)
+lappend_globbed_filenames(List *filenames,
+                          const char *filename)
 {
     glob_t  globbuf;
 
-    int error = glob(filename, GLOB_NOCHECK | GLOB_ERR, NULL, &globbuf);
+    globbuf.gl_offs = 0;
+    int error = glob(filename, GLOB_ERR | GLOB_NOCHECK, NULL, &globbuf);
     switch (error) {
         case 0:
-            for (size_t i = 0; i < globbuf.gl_pathc; i++)
+            for (size_t i = globbuf.gl_offs; i < globbuf.gl_pathc; i++)
             {
-                filenames = lappend(filenames, makeString(globbuf.gl_pathv[i]));
+                elog(DEBUG1, "parquet_fdw: adding globbed filename %s to list of files", globbuf.gl_pathv[i]);
+                filenames = lappend(filenames, makeString(pstrdup(globbuf.gl_pathv[i])));
             }
             break;
         case GLOB_NOSPACE:
             ereport(ERROR,
                     (errcode(ERRCODE_OUT_OF_MEMORY),
-                    errmsg("running out of memory while globbing Parquet filename \"%s\"",
+                    errmsg("parquet_fdw: running out of memory while globbing Parquet filename \"%s\"",
                            filename)));
             break;
         case GLOB_ABORTED:
             ereport(ERROR,
                     (errcode(ERRCODE_IO_ERROR),
-                    errmsg("read error while globbing Parquet filename \"%s\". Check file permissions.",
+                    errmsg("parquet_fdw: read error while globbing Parquet filename \"%s\". Check file permissions.",
+                           filename)));
+            break;
+        // Should not come here as we use GLOB_NOCHECK flag
+        case GLOB_NOMATCH:
+            ereport(ERROR,
+                    (errcode(ERRCODE_UNDEFINED_FILE),
+                    errmsg("parquet_fdw: no Parquet filename matches \"%s\". Check path.",
                            filename)));
             break;
+        default:
+            ereport(ERROR,
+                    (errcode(ERRCODE_SYSTEM_ERROR),
+                    errmsg("parquet_fdw: unknown error; no Parquet filename matches \"%s\". Check path and permissions.",
+                           filename)));
     }
     globfree(&globbuf);
 
@@ -1376,13 +1373,14 @@ parquetGetForeignPaths(PlannerInfo *root,
     }
 
     foreign_path = (Path *) create_foreignscan_path(root, baserel,
-                                                    NULL,	/* default pathtarget */
+                                                    NULL,      /* pathtarget (default) */
                                                     baserel->rows,
                                                     startup_cost,
                                                     total_cost,
-                                                    NULL,   /* no pathkeys */
-                                                    NULL,	/* no outer rel either */
-                                                    NULL,	/* no extra plan */
+                                                    pathkeys,
+                                                    NULL,      /* required_outer */
+                                                    NULL,      /* fdw_outerpath */
+                                                    NULL,      /* fdw_restrictinfo */
                                                     (List *) fdw_private);
     if (!enable_multifile && is_multi)
         foreign_path->total_cost += disable_cost;
@@ -1402,13 +1400,14 @@ parquetGetForeignPaths(PlannerInfo *root,
         memcpy(private_sort, fdw_private, sizeof(ParquetFdwPlanState));
 
         path = (Path *) create_foreignscan_path(root, baserel,
-                                                NULL,	/* default pathtarget */
+                                                NULL,      /* pathtarget (default) */
                                                 baserel->rows,
                                                 startup_cost,
                                                 total_cost,
                                                 pathkeys,
-                                                NULL,	/* no outer rel either */
-                                                NULL,	/* no extra plan */
+                                                NULL,      /* required_outer */
+                                                NULL,      /* fdw_outerpath */
+                                                NULL,      /* fdw_restrictinfo */
                                                 (List *) private_sort);
 
         /* For multifile case calculate the cost of merging files */
@@ -1443,14 +1442,15 @@ parquetGetForeignPaths(PlannerInfo *root,
 
         Path *path = (Path *)
                  create_foreignscan_path(root, baserel,
-                                         NULL,	/* default pathtarget */
-                                         rows_per_worker,
-                                         startup_cost,
-                                         startup_cost + run_cost / (num_workers + 1),
-                                         use_pathkeys ? pathkeys : NULL,
-                                         NULL,	/* no outer rel either */
-                                         NULL,	/* no extra plan */
-                                         (List *) private_parallel);
+                                        NULL,      /* pathtarget (default) */
+                                        rows_per_worker,
+                                        startup_cost,
+                                        startup_cost + run_cost / (num_workers + 1),
+                                        use_pathkeys ? pathkeys : NULL,
+                                        NULL,      /* required_outer */
+                                        NULL,      /* fdw_outerpath */
+                                        NULL,      /* fdw_restrictinfo */
+                                        (List *) private_parallel);
 
         path->parallel_workers = num_workers;
         path->parallel_aware   = true;
@@ -1474,14 +1474,15 @@ parquetGetForeignPaths(PlannerInfo *root,
 
             path = (Path *)
                      create_foreignscan_path(root, baserel,
-                                             NULL,	/* default pathtarget */
-                                             rows_per_worker,
-                                             startup_cost,
-                                             total_cost,
-                                             pathkeys,
-                                             NULL,	/* no outer rel either */
-                                             NULL,	/* no extra plan */
-                                             (List *) private_parallel_merge);
+                                            NULL,      /* pathtarget (default) */
+                                            rows_per_worker,
+                                            startup_cost,
+                                            total_cost,
+                                            pathkeys,
+                                            NULL,      /* required_outer */
+                                            NULL,      /* fdw_outerpath */
+                                            NULL,      /* fdw_restrictinfo */
+                                            (List *) private_parallel_merge);
 
             cost_merge(path, list_length(private_parallel_merge->filenames),
                        startup_cost, total_cost, path->rows);
@@ -1509,6 +1510,10 @@ parquetGetForeignPlan(PlannerInfo * /* root */,
                       Plan *outer_plan)
 {
     ParquetFdwPlanState *fdw_private = (ParquetFdwPlanState *) best_path->fdw_private;
+    if (fdw_private == NULL) {
+        fdw_private = (ParquetFdwPlanState *) palloc0(sizeof(ParquetFdwPlanState));
+    }
+
     Index		scan_relid = baserel->relid;
     List       *attrs_used = NIL;
     List       *attrs_sorted = NIL;
@@ -1532,8 +1537,9 @@ parquetGetForeignPlan(PlannerInfo * /* root */,
      * Nodes. So we need to convert everything in nodes and store it in a List.
      */
     attr = -1;
-    while ((attr = bms_next_member(fdw_private->attrs_used, attr)) >= 0)
-        attrs_used = lappend_int(attrs_used, attr);
+    if (fdw_private->attrs_used) 
+        while ((attr = bms_next_member(fdw_private->attrs_used, attr)) >= 0)
+            attrs_used = lappend_int(attrs_used, attr);
 
     foreach (lc, fdw_private->attrs_sorted)
         attrs_sorted = lappend_int(attrs_sorted, lfirst_int(lc));

From e094be44f08dd8fac40ecb3a91ed545fc652f900 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pierre=20M=C3=A9tras?= <p.metras@nfb.ca>
Date: Wed, 27 Aug 2025 15:43:07 -0400
Subject: [PATCH 3/4] Update documentation

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index a573f9f..27c3c17 100644
--- a/README.md
+++ b/README.md
@@ -44,6 +44,8 @@ options (
 );
 ```
 
+You can use [globbing](https://en.wikipedia.org/wiki/Glob_(programming)) to list all the Parquet files, like `options (filename '/mnt/userdata*.parquet')` and it will import all matching files. This can be usefull when you have a Hive directory structure, for instance organized by `year/month/day` and you can consider all Parquet files with `/mnt/userdata/**/*.parquet`.
+
 ## Advanced
 
 Currently `parquet_fdw` supports the following column [types](https://github.com/apache/arrow/blob/master/cpp/src/arrow/type.h):

From a9a5d5b00a3978aef4a3bcbf563779494c68be49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pierre=20M=C3=A9tras?= <p.metras@nfb.ca>
Date: Tue, 2 Sep 2025 09:38:28 -0400
Subject: [PATCH 4/4] Support for filenames enumeration using braces

---
 README.md            | 2 +-
 src/parquet_impl.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 27c3c17..ef5aa32 100644
--- a/README.md
+++ b/README.md
@@ -44,7 +44,7 @@ options (
 );
 ```
 
-You can use [globbing](https://en.wikipedia.org/wiki/Glob_(programming)) to list all the Parquet files, like `options (filename '/mnt/userdata*.parquet')` and it will import all matching files. This can be usefull when you have a Hive directory structure, for instance organized by `year/month/day` and you can consider all Parquet files with `/mnt/userdata/**/*.parquet`.
+You can use [globbing](https://en.wikipedia.org/wiki/Glob_(programming)) to list all the Parquet files, like `options (filename '/mnt/userdata*.parquet')` and it will import all matching files. This can be usefull when you have a Hive directory structure, for instance organized by `year/month/day` and you can consider all Parquet files with `/mnt/userdata/**/*.parquet`. You can also use name enumerations using braces like `/mnt/userdata/data_{1,3}.parquet` that will consider only files `data_1.parquet` and `data_3.parquet`.
 
 ## Advanced
 
diff --git a/src/parquet_impl.cpp b/src/parquet_impl.cpp
index 592c9db..7184594 100644
--- a/src/parquet_impl.cpp
+++ b/src/parquet_impl.cpp
@@ -540,7 +540,7 @@ lappend_globbed_filenames(List *filenames,
     glob_t  globbuf;
 
     globbuf.gl_offs = 0;
-    int error = glob(filename, GLOB_ERR | GLOB_NOCHECK, NULL, &globbuf);
+    int error = glob(filename, GLOB_ERR | GLOB_NOCHECK | GLOB_BRACE, NULL, &globbuf);
     switch (error) {
         case 0:
             for (size_t i = globbuf.gl_offs; i < globbuf.gl_pathc; i++)