From 99585942e8ef8e53c5be4be718f53420615d697d Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 17:58:31 +0100 Subject: [PATCH 1/5] Add GReplaceSubstringOptions --- c_glib/arrow-glib/compute.cpp | 230 ++++++++++++++++++ c_glib/arrow-glib/compute.h | 17 ++ c_glib/arrow-glib/compute.hpp | 6 + c_glib/test/test-replace-substring-options.rb | 67 +++++ 4 files changed, 320 insertions(+) create mode 100644 c_glib/test/test-replace-substring-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 7687d2e6a1e..35e41c4119c 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -309,6 +309,9 @@ G_BEGIN_DECLS * #GArrowRankQuantileOptions is a class to customize the `rank_quantile` and * `rank_normal` functions. * + * #GArrowReplaceSubstringOptions is a class to customize the + * `replace_substring` and `replace_substring_regex` functions. + * * There are many functions to compute data on an array. */ @@ -8914,6 +8917,205 @@ garrow_rank_quantile_options_add_sort_key(GArrowRankQuantileOptions *options, garrow_raw_sort_keys_add(arrow_options->sort_keys, sort_key); } +enum { + PROP_REPLACE_SUBSTRING_OPTIONS_PATTERN = 1, + PROP_REPLACE_SUBSTRING_OPTIONS_REPLACEMENT, + PROP_REPLACE_SUBSTRING_OPTIONS_MAX_REPLACEMENTS, +}; + +typedef struct _GArrowReplaceSubstringOptionsPrivate GArrowReplaceSubstringOptionsPrivate; +struct _GArrowReplaceSubstringOptionsPrivate +{ + gchar *pattern; + gchar *replacement; +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowReplaceSubstringOptions, + garrow_replace_substring_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +#define GARROW_REPLACE_SUBSTRING_OPTIONS_GET_PRIVATE(object) \ + static_cast( \ + garrow_replace_substring_options_get_instance_private( \ + GARROW_REPLACE_SUBSTRING_OPTIONS(object))) + +static void +garrow_replace_substring_options_dispose(GObject *object) +{ + auto priv = GARROW_REPLACE_SUBSTRING_OPTIONS_GET_PRIVATE(object); + if (priv->pattern) { + g_free(priv->pattern); + priv->pattern = nullptr; + } + if (priv->replacement) { + g_free(priv->replacement); + priv->replacement = nullptr; + } + G_OBJECT_CLASS(garrow_replace_substring_options_parent_class)->dispose(object); +} + +static void +garrow_replace_substring_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_replace_substring_options_get_raw(GARROW_REPLACE_SUBSTRING_OPTIONS(object)); + auto priv = GARROW_REPLACE_SUBSTRING_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_REPLACE_SUBSTRING_OPTIONS_PATTERN: + { + const gchar *pattern = g_value_get_string(value); + if (priv->pattern) { + g_free(priv->pattern); + } + priv->pattern = g_strdup(pattern); + options->pattern = pattern ? pattern : ""; + } + break; + case PROP_REPLACE_SUBSTRING_OPTIONS_REPLACEMENT: + { + const gchar *replacement = g_value_get_string(value); + if (priv->replacement) { + g_free(priv->replacement); + } + priv->replacement = g_strdup(replacement); + options->replacement = replacement ? replacement : ""; + } + break; + case PROP_REPLACE_SUBSTRING_OPTIONS_MAX_REPLACEMENTS: + options->max_replacements = g_value_get_int64(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_replace_substring_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_replace_substring_options_get_raw(GARROW_REPLACE_SUBSTRING_OPTIONS(object)); + auto priv = GARROW_REPLACE_SUBSTRING_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_REPLACE_SUBSTRING_OPTIONS_PATTERN: + g_value_set_string(value, priv->pattern ? priv->pattern : options->pattern.c_str()); + break; + case PROP_REPLACE_SUBSTRING_OPTIONS_REPLACEMENT: + g_value_set_string(value, + priv->replacement ? priv->replacement + : options->replacement.c_str()); + break; + case PROP_REPLACE_SUBSTRING_OPTIONS_MAX_REPLACEMENTS: + g_value_set_int64(value, options->max_replacements); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_replace_substring_options_init(GArrowReplaceSubstringOptions *object) +{ + auto priv = GARROW_REPLACE_SUBSTRING_OPTIONS_GET_PRIVATE(object); + priv->pattern = nullptr; + priv->replacement = nullptr; + auto arrow_priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + arrow_priv->options = static_cast( + new arrow::compute::ReplaceSubstringOptions()); + // Sync the private strings with the C++ options + auto arrow_options = + garrow_replace_substring_options_get_raw(GARROW_REPLACE_SUBSTRING_OPTIONS(object)); + priv->pattern = g_strdup(arrow_options->pattern.c_str()); + priv->replacement = g_strdup(arrow_options->replacement.c_str()); +} + +static void +garrow_replace_substring_options_class_init(GArrowReplaceSubstringOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_replace_substring_options_dispose; + gobject_class->set_property = garrow_replace_substring_options_set_property; + gobject_class->get_property = garrow_replace_substring_options_get_property; + + arrow::compute::ReplaceSubstringOptions options; + + GParamSpec *spec; + /** + * GArrowReplaceSubstringOptions:pattern: + * + * Pattern to match, literal, or regular expression depending on which kernel is used. + * + * Since: 23.0.0 + */ + spec = g_param_spec_string( + "pattern", + "Pattern", + "Pattern to match, literal, or regular expression depending on which kernel is used", + options.pattern.c_str(), + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_REPLACE_SUBSTRING_OPTIONS_PATTERN, + spec); + + /** + * GArrowReplaceSubstringOptions:replacement: + * + * String to replace the pattern with. + * + * Since: 23.0.0 + */ + spec = g_param_spec_string("replacement", + "Replacement", + "String to replace the pattern with", + options.replacement.c_str(), + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_REPLACE_SUBSTRING_OPTIONS_REPLACEMENT, + spec); + + /** + * GArrowReplaceSubstringOptions:max_replacements: + * + * Max number of substrings to replace (-1 means unbounded). + * + * Since: 23.0.0 + */ + spec = g_param_spec_int64("max_replacements", + "Max Replacements", + "Max number of substrings to replace (-1 means unbounded)", + G_MININT64, + G_MAXINT64, + options.max_replacements, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_REPLACE_SUBSTRING_OPTIONS_MAX_REPLACEMENTS, + spec); +} + +/** + * garrow_replace_substring_options_new: + * + * Returns: A newly created #GArrowReplaceSubstringOptions. + * + * Since: 23.0.0 + */ +GArrowReplaceSubstringOptions * +garrow_replace_substring_options_new(void) +{ + return GARROW_REPLACE_SUBSTRING_OPTIONS( + g_object_new(GARROW_TYPE_REPLACE_SUBSTRING_OPTIONS, NULL)); +} + G_END_DECLS arrow::Result @@ -9138,6 +9340,12 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_rank_quantile_options_new_raw(arrow_rank_quantile_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "ReplaceSubstringOptions") { + const auto arrow_replace_substring_options = + static_cast(arrow_options); + auto options = + garrow_replace_substring_options_new_raw(arrow_replace_substring_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -10048,3 +10256,25 @@ garrow_rank_quantile_options_get_raw(GArrowRankQuantileOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowReplaceSubstringOptions * +garrow_replace_substring_options_new_raw( + const arrow::compute::ReplaceSubstringOptions *arrow_options) +{ + return GARROW_REPLACE_SUBSTRING_OPTIONS( + g_object_new(GARROW_TYPE_REPLACE_SUBSTRING_OPTIONS, + "pattern", + arrow_options->pattern.c_str(), + "replacement", + arrow_options->replacement.c_str(), + "max_replacements", + arrow_options->max_replacements, + NULL)); +} + +arrow::compute::ReplaceSubstringOptions * +garrow_replace_substring_options_get_raw(GArrowReplaceSubstringOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 92ec9e86fb6..80bfa506616 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1566,4 +1566,21 @@ void garrow_rank_quantile_options_add_sort_key(GArrowRankQuantileOptions *options, GArrowSortKey *sort_key); +#define GARROW_TYPE_REPLACE_SUBSTRING_OPTIONS \ + (garrow_replace_substring_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowReplaceSubstringOptions, + garrow_replace_substring_options, + GARROW, + REPLACE_SUBSTRING_OPTIONS, + GArrowFunctionOptions) +struct _GArrowReplaceSubstringOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowReplaceSubstringOptions * +garrow_replace_substring_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index 726a1d2e75f..bdd27c0a358 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -280,3 +280,9 @@ garrow_rank_quantile_options_new_raw( const arrow::compute::RankQuantileOptions *arrow_options); arrow::compute::RankQuantileOptions * garrow_rank_quantile_options_get_raw(GArrowRankQuantileOptions *options); + +GArrowReplaceSubstringOptions * +garrow_replace_substring_options_new_raw( + const arrow::compute::ReplaceSubstringOptions *arrow_options); +arrow::compute::ReplaceSubstringOptions * +garrow_replace_substring_options_get_raw(GArrowReplaceSubstringOptions *options); diff --git a/c_glib/test/test-replace-substring-options.rb b/c_glib/test/test-replace-substring-options.rb new file mode 100644 index 00000000000..14e011d8352 --- /dev/null +++ b/c_glib/test/test-replace-substring-options.rb @@ -0,0 +1,67 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestReplaceSubstringOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::ReplaceSubstringOptions.new + end + + def test_pattern_property + assert_equal("", @options.pattern) + @options.pattern = "foo" + assert_equal("foo", @options.pattern) + end + + def test_replacement_property + assert_equal("", @options.replacement) + @options.replacement = "bar" + assert_equal("bar", @options.replacement) + end + + def test_max_replacements_property + assert_equal(-1, @options.max_replacements) + @options.max_replacements = 1 + assert_equal(1, @options.max_replacements) + end + + def test_replace_substring_function + args = [ + Arrow::ArrayDatum.new(build_string_array(["foo", "this foo that foo", "bar"])), + ] + @options.pattern = "foo" + @options.replacement = "baz" + replace_substring_function = Arrow::Function.find("replace_substring") + result = replace_substring_function.execute(args, @options).value + expected = build_string_array(["baz", "this baz that baz", "bar"]) + assert_equal(expected, result) + end + + def test_replace_substring_with_max_replacements + args = [ + Arrow::ArrayDatum.new(build_string_array(["this foo that foo"])), + ] + @options.pattern = "foo" + @options.replacement = "baz" + @options.max_replacements = 1 + replace_substring_function = Arrow::Function.find("replace_substring") + result = replace_substring_function.execute(args, @options).value + expected = build_string_array(["this baz that foo"]) + assert_equal(expected, result) + end +end From 6733bf9915be9aa06cee1aa9aea906f197409e87 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Wed, 31 Dec 2025 10:19:14 +0100 Subject: [PATCH 2/5] Remove private stucture --- c_glib/arrow-glib/compute.cpp | 68 ++++------------------------------- 1 file changed, 7 insertions(+), 61 deletions(-) diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 35e41c4119c..f151e5ed075 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -8923,36 +8923,9 @@ enum { PROP_REPLACE_SUBSTRING_OPTIONS_MAX_REPLACEMENTS, }; -typedef struct _GArrowReplaceSubstringOptionsPrivate GArrowReplaceSubstringOptionsPrivate; -struct _GArrowReplaceSubstringOptionsPrivate -{ - gchar *pattern; - gchar *replacement; -}; - -G_DEFINE_TYPE_WITH_PRIVATE(GArrowReplaceSubstringOptions, - garrow_replace_substring_options, - GARROW_TYPE_FUNCTION_OPTIONS) - -#define GARROW_REPLACE_SUBSTRING_OPTIONS_GET_PRIVATE(object) \ - static_cast( \ - garrow_replace_substring_options_get_instance_private( \ - GARROW_REPLACE_SUBSTRING_OPTIONS(object))) - -static void -garrow_replace_substring_options_dispose(GObject *object) -{ - auto priv = GARROW_REPLACE_SUBSTRING_OPTIONS_GET_PRIVATE(object); - if (priv->pattern) { - g_free(priv->pattern); - priv->pattern = nullptr; - } - if (priv->replacement) { - g_free(priv->replacement); - priv->replacement = nullptr; - } - G_OBJECT_CLASS(garrow_replace_substring_options_parent_class)->dispose(object); -} +G_DEFINE_TYPE(GArrowReplaceSubstringOptions, + garrow_replace_substring_options, + GARROW_TYPE_FUNCTION_OPTIONS) static void garrow_replace_substring_options_set_property(GObject *object, @@ -8962,28 +8935,13 @@ garrow_replace_substring_options_set_property(GObject *object, { auto options = garrow_replace_substring_options_get_raw(GARROW_REPLACE_SUBSTRING_OPTIONS(object)); - auto priv = GARROW_REPLACE_SUBSTRING_OPTIONS_GET_PRIVATE(object); switch (prop_id) { case PROP_REPLACE_SUBSTRING_OPTIONS_PATTERN: - { - const gchar *pattern = g_value_get_string(value); - if (priv->pattern) { - g_free(priv->pattern); - } - priv->pattern = g_strdup(pattern); - options->pattern = pattern ? pattern : ""; - } + options->pattern = g_value_get_string(value); break; case PROP_REPLACE_SUBSTRING_OPTIONS_REPLACEMENT: - { - const gchar *replacement = g_value_get_string(value); - if (priv->replacement) { - g_free(priv->replacement); - } - priv->replacement = g_strdup(replacement); - options->replacement = replacement ? replacement : ""; - } + options->replacement = g_value_get_string(value); break; case PROP_REPLACE_SUBSTRING_OPTIONS_MAX_REPLACEMENTS: options->max_replacements = g_value_get_int64(value); @@ -9002,16 +8960,13 @@ garrow_replace_substring_options_get_property(GObject *object, { auto options = garrow_replace_substring_options_get_raw(GARROW_REPLACE_SUBSTRING_OPTIONS(object)); - auto priv = GARROW_REPLACE_SUBSTRING_OPTIONS_GET_PRIVATE(object); switch (prop_id) { case PROP_REPLACE_SUBSTRING_OPTIONS_PATTERN: - g_value_set_string(value, priv->pattern ? priv->pattern : options->pattern.c_str()); + g_value_set_string(value, options->pattern.c_str()); break; case PROP_REPLACE_SUBSTRING_OPTIONS_REPLACEMENT: - g_value_set_string(value, - priv->replacement ? priv->replacement - : options->replacement.c_str()); + g_value_set_string(value, options->replacement.c_str()); break; case PROP_REPLACE_SUBSTRING_OPTIONS_MAX_REPLACEMENTS: g_value_set_int64(value, options->max_replacements); @@ -9025,17 +8980,9 @@ garrow_replace_substring_options_get_property(GObject *object, static void garrow_replace_substring_options_init(GArrowReplaceSubstringOptions *object) { - auto priv = GARROW_REPLACE_SUBSTRING_OPTIONS_GET_PRIVATE(object); - priv->pattern = nullptr; - priv->replacement = nullptr; auto arrow_priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); arrow_priv->options = static_cast( new arrow::compute::ReplaceSubstringOptions()); - // Sync the private strings with the C++ options - auto arrow_options = - garrow_replace_substring_options_get_raw(GARROW_REPLACE_SUBSTRING_OPTIONS(object)); - priv->pattern = g_strdup(arrow_options->pattern.c_str()); - priv->replacement = g_strdup(arrow_options->replacement.c_str()); } static void @@ -9043,7 +8990,6 @@ garrow_replace_substring_options_class_init(GArrowReplaceSubstringOptionsClass * { auto gobject_class = G_OBJECT_CLASS(klass); - gobject_class->dispose = garrow_replace_substring_options_dispose; gobject_class->set_property = garrow_replace_substring_options_set_property; gobject_class->get_property = garrow_replace_substring_options_get_property; From e36625097b68132943674204bbc17a7f37c6a6c1 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Wed, 31 Dec 2025 10:20:50 +0100 Subject: [PATCH 3/5] Use nullptr --- c_glib/arrow-glib/compute.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index f151e5ed075..77fd4e563a2 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -9059,7 +9059,7 @@ GArrowReplaceSubstringOptions * garrow_replace_substring_options_new(void) { return GARROW_REPLACE_SUBSTRING_OPTIONS( - g_object_new(GARROW_TYPE_REPLACE_SUBSTRING_OPTIONS, NULL)); + g_object_new(GARROW_TYPE_REPLACE_SUBSTRING_OPTIONS, nullptr)); } G_END_DECLS @@ -10215,7 +10215,7 @@ garrow_replace_substring_options_new_raw( arrow_options->replacement.c_str(), "max_replacements", arrow_options->max_replacements, - NULL)); + nullptr)); } arrow::compute::ReplaceSubstringOptions * From 784eabfc0f71ade9d28c7e6074110e35a923509f Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Wed, 31 Dec 2025 10:21:58 +0100 Subject: [PATCH 4/5] -1 is the minimum value for max-replacements --- c_glib/arrow-glib/compute.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 77fd4e563a2..70c0fc40df4 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -9039,7 +9039,7 @@ garrow_replace_substring_options_class_init(GArrowReplaceSubstringOptionsClass * spec = g_param_spec_int64("max_replacements", "Max Replacements", "Max number of substrings to replace (-1 means unbounded)", - G_MININT64, + -1, G_MAXINT64, options.max_replacements, static_cast(G_PARAM_READWRITE)); From 8095ee18c7a9fdeaca8848ecf24ad7aff6f5ec0a Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 31 Dec 2025 18:51:35 +0900 Subject: [PATCH 5/5] Use - for property name separator --- c_glib/arrow-glib/compute.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 70c0fc40df4..da390781b07 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -9030,13 +9030,13 @@ garrow_replace_substring_options_class_init(GArrowReplaceSubstringOptionsClass * spec); /** - * GArrowReplaceSubstringOptions:max_replacements: + * GArrowReplaceSubstringOptions:max-replacements: * * Max number of substrings to replace (-1 means unbounded). * * Since: 23.0.0 */ - spec = g_param_spec_int64("max_replacements", + spec = g_param_spec_int64("max-replacements", "Max Replacements", "Max number of substrings to replace (-1 means unbounded)", -1, @@ -10213,7 +10213,7 @@ garrow_replace_substring_options_new_raw( arrow_options->pattern.c_str(), "replacement", arrow_options->replacement.c_str(), - "max_replacements", + "max-replacements", arrow_options->max_replacements, nullptr)); }