Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
9c5d868
Support large pipeline options in Python SDK #37370
Mathdee Jan 21, 2026
0707d08
reformatted code to match the project's standards to pass checks
Mathdee Jan 21, 2026
258ddf8
Fixed PythonFormatterPreCommitscript error"
Mathdee Jan 21, 2026
6a9cd2f
Fix import order: move google.protobuf above apache_beam
Mathdee Jan 21, 2026
937fd54
Removed blank line between import and google import
Mathdee Jan 21, 2026
f1aa586
Addressed the review comments: cleaned up comments, align error messa…
Mathdee Jan 28, 2026
171cfad
Fixed Formatting Error
Mathdee Jan 28, 2026
4666684
Fixed line lenght that causes lint error
Mathdee Jan 28, 2026
e64900c
rerun tests
Mathdee Jan 28, 2026
86cfb0c
Rerun tests
Mathdee Jan 28, 2026
d5a2e12
Fix comment formatting in sdk_worker_main.py
shunping Feb 18, 2026
11cbba4
Fix: Update exception handling after bot review
Mathdee Feb 19, 2026
7e38551
Merge branch 'python-file-options-fix' of https://github.com/mathdee/…
Mathdee Feb 19, 2026
f0ce1c0
Merge remote-tracking branch 'upstream/master' into python-file-optio…
Mathdee Feb 19, 2026
2732ff4
Set bootstrap log level to INFO in create_harness
shunping Feb 19, 2026
66026ce
Fix formatting in sdk_worker_main.py
shunping Feb 19, 2026
93b657e
Documents: Updated CHANGES.md for file-based pipeline options
Mathdee Feb 19, 2026
23d75ce
Fix CHANGES.md formatting issues
Mathdee Feb 19, 2026
fcb4454
Fix CHANGES.md, correct issue link
Mathdee Feb 19, 2026
5230a49
Fix CHANGES.md, correct issue link
Mathdee Feb 19, 2026
f83b59a
Updated issue number in brackets to [#37370] in CHANGES.md
Mathdee Feb 19, 2026
c5c8af3
Updated issue number in brackets to [#37370] in CHANGES.md
Mathdee Feb 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,11 @@

## New Features / Improvements

* X feature added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)).
* Added support for large pipeline options via a file (Python) ([#37370](https://github.com/apache/beam/issues/37370)).

## Breaking Changes

* X behavior was changed ([#X](https://github.com/apache/beam/issues/X)).
* The Python SDK container's `boot.go` now passes pipeline options through a file instead of the `PIPELINE_OPTIONS` environment variable. If a user pairs a new Python SDK container with an older SDK version (which does not support the file-based approach), the pipeline options will not be recognized and the pipeline will fail. Users must ensure their SDK and container versions are synchronized ([#37370](https://github.com/apache/beam/issues/37370)).

## Deprecations

Expand Down
27 changes: 25 additions & 2 deletions sdks/python/apache_beam/runners/worker/sdk_worker_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ def _import_beam_plugins(plugins):
def create_harness(environment, dry_run=False):
"""Creates SDK Fn Harness."""

# Bootstrap log level to capture startup events until pipeline options are
# parsed and the actual log level is set.
logging.getLogger().setLevel(logging.INFO)

deferred_exception = None
if 'LOGGING_API_SERVICE_DESCRIPTOR' in environment:
try:
Expand All @@ -93,8 +97,24 @@ def create_harness(environment, dry_run=False):
else:
fn_log_handler = None

pipeline_options_dict = _load_pipeline_options(
environment.get('PIPELINE_OPTIONS'))
options_json = environment.get('PIPELINE_OPTIONS')

# We check if options are stored in the file.
if 'PIPELINE_OPTIONS_FILE' in environment:
options_file = environment['PIPELINE_OPTIONS_FILE']
try:
with open(options_file, 'r') as f:
options_json = f.read()
_LOGGER.info('Load pipeline options from file: %s', options_file)
except Exception:
_LOGGER.error(
'Failed to load pipeline options from file: %s',
options_file,
exc_info=True)
raise

pipeline_options_dict = _load_pipeline_options(options_json)

default_log_level = _get_log_level_from_options_dict(pipeline_options_dict)
logging.getLogger().setLevel(default_log_level)
_set_log_level_overrides(pipeline_options_dict)
Expand Down Expand Up @@ -239,6 +259,7 @@ def terminate_sdk_harness():


def _load_pipeline_options(options_json):
"""Deserialize the pipeline options from a JSON string into a dictionary."""
if options_json is None:
return {}
options = json.loads(options_json)
Expand All @@ -256,6 +277,8 @@ def _load_pipeline_options(options_json):


def _parse_pipeline_options(options_json):
"""Parses the pipeline options from a JSON string into a PipelineOptions
object."""
return PipelineOptions.from_dictionary(_load_pipeline_options(options_json))


Expand Down
6 changes: 5 additions & 1 deletion sdks/python/container/boot.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,11 @@ func launchSDKProcess() error {

// (3) Invoke python

os.Setenv("PIPELINE_OPTIONS", options)
// Write the JSON string of pipeline options into a file to prevent "argument list too long" error.
if err := tools.MakePipelineOptionsFileAndEnvVar(options); err != nil {
logger.Fatalf(ctx, "Failed to load pipeline options to worker: %v", err)
}

os.Setenv("SEMI_PERSISTENT_DIRECTORY", *semiPersistDir)
os.Setenv("LOGGING_API_SERVICE_DESCRIPTOR", (&pipepb.ApiServiceDescriptor{Url: *loggingEndpoint}).String())
os.Setenv("CONTROL_API_SERVICE_DESCRIPTOR", (&pipepb.ApiServiceDescriptor{Url: *controlEndpoint}).String())
Expand Down
Loading