diff --git a/src/Core/Settings.h b/src/Core/Settings.h index e0b3ca39899f..275826daeece 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -943,6 +943,7 @@ class IColumn; M(Bool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).", 0) \ M(Bool, input_format_defaults_for_omitted_fields, true, "For input data calculate default expressions for omitted fields (it works for JSONEachRow, -WithNames, -WithNamesAndTypes formats).", IMPORTANT) \ M(Bool, input_format_csv_empty_as_default, true, "Treat empty fields in CSV input as default values.", 0) \ + M(Bool, input_format_csv_default_empty_for_nullable_string, false, "Treat empty as default value for nullable string field.", 0) \ M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \ M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices.", 0) \ M(Bool, input_format_null_as_default, true, "Initialize null fields with default values if the data type of this field is not nullable and it is supported by the input format", 0) \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 62cbadec4f47..f7bf60731b7c 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -66,6 +66,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.csv.empty_as_default = settings.input_format_csv_empty_as_default; format_settings.csv.enum_as_number = settings.input_format_csv_enum_as_number; format_settings.csv.null_representation = settings.format_csv_null_representation; + format_settings.csv.default_empty_for_nullable_string = settings.input_format_csv_default_empty_for_nullable_string; format_settings.csv.arrays_as_nested_csv = settings.input_format_csv_arrays_as_nested_csv; format_settings.csv.use_best_effort_in_schema_inference = settings.input_format_csv_use_best_effort_in_schema_inference; format_settings.csv.skip_first_lines = settings.input_format_csv_skip_first_lines; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index f936ad2f9195..0e1138458582 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -152,6 +152,7 @@ struct FormatSettings char delimiter = ','; bool allow_single_quotes = true; bool allow_double_quotes = true; + bool default_empty_for_nullable_string = false; bool empty_as_default = false; bool crlf_end_of_line = false; bool allow_cr_end_of_line = false; diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 2c0a25243574..149f6dc13a52 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -367,10 +367,11 @@ bool CSVFormatReader::readField( const bool at_delimiter = !buf->eof() && *buf->position() == format_settings.csv.delimiter; const bool at_last_column_line_end = is_last_file_column && (buf->eof() || *buf->position() == '\n' || *buf->position() == '\r'); + const bool nullable_string_field_as_empty = format_settings.csv.default_empty_for_nullable_string && type->isNullable() && isString(removeNullable(type)); /// Note: Tuples are serialized in CSV as separate columns, but with empty_as_default or null_as_default /// only one empty or NULL column will be expected - if (format_settings.csv.empty_as_default && (at_delimiter || at_last_column_line_end)) + if (format_settings.csv.empty_as_default && (at_delimiter || at_last_column_line_end) && !nullable_string_field_as_empty) { /// Treat empty unquoted column value as default value, if /// specified in the settings. Tuple columns might seem