fileStatuses = new ArrayList<>();
+
+ String dirId = dirPath.getName();
+ if (dirId == null) {
+ throw new IOException("Directory not found: " + dirPath);
+ }
+
+ // Query Google Drive for files in the directory
+ StringBuilder query = new StringBuilder("'" + dirId + "' in parents and trashed = false");
+ query.append(" and ").append(getGoogleNativeExclusionQuery());
+ if (!Strings.isNullOrEmpty(filter)) {
+ query.append(" and ").append(filter);
+ }
+
+ FileList result = driveService.files().list()
+ .setQ(query.toString())
+ .setFields("files(id, name, mimeType, size, modifiedTime)")
+ .setSupportsAllDrives(true)
+ .setSupportsTeamDrives(true)
+ .setIncludeItemsFromAllDrives(true)
+ .execute();
+
+ for (File file : result.getFiles()) {
+ boolean isDirectory = GOOGLE_DRIVE_FOLDER_MIME_TYPE.equals(file.getMimeType());
+
+ Path filePathWithFilePrefix;
+ if (isDirectory) {
+ filePathWithFilePrefix = new Path(String.format("%s://%s%s/%s",
+ GoogleDriveSourceConfig.GOOGLE_DRIVE_SCHEMA,
+ GoogleDriveSourceConfig.GOOGLE_DRIVE_AUTHORITY,
+ GoogleDriveSourceConfig.GOOGLE_DRIVE_FOLDER_PATH_PREFIX,
+ file.getId()));
+ } else {
+ filePathWithFilePrefix = new Path(String.format("%s://%s%s/%s/%s",
+ GoogleDriveSourceConfig.GOOGLE_DRIVE_SCHEMA,
+ GoogleDriveSourceConfig.GOOGLE_DRIVE_AUTHORITY,
+ GoogleDriveSourceConfig.GOOGLE_DRIVE_FILE_PATH_PREFIX,
+ file.getId(), file.getName()));
+ }
+ FileStatus fileStatus = new FileStatus(
+ isDirectory ? 0 : file.getSize(),
+ isDirectory,
+ 1,
+ file.getSize() == null ? 0 : file.getSize(),
+ file.getModifiedTime().getValue(),
+ 0,
+ FsPermission.getDefault(),
+ "owner",
+ "group",
+ filePathWithFilePrefix
+ );
+
+ fileStatuses.add(fileStatus);
+ }
+
+ return fileStatuses.toArray(new FileStatus[0]);
+ }
+
+ /**
+ * Retrieves the status of a specific file or directory in Google Drive.
+ * @param driveService The authenticated {@link Drive} service instance used to interact with the Drive API.
+ * @param path The Hadoop {@link Path} representing the target file or directory.
+ * @param isDir Indicates whether the path is a directory.
+ * @param objectId The Google Drive file ID corresponding to the path.
+ */
+ public static FileStatus listObjectStatus(Drive driveService, Path path, boolean isDir, String objectId)
+ throws IOException {
+ File fileSummary = driveService.files()
+ .get(objectId)
+ .setFields(FIELDS_TO_RETURN)
+ .setSupportsAllDrives(true)
+ .execute();
+
+ Path pathWithFileName = path;
+ // if file name not equal to default file name, then we need to change the path with correct file name
+ if (!isDir && !fileSummary.getName().equals(GoogleDriveSourceConfig.GOOGLE_DRIVE_DEFAULT_FILENAME) &&
+ path.getName().equals(GoogleDriveSourceConfig.GOOGLE_DRIVE_DEFAULT_FILENAME)) {
+ pathWithFileName = new Path(path.getParent(), fileSummary.getName());
+ }
+ return new FileStatus(
+ fileSummary.getSize() == null ? 0 : fileSummary.getSize(),
+ isDir,
+ 1,
+ fileSummary.getSize() == null ? 0 : fileSummary.getSize(),
+ fileSummary.getModifiedTime().getValue(),
+ 0,
+ FsPermission.getDefault(),
+ "owner",
+ "group",
+ pathWithFileName
+ );
+ }
+
+ /**
+ * Builds a Google Drive API query string that excludes all Google-native file types
+ * defined in {@link #GOOGLE_NATIVE_MIME_TYPES}.
+ *
+ * The generated string will be used in the Drive `files().list().setQ(...)` query to
+ * filter out files such as Google Docs, Sheets, Slides, etc., which are not
+ * directly downloadable.
+ *
+ * Example output:
+ *
+ * mimeType != 'application/vnd.google-apps.document' and
+ * mimeType != 'application/vnd.google-apps.spreadsheet' and
+ * ...
+ *
+ *
+ * @return A query string that excludes all Google-native MIME types using "and" conditions.
+ */
+ private static String getGoogleNativeExclusionQuery() {
+ return GOOGLE_NATIVE_MIME_TYPES.stream()
+ .map(mime -> "mimeType != '" + mime + "'")
+ .collect(Collectors.joining(" and "));
+ }
+}
diff --git a/src/main/java/io/cdap/plugin/google/sheets/sink/GoogleSheetsSinkConfig.java b/src/main/java/io/cdap/plugin/google/sheets/sink/GoogleSheetsSinkConfig.java
index 1128d538..5b101d1b 100644
--- a/src/main/java/io/cdap/plugin/google/sheets/sink/GoogleSheetsSinkConfig.java
+++ b/src/main/java/io/cdap/plugin/google/sheets/sink/GoogleSheetsSinkConfig.java
@@ -167,7 +167,8 @@ public class GoogleSheetsSinkConfig extends GoogleInputSchemaFieldsUsageConfig {
* @param schema the schema to check compatibility
*/
public void validate(FailureCollector collector, Schema schema) {
- super.validate(collector);
+ super.getValidationResult(collector);
+ collector.getOrThrowException();
// validate spreadsheet name field is in schema and has valid format
validateSchemaField(collector, schema, SCHEMA_SPREAD_SHEET_NAME_FIELD_NAME, schemaSpreadsheetNameFieldName,
diff --git a/src/main/java/io/cdap/plugin/google/sheets/source/GoogleSheetsSource.java b/src/main/java/io/cdap/plugin/google/sheets/source/GoogleSheetsSource.java
index 3752f07c..54b8fdd6 100644
--- a/src/main/java/io/cdap/plugin/google/sheets/source/GoogleSheetsSource.java
+++ b/src/main/java/io/cdap/plugin/google/sheets/source/GoogleSheetsSource.java
@@ -54,7 +54,7 @@ public GoogleSheetsSource(GoogleSheetsSourceConfig config) {
public void configurePipeline(PipelineConfigurer pipelineConfigurer) {
StageConfigurer stageConfigurer = pipelineConfigurer.getStageConfigurer();
FailureCollector failureCollector = pipelineConfigurer.getStageConfigurer().getFailureCollector();
- config.validate(failureCollector);
+ config.getValidationResult(failureCollector);
failureCollector.getOrThrowException();
Schema configuredSchema = config.getSchema(failureCollector);
@@ -64,7 +64,7 @@ public void configurePipeline(PipelineConfigurer pipelineConfigurer) {
@Override
public void prepareRun(BatchSourceContext context) {
FailureCollector failureCollector = context.getFailureCollector();
- config.validate(failureCollector);
+ config.getValidationResult(failureCollector);
failureCollector.getOrThrowException();
Schema configSchema = config.getSchema(failureCollector);
diff --git a/src/main/java/io/cdap/plugin/google/sheets/source/GoogleSheetsSourceConfig.java b/src/main/java/io/cdap/plugin/google/sheets/source/GoogleSheetsSourceConfig.java
index 161835d9..1ccfbbac 100644
--- a/src/main/java/io/cdap/plugin/google/sheets/source/GoogleSheetsSourceConfig.java
+++ b/src/main/java/io/cdap/plugin/google/sheets/source/GoogleSheetsSourceConfig.java
@@ -318,8 +318,8 @@ private boolean shouldGetSchema() {
* @param collector the failure collector is provided
* @return The ValidationResult
*/
- public ValidationResult validate(FailureCollector collector) {
- ValidationResult validationResult = super.validate(collector);
+ public ValidationResult getValidationResult(FailureCollector collector) {
+ ValidationResult validationResult = super.getValidationResult(collector);
// reset current headers info
dataSchemaInfo = new LinkedHashMap<>();
diff --git a/src/main/java/io/cdap/plugin/google/sheets/source/SheetTransformer.java b/src/main/java/io/cdap/plugin/google/sheets/source/SheetTransformer.java
index 33bdba90..55ed8f9a 100644
--- a/src/main/java/io/cdap/plugin/google/sheets/source/SheetTransformer.java
+++ b/src/main/java/io/cdap/plugin/google/sheets/source/SheetTransformer.java
@@ -69,8 +69,8 @@ public static StructuredRecord transform(RowRecord rowRecord, Schema schema, boo
builder.set(metadataRecordName, rowRecord.getMetadata());
} else {
ComplexSingleValueColumn complexSingleValueColumn = rowRecord.getHeaderedCells().get(name);
- if (complexSingleValueColumn == null || (complexSingleValueColumn.getData() == null
- && complexSingleValueColumn.getSubColumns().isEmpty())) {
+ if (complexSingleValueColumn == null || complexSingleValueColumn.getData() == null
+ || complexSingleValueColumn.getSubColumns() == null || complexSingleValueColumn.getSubColumns().isEmpty()) {
builder.set(name, null);
} else {
processCellData(builder, field, complexSingleValueColumn);
@@ -129,7 +129,7 @@ private static StructuredRecord processRecord(Schema fieldSchema, ComplexSingleV
for (Schema.Field subField : fieldSchema.getFields()) {
String subFieldName = subField.getName();
ComplexSingleValueColumn complexSubColumn = complexSingleValueColumn.getSubColumns().get(subFieldName);
- if (complexSubColumn.getData() == null) {
+ if (complexSubColumn == null || complexSubColumn.getData() == null) {
builder.set(subFieldName, null);
} else {
processCellData(builder, subField, complexSubColumn);
diff --git a/src/test/java/io/cdap/plugin/google/common/GoogleAuthBaseConfigTest.java b/src/test/java/io/cdap/plugin/google/common/GoogleAuthBaseConfigTest.java
index 2c5454db..ed22afb2 100644
--- a/src/test/java/io/cdap/plugin/google/common/GoogleAuthBaseConfigTest.java
+++ b/src/test/java/io/cdap/plugin/google/common/GoogleAuthBaseConfigTest.java
@@ -60,7 +60,7 @@ public void testValidationErrorFilePath() {
config.setIdentifierType(IdentifierType.FILE_IDENTIFIER.name());
config.setFileIdentifier("fileId");
FailureCollector collector = new DefaultFailureCollector("stageConfig", Collections.EMPTY_MAP);
- config.validate(collector);
+ config.getValidationResult(collector);
Assert.assertEquals(1, collector.getValidationFailures().size());
Assert.assertEquals("Service Account File Path is not available.",
collector.getValidationFailures().get(0).getMessage());
@@ -81,7 +81,7 @@ public void testValidationErrorJSON() {
config.setFileIdentifier("fileId");
config.setIdentifierType(IdentifierType.FILE_IDENTIFIER.name());
FailureCollector collector = new DefaultFailureCollector("stageConfig", Collections.EMPTY_MAP);
- config.validate(collector);
+ config.getValidationResult(collector);
Assert.assertEquals(1, collector.getValidationFailures().size());
Assert.assertEquals("Service Account JSON can not be empty.",
collector.getValidationFailures().get(0).getMessage());
@@ -101,7 +101,7 @@ public void testValidationOauthWithoutAccessToken() {
config.setIdentifierType(IdentifierType.FILE_IDENTIFIER.name());
config.setFileIdentifier("fileId");
FailureCollector collector = new DefaultFailureCollector("stageConfig", Collections.EMPTY_MAP);
- config.validate(collector);
+ config.getValidationResult(collector);
Assert.assertEquals(1, collector.getValidationFailures().size());
Assert.assertEquals("'Access Token' property is empty or macro is not available.",
collector.getValidationFailures().get(0).getMessage());
@@ -121,7 +121,7 @@ public void testValidationOauthWithoutRefreshToken() {
config.setFileIdentifier("fileId");
config.setIdentifierType(IdentifierType.FILE_IDENTIFIER.name());
FailureCollector collector = new DefaultFailureCollector("stageConfig", Collections.EMPTY_MAP);
- config.validate(collector);
+ config.getValidationResult(collector);
Assert.assertEquals(3, collector.getValidationFailures().size());
Assert.assertEquals("'Client ID' property is empty or macro is not available.",
collector.getValidationFailures().get(0).getMessage());
@@ -148,7 +148,7 @@ public void testWithDirectoryIdAsNull() {
config.setAccessToken("access");
config.setoAuthMethod(OAuthMethod.ACCESS_TOKEN.name());
FailureCollector collector = new DefaultFailureCollector("stageConfig", Collections.EMPTY_MAP);
- config.validate(collector);
+ config.getValidationResult(collector);
Assert.assertEquals(2, collector.getValidationFailures().size());
Assert.assertEquals("Directory Identifier can not be null.",
collector.getValidationFailures().get(0).getMessage());
@@ -171,7 +171,7 @@ public void testWithFileIdAsNull() {
config.setIdentifierType(IdentifierType.FILE_IDENTIFIER.name());
config.setoAuthMethod(OAuthMethod.ACCESS_TOKEN.name());
FailureCollector collector = new DefaultFailureCollector("stageConfig", Collections.EMPTY_MAP);
- config.validate(collector);
+ config.getValidationResult(collector);
Assert.assertEquals(2, collector.getValidationFailures().size());
Assert.assertEquals("File Identifier can not be null.",
collector.getValidationFailures().get(0).getMessage());
diff --git a/src/test/java/io/cdap/plugin/google/sheets/source/GoogleSheetsSourceConfigTest.java b/src/test/java/io/cdap/plugin/google/sheets/source/GoogleSheetsSourceConfigTest.java
index 65d8b633..c6135594 100644
--- a/src/test/java/io/cdap/plugin/google/sheets/source/GoogleSheetsSourceConfigTest.java
+++ b/src/test/java/io/cdap/plugin/google/sheets/source/GoogleSheetsSourceConfigTest.java
@@ -104,7 +104,7 @@ public void testGetMetadataCoordinates() throws NoSuchFieldException, IllegalAcc
}
@Test
- public void testValidateMetadataCellsOnlyHeader() throws NoSuchFieldException, IllegalAccessException,
+ public void testGetValidationResultMetadataCellsOnlyHeader() throws NoSuchFieldException, IllegalAccessException,
NoSuchMethodException, InvocationTargetException {
Method validateMetadataCellsMethod =
config.getClass().getDeclaredMethod("validateMetadataCells", FailureCollector.class);
@@ -141,7 +141,7 @@ public void testValidateMetadataCellsOnlyHeader() throws NoSuchFieldException, I
}
@Test
- public void testValidateMetadataCellsOnlyFooter() throws NoSuchFieldException, IllegalAccessException,
+ public void testGetValidationResultMetadataCellsOnlyFooter() throws NoSuchFieldException, IllegalAccessException,
NoSuchMethodException, InvocationTargetException {
Method validateMetadataCellsMethod =
config.getClass().getDeclaredMethod("validateMetadataCells", FailureCollector.class);
@@ -180,7 +180,7 @@ public void testValidateMetadataCellsOnlyFooter() throws NoSuchFieldException, I
}
@Test
- public void testValidateMetadataCellsHeaderAndFooter() throws NoSuchFieldException, IllegalAccessException,
+ public void testGetValidationResultMetadataCellsHeaderAndFooter() throws NoSuchFieldException, IllegalAccessException,
NoSuchMethodException, InvocationTargetException {
Method validateMetadataCellsMethod =
config.getClass().getDeclaredMethod("validateMetadataCells", FailureCollector.class);
diff --git a/widgets/GoogleDrive-batchsource.json b/widgets/GoogleDrive-batchsource.json
index c657a281..713c3046 100644
--- a/widgets/GoogleDrive-batchsource.json
+++ b/widgets/GoogleDrive-batchsource.json
@@ -4,6 +4,104 @@
},
"display-name": "Google Drive Source",
"configuration-groups": [
+ {
+ "label": "Authentication",
+ "properties": [
+ {
+ "widget-type": "radio-group",
+ "label": "Authentication Type",
+ "name": "authType",
+ "widget-attributes": {
+ "layout": "inline",
+ "default": "oAuth2",
+ "options": [
+ {
+ "id": "oAuth2",
+ "label": "OAuth2"
+ },
+ {
+ "id": "serviceAccount",
+ "label": "Service account"
+ }
+ ]
+ }
+ },
+ {
+ "name": "oAuthMethod",
+ "label": "OAuth Method",
+ "widget-type": "radio-group",
+ "widget-attributes": {
+ "layout": "inline",
+ "default": "REFRESH_TOKEN",
+ "options": [
+ {
+ "id": "REFRESH_TOKEN",
+ "label": "Refresh Token"
+ },
+ {
+ "id": "ACCESS_TOKEN",
+ "label": "Access Token"
+ }
+ ]
+ }
+ },
+ {
+ "name": "accessToken",
+ "label": "Access Token",
+ "widget-type": "textbox",
+ "widget-attributes": {
+ "placeholder": "${oauthAccessToken(provider,credential)}"
+ }
+ },
+ {
+ "widget-type": "textbox",
+ "label": "Client ID",
+ "name": "clientId"
+ },
+ {
+ "widget-type": "password",
+ "label": "Client Secret",
+ "name": "clientSecret"
+ },
+ {
+ "widget-type": "password",
+ "label": "Refresh Token",
+ "name": "refreshToken"
+ },
+ {
+ "name": "serviceAccountType",
+ "label": "Service Account Type",
+ "widget-type": "radio-group",
+ "widget-attributes": {
+ "layout": "inline",
+ "default": "filePath",
+ "options": [
+ {
+ "id": "filePath",
+ "label": "File Path"
+ },
+ {
+ "id": "JSON",
+ "label": "JSON"
+ }
+ ]
+ }
+ },
+ {
+ "widget-type": "textbox",
+ "label": "Service Account File Path",
+ "name": "accountFilePath",
+ "widget-attributes": {
+ "default": "auto-detect"
+ }
+ },
+ {
+ "widget-type": "textbox",
+ "label": "Service Account JSON",
+ "name": "serviceAccountJSON"
+ }
+ ]
+ },
{
"label": "Basic",
"properties": [
@@ -226,6 +324,186 @@
}
]
}
+ },
+ {
+ "name": "structuredSchemaRequired",
+ "label": "Structured Schema Required",
+ "widget-type": "toggle",
+ "widget-attributes": {
+ "on": {
+ "value": "true",
+ "label": "YES"
+ },
+ "off": {
+ "value": "false",
+ "label": "NO"
+ },
+ "default": "true"
+ }
+ },
+ {
+ "widget-type": "select",
+ "label": "Format",
+ "name": "format",
+ "widget-attributes": {
+ "values": [
+ "avro",
+ "blob",
+ "delimited",
+ "tsv",
+ "csv",
+ "json",
+ "text",
+ "xls"
+ ],
+ "default": "blob"
+ }
+ },
+ {
+ "widget-type": "get-schema",
+ "widget-category": "plugin"
+ },
+ {
+ "widget-type": "hidden",
+ "name": "recursive",
+ "label": "Read Files Recursively",
+ "widget-attributes": {
+ "layout": "inline",
+ "default": "false",
+ "options": [
+ {
+ "id": "true",
+ "label": "True"
+ },
+ {
+ "id": "false",
+ "label": "False"
+ }
+ ]
+ }
+ },
+ {
+ "widget-type": "hidden",
+ "label": "Allow Empty Input",
+ "name": "ignoreNonExistingFolders",
+ "widget-attributes": {
+ "layout": "inline",
+ "default": "false",
+ "options": [
+ {
+ "id": "true",
+ "label": "True"
+ },
+ {
+ "id": "false",
+ "label": "False"
+ }
+ ]
+ }
+ },
+ {
+ "widget-type": "number",
+ "label": "Sample Size",
+ "name": "sampleSize",
+ "widget-attributes": {
+ "default": "1000",
+ "minimum": "1"
+ }
+ },
+ {
+ "widget-type": "keyvalue-dropdown",
+ "label": "Override",
+ "name": "override",
+ "widget-attributes": {
+ "key-placeholder": "Field Name",
+ "value-placeholder": "Data Type",
+ "dropdownOptions": [
+ "boolean",
+ "bytes",
+ "double",
+ "float",
+ "int",
+ "long",
+ "string",
+ "timestamp"
+ ]
+ }
+ },
+ {
+ "widget-type": "textbox",
+ "label": "Delimiter",
+ "name": "delimiter",
+ "widget-attributes": {
+ "placeholder": "Delimiter if the format is 'delimited'"
+ }
+ },
+ {
+ "widget-type": "toggle",
+ "name": "enableQuotedValues",
+ "label": "Enable Quoted Values",
+ "widget-attributes": {
+ "default": "false",
+ "on": {
+ "value": "true",
+ "label": "True"
+ },
+ "off": {
+ "value": "false",
+ "label": "False"
+ }
+ }
+ },
+ {
+ "widget-type": "toggle",
+ "name": "skipHeader",
+ "label": "Use First Row as Header",
+ "widget-attributes": {
+ "default": "false",
+ "on": {
+ "value": "true",
+ "label": "True"
+ },
+ "off": {
+ "value": "false",
+ "label": "False"
+ }
+ }
+ },
+ {
+ "widget-type": "toggle",
+ "label": "Terminate Reading After Empty Row",
+ "name": "terminateIfEmptyRow",
+ "widget-attributes": {
+ "default": "false",
+ "on": {
+ "value": "true",
+ "label": "True"
+ },
+ "off": {
+ "value": "false",
+ "label": "False"
+ }
+ }
+ },
+ {
+ "widget-type": "select",
+ "label": "Select Sheet Using",
+ "name": "sheet",
+ "widget-attributes": {
+ "values": [
+ "Sheet Name",
+ "Sheet Number"
+ ],
+ "default": "Sheet Number"
+ }
+ },
+ {
+ "widget-type": "textbox",
+ "label": "Sheet Value",
+ "name": "sheetValue",
+ "widget-attributes": {
+ "default": "0"
+ }
}
]
},
@@ -313,104 +591,6 @@
}
]
},
- {
- "label": "Authentication",
- "properties": [
- {
- "widget-type": "radio-group",
- "label": "Authentication Type",
- "name": "authType",
- "widget-attributes": {
- "layout": "inline",
- "default": "oAuth2",
- "options": [
- {
- "id": "oAuth2",
- "label": "OAuth2"
- },
- {
- "id": "serviceAccount",
- "label": "Service account"
- }
- ]
- }
- },
- {
- "name": "oAuthMethod",
- "label": "OAuth Method",
- "widget-type": "radio-group",
- "widget-attributes": {
- "layout": "inline",
- "default": "REFRESH_TOKEN",
- "options": [
- {
- "id": "REFRESH_TOKEN",
- "label": "Refresh Token"
- },
- {
- "id": "ACCESS_TOKEN",
- "label": "Access Token"
- }
- ]
- }
- },
- {
- "name": "accessToken",
- "label": "Access Token",
- "widget-type": "textbox",
- "widget-attributes": {
- "placeholder": "${oauthAccessToken(provider,credential)}"
- }
- },
- {
- "widget-type": "textbox",
- "label": "Client ID",
- "name": "clientId"
- },
- {
- "widget-type": "password",
- "label": "Client Secret",
- "name": "clientSecret"
- },
- {
- "widget-type": "password",
- "label": "Refresh Token",
- "name": "refreshToken"
- },
- {
- "name": "serviceAccountType",
- "label": "Service Account Type",
- "widget-type": "radio-group",
- "widget-attributes": {
- "layout": "inline",
- "default": "filePath",
- "options": [
- {
- "id": "filePath",
- "label": "File Path"
- },
- {
- "id": "JSON",
- "label": "JSON"
- }
- ]
- }
- },
- {
- "widget-type": "textbox",
- "label": "Service Account File Path",
- "name": "accountFilePath",
- "widget-attributes": {
- "default": "auto-detect"
- }
- },
- {
- "widget-type": "textbox",
- "label": "Service Account JSON",
- "name": "serviceAccountJSON"
- }
- ]
- },
{
"label": "Advanced",
"properties": [
@@ -441,6 +621,285 @@
}
]
}
+ },
+ {
+ "widget-type": "json-editor",
+ "label": "File System Properties",
+ "name": "fileSystemProperties"
+ },
+ {
+ "widget-type": "select",
+ "label": "File encoding",
+ "name": "fileEncoding",
+ "widget-attributes": {
+ "values": [
+ {
+ "label": "UTF-8",
+ "value": "UTF-8"
+ },
+ {
+ "label": "UTF-32",
+ "value": "UTF-32"
+ },
+ {
+ "label": "ISO-8859-1 (Latin-1 Western European)",
+ "value": "ISO-8859-1"
+ },
+ {
+ "label": "ISO-8859-2 (Latin-2 Central European)",
+ "value": "ISO-8859-2"
+ },
+ {
+ "label": "ISO-8859-3 (Latin-3 South European)",
+ "value": "ISO-8859-3"
+ },
+ {
+ "label": "ISO-8859-4 (Latin-4 North European)",
+ "value": "ISO-8859-4"
+ },
+ {
+ "label": "ISO-8859-5 (Latin/Cyrillic)",
+ "value": "ISO-8859-5"
+ },
+ {
+ "label": "ISO-8859-6 (Latin/Arabic)",
+ "value": "ISO-8859-6"
+ },
+ {
+ "label": "ISO-8859-7 (Latin/Greek)",
+ "value": "ISO-8859-7"
+ },
+ {
+ "label": "ISO-8859-8 (Latin/Hebrew)",
+ "value": "ISO-8859-8"
+ },
+ {
+ "label": "ISO-8859-9 (Latin-5 Turkish)",
+ "value": "ISO-8859-9"
+ },
+ {
+ "label": "ISO-8859-11 (Latin/Thai)",
+ "value": "ISO-8859-11"
+ },
+ {
+ "label": "ISO-8859-13 (Latin-7 Baltic Rim)",
+ "value": "ISO-8859-13"
+ },
+ {
+ "label": "ISO-8859-15 (Latin-9)",
+ "value": "ISO-8859-15"
+ },
+ {
+ "label": "Windows-1250",
+ "value": "Windows-1250"
+ },
+ {
+ "label": "Windows-1251",
+ "value": "Windows-1251"
+ },
+ {
+ "label": "Windows-1252",
+ "value": "Windows-1252"
+ },
+ {
+ "label": "Windows-1253",
+ "value": "Windows-1253"
+ },
+ {
+ "label": "Windows-1254",
+ "value": "Windows-1254"
+ },
+ {
+ "label": "Windows-1255",
+ "value": "Windows-1255"
+ },
+ {
+ "label": "Windows-1256",
+ "value": "Windows-1256"
+ },
+ {
+ "label": "Windows-1257",
+ "value": "Windows-1257"
+ },
+ {
+ "label": "Windows-1258",
+ "value": "Windows-1258"
+ },
+ {
+ "label": "IBM00858",
+ "value": "IBM00858"
+ },
+ {
+ "label": "IBM01140",
+ "value": "IBM01140"
+ },
+ {
+ "label": "IBM01141",
+ "value": "IBM01141"
+ },
+ {
+ "label": "IBM01142",
+ "value": "IBM01142"
+ },
+ {
+ "label": "IBM01143",
+ "value": "IBM01143"
+ },
+ {
+ "label": "IBM01144",
+ "value": "IBM01144"
+ },
+ {
+ "label": "IBM01145",
+ "value": "IBM01145"
+ },
+ {
+ "label": "IBM01146",
+ "value": "IBM01146"
+ },
+ {
+ "label": "IBM01147",
+ "value": "IBM01147"
+ },
+ {
+ "label": "IBM01148",
+ "value": "IBM01148"
+ },
+ {
+ "label": "IBM01149",
+ "value": "IBM01149"
+ },
+ {
+ "label": "IBM037",
+ "value": "IBM037"
+ },
+ {
+ "label": "IBM1026",
+ "value": "IBM1026"
+ },
+ {
+ "label": "IBM1047",
+ "value": "IBM1047"
+ },
+ {
+ "label": "IBM273",
+ "value": "IBM273"
+ },
+ {
+ "label": "IBM277",
+ "value": "IBM277"
+ },
+ {
+ "label": "IBM278",
+ "value": "IBM278"
+ },
+ {
+ "label": "IBM280",
+ "value": "IBM280"
+ },
+ {
+ "label": "IBM284",
+ "value": "IBM284"
+ },
+ {
+ "label": "IBM285",
+ "value": "IBM285"
+ },
+ {
+ "label": "IBM290",
+ "value": "IBM290"
+ },
+ {
+ "label": "IBM297",
+ "value": "IBM297"
+ },
+ {
+ "label": "IBM420",
+ "value": "IBM420"
+ },
+ {
+ "label": "IBM424",
+ "value": "IBM424"
+ },
+ {
+ "label": "IBM437",
+ "value": "IBM437"
+ },
+ {
+ "label": "IBM500",
+ "value": "IBM500"
+ },
+ {
+ "label": "IBM775",
+ "value": "IBM775"
+ },
+ {
+ "label": "IBM850",
+ "value": "IBM850"
+ },
+ {
+ "label": "IBM852",
+ "value": "IBM852"
+ },
+ {
+ "label": "IBM855",
+ "value": "IBM855"
+ },
+ {
+ "label": "IBM857",
+ "value": "IBM857"
+ },
+ {
+ "label": "IBM860",
+ "value": "IBM860"
+ },
+ {
+ "label": "IBM861",
+ "value": "IBM861"
+ },
+ {
+ "label": "IBM862",
+ "value": "IBM862"
+ },
+ {
+ "label": "IBM863",
+ "value": "IBM863"
+ },
+ {
+ "label": "IBM864",
+ "value": "IBM864"
+ },
+ {
+ "label": "IBM865",
+ "value": "IBM865"
+ },
+ {
+ "label": "IBM866",
+ "value": "IBM866"
+ },
+ {
+ "label": "IBM868",
+ "value": "IBM868"
+ },
+ {
+ "label": "IBM869",
+ "value": "IBM869"
+ },
+ {
+ "label": "IBM870",
+ "value": "IBM870"
+ },
+ {
+ "label": "IBM871",
+ "value": "IBM871"
+ },
+ {
+ "label": "IBM918",
+ "value": "IBM918"
+ }
+ ],
+ "default": "UTF-8"
+ }
}
]
},
@@ -512,7 +971,28 @@
]
}
],
- "outputs": [],
+ "outputs": [
+ {
+ "name": "schema",
+ "widget-type": "schema",
+ "widget-attributes": {
+ "default-schema": {
+ "name": "fileRecord",
+ "type": "record",
+ "fields": [
+ {
+ "name": "offset",
+ "type": "long"
+ },
+ {
+ "name": "body",
+ "type": "string"
+ }
+ ]
+ }
+ }
+ }
+ ],
"filters": [
{
"name": "Select modification date range",
@@ -657,6 +1137,111 @@
"type": "property"
}
]
+ },
+ {
+ "name": "Show property delimiter",
+ "condition": {
+ "expression": "format == 'delimited' && structuredSchemaRequired == true"
+ },
+ "show": [
+ {
+ "name": "delimiter"
+ }
+ ]
+ },
+ {
+ "name": "Show property enableQuotedValues",
+ "condition": {
+ "expression": "(format == 'delimited' || format == 'csv' || format == 'tsv') && structuredSchemaRequired == true"
+ },
+ "show": [
+ {
+ "name": "enableQuotedValues"
+ }
+ ]
+ },
+ {
+ "name": "Show property skipHeader",
+ "condition": {
+ "expression": "(format == 'delimited' || format == 'csv' || format == 'tsv' || format == 'xls') && structuredSchemaRequired == true"
+ },
+ "show": [
+ {
+ "name": "skipHeader"
+ }
+ ]
+ },
+ {
+ "name": "Show property sheet, sheetValue, terminateIfEmptyRow",
+ "condition": {
+ "expression": "format == 'xls' && structuredSchemaRequired == true"
+ },
+ "show": [
+ {
+ "name": "sheet"
+ },
+ {
+ "name": "sheetValue"
+ },
+ {
+ "name": "terminateIfEmptyRow"
+ }
+ ]
+ },
+ {
+ "name": "Show property format, get-schema, override, fileEncoding, sampleSize",
+ "condition": {
+ "expression": "structuredSchemaRequired == true"
+ },
+ "show": [
+ {
+ "name": "format"
+ },
+ {
+ "widget-type": "get-schema"
+ },
+ {
+ "name": "override"
+ },
+ {
+ "name": "fileEncoding"
+ },
+ {
+ "name": "sampleSize"
+ }
+ ]
+ },
+ {
+ "name": "Show old properties when structuredSchemaRequired is false or null",
+ "condition": {
+ "expression": "structuredSchemaRequired != true"
+ },
+ "show": [
+ {
+ "name": "docsExportingFormat"
+ },
+ {
+ "name": "sheetsExportingFormat"
+ },
+ {
+ "name": "drawingsExportingFormat"
+ },
+ {
+ "name": "presentationsExportingFormat"
+ },
+ {
+ "name": "maxPartitionSize"
+ },
+ {
+ "name": "bodyFormat"
+ },
+ {
+ "name": "fileMetadataProperties"
+ },
+ {
+ "name": "fileTypesToPull"
+ }
+ ]
}
]
}