diff --git a/core-plugins/src/main/java/io/cdap/plugin/batch/source/ExcelErrorDetailsProvider.java b/core-plugins/src/main/java/io/cdap/plugin/batch/source/ExcelErrorDetailsProvider.java new file mode 100644 index 000000000..e73529e2d --- /dev/null +++ b/core-plugins/src/main/java/io/cdap/plugin/batch/source/ExcelErrorDetailsProvider.java @@ -0,0 +1,86 @@ +/* + * Copyright © 2025 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.batch.source; + +import com.github.pjfanning.xlsx.exceptions.MissingSheetException; +import com.github.pjfanning.xlsx.exceptions.ReadException; +import com.google.common.base.Throwables; +import io.cdap.cdap.api.exception.ErrorCategory; +import io.cdap.cdap.api.exception.ErrorType; +import io.cdap.cdap.api.exception.ErrorUtils; +import io.cdap.cdap.api.exception.ProgramFailureException; +import io.cdap.cdap.etl.api.exception.ErrorContext; +import io.cdap.cdap.etl.api.exception.ErrorDetailsProvider; +import org.apache.poi.EmptyFileException; + +import java.util.List; +import javax.annotation.Nullable; + +/** + * ExcelErrorDetailsProvider provider + */ +public class ExcelErrorDetailsProvider implements ErrorDetailsProvider { + + private static final String ERROR_MESSAGE_FORMAT = "Error occurred in the phase: '%s'. Error message: %s"; + private static final String SUBCATEGORY_CONFIGURATION = "Configuration"; + private static final String SUBCATEGORY_DATA_MISSING = "Data Missing"; + private static final String SUBCATEGORY_FILE_READ_ERROR = "File Read Error"; + + @Nullable + @Override + public ProgramFailureException getExceptionDetails(Exception e, ErrorContext errorContext) { + List causalChain = Throwables.getCausalChain(e); + for (Throwable t : causalChain) { + if (t instanceof ProgramFailureException) { + // if causal chain already has program failure exception, return null to avoid double wrap. + return null; + } + if (t instanceof MissingSheetException) { + return getProgramFailureException((MissingSheetException) t, errorContext, + ErrorType.USER, SUBCATEGORY_DATA_MISSING); + } + if (t instanceof ReadException) { + return getProgramFailureException((ReadException) t, errorContext, + ErrorType.USER, SUBCATEGORY_FILE_READ_ERROR); + } + if (t instanceof EmptyFileException) { + return getProgramFailureException((EmptyFileException) t, errorContext, + ErrorType.USER, SUBCATEGORY_DATA_MISSING); + } + if (t instanceof IllegalArgumentException) { + return getProgramFailureException((IllegalArgumentException) t, errorContext, + ErrorType.USER, SUBCATEGORY_CONFIGURATION); + } + } + return null; + } + + /** + * Get a ProgramFailureException with the given error information from {@link Exception}. + * + * @param exception The Exception to get the error information from. + * @return A ProgramFailureException with the given error information. + */ + private ProgramFailureException getProgramFailureException(Exception exception, ErrorContext errorContext, + ErrorType errorType, String subCategory) { + String errorMessage = exception.getMessage(); + return ErrorUtils.getProgramFailureException( + new ErrorCategory(ErrorCategory.ErrorCategoryEnum.PLUGIN, subCategory), errorMessage, + String.format(ERROR_MESSAGE_FORMAT, errorContext.getPhase(), errorMessage), errorType, + false, exception); + } +} diff --git a/core-plugins/src/main/java/io/cdap/plugin/batch/source/ExcelInputFormat.java b/core-plugins/src/main/java/io/cdap/plugin/batch/source/ExcelInputFormat.java index a7e46a30f..2db71a674 100644 --- a/core-plugins/src/main/java/io/cdap/plugin/batch/source/ExcelInputFormat.java +++ b/core-plugins/src/main/java/io/cdap/plugin/batch/source/ExcelInputFormat.java @@ -19,6 +19,9 @@ import com.github.pjfanning.xlsx.StreamingReader; import com.google.common.base.Preconditions; import com.google.common.base.Strings; +import io.cdap.cdap.api.exception.ErrorCategory; +import io.cdap.cdap.api.exception.ErrorType; +import io.cdap.cdap.api.exception.ErrorUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; @@ -193,7 +196,9 @@ public void initialize(InputSplit genericSplit, TaskAttemptContext context) thro workSheet = workbook.getSheetAt(Integer.parseInt(sheetValue)); } } catch (Exception e) { - throw new IllegalArgumentException("Exception while reading excel sheet. " + e.getMessage(), e); + String error = String.format("Exception while reading excel sheet: %s", e.getMessage()); + throw ErrorUtils.getProgramFailureException(new ErrorCategory(ErrorCategory.ErrorCategoryEnum.PLUGIN), + error, error, ErrorType.USER, false, e); } // As we cannot get the number of rows in a sheet while streaming. diff --git a/core-plugins/src/main/java/io/cdap/plugin/batch/source/ExcelInputReader.java b/core-plugins/src/main/java/io/cdap/plugin/batch/source/ExcelInputReader.java index 38157d85e..39e2b479c 100644 --- a/core-plugins/src/main/java/io/cdap/plugin/batch/source/ExcelInputReader.java +++ b/core-plugins/src/main/java/io/cdap/plugin/batch/source/ExcelInputReader.java @@ -35,12 +35,16 @@ import io.cdap.cdap.api.dataset.lib.KeyValue; import io.cdap.cdap.api.dataset.lib.KeyValueTable; import io.cdap.cdap.api.dataset.table.Table; +import io.cdap.cdap.api.exception.ErrorCategory; +import io.cdap.cdap.api.exception.ErrorType; +import io.cdap.cdap.api.exception.ErrorUtils; import io.cdap.cdap.etl.api.Emitter; import io.cdap.cdap.etl.api.FailureCollector; import io.cdap.cdap.etl.api.PipelineConfigurer; import io.cdap.cdap.etl.api.batch.BatchRuntimeContext; import io.cdap.cdap.etl.api.batch.BatchSource; import io.cdap.cdap.etl.api.batch.BatchSourceContext; +import io.cdap.cdap.etl.api.exception.ErrorDetailsProviderSpec; import io.cdap.plugin.common.LineageRecorder; import io.cdap.plugin.common.Properties; import io.cdap.plugin.common.ReferencePluginConfig; @@ -192,10 +196,11 @@ public void transform(KeyValue input, Emitter 1 && excelInputreaderConfig.terminateIfEmptyRow.equalsIgnoreCase("true")) { - throw new ExecutionException("Encountered empty row while reading Excel file :" + fileName + - " . Terminating processing", new Throwable()); + String error = String.format("Encountered empty row while reading Excel file :%s." + + " Terminating processing", fileName); + throw ErrorUtils.getProgramFailureException(new ErrorCategory(ErrorCategory.ErrorCategoryEnum.PLUGIN), + error, error, ErrorType.USER, false, null); } - prevRowNum = currentRowNum; Map excelColumnValueMap = new HashMap<>(); @@ -212,6 +217,9 @@ public void transform(KeyValue input, Emitter