diff --git a/jmix-search/search/search.gradle b/jmix-search/search/search.gradle index d3cde7ec0d..186c9a27c1 100644 --- a/jmix-search/search/search.gradle +++ b/jmix-search/search/search.gradle @@ -69,8 +69,9 @@ dependencies { testImplementation 'org.junit.jupiter:junit-jupiter-engine' testImplementation 'org.junit.jupiter:junit-jupiter-params' testImplementation 'org.junit.vintage:junit-vintage-engine' + testImplementation 'org.spockframework:spock-core' testImplementation 'org.mockito:mockito-core' - testImplementation "org.spockframework:spock-core" + testImplementation 'ch.qos.logback:logback-classic' testRuntimeOnly 'org.slf4j:slf4j-simple' testRuntimeOnly 'org.hsqldb:hsqldb' testRuntimeOnly 'org.junit.platform:junit-platform-launcher' diff --git a/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileFormatException.java b/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileFormatException.java index bdb461687e..1b958547ab 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileFormatException.java +++ b/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileFormatException.java @@ -16,13 +16,29 @@ package io.jmix.search.exception; -import org.apache.commons.io.FilenameUtils; +import java.util.List; +/** + * An exception that is thrown when a user added some file of the type that is not supported + * and there are no any known parser for. + */ public class UnsupportedFileFormatException extends Exception { - public static final String MESSAGE = "The file %s with the '%s' extension is not supported."; + private static final String MESSAGE = "The file %s can't be parsed. " + + "Only the following file parsing criteria are supported:\n -%s"; + + /** + * @param fileName the name of the file which type is not supported + * @param supportedExtensions the list of the criteria that are supported in the application + */ + public UnsupportedFileFormatException(String fileName, List supportedExtensions) { + super(String.format( + MESSAGE, + fileName, + getSupportedExtensionsString(supportedExtensions))); + } - public UnsupportedFileFormatException(String fileName) { - super(String.format(MESSAGE, fileName, FilenameUtils.getExtension(fileName))); + protected static String getSupportedExtensionsString(List supportedExtensions) { + return String.join("\n -", supportedExtensions); } } diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java new file mode 100644 index 0000000000..e4f14bd916 --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java @@ -0,0 +1,104 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.index.fileparsing; + +import com.google.common.base.Strings; +import io.jmix.core.FileRef; +import org.apache.commons.io.FilenameUtils; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.Parser; +import org.apache.tika.sax.BodyContentHandler; +import org.xml.sax.ContentHandler; + +import java.io.StringWriter; +import java.util.Set; +import java.util.function.Function; + +/** + * Implements the common logic for all extension based file parser resolvers. + */ +public abstract class AbstractExtensionBasedFileParserResolver implements FileParserResolver { + + /** + * Returns a collection of supported extensions of the supported file type. + * Note that the extension checking mechanism is case-sensitive. So in order to support + * the both uppercase one and lowercase option of the extension they should be defined explicitly. + * E.g. ["xlsx", "XLSX", "docx", "DOCX"]. + * + * @return collection of supported extensions + */ + public abstract Set getSupportedExtensions(); + + @Override + public String getCriteriaDescription() { + return String.format( + "File parser resolver: %s. Supported extensions: %s.", + this.getClass().getSimpleName(), + getSupportedExtensionsString(getSupportedExtensions())); + } + + @Override + public boolean supports(FileRef fileRef) { + String fileName = fileRef.getFileName(); + String fileExtension = FilenameUtils.getExtension(fileName); + if (Strings.isNullOrEmpty(fileExtension)) { + return false; + } + + return getSupportedExtensions().contains(fileExtension); + } + + protected String getSupportedExtensionsString(Set supportedExtensions) { + return String.join(", ", supportedExtensions); + } + + @Override + public FileParserKit getParserKit() { + return new FileParserKit( + getParser(), + getContentHandlerGenerator(), + getMetadata(), + getParseContext()); + } + + /** + * Returns a parser for the supported file type. + */ + protected abstract Parser getParser(); + + /** + * Returns a function for the ContentHandler generating that is necessary for the given file parsing. + */ + protected Function getContentHandlerGenerator() { + return stringWriter -> new BodyContentHandler(stringWriter); + } + + /** + * Returns a Metadata object for the given file parsing. + */ + protected Metadata getMetadata() { + return new Metadata(); + } + + /** + * Returns a ParseContext object for the given file parsing. + */ + protected ParseContext getParseContext() { + return new ParseContext(); + } +} diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserKit.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserKit.java new file mode 100644 index 0000000000..2c6ecbc218 --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserKit.java @@ -0,0 +1,32 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.index.fileparsing; + +import jakarta.validation.constraints.NotNull; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.Parser; +import org.xml.sax.ContentHandler; + +import java.io.StringWriter; +import java.util.function.Function; + +public record FileParserKit( + @NotNull Parser parser, + @NotNull Function contentHandlerGenerator, + @NotNull Metadata metadata, + @NotNull ParseContext parseContext) {} diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java new file mode 100644 index 0000000000..3cb44b3110 --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java @@ -0,0 +1,52 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.index.fileparsing; + +import io.jmix.core.FileRef; + +/** + * Interface to be implemented for adding a custom file parser resolver + * or modifying the behavior of the existing file parser resolvers. It gives an ability to define the exact parser + * for the exact file types with a custom implementation of the file checking logic. These parsers are used to extract + * file content for sending it to the search server and indexing. + */ +public interface FileParserResolver { + + /** + * Returns the description of the criteria for the files that are supported with this resolver. + * This text is used for generating the log message that is written into the log + * while no one of the resolvers supports the processing of the given file. + * + * @return criteria description + */ + String getCriteriaDescription(); + + /** + * Returns a complex object that contains all necessary objects for the supported file type parsing. + * + * @return an instance of a file parser kit + */ + FileParserKit getParserKit(); + + /** + * Returns the result of the checking if the file with the given fileRef is supported by the resolver or not. + * + * @param fileRef object with the file information + * @return the given FileRef's checking result + */ + boolean supports(FileRef fileRef); +} diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/package-info.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/package-info.java new file mode 100644 index 0000000000..823145b6d0 --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright 2020 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@NonNullApi +package io.jmix.search.index.fileparsing; + +import org.springframework.lang.NonNullApi; \ No newline at end of file diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/LegacyMSOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/LegacyMSOfficeDocumentsParserResolver.java new file mode 100644 index 0000000000..141c4d4725 --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/LegacyMSOfficeDocumentsParserResolver.java @@ -0,0 +1,40 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.index.fileparsing.resolvers; + +import io.jmix.search.index.fileparsing.AbstractExtensionBasedFileParserResolver; +import org.apache.tika.parser.Parser; +import org.apache.tika.parser.microsoft.OfficeParser; +import org.springframework.core.annotation.Order; +import org.springframework.stereotype.Component; + +import java.util.Set; + +@Component("search_LegacyMSOfficeDocumentsParserResolver") +@Order(100) +public class LegacyMSOfficeDocumentsParserResolver extends AbstractExtensionBasedFileParserResolver { + + @Override + public Set getSupportedExtensions() { + return Set.of("doc", "xls", "DOC", "XLS"); + } + + @Override + public Parser getParser() { + return new OfficeParser(); + } +} \ No newline at end of file diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java new file mode 100644 index 0000000000..23c9d74314 --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java @@ -0,0 +1,53 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.index.fileparsing.resolvers; + +import io.jmix.search.index.fileparsing.AbstractExtensionBasedFileParserResolver; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.Parser; +import org.apache.tika.parser.microsoft.OfficeParserConfig; +import org.apache.tika.parser.microsoft.ooxml.OOXMLParser; +import org.springframework.core.annotation.Order; +import org.springframework.stereotype.Component; + +import java.util.Set; + +@Component("search_OfficeDocumentsParserResolver") +@Order(100) +public class MSOfficeDocumentsParserResolver extends AbstractExtensionBasedFileParserResolver { + + @Override + public Set getSupportedExtensions() { + return Set.of("docx", "xlsx", "DOCX", "XLSX"); + } + + @Override + public Parser getParser() { + return new OOXMLParser(); + } + + @Override + protected ParseContext getParseContext() { + ParseContext parseContext = super.getParseContext(); + + OfficeParserConfig officeParserConfig = new OfficeParserConfig(); + officeParserConfig.setIncludeHeadersAndFooters(false); + parseContext.set(OfficeParserConfig.class, officeParserConfig); + + return parseContext; + } +} \ No newline at end of file diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java new file mode 100644 index 0000000000..9668ae3a2d --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java @@ -0,0 +1,40 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.index.fileparsing.resolvers; + +import io.jmix.search.index.fileparsing.AbstractExtensionBasedFileParserResolver; +import org.apache.tika.parser.Parser; +import org.apache.tika.parser.odf.OpenDocumentParser; +import org.springframework.core.annotation.Order; +import org.springframework.stereotype.Component; + +import java.util.Set; + +@Component("search_OpenOfficeDocumentsParserResolver") +@Order(100) +public class OpenOfficeDocumentsParserResolver extends AbstractExtensionBasedFileParserResolver { + + @Override + public Set getSupportedExtensions() { + return Set.of("odt", "ods", "ODT", "ODS"); + } + + @Override + public Parser getParser() { + return new OpenDocumentParser(); + } +} \ No newline at end of file diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java new file mode 100644 index 0000000000..ca50cb685e --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java @@ -0,0 +1,40 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.index.fileparsing.resolvers; + +import io.jmix.search.index.fileparsing.AbstractExtensionBasedFileParserResolver; +import org.apache.tika.parser.Parser; +import org.apache.tika.parser.pdf.PDFParser; +import org.springframework.core.annotation.Order; +import org.springframework.stereotype.Component; + +import java.util.Set; + +@Component("search_PDFParserResolver") +@Order(100) +public class PDFParserResolver extends AbstractExtensionBasedFileParserResolver { + + @Override + public Set getSupportedExtensions() { + return Set.of("pdf", "PDF"); + } + + @Override + public Parser getParser() { + return new PDFParser(); + } +} \ No newline at end of file diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java new file mode 100644 index 0000000000..0ee3b86f87 --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java @@ -0,0 +1,40 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.index.fileparsing.resolvers; + +import io.jmix.search.index.fileparsing.AbstractExtensionBasedFileParserResolver; +import org.apache.tika.parser.Parser; +import org.apache.tika.parser.rtf.RTFParser; +import org.springframework.core.annotation.Order; +import org.springframework.stereotype.Component; + +import java.util.Set; + +@Component("search_RTFParserResolver") +@Order(100) +public class RTFParserResolver extends AbstractExtensionBasedFileParserResolver { + + @Override + public Set getSupportedExtensions() { + return Set.of("rtf", "RTF"); + } + + @Override + public Parser getParser() { + return new RTFParser(); + } +} \ No newline at end of file diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java new file mode 100644 index 0000000000..b5fb962604 --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java @@ -0,0 +1,40 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.index.fileparsing.resolvers; + +import io.jmix.search.index.fileparsing.AbstractExtensionBasedFileParserResolver; +import org.apache.tika.parser.Parser; +import org.apache.tika.parser.txt.TXTParser; +import org.springframework.core.annotation.Order; +import org.springframework.stereotype.Component; + +import java.util.Set; + +@Component("search_TXTParserResolver") +@Order(100) +public class TXTParserResolver extends AbstractExtensionBasedFileParserResolver { + + @Override + public Set getSupportedExtensions() { + return Set.of("txt", "TXT"); + } + + @Override + public Parser getParser() { + return new TXTParser(); + } +} \ No newline at end of file diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/package-info.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/package-info.java new file mode 100644 index 0000000000..99a9c324df --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright 2020 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@NonNullApi +package io.jmix.search.index.fileparsing.resolvers; + +import org.springframework.lang.NonNullApi; \ No newline at end of file diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserProvider.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserProvider.java new file mode 100644 index 0000000000..a9b871bd37 --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserProvider.java @@ -0,0 +1,62 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.utils; + +import io.jmix.core.FileRef; +import io.jmix.search.exception.UnsupportedFileFormatException; +import io.jmix.search.index.fileparsing.FileParserResolver; +import io.jmix.search.index.fileparsing.FileParserKit; +import org.springframework.stereotype.Component; + +import java.util.ArrayList; +import java.util.List; + +/** + * The service that searches appropriate file parsers for the supported file types. + * A search principle is based on the sequential applying FileParserResolver objects' checks for the given file. + */ +@Component("search_FileParserProvider") +public class FileParserProvider { + + private static final String EMPTY_FILE_PARSER_RESOLVERS_LIST_MESSAGE + = "There are no any file parser resolvers in the application."; + + protected List fileParserResolvers; + + public FileParserProvider(List fileParserResolvers) { + this.fileParserResolvers = fileParserResolvers; + } + + public FileParserKit getParserKit(FileRef fileRef) throws UnsupportedFileFormatException { + if (fileParserResolvers.isEmpty()) { + throw new IllegalStateException(EMPTY_FILE_PARSER_RESOLVERS_LIST_MESSAGE); + } + + String fileName = fileRef.getFileName(); + + List messages = new ArrayList<>(); + + for (FileParserResolver resolver : fileParserResolvers) { + if (resolver.supports(fileRef)) { + return resolver.getParserKit(); + } + messages.add(resolver.getCriteriaDescription()); + } + + throw new UnsupportedFileFormatException(fileName, messages); + } +} diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java index ad8f96e0e1..f1964b53f1 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java @@ -16,14 +16,13 @@ package io.jmix.search.utils; -import com.google.common.base.Strings; import io.jmix.core.FileRef; import io.jmix.core.FileStorage; import io.jmix.core.FileStorageLocator; import io.jmix.core.common.util.Preconditions; import io.jmix.search.exception.FileParseException; import io.jmix.search.exception.UnsupportedFileFormatException; -import org.apache.commons.io.FilenameUtils; +import io.jmix.search.index.fileparsing.FileParserKit; import org.apache.poi.poifs.filesystem.OfficeXmlFileException; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; @@ -31,10 +30,6 @@ import org.apache.tika.parser.microsoft.OfficeParser; import org.apache.tika.parser.microsoft.OfficeParserConfig; import org.apache.tika.parser.microsoft.ooxml.OOXMLParser; -import org.apache.tika.parser.odf.OpenDocumentParser; -import org.apache.tika.parser.pdf.PDFParser; -import org.apache.tika.parser.rtf.RTFParser; -import org.apache.tika.parser.txt.TXTParser; import org.apache.tika.sax.BodyContentHandler; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -42,7 +37,6 @@ import java.io.InputStream; import java.io.StringWriter; -import java.util.Optional; @Component public class FileProcessor { @@ -50,28 +44,35 @@ public class FileProcessor { private static final Logger log = LoggerFactory.getLogger(FileProcessor.class); protected FileStorageLocator fileStorageLocator; + protected FileParserProvider fileParserProvider; - public FileProcessor(FileStorageLocator fileStorageLocator) { + public FileProcessor(FileStorageLocator fileStorageLocator, FileParserProvider fileParserProvider) { this.fileStorageLocator = fileStorageLocator; + this.fileParserProvider = fileParserProvider; } public String extractFileContent(FileRef fileRef) throws FileParseException, UnsupportedFileFormatException { Preconditions.checkNotNullArgument(fileRef); log.debug("Extract content of file {}", fileRef); FileStorage fileStorage = fileStorageLocator.getByName(fileRef.getStorageName()); - Parser parser = getParser(fileRef); + FileParserKit parsingBundle = getParserKit(fileRef); + Parser parser = parsingBundle.parser(); log.debug("Parser for file {}: {}", fileRef, parser); StringWriter stringWriter = new StringWriter(); - ParseContext parseContext = createParseContext(); try (InputStream stream = fileStorage.openStream(fileRef)) { - parser.parse(stream, new BodyContentHandler(stringWriter), new Metadata(), parseContext); + parser.parse( + stream, + parsingBundle.contentHandlerGenerator().apply(stringWriter), + parsingBundle.metadata(), + parsingBundle.parseContext()); } catch (OfficeXmlFileException e) { + //Protection from Office 2007 documents with old .doc extension. if (parser instanceof OfficeParser) { parser = new OOXMLParser(); try (InputStream secondStream = fileStorage.openStream(fileRef)) { stringWriter = new StringWriter(); - parser.parse(secondStream, new BodyContentHandler(stringWriter), new Metadata(), parseContext); + parser.parse(secondStream, new BodyContentHandler(stringWriter), new Metadata(), parsingBundle.parseContext()); } catch (Exception e1) { log.error("Unable to parse OOXML file '{}'", fileRef.getFileName(), e1); throw new FileParseException(fileRef.getFileName(), "Fail to parse OOXML file via OOXMLParser", e); @@ -85,55 +86,7 @@ public String extractFileContent(FileRef fileRef) throws FileParseException, Uns return stringWriter.toString(); } - protected Parser getParser(FileRef fileRef) throws UnsupportedFileFormatException { - Optional parserOpt = getParserOpt(fileRef); - return parserOpt.orElseThrow(() -> new UnsupportedFileFormatException(fileRef.getFileName())); - } - - protected ParseContext createParseContext() { - ParseContext parseContext = new ParseContext(); - - OfficeParserConfig officeParserConfig = new OfficeParserConfig(); - officeParserConfig.setIncludeHeadersAndFooters(false); - parseContext.set(OfficeParserConfig.class, officeParserConfig); - - return parseContext; - } - - protected Optional getParserOpt(FileRef fileRef) { - Parser parser; - String ext = FilenameUtils.getExtension(fileRef.getFileName()).toLowerCase(); - if (Strings.isNullOrEmpty(ext)) { - log.warn("Unable to create a parser for a file without extension"); - parser = null; - } else { - switch (ext) { - case "pdf": - parser = new PDFParser(); - break; - case "doc": - case "xls": - parser = new OfficeParser(); - break; - case "docx": - case "xlsx": - parser = new OOXMLParser(); - break; - case "odt": - case "ods": - parser = new OpenDocumentParser(); - break; - case "rtf": - parser = new RTFParser(); - break; - case "txt": - parser = new TXTParser(); - break; - default: - log.warn("Unsupported file extension: {}", ext); - parser = null; - } - } - return Optional.ofNullable(parser); + protected FileParserKit getParserKit(FileRef fileRef) throws UnsupportedFileFormatException { + return fileParserProvider.getParserKit(fileRef); } } diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileFormatExceptionTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileFormatExceptionTest.groovy new file mode 100644 index 0000000000..d0e319b7bf --- /dev/null +++ b/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileFormatExceptionTest.groovy @@ -0,0 +1,51 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain fileName copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.exception + +import spock.lang.Specification + +class UnsupportedFileFormatExceptionTest extends Specification { + + private static final String MESSAGE_1 = "The file another-file.smt can't be parsed. " + + "Only the following file parsing criteria are supported:\n" + + " -The only one criteria." + private static final String MESSAGE_2 = "The file the-file-with-not-supported-extension.sql can't be parsed. " + + "Only the following file parsing criteria are supported:\n" + + " -The first criteria.\n" + + " -The second criteria." + + private static final String MESSAGE_3 = "The file anyfile can't be parsed. " + + "Only the following file parsing criteria are supported:\n" + + " -line1\n" + + " -line2\n" + + " -line3\n" + + " -line4" + + def "message test"() { + when: + def exception = new UnsupportedFileFormatException(fileName, supportedTypes) + + then: + exception.getMessage() == message + + where: + fileName | supportedTypes | message + "another-file.smt" | ["The only one criteria."] | MESSAGE_1 + "the-file-with-not-supported-extension.sql" | ["The first criteria.", "The second criteria."] | MESSAGE_2 + "anyfile" | ["line1", "line2", "line3", "line4"] | MESSAGE_3 + } +} diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy new file mode 100644 index 0000000000..d681460fba --- /dev/null +++ b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy @@ -0,0 +1,106 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.index.fileparsing + +import io.jmix.core.FileRef +import org.apache.tika.parser.Parser +import spock.lang.Specification + +class AbstractExtensionBasedFileParserResolverTest extends Specification { + + def "getParserKit. Different kits are returned each time"() { + given: + def resolver = new TestExtensionBasedFileParserResolver(Mock(Parser), Mock(Set)) + + expect: + resolver.getParserKit() != resolver.getParserKit() + } + + def "getParserKit. Not the same objects inside of the kits"() { + given: + def resolver = new TestExtensionBasedFileParserResolver(Mock(Parser), Mock(Set)) + + when: + def kit1 = resolver.getParserKit() + def kit2 = resolver.getParserKit() + + then: + kit1.contentHandlerGenerator() != null + !kit1.metadata().is(kit2.metadata()) + !kit1.parseContext().is(kit2.parseContext()) + } + + def "GetCriteriaDescription"() { + given: + def resolver = new TestExtensionBasedFileParserResolver(Mock(Parser), extensions as Set) + + expect: + resolver.getCriteriaDescription() == criteriaDescription + + where: + extensions | criteriaDescription + ["ext1"] | "File parser resolver: TestExtensionBasedFileParserResolver. Supported extensions: ext1." + ["ext1", "ext2"] | "File parser resolver: TestExtensionBasedFileParserResolver. Supported extensions: ext1, ext2." + ["ext1", "ext2", "ext3"] | "File parser resolver: TestExtensionBasedFileParserResolver. Supported extensions: ext1, ext2, ext3." + } + + def "Supports"() { + given: + def resolver = new TestExtensionBasedFileParserResolver(Mock(Parser), extensions as Set) + + and: + def fileRef = Mock(FileRef) + fileRef.getFileName() >> fileName + + expect: + resolver.supports(fileRef) == supports + + where: + fileName | extensions | supports + "file1.ext1" | ["ext1"] | true + "file1.ext11" | ["ext1"] | false + "file1..ext1" | ["ext1"] | true + "file1..ext" | ["ext1"] | false + "file1.ext1" | ["ext1", "ext2"] | true + "file1.ext2" | ["ext1", "ext2"] | true + "file1.ext3" | ["ext1", "ext2", "ext3"] | true + "file1.ext33" | ["ext1", "ext2", "ext3"] | false + "file1.doc" | ["docx"] | false + "file1." | ["docx"] | false + "file" | ["ext1"] | false + } + + private static class TestExtensionBasedFileParserResolver extends AbstractExtensionBasedFileParserResolver { + private Parser parser + private Set extensions + + TestExtensionBasedFileParserResolver(Parser parser, Set extensions) { + this.parser = parser + this.extensions = extensions + } + + @Override + Set getSupportedExtensions() { + return extensions; + } + + @Override + Parser getParser() { + return parser + } + } +} diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy new file mode 100644 index 0000000000..ddff5856f8 --- /dev/null +++ b/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy @@ -0,0 +1,69 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.index.mapping.propertyvalue.impl + +import ch.qos.logback.classic.Level +import ch.qos.logback.classic.spi.ILoggingEvent +import ch.qos.logback.core.read.ListAppender +import io.jmix.core.FileRef +import io.jmix.search.exception.UnsupportedFileFormatException +import io.jmix.search.utils.FileProcessor +import spock.lang.Specification + +import static io.jmix.search.utils.LogbackMocker.cleanUpAppender +import static io.jmix.search.utils.LogbackMocker.createAttachedAppender + + +class FilePropertyValueExtractorTest extends Specification { + + private ListAppender appender + + void setup() { + appender = createAttachedAppender( + FilePropertyValueExtractor.class, + Level.WARN) + } + + def "nothing should be thrown if fileProcessor throws a ParserResolvingException but should be logged"() { + given: + FileRef fileRef = Mock() + + and: + def exceptionMock = Mock(UnsupportedFileFormatException) + exceptionMock.getMessage() >> "Some exception message." + + and: + FileProcessor fileProcessor = Mock() + fileProcessor.extractFileContent(fileRef) >> { throw exceptionMock } + + and: + FilePropertyValueExtractor extractor = new FilePropertyValueExtractor(fileProcessor) + + when: + extractor.addFileContent(null, fileRef) + + then: + this.appender.list.size() == 1 + def loggingEvent = this.appender.list.get(0) + loggingEvent.getLevel() == Level.WARN + loggingEvent.getMessage() == exceptionMock.getMessage() + } + + void cleanup() { + cleanUpAppender(FilePropertyValueExtractor.class, appender) + } +} diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest2.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy similarity index 62% rename from jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest2.groovy rename to jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy index 0a870213f1..3ed1995328 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest2.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy @@ -21,25 +21,26 @@ import io.jmix.core.FileStorageLocator import io.jmix.search.exception.UnsupportedFileFormatException import spock.lang.Specification -class FileProcessorTest2 extends Specification { - def "ExtractFileContent"() { +class FileProcessorTest extends Specification { + + def "should throw the UnsupportedFileTypeException that have been thrown by the FileParserResolver"() { given: FileStorageLocator storageLocatorMock = Mock() + + and: + def exception = Mock(UnsupportedFileFormatException) + + and: + FileParserProvider fileParserProvider = Mock() FileRef fileRefMock = Mock() - fileRefMock.getFileName() >> fileName - FileProcessor fileProcessor = new FileProcessor(storageLocatorMock) + fileParserProvider.getParserKit(fileRefMock) >> { throw exception } + FileProcessor fileProcessor = new FileProcessor(storageLocatorMock, fileParserProvider) when: fileProcessor.extractFileContent(fileRefMock) then: - def exception = thrown(UnsupportedFileFormatException) - exception.getMessage() == message - - where: - fileName | message - "file-name.sql" | "The file file-name.sql with the 'sql' extension is not supported." - "any-file.abc" | "The file any-file.abc with the 'abc' extension is not supported." - "any-file-without-extension" | "The file any-file-without-extension with the '' extension is not supported." + UnsupportedFileFormatException throwable = thrown() + throwable == exception } } diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/LogbackMocker.java b/jmix-search/search/src/test/groovy/io/jmix/search/utils/LogbackMocker.java new file mode 100644 index 0000000000..4d5056b677 --- /dev/null +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/LogbackMocker.java @@ -0,0 +1,43 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.utils; + +import ch.qos.logback.classic.Level; +import ch.qos.logback.classic.Logger; +import ch.qos.logback.classic.spi.ILoggingEvent; +import ch.qos.logback.core.Appender; +import ch.qos.logback.core.read.ListAppender; +import org.slf4j.LoggerFactory; + +public class LogbackMocker { + + public static ListAppender createAttachedAppender(Class classForLogging, + Level loggingLevel) { + Logger logger = (Logger) LoggerFactory.getLogger(classForLogging); + + ListAppender appender = new ListAppender<>(); + logger.addAppender(appender); + logger.setLevel(loggingLevel); + appender.start(); + return appender; + } + + public static void cleanUpAppender(Class classForLogging, Appender appender) { + Logger logger = (Logger) LoggerFactory.getLogger(classForLogging); + logger.detachAppender(appender); + } +} diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderIntegrationTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderIntegrationTest.groovy new file mode 100644 index 0000000000..be391054ca --- /dev/null +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderIntegrationTest.groovy @@ -0,0 +1,100 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.utils.parserresolving + +import io.jmix.core.FileRef +import io.jmix.search.exception.UnsupportedFileFormatException +import io.jmix.search.index.fileparsing.FileParserResolver +import io.jmix.search.index.fileparsing.resolvers.MSOfficeDocumentsParserResolver +import io.jmix.search.index.fileparsing.resolvers.LegacyMSOfficeDocumentsParserResolver +import io.jmix.search.index.fileparsing.resolvers.OpenOfficeDocumentsParserResolver +import io.jmix.search.index.fileparsing.resolvers.PDFParserResolver +import io.jmix.search.index.fileparsing.resolvers.RTFParserResolver +import io.jmix.search.index.fileparsing.resolvers.TXTParserResolver +import io.jmix.search.utils.FileParserProvider +import org.apache.tika.parser.microsoft.OfficeParser +import org.apache.tika.parser.microsoft.ooxml.OOXMLParser +import org.apache.tika.parser.odf.OpenDocumentParser +import org.apache.tika.parser.pdf.PDFParser +import org.apache.tika.parser.rtf.RTFParser +import org.apache.tika.parser.txt.TXTParser +import spock.lang.Specification + +class FileParserProviderIntegrationTest extends Specification { + + def "there is appropriate resolver for the file"() { + given: + def provider = new FileParserProvider(getResolvers()) + + and: + def fileRef = Mock(FileRef) + fileRef.getFileName() >> "filename." + extension + + expect: + provider.getParserKit(fileRef).parser().getClass() == theClass + + where: + extension | theClass + "txt" | TXTParser + "TXT" | TXTParser + "pdf" | PDFParser + "PDF" | PDFParser + "rtf" | RTFParser + "RTF" | RTFParser + "odt" | OpenDocumentParser + "ODT" | OpenDocumentParser + "ods" | OpenDocumentParser + "ODS" | OpenDocumentParser + "doc" | OfficeParser + "DOC" | OfficeParser + "xls" | OfficeParser + "XLS" | OfficeParser + "docx" | OOXMLParser + "DOCX" | OOXMLParser + "xlsx" | OOXMLParser + "XLSX" | OOXMLParser + } + + def "there is not appropriate resolver for the file"() { + given: + def provider = new FileParserProvider(getResolvers()) + + and: + def fileRef = Mock(FileRef) + fileRef.getFileName() >> "filename." + extension + + when: + provider.getParserKit(fileRef) + + then: + thrown(UnsupportedFileFormatException) + + where: + extension << ["txt1", "ems", "", "od", "ods2"] + } + + List getResolvers() { + List.of( + new MSOfficeDocumentsParserResolver(), + new LegacyMSOfficeDocumentsParserResolver(), + new OpenOfficeDocumentsParserResolver(), + new PDFParserResolver(), + new RTFParserResolver(), + new TXTParserResolver() + ) + } +} diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderTest.groovy new file mode 100644 index 0000000000..9987eb5881 --- /dev/null +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderTest.groovy @@ -0,0 +1,126 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.utils.parserresolving + +import io.jmix.core.FileRef +import io.jmix.search.exception.UnsupportedFileFormatException +import io.jmix.search.index.fileparsing.FileParserResolver +import io.jmix.search.index.fileparsing.FileParserKit +import io.jmix.search.utils.FileParserProvider +import org.apache.tika.metadata.Metadata +import org.apache.tika.parser.ParseContext +import org.apache.tika.parser.Parser +import spock.lang.Specification +import java.util.function.Function + +import static java.util.Collections.emptyList + +class FileParserProviderTest extends Specification { + + def "should throw UnsupportedFileExtensionException when the given file of unsupported type"() { + given: + FileRef fileRef = Mock() + fileRef.getFileName() >> fileName + + and: + def resolver = Mock(FileParserResolver) + resolver.supports(fileRef) >> false + def resolver2 = Mock(FileParserResolver) + resolver2.supports(fileRef) >> false + + and: + def parserProvider = new FileParserProvider(List.of(resolver, resolver2)) + + when: + parserProvider.getParserKit(fileRef) + + then: + def exception = thrown(UnsupportedFileFormatException) + exception.getMessage().contains(fileName) + + where: + fileName << ["abc.def", "def.zxc"] + } + + def "should return parser of the type that is supported with exact resolver"() { + given: + FileRef fileRef = Mock() + fileRef.getFileName() >> fileName + + and: + def resolver1 = createExtensionBasedResolver("txt", parser1) + def resolver2 = createExtensionBasedResolver("rtf", parser2) + def resolver3 = Mock(FileParserResolver) + resolver3.supports(_ as FileRef) >> true; + resolver3.getParserKit() >> new FileParserKit(parser3, + Mock(Function), + Mock(Metadata), + Mock(ParseContext)) + + and: + def parserProvider = new FileParserProvider(List.of(resolver1, resolver2, resolver3)) + + when: + def resolvedParser = parserProvider.getParserKit(fileRef).parser() + + then: + resolvedParser != null + resolvedParser == expectedResolvedParser + + where: + fileName | parser1 | parser2 | parser3 | expectedResolvedParser + "file.txt" | Mock(Parser) | null | null | parser1 + "file.rtf" | null | Mock(Parser) | null | parser2 + "another.rtf" | null | Mock(Parser) | null | parser2 + "another.txt" | Mock(Parser) | null | null | parser1 + "file.eps" | null | null | Mock(Parser) | parser3 + "file" | null | null | Mock(Parser) | parser3 + } + + def "should throw an exception when there are no any resolver"() { + given: + FileRef fileRef = Mock() + + and: + def resolverManager = new FileParserProvider(emptyList()) + + when: + resolverManager.getParserKit(fileRef) + + then: + def exception = thrown(IllegalStateException) + exception.getMessage() == "There are no any file parser resolvers in the application." + } + + private FileParserResolver createExtensionBasedResolver(String fileExtension, Parser parser) { + def resolver = Mock(FileParserResolver) + resolver.supports(_ as FileRef) >> { FileRef fileRef1 -> + { + if (fileRef1.getFileName().contains(fileExtension)) { + return true + } + return false + } + } + resolver.getParserKit() >> new FileParserKit( + parser, + Mock(Function), + Mock(Metadata), + Mock(ParseContext)) + resolver + } +} diff --git a/jmix-search/search/src/test/java/io/jmix/search/utils/FileProcessorTest.java b/jmix-search/search/src/test/java/io/jmix/search/utils/FileProcessorTest.java new file mode 100644 index 0000000000..e69de29bb2