From 62c03839ea15245f924420f4d165fa431da5bad2 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 6 Sep 2024 16:38:42 +0400 Subject: [PATCH 01/81] Message text correction --- .../UnsupportedFileFormatException.java | 2 +- .../jmix/search/utils/FileProcessorTest.java | 44 +++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 jmix-search/search/src/test/java/io/jmix/search/utils/FileProcessorTest.java diff --git a/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileFormatException.java b/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileFormatException.java index bdb461687e..fee7894d04 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileFormatException.java +++ b/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileFormatException.java @@ -20,7 +20,7 @@ public class UnsupportedFileFormatException extends Exception { - public static final String MESSAGE = "The file %s with the '%s' extension is not supported."; + public static final String MESSAGE = "The file %s with '%s' extension is not supported."; public UnsupportedFileFormatException(String fileName) { super(String.format(MESSAGE, fileName, FilenameUtils.getExtension(fileName))); diff --git a/jmix-search/search/src/test/java/io/jmix/search/utils/FileProcessorTest.java b/jmix-search/search/src/test/java/io/jmix/search/utils/FileProcessorTest.java new file mode 100644 index 0000000000..ec53887d6a --- /dev/null +++ b/jmix-search/search/src/test/java/io/jmix/search/utils/FileProcessorTest.java @@ -0,0 +1,44 @@ + +import io.jmix.core.FileRef; +import io.jmix.core.FileStorageLocator; +import io.jmix.search.exception.UnsupportedFileFormatException; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +class FileProcessorTest { + + public static final String FILE_NAME_EXAMPLE = "the-file-with-not-supported-extension.sql"; + public static final String FILE_NAME_EXAMPLE_2 = "another-file.smt"; + + @Test + void extractFileContent_1() { + FileStorageLocator storageLocatorMock = mock(FileStorageLocator.class); + FileProcessor fileProcessor = new FileProcessor(storageLocatorMock); + FileRef fileRefMock = mock(FileRef.class); + when(fileRefMock.getFileName()).thenReturn(FILE_NAME_EXAMPLE); + UnsupportedFileFormatException exception = assertThrows( + UnsupportedFileFormatException.class, + () -> fileProcessor.extractFileContent(fileRefMock)); + assertEquals( + "The file the-file-with-not-supported-extension.sql with 'sql' extension is not supported.", + exception.getMessage()); + } + + @Test + void extractFileContent_2() { + FileStorageLocator storageLocatorMock = mock(FileStorageLocator.class); + FileProcessor fileProcessor = new FileProcessor(storageLocatorMock); + FileRef fileRefMock = mock(FileRef.class); + when(fileRefMock.getFileName()).thenReturn(FILE_NAME_EXAMPLE_2); + UnsupportedFileFormatException exception = assertThrows( + UnsupportedFileFormatException.class, + () -> fileProcessor.extractFileContent(fileRefMock)); + assertEquals( + "The file another-file.smt with 'smt' extension is not supported.", + exception.getMessage()); + } + +} \ No newline at end of file From 56af7483e17021dcb37bdc0089d18ff79c3da2a5 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Sat, 7 Sep 2024 19:31:25 +0400 Subject: [PATCH 02/81] Parser resolving mechanism refactoring. --- ....java => EmptyFileExtensionException.java} | 10 +-- .../exception/ParserResolvingException.java | 23 +++++++ .../UnsupportedFileExtensionException.java | 40 +++++++++++ .../impl/FilePropertyValueExtractor.java | 5 +- .../jmix/search/utils/FileParserResolver.java | 44 ++++++++++++ .../io/jmix/search/utils/FileProcessor.java | 69 ++----------------- .../search/utils/SupportedFileExtensions.java | 55 +++++++++++++++ 7 files changed, 176 insertions(+), 70 deletions(-) rename jmix-search/search/src/main/java/io/jmix/search/exception/{UnsupportedFileFormatException.java => EmptyFileExtensionException.java} (64%) create mode 100644 jmix-search/search/src/main/java/io/jmix/search/exception/ParserResolvingException.java create mode 100644 jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileExtensionException.java create mode 100644 jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolver.java create mode 100644 jmix-search/search/src/main/java/io/jmix/search/utils/SupportedFileExtensions.java diff --git a/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileFormatException.java b/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileExtensionException.java similarity index 64% rename from jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileFormatException.java rename to jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileExtensionException.java index fee7894d04..5e741efdf0 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileFormatException.java +++ b/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileExtensionException.java @@ -16,13 +16,13 @@ package io.jmix.search.exception; -import org.apache.commons.io.FilenameUtils; +import io.jmix.core.FileRef; -public class UnsupportedFileFormatException extends Exception { +public class EmptyFileExtensionException extends ParserResolvingException { - public static final String MESSAGE = "The file %s with '%s' extension is not supported."; + public static final String MESSAGE = "Extension of the file %s is empty"; - public UnsupportedFileFormatException(String fileName) { - super(String.format(MESSAGE, fileName, FilenameUtils.getExtension(fileName))); + public EmptyFileExtensionException(String fileName) { + super(String.format(MESSAGE, fileName)); } } diff --git a/jmix-search/search/src/main/java/io/jmix/search/exception/ParserResolvingException.java b/jmix-search/search/src/main/java/io/jmix/search/exception/ParserResolvingException.java new file mode 100644 index 0000000000..ddbeccf6a8 --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/exception/ParserResolvingException.java @@ -0,0 +1,23 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.exception; + +public abstract class ParserResolvingException extends Exception { + public ParserResolvingException(String message) { + super(message); + } +} diff --git a/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileExtensionException.java b/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileExtensionException.java new file mode 100644 index 0000000000..d720ea226b --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileExtensionException.java @@ -0,0 +1,40 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.exception; + +import io.jmix.search.utils.SupportedFileExtensions; +import org.apache.commons.io.FilenameUtils; + +import java.util.Arrays; + +import static java.util.stream.Collectors.joining; + +public class UnsupportedFileExtensionException extends ParserResolvingException { + + public static final String MESSAGE = "The file %s with '%s' extension is not supported. " + + "Only following file extensions are supported %s."; + + public UnsupportedFileExtensionException(String fileName) { + super(String.format( + MESSAGE, + fileName, + FilenameUtils.getExtension(fileName), + Arrays.stream(SupportedFileExtensions.values()) + .map(SupportedFileExtensions::getSymbols) + .collect(joining(", ")))); + } +} diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractor.java b/jmix-search/search/src/main/java/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractor.java index a0b2453f6d..3a55a057b2 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractor.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractor.java @@ -25,7 +25,8 @@ import io.jmix.core.metamodel.datatype.impl.FileRefDatatype; import io.jmix.core.metamodel.model.MetaPropertyPath; import io.jmix.search.exception.FileParseException; -import io.jmix.search.exception.UnsupportedFileFormatException; +import io.jmix.search.exception.ParserResolvingException; +import io.jmix.search.exception.UnsupportedFileExtensionException; import io.jmix.search.index.mapping.ParameterKeys; import io.jmix.search.utils.BooleanParser; import io.jmix.search.utils.FileProcessor; @@ -91,7 +92,7 @@ protected void addFileContent(ObjectNode node, FileRef fileRef) { try { String content = fileProcessor.extractFileContent(fileRef); node.put("_content", content); - } catch (UnsupportedFileFormatException e) { + } catch (ParserResolvingException e) { log.warn(e.getMessage()); } catch (FileParseException e) { log.error("Unable to index file content", e); diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolver.java new file mode 100644 index 0000000000..d6bdeb4da8 --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolver.java @@ -0,0 +1,44 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.utils; + +import com.google.common.base.Strings; +import io.jmix.core.FileRef; +import io.jmix.search.exception.EmptyFileExtensionException; +import io.jmix.search.exception.ParserResolvingException; +import io.jmix.search.exception.UnsupportedFileExtensionException; +import org.apache.commons.io.FilenameUtils; +import org.apache.tika.parser.Parser; +import org.springframework.stereotype.Component; + +@Component("search_FileParserResolver") +public class FileParserResolver { + + public Parser getParser(FileRef fileRef) throws ParserResolvingException { + String fileName = fileRef.getFileName(); + String fileExtension = FilenameUtils.getExtension(fileName); + if (Strings.isNullOrEmpty(fileExtension)){ + throw new EmptyFileExtensionException(fileName); + } + for (SupportedFileExtensions extension : SupportedFileExtensions.values()) { + if (extension.getSymbols().equals(fileExtension)) { + return extension.getParser(); + } + } + throw new UnsupportedFileExtensionException(fileName); + } +} diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java index ad8f96e0e1..b8589ca2ee 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java @@ -16,14 +16,12 @@ package io.jmix.search.utils; -import com.google.common.base.Strings; import io.jmix.core.FileRef; import io.jmix.core.FileStorage; import io.jmix.core.FileStorageLocator; import io.jmix.core.common.util.Preconditions; import io.jmix.search.exception.FileParseException; -import io.jmix.search.exception.UnsupportedFileFormatException; -import org.apache.commons.io.FilenameUtils; +import io.jmix.search.exception.ParserResolvingException; import org.apache.poi.poifs.filesystem.OfficeXmlFileException; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; @@ -31,10 +29,6 @@ import org.apache.tika.parser.microsoft.OfficeParser; import org.apache.tika.parser.microsoft.OfficeParserConfig; import org.apache.tika.parser.microsoft.ooxml.OOXMLParser; -import org.apache.tika.parser.odf.OpenDocumentParser; -import org.apache.tika.parser.pdf.PDFParser; -import org.apache.tika.parser.rtf.RTFParser; -import org.apache.tika.parser.txt.TXTParser; import org.apache.tika.sax.BodyContentHandler; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -42,7 +36,6 @@ import java.io.InputStream; import java.io.StringWriter; -import java.util.Optional; @Component public class FileProcessor { @@ -50,16 +43,18 @@ public class FileProcessor { private static final Logger log = LoggerFactory.getLogger(FileProcessor.class); protected FileStorageLocator fileStorageLocator; + private final FileParserResolver fileParserResolver; - public FileProcessor(FileStorageLocator fileStorageLocator) { + public FileProcessor(FileStorageLocator fileStorageLocator, FileParserResolver fileParserResolver) { this.fileStorageLocator = fileStorageLocator; + this.fileParserResolver = fileParserResolver; } - public String extractFileContent(FileRef fileRef) throws FileParseException, UnsupportedFileFormatException { + public String extractFileContent(FileRef fileRef) throws FileParseException, ParserResolvingException { Preconditions.checkNotNullArgument(fileRef); log.debug("Extract content of file {}", fileRef); FileStorage fileStorage = fileStorageLocator.getByName(fileRef.getStorageName()); - Parser parser = getParser(fileRef); + Parser parser = fileParserResolver.getParser(fileRef); log.debug("Parser for file {}: {}", fileRef, parser); StringWriter stringWriter = new StringWriter(); @@ -84,56 +79,4 @@ public String extractFileContent(FileRef fileRef) throws FileParseException, Uns } return stringWriter.toString(); } - - protected Parser getParser(FileRef fileRef) throws UnsupportedFileFormatException { - Optional parserOpt = getParserOpt(fileRef); - return parserOpt.orElseThrow(() -> new UnsupportedFileFormatException(fileRef.getFileName())); - } - - protected ParseContext createParseContext() { - ParseContext parseContext = new ParseContext(); - - OfficeParserConfig officeParserConfig = new OfficeParserConfig(); - officeParserConfig.setIncludeHeadersAndFooters(false); - parseContext.set(OfficeParserConfig.class, officeParserConfig); - - return parseContext; - } - - protected Optional getParserOpt(FileRef fileRef) { - Parser parser; - String ext = FilenameUtils.getExtension(fileRef.getFileName()).toLowerCase(); - if (Strings.isNullOrEmpty(ext)) { - log.warn("Unable to create a parser for a file without extension"); - parser = null; - } else { - switch (ext) { - case "pdf": - parser = new PDFParser(); - break; - case "doc": - case "xls": - parser = new OfficeParser(); - break; - case "docx": - case "xlsx": - parser = new OOXMLParser(); - break; - case "odt": - case "ods": - parser = new OpenDocumentParser(); - break; - case "rtf": - parser = new RTFParser(); - break; - case "txt": - parser = new TXTParser(); - break; - default: - log.warn("Unsupported file extension: {}", ext); - parser = null; - } - } - return Optional.ofNullable(parser); - } } diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/SupportedFileExtensions.java b/jmix-search/search/src/main/java/io/jmix/search/utils/SupportedFileExtensions.java new file mode 100644 index 0000000000..e4a08e6fd6 --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/SupportedFileExtensions.java @@ -0,0 +1,55 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.utils; + +import org.apache.tika.parser.Parser; +import org.apache.tika.parser.microsoft.OfficeParser; +import org.apache.tika.parser.microsoft.ooxml.OOXMLParser; +import org.apache.tika.parser.odf.OpenDocumentParser; +import org.apache.tika.parser.pdf.PDFParser; +import org.apache.tika.parser.rtf.RTFParser; +import org.apache.tika.parser.txt.TXTParser; + +import java.util.function.Supplier; + +public enum SupportedFileExtensions { + PDF("pdf", PDFParser::new), + DOC("doc", OfficeParser::new), + XLS("xls", OfficeParser::new), + DOCX("docx", OOXMLParser::new), + XLSX("xlsx", OOXMLParser::new), + ODT("odt", OpenDocumentParser::new), + ODS("ods", OpenDocumentParser::new), + RTF("rtf", RTFParser::new), + TXT("rtf", TXTParser::new); + + private final String symbols; + private final Supplier parserSupplier; + + SupportedFileExtensions(String symbols, Supplier parserSupplier) { + this.symbols = symbols; + this.parserSupplier = parserSupplier; + } + + public String getSymbols() { + return symbols; + } + + public Parser getParser() { + return parserSupplier.get(); + } +} From 23a2f66c84adea7d43bd0d0da1edc85c2a81e68b Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Sat, 7 Sep 2024 19:39:37 +0400 Subject: [PATCH 03/81] UnsupportedFileExtensionExceptionTest --- jmix-search/search/search.gradle | 1 + ...supportedFileExtensionExceptionTest.groovy | 42 +++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileExtensionExceptionTest.groovy diff --git a/jmix-search/search/search.gradle b/jmix-search/search/search.gradle index d3cde7ec0d..0c0622747a 100644 --- a/jmix-search/search/search.gradle +++ b/jmix-search/search/search.gradle @@ -69,6 +69,7 @@ dependencies { testImplementation 'org.junit.jupiter:junit-jupiter-engine' testImplementation 'org.junit.jupiter:junit-jupiter-params' testImplementation 'org.junit.vintage:junit-vintage-engine' + testImplementation 'org.spockframework:spock-core' testImplementation 'org.mockito:mockito-core' testImplementation "org.spockframework:spock-core" testRuntimeOnly 'org.slf4j:slf4j-simple' diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileExtensionExceptionTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileExtensionExceptionTest.groovy new file mode 100644 index 0000000000..6599ba1eb0 --- /dev/null +++ b/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileExtensionExceptionTest.groovy @@ -0,0 +1,42 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.exception + +import spock.lang.Specification + +class UnsupportedFileExtensionExceptionTest extends Specification { + + + public static final String MESSAGE_1 = "The file the-file-with-not-supported-extension.sql with 'sql' extension " + + "is not supported. " + + "Only following file extensions are supported pdf, doc, xls, docx, xlsx, odt, ods, rtf, rtf." + public static final String MESSAGE_2 = "The file another-file.smt with 'smt' extension is not supported. " + + "Only following file extensions are supported pdf, doc, xls, docx, xlsx, odt, ods, rtf, rtf." + + def "message test"(){ + when: + def exception = new UnsupportedFileExtensionException(a) + + then: + exception.getMessage() == b + + where: + a | b + "the-file-with-not-supported-extension.sql" | MESSAGE_1 + "another-file.smt" | MESSAGE_2 + } +} From e5ed2cebef2effb3c564f40930eddb9f55351e2c Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Sat, 7 Sep 2024 22:55:40 +0400 Subject: [PATCH 04/81] FileParserResolverTest --- .../utils/FileParserResolverTest.groovy | 110 ++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 jmix-search/search/src/test/groovy/io/jmix/search/utils/FileParserResolverTest.groovy diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileParserResolverTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileParserResolverTest.groovy new file mode 100644 index 0000000000..c7d0b02286 --- /dev/null +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileParserResolverTest.groovy @@ -0,0 +1,110 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.utils + +import io.jmix.core.FileRef +import io.jmix.search.exception.EmptyFileExtensionException +import io.jmix.search.exception.UnsupportedFileExtensionException +import org.apache.tika.parser.microsoft.OfficeParser +import org.apache.tika.parser.microsoft.ooxml.OOXMLParser +import org.apache.tika.parser.odf.OpenDocumentParser +import org.apache.tika.parser.pdf.PDFParser +import org.apache.tika.parser.rtf.RTFParser +import org.apache.tika.parser.txt.TXTParser +import spock.lang.Specification + +class FileParserResolverTest extends Specification { + def "should throw EmptyFileExtensionException when the given file name has no extension"() { + given: + FileRef fileRef = Mock() + fileRef.getFileName() >> a + + and: + def parserResolver = new FileParserResolver() + + when: + parserResolver.getParser(fileRef) + + then: + thrown(EmptyFileExtensionException) + + where: + a << ["abc", "def", "abc.", "abc.."] + } + + def "should throw EmptyFileExtensionException when the given file name with unsupported extension"() { + given: + FileRef fileRef = Mock() + fileRef.getFileName() >> a + + and: + def parserResolver = new FileParserResolver() + + when: + parserResolver.getParser(fileRef) + + then: + thrown(UnsupportedFileExtensionException) + + where: + a << ["abc.def", "def.zxc"] + } + + def "should throw EmptyFileExtensionException with detailed description of the problem"() { + given: + FileRef fileRef = Mock() + fileRef.getFileName() >> "abc.def" + + and: + def parserResolver = new FileParserResolver() + + when: + parserResolver.getParser(fileRef) + + then: + def exception = thrown(UnsupportedFileExtensionException) + exception.getMessage() == "The file abc.def with 'def' extension is not supported. " + + "Only following file extensions are supported pdf, doc, xls, docx, xlsx, odt, ods, rtf, txt." + } + + def "should return parser of "() { + given: + FileRef fileRef = Mock() + fileRef.getFileName() >> "filename." + fileExtension + + and: + def parserResolver = new FileParserResolver() + + when: + def parser = parserResolver.getParser(fileRef) + + then: + parser.getClass() == parserClass + + where: + fileExtension | parserClass + "pdf"| PDFParser + "doc"| OfficeParser + "xls"| OfficeParser + "docx"| OOXMLParser + "xlsx"| OOXMLParser + "odt"| OpenDocumentParser + "ods"| OpenDocumentParser + "rtf"| RTFParser + "txt"| TXTParser + } +} From 0b630ac66db5491e37da4ebd0ea92ff135ea4b44 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Sat, 7 Sep 2024 22:58:27 +0400 Subject: [PATCH 05/81] FileParserResolverTest --- .../utils/FileParserResolverTest.groovy | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileParserResolverTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileParserResolverTest.groovy index c7d0b02286..2f8b95788b 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileParserResolverTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileParserResolverTest.groovy @@ -3,7 +3,7 @@ * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * You may obtain fileName copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * @@ -31,7 +31,7 @@ class FileParserResolverTest extends Specification { def "should throw EmptyFileExtensionException when the given file name has no extension"() { given: FileRef fileRef = Mock() - fileRef.getFileName() >> a + fileRef.getFileName() >> fileName and: def parserResolver = new FileParserResolver() @@ -43,13 +43,13 @@ class FileParserResolverTest extends Specification { thrown(EmptyFileExtensionException) where: - a << ["abc", "def", "abc.", "abc.."] + fileName << ["abc", "def", "abc.", "abc.."] } def "should throw EmptyFileExtensionException when the given file name with unsupported extension"() { given: FileRef fileRef = Mock() - fileRef.getFileName() >> a + fileRef.getFileName() >> fileName and: def parserResolver = new FileParserResolver() @@ -61,7 +61,7 @@ class FileParserResolverTest extends Specification { thrown(UnsupportedFileExtensionException) where: - a << ["abc.def", "def.zxc"] + fileName << ["abc.def", "def.zxc"] } def "should throw EmptyFileExtensionException with detailed description of the problem"() { @@ -97,14 +97,14 @@ class FileParserResolverTest extends Specification { where: fileExtension | parserClass - "pdf"| PDFParser - "doc"| OfficeParser - "xls"| OfficeParser - "docx"| OOXMLParser - "xlsx"| OOXMLParser - "odt"| OpenDocumentParser - "ods"| OpenDocumentParser - "rtf"| RTFParser - "txt"| TXTParser + "pdf" | PDFParser + "doc" | OfficeParser + "xls" | OfficeParser + "docx" | OOXMLParser + "xlsx" | OOXMLParser + "odt" | OpenDocumentParser + "ods" | OpenDocumentParser + "rtf" | RTFParser + "txt" | TXTParser } } From c51699db4c4fd093fd4c163d0cfea2c896484805 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Sat, 7 Sep 2024 22:58:55 +0400 Subject: [PATCH 06/81] File type correction --- .../main/java/io/jmix/search/utils/SupportedFileExtensions.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/SupportedFileExtensions.java b/jmix-search/search/src/main/java/io/jmix/search/utils/SupportedFileExtensions.java index e4a08e6fd6..51a6f6540d 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/SupportedFileExtensions.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/SupportedFileExtensions.java @@ -35,7 +35,7 @@ public enum SupportedFileExtensions { ODT("odt", OpenDocumentParser::new), ODS("ods", OpenDocumentParser::new), RTF("rtf", RTFParser::new), - TXT("rtf", TXTParser::new); + TXT("txt", TXTParser::new); private final String symbols; private final Supplier parserSupplier; From 230e41ac9b1ab63a5fba4ee9c5da106c69169d05 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Sat, 7 Sep 2024 22:59:35 +0400 Subject: [PATCH 07/81] UnsupportedFileExtensionExceptionTest --- .../UnsupportedFileExtensionExceptionTest.groovy | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileExtensionExceptionTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileExtensionExceptionTest.groovy index 6599ba1eb0..a98b609d9c 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileExtensionExceptionTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileExtensionExceptionTest.groovy @@ -3,7 +3,7 @@ * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * You may obtain fileName copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * @@ -27,16 +27,16 @@ class UnsupportedFileExtensionExceptionTest extends Specification { public static final String MESSAGE_2 = "The file another-file.smt with 'smt' extension is not supported. " + "Only following file extensions are supported pdf, doc, xls, docx, xlsx, odt, ods, rtf, rtf." - def "message test"(){ + def "message test"() { when: - def exception = new UnsupportedFileExtensionException(a) + def exception = new UnsupportedFileExtensionException(fileName) then: exception.getMessage() == b where: - a | b + fileName | b "the-file-with-not-supported-extension.sql" | MESSAGE_1 - "another-file.smt" | MESSAGE_2 + "another-file.smt" | MESSAGE_2 } } From a8d720b67ea62a4da0d6af6036500c89b5166844 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Sat, 7 Sep 2024 23:32:31 +0400 Subject: [PATCH 08/81] Groovy tests correction --- ...supportedFileExtensionExceptionTest.groovy | 6 +-- .../utils/FileParserResolverTest.groovy | 25 ++-------- .../search/utils/FileProcessorTest.groovy | 47 +++++++++++++++++++ .../jmix/search/utils/FileProcessorTest.java | 44 ----------------- 4 files changed, 54 insertions(+), 68 deletions(-) create mode 100644 jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileExtensionExceptionTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileExtensionExceptionTest.groovy index a98b609d9c..9429b7b303 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileExtensionExceptionTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileExtensionExceptionTest.groovy @@ -23,9 +23,9 @@ class UnsupportedFileExtensionExceptionTest extends Specification { public static final String MESSAGE_1 = "The file the-file-with-not-supported-extension.sql with 'sql' extension " + "is not supported. " + - "Only following file extensions are supported pdf, doc, xls, docx, xlsx, odt, ods, rtf, rtf." + "Only following file extensions are supported pdf, doc, xls, docx, xlsx, odt, ods, rtf, txt." public static final String MESSAGE_2 = "The file another-file.smt with 'smt' extension is not supported. " + - "Only following file extensions are supported pdf, doc, xls, docx, xlsx, odt, ods, rtf, rtf." + "Only following file extensions are supported pdf, doc, xls, docx, xlsx, odt, ods, rtf, txt." def "message test"() { when: @@ -35,7 +35,7 @@ class UnsupportedFileExtensionExceptionTest extends Specification { exception.getMessage() == b where: - fileName | b + fileName | b "the-file-with-not-supported-extension.sql" | MESSAGE_1 "another-file.smt" | MESSAGE_2 } diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileParserResolverTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileParserResolverTest.groovy index 2f8b95788b..ffdef6d9da 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileParserResolverTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileParserResolverTest.groovy @@ -46,7 +46,7 @@ class FileParserResolverTest extends Specification { fileName << ["abc", "def", "abc.", "abc.."] } - def "should throw EmptyFileExtensionException when the given file name with unsupported extension"() { + def "should throw UnsupportedFileExtensionException when the given file name with unsupported extension"() { given: FileRef fileRef = Mock() fileRef.getFileName() >> fileName @@ -58,30 +58,13 @@ class FileParserResolverTest extends Specification { parserResolver.getParser(fileRef) then: - thrown(UnsupportedFileExtensionException) - + def exception = thrown(UnsupportedFileExtensionException) + exception.getMessage().contains(fileName) where: fileName << ["abc.def", "def.zxc"] } - def "should throw EmptyFileExtensionException with detailed description of the problem"() { - given: - FileRef fileRef = Mock() - fileRef.getFileName() >> "abc.def" - - and: - def parserResolver = new FileParserResolver() - - when: - parserResolver.getParser(fileRef) - - then: - def exception = thrown(UnsupportedFileExtensionException) - exception.getMessage() == "The file abc.def with 'def' extension is not supported. " + - "Only following file extensions are supported pdf, doc, xls, docx, xlsx, odt, ods, rtf, txt." - } - - def "should return parser of "() { + def "should return parser of the type that corresponds to the file extension"() { given: FileRef fileRef = Mock() fileRef.getFileName() >> "filename." + fileExtension diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy new file mode 100644 index 0000000000..f2bd53a3d9 --- /dev/null +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy @@ -0,0 +1,47 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.utils + +import io.jmix.core.FileRef +import io.jmix.core.FileStorageLocator +import io.jmix.search.exception.EmptyFileExtensionException +import io.jmix.search.exception.ParserResolvingException +import io.jmix.search.exception.UnsupportedFileExtensionException +import spock.lang.Specification + +class FileProcessorTest extends Specification { + def "should throw the ParserResolvingException that have been thrown by the FileParserResolver"() { + given: + FileStorageLocator storageLocatorMock = Mock() + + and: + FileParserResolver fileParserResolver = Mock() + FileRef fileRefMock = Mock() + fileParserResolver.getParser(fileRefMock) >> { throw exception } + FileProcessor fileProcessor = new FileProcessor(storageLocatorMock, fileParserResolver) + + when: + fileProcessor.extractFileContent(fileRefMock) + + then: + ParserResolvingException throwable = thrown() + throwable == exception + + where: + exception << [new UnsupportedFileExtensionException("any.name"), new EmptyFileExtensionException("any")] + } +} diff --git a/jmix-search/search/src/test/java/io/jmix/search/utils/FileProcessorTest.java b/jmix-search/search/src/test/java/io/jmix/search/utils/FileProcessorTest.java index ec53887d6a..e69de29bb2 100644 --- a/jmix-search/search/src/test/java/io/jmix/search/utils/FileProcessorTest.java +++ b/jmix-search/search/src/test/java/io/jmix/search/utils/FileProcessorTest.java @@ -1,44 +0,0 @@ - -import io.jmix.core.FileRef; -import io.jmix.core.FileStorageLocator; -import io.jmix.search.exception.UnsupportedFileFormatException; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.*; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -class FileProcessorTest { - - public static final String FILE_NAME_EXAMPLE = "the-file-with-not-supported-extension.sql"; - public static final String FILE_NAME_EXAMPLE_2 = "another-file.smt"; - - @Test - void extractFileContent_1() { - FileStorageLocator storageLocatorMock = mock(FileStorageLocator.class); - FileProcessor fileProcessor = new FileProcessor(storageLocatorMock); - FileRef fileRefMock = mock(FileRef.class); - when(fileRefMock.getFileName()).thenReturn(FILE_NAME_EXAMPLE); - UnsupportedFileFormatException exception = assertThrows( - UnsupportedFileFormatException.class, - () -> fileProcessor.extractFileContent(fileRefMock)); - assertEquals( - "The file the-file-with-not-supported-extension.sql with 'sql' extension is not supported.", - exception.getMessage()); - } - - @Test - void extractFileContent_2() { - FileStorageLocator storageLocatorMock = mock(FileStorageLocator.class); - FileProcessor fileProcessor = new FileProcessor(storageLocatorMock); - FileRef fileRefMock = mock(FileRef.class); - when(fileRefMock.getFileName()).thenReturn(FILE_NAME_EXAMPLE_2); - UnsupportedFileFormatException exception = assertThrows( - UnsupportedFileFormatException.class, - () -> fileProcessor.extractFileContent(fileRefMock)); - assertEquals( - "The file another-file.smt with 'smt' extension is not supported.", - exception.getMessage()); - } - -} \ No newline at end of file From 5f81bf0cdd2ad9012f9f7f3b19462c0ae0890891 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Sat, 7 Sep 2024 23:33:47 +0400 Subject: [PATCH 09/81] minor change --- .../src/main/java/io/jmix/search/utils/FileProcessor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java index b8589ca2ee..6ee306ae3b 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java @@ -37,7 +37,7 @@ import java.io.InputStream; import java.io.StringWriter; -@Component +@Component("search_FileProcessor") public class FileProcessor { private static final Logger log = LoggerFactory.getLogger(FileProcessor.class); From 078543bec960e5e54b6d5d65db544767ed6a0890 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Sat, 7 Sep 2024 23:35:06 +0400 Subject: [PATCH 10/81] minor change --- .../io/jmix/search/exception/EmptyFileExtensionException.java | 2 -- .../mapping/propertyvalue/impl/FilePropertyValueExtractor.java | 1 - 2 files changed, 3 deletions(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileExtensionException.java b/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileExtensionException.java index 5e741efdf0..c50df267b2 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileExtensionException.java +++ b/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileExtensionException.java @@ -16,8 +16,6 @@ package io.jmix.search.exception; -import io.jmix.core.FileRef; - public class EmptyFileExtensionException extends ParserResolvingException { public static final String MESSAGE = "Extension of the file %s is empty"; diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractor.java b/jmix-search/search/src/main/java/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractor.java index 3a55a057b2..a4d30979a9 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractor.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractor.java @@ -26,7 +26,6 @@ import io.jmix.core.metamodel.model.MetaPropertyPath; import io.jmix.search.exception.FileParseException; import io.jmix.search.exception.ParserResolvingException; -import io.jmix.search.exception.UnsupportedFileExtensionException; import io.jmix.search.index.mapping.ParameterKeys; import io.jmix.search.utils.BooleanParser; import io.jmix.search.utils.FileProcessor; From 856366c0782919c2c232681dc623f21b44aa4f1e Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Sun, 8 Sep 2024 00:38:40 +0400 Subject: [PATCH 11/81] FilePropertyValueExtractorTest --- .../FilePropertyValueExtractorTest.groovy | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy new file mode 100644 index 0000000000..5b3bbbbcc6 --- /dev/null +++ b/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy @@ -0,0 +1,48 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.index.mapping.propertyvalue.impl + +import io.jmix.core.FileRef +import io.jmix.search.exception.EmptyFileExtensionException +import io.jmix.search.exception.UnsupportedFileExtensionException +import io.jmix.search.utils.FileProcessor +import spock.lang.Specification + +class FilePropertyValueExtractorTest extends Specification { + + + def "nothing should be thrown if fileProcessor throws a ParserResolvingException"() { + given: + FileRef fileRef = Mock() + + and: + FileProcessor fileProcessor = Mock() + fileProcessor.extractFileContent(fileRef) >> {throw exception} + + and: + FilePropertyValueExtractor extractor = new FilePropertyValueExtractor(fileProcessor) + + when: + extractor.addFileContent(null, fileRef) + + then: + true + + where: + exception<<[new UnsupportedFileExtensionException("any.file"), new EmptyFileExtensionException("any")] + } +} From 939b5606efb17de97d587375a54b5a70977da04b Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Mon, 9 Sep 2024 17:55:31 +0400 Subject: [PATCH 12/81] FilePropertyValueExtractorTest enhancement --- .../FilePropertyValueExtractorTest.groovy | 23 +++++++++- .../io/jmix/search/utils/LogbackMocker.java | 44 +++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 jmix-search/search/src/test/groovy/io/jmix/search/utils/LogbackMocker.java diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy index 5b3bbbbcc6..787f6969fb 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy @@ -16,14 +16,28 @@ package io.jmix.search.index.mapping.propertyvalue.impl +import ch.qos.logback.classic.Level +import ch.qos.logback.classic.spi.ILoggingEvent +import ch.qos.logback.core.read.ListAppender import io.jmix.core.FileRef import io.jmix.search.exception.EmptyFileExtensionException import io.jmix.search.exception.UnsupportedFileExtensionException import io.jmix.search.utils.FileProcessor import spock.lang.Specification +import static io.jmix.search.utils.LogbackMocker.cleanUpAppender +import static io.jmix.search.utils.LogbackMocker.createAttachedAppender + + class FilePropertyValueExtractorTest extends Specification { + private ListAppender appender + + void setup() { + appender = createAttachedAppender( + FilePropertyValueExtractor.class, + Level.WARN) + } def "nothing should be thrown if fileProcessor throws a ParserResolvingException"() { given: @@ -40,9 +54,16 @@ class FilePropertyValueExtractorTest extends Specification { extractor.addFileContent(null, fileRef) then: - true + this.appender.list.size() == 1 + def loggingEvent = this.appender.list.get(0) + loggingEvent.getLevel() == Level.WARN + loggingEvent.getMessage() == exception.getMessage() where: exception<<[new UnsupportedFileExtensionException("any.file"), new EmptyFileExtensionException("any")] } + + void cleanup() { + cleanUpAppender(FilePropertyValueExtractor.class, appender) + } } diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/LogbackMocker.java b/jmix-search/search/src/test/groovy/io/jmix/search/utils/LogbackMocker.java new file mode 100644 index 0000000000..cf20b96fe5 --- /dev/null +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/LogbackMocker.java @@ -0,0 +1,44 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.utils; + +import ch.qos.logback.classic.Level; +import ch.qos.logback.classic.Logger; +import ch.qos.logback.classic.spi.ILoggingEvent; +import ch.qos.logback.core.Appender; +import ch.qos.logback.core.read.ListAppender; +import io.jmix.search.index.mapping.propertyvalue.impl.FilePropertyValueExtractor; +import org.slf4j.LoggerFactory; + +public class LogbackMocker { + + public static ListAppender createAttachedAppender(Class classForLogging, + Level loggingLevel) { + Logger logger = (Logger) LoggerFactory.getLogger(classForLogging); + + ListAppender appender = new ListAppender<>(); + logger.addAppender(appender); + logger.setLevel(loggingLevel); + appender.start(); + return appender; + } + + public static void cleanUpAppender(Class classForLogging, Appender appender){ + Logger logger = (Logger) LoggerFactory.getLogger(classForLogging); + logger.detachAppender(appender); + } +} From f5b0bf0a66ff183927f3f10ade6809e98c9d6e4f Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Mon, 9 Sep 2024 17:59:40 +0400 Subject: [PATCH 13/81] FilePropertyValueExtractorTest enhancement --- .../src/test/groovy/io/jmix/search/utils/LogbackMocker.java | 1 - 1 file changed, 1 deletion(-) diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/LogbackMocker.java b/jmix-search/search/src/test/groovy/io/jmix/search/utils/LogbackMocker.java index cf20b96fe5..f122e7be43 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/LogbackMocker.java +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/LogbackMocker.java @@ -21,7 +21,6 @@ import ch.qos.logback.classic.spi.ILoggingEvent; import ch.qos.logback.core.Appender; import ch.qos.logback.core.read.ListAppender; -import io.jmix.search.index.mapping.propertyvalue.impl.FilePropertyValueExtractor; import org.slf4j.LoggerFactory; public class LogbackMocker { From 659d3a326b005fa0b5b16a3bbb38fcb24b8c70cf Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Mon, 9 Sep 2024 18:15:43 +0400 Subject: [PATCH 14/81] dependencies adding --- jmix-search/search/search.gradle | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/jmix-search/search/search.gradle b/jmix-search/search/search.gradle index 0c0622747a..ec3597c066 100644 --- a/jmix-search/search/search.gradle +++ b/jmix-search/search/search.gradle @@ -71,7 +71,8 @@ dependencies { testImplementation 'org.junit.vintage:junit-vintage-engine' testImplementation 'org.spockframework:spock-core' testImplementation 'org.mockito:mockito-core' - testImplementation "org.spockframework:spock-core" + testImplementation 'org.spockframework:spock-core' + testImplementation 'ch.qos.logback:logback-classic' testRuntimeOnly 'org.slf4j:slf4j-simple' testRuntimeOnly 'org.hsqldb:hsqldb' testRuntimeOnly 'org.junit.platform:junit-platform-launcher' From 68642551e7fb622dfe3697c26182aee607b78d64 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Tue, 10 Sep 2024 18:54:50 +0400 Subject: [PATCH 15/81] Review correction --- jmix-search/search/search.gradle | 1 - 1 file changed, 1 deletion(-) diff --git a/jmix-search/search/search.gradle b/jmix-search/search/search.gradle index ec3597c066..32c344c0f4 100644 --- a/jmix-search/search/search.gradle +++ b/jmix-search/search/search.gradle @@ -72,7 +72,6 @@ dependencies { testImplementation 'org.spockframework:spock-core' testImplementation 'org.mockito:mockito-core' testImplementation 'org.spockframework:spock-core' - testImplementation 'ch.qos.logback:logback-classic' testRuntimeOnly 'org.slf4j:slf4j-simple' testRuntimeOnly 'org.hsqldb:hsqldb' testRuntimeOnly 'org.junit.platform:junit-platform-launcher' From 7936b6b10748d259bee5e0de8790def7ce501e32 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Tue, 10 Sep 2024 19:19:04 +0400 Subject: [PATCH 16/81] Review correction(exceptions) --- .../jmix/search/exception/EmptyFileExtensionException.java | 6 +++++- .../io/jmix/search/exception/ParserResolvingException.java | 4 ++++ .../search/exception/UnsupportedFileExtensionException.java | 5 ++++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileExtensionException.java b/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileExtensionException.java index c50df267b2..57eb6dc2b1 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileExtensionException.java +++ b/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileExtensionException.java @@ -16,9 +16,13 @@ package io.jmix.search.exception; +/** + * An exception that is thrown when a user added some file without any extension. + * In this case any parser for this file couldn't be resolved. + */ public class EmptyFileExtensionException extends ParserResolvingException { - public static final String MESSAGE = "Extension of the file %s is empty"; + private static final String MESSAGE = "Extension of the file %s is empty"; public EmptyFileExtensionException(String fileName) { super(String.format(MESSAGE, fileName)); diff --git a/jmix-search/search/src/main/java/io/jmix/search/exception/ParserResolvingException.java b/jmix-search/search/src/main/java/io/jmix/search/exception/ParserResolvingException.java index ddbeccf6a8..abec2588e5 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/exception/ParserResolvingException.java +++ b/jmix-search/search/src/main/java/io/jmix/search/exception/ParserResolvingException.java @@ -16,6 +16,10 @@ package io.jmix.search.exception; +/** + * An exception that is thrown when the problem with parser resolving is occurred. The parser is need for + * the fields of the "File" type indexing. + */ public abstract class ParserResolvingException extends Exception { public ParserResolvingException(String message) { super(message); diff --git a/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileExtensionException.java b/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileExtensionException.java index d720ea226b..d6da069298 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileExtensionException.java +++ b/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileExtensionException.java @@ -23,9 +23,12 @@ import static java.util.stream.Collectors.joining; +/** + * An exception that is thrown when a user added some file with extension that there are no any known parser for. + */ public class UnsupportedFileExtensionException extends ParserResolvingException { - public static final String MESSAGE = "The file %s with '%s' extension is not supported. " + + private static final String MESSAGE = "The file %s with '%s' extension is not supported. " + "Only following file extensions are supported %s."; public UnsupportedFileExtensionException(String fileName) { From 8b2b1bd6f5a27234db7ca4c594f4ac4da683547e Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Tue, 10 Sep 2024 20:05:06 +0400 Subject: [PATCH 17/81] Review correction(exceptions) --- .../main/java/io/jmix/search/utils/FileParserResolver.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolver.java index d6bdeb4da8..982fa42e50 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolver.java @@ -31,14 +31,16 @@ public class FileParserResolver { public Parser getParser(FileRef fileRef) throws ParserResolvingException { String fileName = fileRef.getFileName(); String fileExtension = FilenameUtils.getExtension(fileName); - if (Strings.isNullOrEmpty(fileExtension)){ + if (Strings.isNullOrEmpty(fileExtension)) { throw new EmptyFileExtensionException(fileName); } + for (SupportedFileExtensions extension : SupportedFileExtensions.values()) { if (extension.getSymbols().equals(fileExtension)) { return extension.getParser(); } } + throw new UnsupportedFileExtensionException(fileName); } } From dd94a3d825530f1c2b3128c391705042c43bf92b Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Tue, 10 Sep 2024 20:08:55 +0400 Subject: [PATCH 18/81] Review correction(exceptions) --- .../src/main/java/io/jmix/search/utils/FileProcessor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java index 6ee306ae3b..b8589ca2ee 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java @@ -37,7 +37,7 @@ import java.io.InputStream; import java.io.StringWriter; -@Component("search_FileProcessor") +@Component public class FileProcessor { private static final Logger log = LoggerFactory.getLogger(FileProcessor.class); From 927fd474c7a7b247bf5f77406051270135b8216b Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Tue, 10 Sep 2024 20:11:33 +0400 Subject: [PATCH 19/81] Review correction(exceptions) --- .../src/main/java/io/jmix/search/utils/FileProcessor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java index b8589ca2ee..50e1de4e35 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java @@ -43,7 +43,7 @@ public class FileProcessor { private static final Logger log = LoggerFactory.getLogger(FileProcessor.class); protected FileStorageLocator fileStorageLocator; - private final FileParserResolver fileParserResolver; + protected FileParserResolver fileParserResolver; public FileProcessor(FileStorageLocator fileStorageLocator, FileParserResolver fileParserResolver) { this.fileStorageLocator = fileStorageLocator; From dccb7fc6082506756925842364a4417126e186c3 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Tue, 10 Sep 2024 21:09:57 +0400 Subject: [PATCH 20/81] Review correction(exceptions) --- .../src/main/java/io/jmix/search/utils/FileProcessor.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java index 50e1de4e35..82898a3c16 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java @@ -54,7 +54,7 @@ public String extractFileContent(FileRef fileRef) throws FileParseException, Par Preconditions.checkNotNullArgument(fileRef); log.debug("Extract content of file {}", fileRef); FileStorage fileStorage = fileStorageLocator.getByName(fileRef.getStorageName()); - Parser parser = fileParserResolver.getParser(fileRef); + Parser parser = getParser(fileRef); log.debug("Parser for file {}: {}", fileRef, parser); StringWriter stringWriter = new StringWriter(); @@ -79,4 +79,8 @@ public String extractFileContent(FileRef fileRef) throws FileParseException, Par } return stringWriter.toString(); } + + protected Parser getParser(FileRef fileRef) throws ParserResolvingException { + return fileParserResolver.getParser(fileRef); + } } From b34675213c270daab59bc3954093061e2bca1f92 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 12 Sep 2024 15:18:18 +0400 Subject: [PATCH 21/81] Parser resolvers --- .../FileParserResolver.java} | 30 +++++---------- .../OfficeDocumentsParserResolver.java | 37 ++++++++++++++++++ .../OldOfficeDocumentsParserResolver.java | 37 ++++++++++++++++++ .../OpenOfficeDocumentsParserResolver.java | 38 +++++++++++++++++++ .../resolvers/PDFParserResolver.java | 37 ++++++++++++++++++ .../resolvers/RTFParserResolver.java | 37 ++++++++++++++++++ .../resolvers/TXTParserResolver.java | 37 ++++++++++++++++++ 7 files changed, 233 insertions(+), 20 deletions(-) rename jmix-search/search/src/main/java/io/jmix/search/utils/{SupportedFileExtensions.java => parserresolving/FileParserResolver.java} (58%) create mode 100644 jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OfficeDocumentsParserResolver.java create mode 100644 jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OldOfficeDocumentsParserResolver.java create mode 100644 jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OpenOfficeDocumentsParserResolver.java create mode 100644 jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/PDFParserResolver.java create mode 100644 jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/RTFParserResolver.java create mode 100644 jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/TXTParserResolver.java diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/SupportedFileExtensions.java b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/FileParserResolver.java similarity index 58% rename from jmix-search/search/src/main/java/io/jmix/search/utils/SupportedFileExtensions.java rename to jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/FileParserResolver.java index 51a6f6540d..3aaa04e980 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/SupportedFileExtensions.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/FileParserResolver.java @@ -14,20 +14,14 @@ * limitations under the License. */ -package io.jmix.search.utils; +package io.jmix.search.utils.parserresolving; import org.apache.tika.parser.Parser; -import org.apache.tika.parser.microsoft.OfficeParser; -import org.apache.tika.parser.microsoft.ooxml.OOXMLParser; -import org.apache.tika.parser.odf.OpenDocumentParser; -import org.apache.tika.parser.pdf.PDFParser; -import org.apache.tika.parser.rtf.RTFParser; -import org.apache.tika.parser.txt.TXTParser; -import java.util.function.Supplier; +import java.util.List; -public enum SupportedFileExtensions { - PDF("pdf", PDFParser::new), +public interface FileParserResolver { + /* PDF("pdf", PDFParser::new), DOC("doc", OfficeParser::new), XLS("xls", OfficeParser::new), DOCX("docx", OOXMLParser::new), @@ -35,21 +29,17 @@ public enum SupportedFileExtensions { ODT("odt", OpenDocumentParser::new), ODS("ods", OpenDocumentParser::new), RTF("rtf", RTFParser::new), - TXT("txt", TXTParser::new); + TXT("txt", TXTParser::new);*/ - private final String symbols; + /*private final String symbols; private final Supplier parserSupplier; - SupportedFileExtensions(String symbols, Supplier parserSupplier) { + FileParserResolver(String symbols, Supplier parserSupplier) { this.symbols = symbols; this.parserSupplier = parserSupplier; } +*/ + List getExtension(); - public String getSymbols() { - return symbols; - } - - public Parser getParser() { - return parserSupplier.get(); - } + Parser getParser(); } diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OfficeDocumentsParserResolver.java new file mode 100644 index 0000000000..bc362efe06 --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OfficeDocumentsParserResolver.java @@ -0,0 +1,37 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.utils.parserresolving.resolvers; + +import io.jmix.search.utils.parserresolving.FileParserResolver; +import org.apache.tika.parser.Parser; +import org.apache.tika.parser.microsoft.ooxml.OOXMLParser; +import org.springframework.stereotype.Component; + +import java.util.List; + +@Component("search_OfficeDocumentsResolver") +public class OfficeDocumentsParserResolver implements FileParserResolver { + @Override + public List getExtension() { + return List.of("docx, xlsx"); + } + + @Override + public Parser getParser() { + return new OOXMLParser(); + } +} \ No newline at end of file diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OldOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OldOfficeDocumentsParserResolver.java new file mode 100644 index 0000000000..4ac9c51de8 --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OldOfficeDocumentsParserResolver.java @@ -0,0 +1,37 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.utils.parserresolving.resolvers; + +import io.jmix.search.utils.parserresolving.FileParserResolver; +import org.apache.tika.parser.Parser; +import org.apache.tika.parser.microsoft.OfficeParser; +import org.springframework.stereotype.Component; + +import java.util.List; + +@Component("search_OldOfficeDocumentsResolver") +public class OldOfficeDocumentsParserResolver implements FileParserResolver { + @Override + public List getExtension() { + return List.of("doc, xls"); + } + + @Override + public Parser getParser() { + return new OfficeParser(); + } +} \ No newline at end of file diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OpenOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OpenOfficeDocumentsParserResolver.java new file mode 100644 index 0000000000..842303f3d3 --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OpenOfficeDocumentsParserResolver.java @@ -0,0 +1,38 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.utils.parserresolving.resolvers; + +import io.jmix.search.utils.parserresolving.FileParserResolver; +import org.apache.tika.parser.Parser; +import org.apache.tika.parser.microsoft.ooxml.OOXMLParser; +import org.apache.tika.parser.odf.OpenDocumentParser; +import org.springframework.stereotype.Component; + +import java.util.List; + +@Component("search_OfficeDocumentsResolver") +public class OpenOfficeDocumentsParserResolver implements FileParserResolver { + @Override + public List getExtension() { + return List.of("odt, ods"); + } + + @Override + public Parser getParser() { + return new OpenDocumentParser(); + } +} \ No newline at end of file diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/PDFParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/PDFParserResolver.java new file mode 100644 index 0000000000..42fcef52ca --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/PDFParserResolver.java @@ -0,0 +1,37 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.utils.parserresolving.resolvers; + +import io.jmix.search.utils.parserresolving.FileParserResolver; +import org.apache.tika.parser.Parser; +import org.apache.tika.parser.rtf.RTFParser; +import org.springframework.stereotype.Component; + +import java.util.List; + +@Component("search_PDFParserResolver") +public class PDFParserResolver implements FileParserResolver { + @Override + public List getExtension() { + return List.of("rtf"); + } + + @Override + public Parser getParser() { + return new RTFParser(); + } +} \ No newline at end of file diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/RTFParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/RTFParserResolver.java new file mode 100644 index 0000000000..7c064d6cd1 --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/RTFParserResolver.java @@ -0,0 +1,37 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.utils.parserresolving.resolvers; + +import io.jmix.search.utils.parserresolving.FileParserResolver; +import org.apache.tika.parser.Parser; +import org.apache.tika.parser.pdf.PDFParser; +import org.springframework.stereotype.Component; + +import java.util.List; + +@Component("search_RTFParserResolver") +public class RTFParserResolver implements FileParserResolver { + @Override + public List getExtension() { + return List.of("pdf"); + } + + @Override + public Parser getParser() { + return new PDFParser(); + } +} \ No newline at end of file diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/TXTParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/TXTParserResolver.java new file mode 100644 index 0000000000..38a227049f --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/TXTParserResolver.java @@ -0,0 +1,37 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.utils.parserresolving.resolvers; + +import io.jmix.search.utils.parserresolving.FileParserResolver; +import org.apache.tika.parser.Parser; +import org.apache.tika.parser.pdf.PDFParser; +import org.springframework.stereotype.Component; + +import java.util.List; + +@Component("search_TXTParserResolver") +public class TXTParserResolver implements FileParserResolver { + @Override + public List getExtension() { + return List.of("txt"); + } + + @Override + public Parser getParser() { + return new PDFParser(); + } +} \ No newline at end of file From 5b39a4f67aee12be7af4dfa35969e769f8926920 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 12 Sep 2024 15:28:00 +0400 Subject: [PATCH 22/81] Parser resolvers --- .../resolvers/OfficeDocumentsParserResolver.java | 2 ++ .../resolvers/OldOfficeDocumentsParserResolver.java | 2 ++ .../resolvers/OpenOfficeDocumentsParserResolver.java | 2 ++ .../utils/parserresolving/resolvers/PDFParserResolver.java | 2 ++ .../utils/parserresolving/resolvers/RTFParserResolver.java | 2 ++ .../utils/parserresolving/resolvers/TXTParserResolver.java | 2 ++ 6 files changed, 12 insertions(+) diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OfficeDocumentsParserResolver.java index bc362efe06..3793156c7d 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OfficeDocumentsParserResolver.java @@ -19,11 +19,13 @@ import io.jmix.search.utils.parserresolving.FileParserResolver; import org.apache.tika.parser.Parser; import org.apache.tika.parser.microsoft.ooxml.OOXMLParser; +import org.springframework.core.annotation.Order; import org.springframework.stereotype.Component; import java.util.List; @Component("search_OfficeDocumentsResolver") +@Order(100) public class OfficeDocumentsParserResolver implements FileParserResolver { @Override public List getExtension() { diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OldOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OldOfficeDocumentsParserResolver.java index 4ac9c51de8..aa2885a747 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OldOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OldOfficeDocumentsParserResolver.java @@ -19,11 +19,13 @@ import io.jmix.search.utils.parserresolving.FileParserResolver; import org.apache.tika.parser.Parser; import org.apache.tika.parser.microsoft.OfficeParser; +import org.springframework.core.annotation.Order; import org.springframework.stereotype.Component; import java.util.List; @Component("search_OldOfficeDocumentsResolver") +@Order(100) public class OldOfficeDocumentsParserResolver implements FileParserResolver { @Override public List getExtension() { diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OpenOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OpenOfficeDocumentsParserResolver.java index 842303f3d3..0fe5f2a167 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OpenOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OpenOfficeDocumentsParserResolver.java @@ -20,11 +20,13 @@ import org.apache.tika.parser.Parser; import org.apache.tika.parser.microsoft.ooxml.OOXMLParser; import org.apache.tika.parser.odf.OpenDocumentParser; +import org.springframework.core.annotation.Order; import org.springframework.stereotype.Component; import java.util.List; @Component("search_OfficeDocumentsResolver") +@Order(100) public class OpenOfficeDocumentsParserResolver implements FileParserResolver { @Override public List getExtension() { diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/PDFParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/PDFParserResolver.java index 42fcef52ca..1754008172 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/PDFParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/PDFParserResolver.java @@ -19,11 +19,13 @@ import io.jmix.search.utils.parserresolving.FileParserResolver; import org.apache.tika.parser.Parser; import org.apache.tika.parser.rtf.RTFParser; +import org.springframework.core.annotation.Order; import org.springframework.stereotype.Component; import java.util.List; @Component("search_PDFParserResolver") +@Order(100) public class PDFParserResolver implements FileParserResolver { @Override public List getExtension() { diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/RTFParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/RTFParserResolver.java index 7c064d6cd1..7e2d1e3d58 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/RTFParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/RTFParserResolver.java @@ -19,11 +19,13 @@ import io.jmix.search.utils.parserresolving.FileParserResolver; import org.apache.tika.parser.Parser; import org.apache.tika.parser.pdf.PDFParser; +import org.springframework.core.annotation.Order; import org.springframework.stereotype.Component; import java.util.List; @Component("search_RTFParserResolver") +@Order(100) public class RTFParserResolver implements FileParserResolver { @Override public List getExtension() { diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/TXTParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/TXTParserResolver.java index 38a227049f..b8775b6f79 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/TXTParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/TXTParserResolver.java @@ -19,11 +19,13 @@ import io.jmix.search.utils.parserresolving.FileParserResolver; import org.apache.tika.parser.Parser; import org.apache.tika.parser.pdf.PDFParser; +import org.springframework.core.annotation.Order; import org.springframework.stereotype.Component; import java.util.List; @Component("search_TXTParserResolver") +@Order(100) public class TXTParserResolver implements FileParserResolver { @Override public List getExtension() { From a64133d5e77cf85dd24b99f69f6eae2b915b21aa Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 12 Sep 2024 18:10:10 +0400 Subject: [PATCH 23/81] FileParserResolverManager --- .../FileParserResolverManager.java} | 29 ++++-- .../FileParserResolverManagerTest.groovy | 95 +++++++++++++++++++ 2 files changed, 117 insertions(+), 7 deletions(-) rename jmix-search/search/src/main/java/io/jmix/search/utils/{FileParserResolver.java => parserresolving/FileParserResolverManager.java} (60%) create mode 100644 jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/FileParserResolverManager.java similarity index 60% rename from jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolver.java rename to jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/FileParserResolverManager.java index 982fa42e50..4b64ed11e3 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/FileParserResolverManager.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package io.jmix.search.utils; +package io.jmix.search.utils.parserresolving; import com.google.common.base.Strings; import io.jmix.core.FileRef; @@ -25,8 +25,16 @@ import org.apache.tika.parser.Parser; import org.springframework.stereotype.Component; -@Component("search_FileParserResolver") -public class FileParserResolver { +import java.util.List; + +@Component("search_FileParserResolverManager") +public class FileParserResolverManager { + + protected List fileParserResolvers; + + public FileParserResolverManager(List fileParserResolvers) { + this.fileParserResolvers = fileParserResolvers; + } public Parser getParser(FileRef fileRef) throws ParserResolvingException { String fileName = fileRef.getFileName(); @@ -35,12 +43,19 @@ public Parser getParser(FileRef fileRef) throws ParserResolvingException { throw new EmptyFileExtensionException(fileName); } - for (SupportedFileExtensions extension : SupportedFileExtensions.values()) { - if (extension.getSymbols().equals(fileExtension)) { - return extension.getParser(); + for (FileParserResolver resolver : fileParserResolvers) { + if (resolver.getExtension().contains(fileExtension)) { + return resolver.getParser(); } } - throw new UnsupportedFileExtensionException(fileName); + throw new UnsupportedFileExtensionException(fileName, getSupportedExtensions()); + } + + private List getSupportedExtensions() { + return fileParserResolvers + .stream() + .flatMap(fileParserResolver -> fileParserResolver.getExtension().stream()) + .toList(); } } diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy new file mode 100644 index 0000000000..4089d8de03 --- /dev/null +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy @@ -0,0 +1,95 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.utils.parserresolving + +import io.jmix.core.FileRef +import io.jmix.search.exception.EmptyFileExtensionException +import io.jmix.search.exception.UnsupportedFileExtensionException +import org.apache.tika.parser.Parser +import spock.lang.Specification + +class FileParserResolverManagerTest extends Specification { + def "should throw EmptyFileExtensionException when the given file name has no extension"() { + given: + FileRef fileRef = Mock() + fileRef.getFileName() >> fileName + + and: + def parserResolver = new FileParserResolverManager(Collections.emptyList()) + + when: + parserResolver.getParser(fileRef) + + then: + thrown(EmptyFileExtensionException) + + where: + fileName << ["abc", "def", "abc.", "abc.."] + } + + def "should throw UnsupportedFileExtensionException when the given file name with unsupported extension"() { + given: + FileRef fileRef = Mock() + fileRef.getFileName() >> fileName + + and: + def resolver = Mock(FileParserResolver) + resolver.getExtension() >> List.of("docx", "xlsx") + def resolver2 = Mock(FileParserResolver) + resolver2.getExtension() >> List.of("doc", "xls") + + and: + def parserResolver = new FileParserResolverManager(List.of(resolver, resolver2)) + + when: + parserResolver.getParser(fileRef) + + then: + def exception = thrown(UnsupportedFileExtensionException) + exception.getMessage().contains(fileName) + where: + fileName << ["abc.def", "def.zxc"] + } + + def "should return parser of the type that corresponds to the file extension"() { + given: + def resolver = Mock(FileParserResolver) + resolver.getExtension() >> List.of("docx", "xlsx") + def parser1 = Mock(Parser) + resolver.getParser() >> parser1 + def resolver2 = Mock(FileParserResolver) + resolver2.getExtension() >> List.of("doc", "xls") + def parser2 = Mock(Parser) + resolver2.getParser() >> parser2 + + and: + def parserResolver = new FileParserResolverManager(List.of(resolver, resolver2)) + + expect: + parserResolver.getParser(createFileRefMock("docx")) == parser1 + parserResolver.getParser(createFileRefMock("xlsx")) == parser1 + parserResolver.getParser(createFileRefMock("doc")) == parser2 + parserResolver.getParser(createFileRefMock("xls")) == parser2 + + } + + private FileRef createFileRefMock(String extension) { + def fileRef = Mock(FileRef) + fileRef.getFileName() >> "filename." + extension + fileRef + } +} From b3444e36cce501217c1d883c8c9de19de7c670db Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 12 Sep 2024 18:19:01 +0400 Subject: [PATCH 24/81] FileParserResolverManager --- .../utils/FileParserResolverTest.groovy | 93 ------------------- 1 file changed, 93 deletions(-) delete mode 100644 jmix-search/search/src/test/groovy/io/jmix/search/utils/FileParserResolverTest.groovy diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileParserResolverTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileParserResolverTest.groovy deleted file mode 100644 index ffdef6d9da..0000000000 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileParserResolverTest.groovy +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright 2024 Haulmont. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain fileName copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.jmix.search.utils - -import io.jmix.core.FileRef -import io.jmix.search.exception.EmptyFileExtensionException -import io.jmix.search.exception.UnsupportedFileExtensionException -import org.apache.tika.parser.microsoft.OfficeParser -import org.apache.tika.parser.microsoft.ooxml.OOXMLParser -import org.apache.tika.parser.odf.OpenDocumentParser -import org.apache.tika.parser.pdf.PDFParser -import org.apache.tika.parser.rtf.RTFParser -import org.apache.tika.parser.txt.TXTParser -import spock.lang.Specification - -class FileParserResolverTest extends Specification { - def "should throw EmptyFileExtensionException when the given file name has no extension"() { - given: - FileRef fileRef = Mock() - fileRef.getFileName() >> fileName - - and: - def parserResolver = new FileParserResolver() - - when: - parserResolver.getParser(fileRef) - - then: - thrown(EmptyFileExtensionException) - - where: - fileName << ["abc", "def", "abc.", "abc.."] - } - - def "should throw UnsupportedFileExtensionException when the given file name with unsupported extension"() { - given: - FileRef fileRef = Mock() - fileRef.getFileName() >> fileName - - and: - def parserResolver = new FileParserResolver() - - when: - parserResolver.getParser(fileRef) - - then: - def exception = thrown(UnsupportedFileExtensionException) - exception.getMessage().contains(fileName) - where: - fileName << ["abc.def", "def.zxc"] - } - - def "should return parser of the type that corresponds to the file extension"() { - given: - FileRef fileRef = Mock() - fileRef.getFileName() >> "filename." + fileExtension - - and: - def parserResolver = new FileParserResolver() - - when: - def parser = parserResolver.getParser(fileRef) - - then: - parser.getClass() == parserClass - - where: - fileExtension | parserClass - "pdf" | PDFParser - "doc" | OfficeParser - "xls" | OfficeParser - "docx" | OOXMLParser - "xlsx" | OOXMLParser - "odt" | OpenDocumentParser - "ods" | OpenDocumentParser - "rtf" | RTFParser - "txt" | TXTParser - } -} From 5c6dc63ae963a34dba906ef79268736724c1a142 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 13 Sep 2024 13:59:29 +0400 Subject: [PATCH 25/81] FileProcessorTest --- .../main/java/io/jmix/search/utils/FileProcessor.java | 9 +++++---- .../groovy/io/jmix/search/utils/FileProcessorTest.groovy | 6 ++++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java index 82898a3c16..e90f44419f 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java @@ -22,6 +22,7 @@ import io.jmix.core.common.util.Preconditions; import io.jmix.search.exception.FileParseException; import io.jmix.search.exception.ParserResolvingException; +import io.jmix.search.utils.parserresolving.FileParserResolverManager; import org.apache.poi.poifs.filesystem.OfficeXmlFileException; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; @@ -43,11 +44,11 @@ public class FileProcessor { private static final Logger log = LoggerFactory.getLogger(FileProcessor.class); protected FileStorageLocator fileStorageLocator; - protected FileParserResolver fileParserResolver; + protected FileParserResolverManager fileParserResolverManager; - public FileProcessor(FileStorageLocator fileStorageLocator, FileParserResolver fileParserResolver) { + public FileProcessor(FileStorageLocator fileStorageLocator, FileParserResolverManager fileParserResolverManager) { this.fileStorageLocator = fileStorageLocator; - this.fileParserResolver = fileParserResolver; + this.fileParserResolverManager = fileParserResolverManager; } public String extractFileContent(FileRef fileRef) throws FileParseException, ParserResolvingException { @@ -81,6 +82,6 @@ public String extractFileContent(FileRef fileRef) throws FileParseException, Par } protected Parser getParser(FileRef fileRef) throws ParserResolvingException { - return fileParserResolver.getParser(fileRef); + return fileParserResolverManager.getParser(fileRef); } } diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy index f2bd53a3d9..83269133b5 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy @@ -21,6 +21,7 @@ import io.jmix.core.FileStorageLocator import io.jmix.search.exception.EmptyFileExtensionException import io.jmix.search.exception.ParserResolvingException import io.jmix.search.exception.UnsupportedFileExtensionException +import io.jmix.search.utils.parserresolving.FileParserResolverManager import spock.lang.Specification class FileProcessorTest extends Specification { @@ -29,7 +30,7 @@ class FileProcessorTest extends Specification { FileStorageLocator storageLocatorMock = Mock() and: - FileParserResolver fileParserResolver = Mock() + FileParserResolverManager fileParserResolver = Mock() FileRef fileRefMock = Mock() fileParserResolver.getParser(fileRefMock) >> { throw exception } FileProcessor fileProcessor = new FileProcessor(storageLocatorMock, fileParserResolver) @@ -42,6 +43,7 @@ class FileProcessorTest extends Specification { throwable == exception where: - exception << [new UnsupportedFileExtensionException("any.name"), new EmptyFileExtensionException("any")] + exception << [new UnsupportedFileExtensionException("any.name", List.of("txt, rtf")), + new EmptyFileExtensionException("any")] } } From 16bc24d50a06a2d559a0489888eca9cefa5f4536 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 13 Sep 2024 14:03:24 +0400 Subject: [PATCH 26/81] UnsupportedFileExtensionException --- .../UnsupportedFileExtensionException.java | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileExtensionException.java b/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileExtensionException.java index d6da069298..aa9d9a769c 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileExtensionException.java +++ b/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileExtensionException.java @@ -16,12 +16,9 @@ package io.jmix.search.exception; -import io.jmix.search.utils.SupportedFileExtensions; import org.apache.commons.io.FilenameUtils; -import java.util.Arrays; - -import static java.util.stream.Collectors.joining; +import java.util.List; /** * An exception that is thrown when a user added some file with extension that there are no any known parser for. @@ -31,13 +28,15 @@ public class UnsupportedFileExtensionException extends ParserResolvingException private static final String MESSAGE = "The file %s with '%s' extension is not supported. " + "Only following file extensions are supported %s."; - public UnsupportedFileExtensionException(String fileName) { + /** + * @param fileName - the name of the file which type is not supported + * @param supportedExtensions - the list of the supported extensions + */ + public UnsupportedFileExtensionException(String fileName, List supportedExtensions) { super(String.format( MESSAGE, fileName, FilenameUtils.getExtension(fileName), - Arrays.stream(SupportedFileExtensions.values()) - .map(SupportedFileExtensions::getSymbols) - .collect(joining(", ")))); + String.join(", ", supportedExtensions))); } } From 89aa4912acc571b021a6ee197bb437b7d727cd79 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 13 Sep 2024 14:10:15 +0400 Subject: [PATCH 27/81] UnsupportedFileExtensionExceptionTest --- ...supportedFileExtensionExceptionTest.groovy | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileExtensionExceptionTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileExtensionExceptionTest.groovy index 9429b7b303..3d604a9aac 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileExtensionExceptionTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileExtensionExceptionTest.groovy @@ -21,22 +21,21 @@ import spock.lang.Specification class UnsupportedFileExtensionExceptionTest extends Specification { - public static final String MESSAGE_1 = "The file the-file-with-not-supported-extension.sql with 'sql' extension " + - "is not supported. " + - "Only following file extensions are supported pdf, doc, xls, docx, xlsx, odt, ods, rtf, txt." - public static final String MESSAGE_2 = "The file another-file.smt with 'smt' extension is not supported. " + - "Only following file extensions are supported pdf, doc, xls, docx, xlsx, odt, ods, rtf, txt." + private static final String MESSAGE_1 = "The file the-file-with-not-supported-extension.sql with 'sql' " + + "extension is not supported. Only following file extensions are supported txt, rtf." + private static final String MESSAGE_2 = "The file another-file.smt with 'smt' extension is not supported. " + + "Only following file extensions are supported abc, def." def "message test"() { when: - def exception = new UnsupportedFileExtensionException(fileName) + def exception = new UnsupportedFileExtensionException(fileName, supportedTypes) then: - exception.getMessage() == b + exception.getMessage() == message where: - fileName | b - "the-file-with-not-supported-extension.sql" | MESSAGE_1 - "another-file.smt" | MESSAGE_2 + fileName |supportedTypes| message + "the-file-with-not-supported-extension.sql" |["txt", "rtf"]| MESSAGE_1 + "another-file.smt" |["abc", "def"]| MESSAGE_2 } } From 7b3e26f78e93425fe1beb70980958a922945bb0c Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 13 Sep 2024 14:13:30 +0400 Subject: [PATCH 28/81] FilePropertyValueExtractorTest --- .../propertyvalue/impl/FilePropertyValueExtractorTest.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy index 787f6969fb..10d92ce6fb 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy @@ -60,7 +60,7 @@ class FilePropertyValueExtractorTest extends Specification { loggingEvent.getMessage() == exception.getMessage() where: - exception<<[new UnsupportedFileExtensionException("any.file"), new EmptyFileExtensionException("any")] + exception<<[new UnsupportedFileExtensionException("any.file", ["txt"]), new EmptyFileExtensionException("any")] } void cleanup() { From 664c9ce9c90ba5832ec401ba9e0121c0544bea20 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 13 Sep 2024 14:21:27 +0400 Subject: [PATCH 29/81] OpenOfficeDocumentsParserResolver correction --- .../resolvers/OpenOfficeDocumentsParserResolver.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OpenOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OpenOfficeDocumentsParserResolver.java index 0fe5f2a167..06d9f49f80 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OpenOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OpenOfficeDocumentsParserResolver.java @@ -25,7 +25,7 @@ import java.util.List; -@Component("search_OfficeDocumentsResolver") +@Component("search_OpenOfficeDocumentsParserResolver") @Order(100) public class OpenOfficeDocumentsParserResolver implements FileParserResolver { @Override From a35004a9de8e75e31a16ea67db2c814171a20e00 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 13 Sep 2024 14:24:13 +0400 Subject: [PATCH 30/81] adding necessary dependency for testing purposes --- jmix-search/search/search.gradle | 1 + 1 file changed, 1 insertion(+) diff --git a/jmix-search/search/search.gradle b/jmix-search/search/search.gradle index 32c344c0f4..ec3597c066 100644 --- a/jmix-search/search/search.gradle +++ b/jmix-search/search/search.gradle @@ -72,6 +72,7 @@ dependencies { testImplementation 'org.spockframework:spock-core' testImplementation 'org.mockito:mockito-core' testImplementation 'org.spockframework:spock-core' + testImplementation 'ch.qos.logback:logback-classic' testRuntimeOnly 'org.slf4j:slf4j-simple' testRuntimeOnly 'org.hsqldb:hsqldb' testRuntimeOnly 'org.junit.platform:junit-platform-launcher' From 2f960553736faf069fec7e2318ea05127ee18ebc Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 13 Sep 2024 14:27:45 +0400 Subject: [PATCH 31/81] Resolvers correction --- .../resolvers/OfficeDocumentsParserResolver.java | 2 +- .../resolvers/OldOfficeDocumentsParserResolver.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OfficeDocumentsParserResolver.java index 3793156c7d..bea2e4a830 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OfficeDocumentsParserResolver.java @@ -24,7 +24,7 @@ import java.util.List; -@Component("search_OfficeDocumentsResolver") +@Component("search_OfficeDocumentsParserResolver") @Order(100) public class OfficeDocumentsParserResolver implements FileParserResolver { @Override diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OldOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OldOfficeDocumentsParserResolver.java index aa2885a747..7d3fe4e2c6 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OldOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OldOfficeDocumentsParserResolver.java @@ -24,7 +24,7 @@ import java.util.List; -@Component("search_OldOfficeDocumentsResolver") +@Component("search_OldOfficeDocumentsParserResolver") @Order(100) public class OldOfficeDocumentsParserResolver implements FileParserResolver { @Override From 788f231522027b53ba0bac27193b1edc67ad31bb Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 13 Sep 2024 14:35:19 +0400 Subject: [PATCH 32/81] test correction --- .../utils/parserresolving/FileParserResolverManagerTest.groovy | 2 ++ 1 file changed, 2 insertions(+) diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy index 4089d8de03..0889861247 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy @@ -71,6 +71,8 @@ class FileParserResolverManagerTest extends Specification { resolver.getExtension() >> List.of("docx", "xlsx") def parser1 = Mock(Parser) resolver.getParser() >> parser1 + + and: def resolver2 = Mock(FileParserResolver) resolver2.getExtension() >> List.of("doc", "xls") def parser2 = Mock(Parser) From 2d0f663ea840a2b4625d83e9991087edab7fd612 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 13 Sep 2024 15:50:40 +0400 Subject: [PATCH 33/81] Removing not necessary lines --- .../parserresolving/FileParserResolver.java | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/FileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/FileParserResolver.java index 3aaa04e980..16c4d87476 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/FileParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/FileParserResolver.java @@ -21,24 +21,7 @@ import java.util.List; public interface FileParserResolver { - /* PDF("pdf", PDFParser::new), - DOC("doc", OfficeParser::new), - XLS("xls", OfficeParser::new), - DOCX("docx", OOXMLParser::new), - XLSX("xlsx", OOXMLParser::new), - ODT("odt", OpenDocumentParser::new), - ODS("ods", OpenDocumentParser::new), - RTF("rtf", RTFParser::new), - TXT("txt", TXTParser::new);*/ - /*private final String symbols; - private final Supplier parserSupplier; - - FileParserResolver(String symbols, Supplier parserSupplier) { - this.symbols = symbols; - this.parserSupplier = parserSupplier; - } -*/ List getExtension(); Parser getParser(); From 96f5f6d2db9475e8d995f38495040166a983ec6c Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 13 Sep 2024 16:45:06 +0400 Subject: [PATCH 34/81] Extensions problem --- .../resolvers/OfficeDocumentsParserResolver.java | 2 +- .../resolvers/OldOfficeDocumentsParserResolver.java | 2 +- .../resolvers/OpenOfficeDocumentsParserResolver.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OfficeDocumentsParserResolver.java index bea2e4a830..9e38def769 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OfficeDocumentsParserResolver.java @@ -29,7 +29,7 @@ public class OfficeDocumentsParserResolver implements FileParserResolver { @Override public List getExtension() { - return List.of("docx, xlsx"); + return List.of("docx", "xlsx"); } @Override diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OldOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OldOfficeDocumentsParserResolver.java index 7d3fe4e2c6..3994ca3fa9 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OldOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OldOfficeDocumentsParserResolver.java @@ -29,7 +29,7 @@ public class OldOfficeDocumentsParserResolver implements FileParserResolver { @Override public List getExtension() { - return List.of("doc, xls"); + return List.of("doc", "xls"); } @Override diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OpenOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OpenOfficeDocumentsParserResolver.java index 06d9f49f80..5ea71e3674 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OpenOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OpenOfficeDocumentsParserResolver.java @@ -30,7 +30,7 @@ public class OpenOfficeDocumentsParserResolver implements FileParserResolver { @Override public List getExtension() { - return List.of("odt, ods"); + return List.of("odt", "ods"); } @Override From ed5865c8e88b985f45bd3127859cdeb3ec6b536a Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 13 Sep 2024 17:32:13 +0400 Subject: [PATCH 35/81] Packages reorganizing --- .../fileparsing}/FileParserResolver.java | 2 +- .../index/fileparsing/package-info.java | 20 +++++++++++++++++++ .../OfficeDocumentsParserResolver.java | 4 ++-- .../OldOfficeDocumentsParserResolver.java | 4 ++-- .../OpenOfficeDocumentsParserResolver.java | 5 ++--- .../resolvers/PDFParserResolver.java | 4 ++-- .../resolvers/RTFParserResolver.java | 4 ++-- .../resolvers/TXTParserResolver.java | 4 ++-- .../fileparsing/resolvers/package-info.java | 20 +++++++++++++++++++ .../FileParserResolverManager.java | 5 +++-- .../io/jmix/search/utils/FileProcessor.java | 1 - .../search/utils/FileProcessorTest.groovy | 1 - .../FileParserResolverManagerTest.groovy | 2 ++ 13 files changed, 58 insertions(+), 18 deletions(-) rename jmix-search/search/src/main/java/io/jmix/search/{utils/parserresolving => index/fileparsing}/FileParserResolver.java (94%) create mode 100644 jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/package-info.java rename jmix-search/search/src/main/java/io/jmix/search/{utils/parserresolving => index/fileparsing}/resolvers/OfficeDocumentsParserResolver.java (90%) rename jmix-search/search/src/main/java/io/jmix/search/{utils/parserresolving => index/fileparsing}/resolvers/OldOfficeDocumentsParserResolver.java (90%) rename jmix-search/search/src/main/java/io/jmix/search/{utils/parserresolving => index/fileparsing}/resolvers/OpenOfficeDocumentsParserResolver.java (86%) rename jmix-search/search/src/main/java/io/jmix/search/{utils/parserresolving => index/fileparsing}/resolvers/PDFParserResolver.java (90%) rename jmix-search/search/src/main/java/io/jmix/search/{utils/parserresolving => index/fileparsing}/resolvers/RTFParserResolver.java (90%) rename jmix-search/search/src/main/java/io/jmix/search/{utils/parserresolving => index/fileparsing}/resolvers/TXTParserResolver.java (90%) create mode 100644 jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/package-info.java rename jmix-search/search/src/main/java/io/jmix/search/utils/{parserresolving => }/FileParserResolverManager.java (93%) diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/FileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java similarity index 94% rename from jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/FileParserResolver.java rename to jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java index 16c4d87476..283b08ce65 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/FileParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package io.jmix.search.utils.parserresolving; +package io.jmix.search.index.fileparsing; import org.apache.tika.parser.Parser; diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/package-info.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/package-info.java new file mode 100644 index 0000000000..823145b6d0 --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright 2020 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@NonNullApi +package io.jmix.search.index.fileparsing; + +import org.springframework.lang.NonNullApi; \ No newline at end of file diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OfficeDocumentsParserResolver.java similarity index 90% rename from jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OfficeDocumentsParserResolver.java rename to jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OfficeDocumentsParserResolver.java index 9e38def769..1f8aa8aae7 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OfficeDocumentsParserResolver.java @@ -14,9 +14,9 @@ * limitations under the License. */ -package io.jmix.search.utils.parserresolving.resolvers; +package io.jmix.search.index.fileparsing.resolvers; -import io.jmix.search.utils.parserresolving.FileParserResolver; +import io.jmix.search.index.fileparsing.FileParserResolver; import org.apache.tika.parser.Parser; import org.apache.tika.parser.microsoft.ooxml.OOXMLParser; import org.springframework.core.annotation.Order; diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OldOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldOfficeDocumentsParserResolver.java similarity index 90% rename from jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OldOfficeDocumentsParserResolver.java rename to jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldOfficeDocumentsParserResolver.java index 3994ca3fa9..968ab774af 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OldOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldOfficeDocumentsParserResolver.java @@ -14,9 +14,9 @@ * limitations under the License. */ -package io.jmix.search.utils.parserresolving.resolvers; +package io.jmix.search.index.fileparsing.resolvers; -import io.jmix.search.utils.parserresolving.FileParserResolver; +import io.jmix.search.index.fileparsing.FileParserResolver; import org.apache.tika.parser.Parser; import org.apache.tika.parser.microsoft.OfficeParser; import org.springframework.core.annotation.Order; diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OpenOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java similarity index 86% rename from jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OpenOfficeDocumentsParserResolver.java rename to jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java index 5ea71e3674..7526787867 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/OpenOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java @@ -14,11 +14,10 @@ * limitations under the License. */ -package io.jmix.search.utils.parserresolving.resolvers; +package io.jmix.search.index.fileparsing.resolvers; -import io.jmix.search.utils.parserresolving.FileParserResolver; +import io.jmix.search.index.fileparsing.FileParserResolver; import org.apache.tika.parser.Parser; -import org.apache.tika.parser.microsoft.ooxml.OOXMLParser; import org.apache.tika.parser.odf.OpenDocumentParser; import org.springframework.core.annotation.Order; import org.springframework.stereotype.Component; diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/PDFParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java similarity index 90% rename from jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/PDFParserResolver.java rename to jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java index 1754008172..9b79d462ea 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/PDFParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java @@ -14,9 +14,9 @@ * limitations under the License. */ -package io.jmix.search.utils.parserresolving.resolvers; +package io.jmix.search.index.fileparsing.resolvers; -import io.jmix.search.utils.parserresolving.FileParserResolver; +import io.jmix.search.index.fileparsing.FileParserResolver; import org.apache.tika.parser.Parser; import org.apache.tika.parser.rtf.RTFParser; import org.springframework.core.annotation.Order; diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/RTFParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java similarity index 90% rename from jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/RTFParserResolver.java rename to jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java index 7e2d1e3d58..e3e8e885b6 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/RTFParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java @@ -14,9 +14,9 @@ * limitations under the License. */ -package io.jmix.search.utils.parserresolving.resolvers; +package io.jmix.search.index.fileparsing.resolvers; -import io.jmix.search.utils.parserresolving.FileParserResolver; +import io.jmix.search.index.fileparsing.FileParserResolver; import org.apache.tika.parser.Parser; import org.apache.tika.parser.pdf.PDFParser; import org.springframework.core.annotation.Order; diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/TXTParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java similarity index 90% rename from jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/TXTParserResolver.java rename to jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java index b8775b6f79..8884272e40 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/resolvers/TXTParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java @@ -14,9 +14,9 @@ * limitations under the License. */ -package io.jmix.search.utils.parserresolving.resolvers; +package io.jmix.search.index.fileparsing.resolvers; -import io.jmix.search.utils.parserresolving.FileParserResolver; +import io.jmix.search.index.fileparsing.FileParserResolver; import org.apache.tika.parser.Parser; import org.apache.tika.parser.pdf.PDFParser; import org.springframework.core.annotation.Order; diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/package-info.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/package-info.java new file mode 100644 index 0000000000..99a9c324df --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright 2020 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@NonNullApi +package io.jmix.search.index.fileparsing.resolvers; + +import org.springframework.lang.NonNullApi; \ No newline at end of file diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/FileParserResolverManager.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java similarity index 93% rename from jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/FileParserResolverManager.java rename to jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java index 4b64ed11e3..8ca6f0b4d7 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/parserresolving/FileParserResolverManager.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java @@ -14,13 +14,14 @@ * limitations under the License. */ -package io.jmix.search.utils.parserresolving; +package io.jmix.search.utils; import com.google.common.base.Strings; import io.jmix.core.FileRef; import io.jmix.search.exception.EmptyFileExtensionException; import io.jmix.search.exception.ParserResolvingException; import io.jmix.search.exception.UnsupportedFileExtensionException; +import io.jmix.search.index.fileparsing.FileParserResolver; import org.apache.commons.io.FilenameUtils; import org.apache.tika.parser.Parser; import org.springframework.stereotype.Component; @@ -52,7 +53,7 @@ public Parser getParser(FileRef fileRef) throws ParserResolvingException { throw new UnsupportedFileExtensionException(fileName, getSupportedExtensions()); } - private List getSupportedExtensions() { + protected List getSupportedExtensions() { return fileParserResolvers .stream() .flatMap(fileParserResolver -> fileParserResolver.getExtension().stream()) diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java index e90f44419f..b0b6201a4c 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java @@ -22,7 +22,6 @@ import io.jmix.core.common.util.Preconditions; import io.jmix.search.exception.FileParseException; import io.jmix.search.exception.ParserResolvingException; -import io.jmix.search.utils.parserresolving.FileParserResolverManager; import org.apache.poi.poifs.filesystem.OfficeXmlFileException; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy index 83269133b5..50e79a15c7 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy @@ -21,7 +21,6 @@ import io.jmix.core.FileStorageLocator import io.jmix.search.exception.EmptyFileExtensionException import io.jmix.search.exception.ParserResolvingException import io.jmix.search.exception.UnsupportedFileExtensionException -import io.jmix.search.utils.parserresolving.FileParserResolverManager import spock.lang.Specification class FileProcessorTest extends Specification { diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy index 0889861247..9a07e0e45b 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy @@ -19,6 +19,8 @@ package io.jmix.search.utils.parserresolving import io.jmix.core.FileRef import io.jmix.search.exception.EmptyFileExtensionException import io.jmix.search.exception.UnsupportedFileExtensionException +import io.jmix.search.index.fileparsing.FileParserResolver +import io.jmix.search.utils.FileParserResolverManager import org.apache.tika.parser.Parser import spock.lang.Specification From 7a15aa355777c87d819a0671a640914c784eb0c8 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 13 Sep 2024 17:38:36 +0400 Subject: [PATCH 36/81] Message correction. --- .../search/exception/UnsupportedFileExtensionException.java | 2 +- .../exception/UnsupportedFileExtensionExceptionTest.groovy | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileExtensionException.java b/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileExtensionException.java index aa9d9a769c..d38669471e 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileExtensionException.java +++ b/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileExtensionException.java @@ -26,7 +26,7 @@ public class UnsupportedFileExtensionException extends ParserResolvingException { private static final String MESSAGE = "The file %s with '%s' extension is not supported. " + - "Only following file extensions are supported %s."; + "Only following file extensions are supported: %s."; /** * @param fileName - the name of the file which type is not supported diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileExtensionExceptionTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileExtensionExceptionTest.groovy index 3d604a9aac..91f6adeef3 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileExtensionExceptionTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileExtensionExceptionTest.groovy @@ -22,9 +22,9 @@ class UnsupportedFileExtensionExceptionTest extends Specification { private static final String MESSAGE_1 = "The file the-file-with-not-supported-extension.sql with 'sql' " + - "extension is not supported. Only following file extensions are supported txt, rtf." + "extension is not supported. Only following file extensions are supported: txt, rtf." private static final String MESSAGE_2 = "The file another-file.smt with 'smt' extension is not supported. " + - "Only following file extensions are supported abc, def." + "Only following file extensions are supported: abc, def." def "message test"() { when: From 317c50cb43fc6fc83cb88b84c50a7d67b96b6667 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 13 Sep 2024 17:54:57 +0400 Subject: [PATCH 37/81] EmptyFileExtensionException message extending --- .../EmptyFileExtensionException.java | 9 ++-- .../exception/ParserResolvingException.java | 8 ++++ .../UnsupportedFileExtensionException.java | 3 +- .../utils/FileParserResolverManager.java | 2 +- .../EmptyFileExtensionExceptionTest.groovy | 41 +++++++++++++++++++ .../FilePropertyValueExtractorTest.groovy | 2 +- .../search/utils/FileProcessorTest.groovy | 2 +- 7 files changed, 60 insertions(+), 7 deletions(-) create mode 100644 jmix-search/search/src/test/groovy/io/jmix/search/exception/EmptyFileExtensionExceptionTest.groovy diff --git a/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileExtensionException.java b/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileExtensionException.java index 57eb6dc2b1..1472d5dfc4 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileExtensionException.java +++ b/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileExtensionException.java @@ -16,15 +16,18 @@ package io.jmix.search.exception; +import java.util.List; + /** * An exception that is thrown when a user added some file without any extension. * In this case any parser for this file couldn't be resolved. */ public class EmptyFileExtensionException extends ParserResolvingException { - private static final String MESSAGE = "Extension of the file %s is empty"; + private static final String MESSAGE = "Extension of the file %s is empty. " + + "Only following file extensions are supported: %s."; - public EmptyFileExtensionException(String fileName) { - super(String.format(MESSAGE, fileName)); + public EmptyFileExtensionException(String fileName, List supportedExtensions) { + super(String.format(MESSAGE, fileName, getSupportedExtensionsString(supportedExtensions))); } } diff --git a/jmix-search/search/src/main/java/io/jmix/search/exception/ParserResolvingException.java b/jmix-search/search/src/main/java/io/jmix/search/exception/ParserResolvingException.java index abec2588e5..8a8c903fe6 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/exception/ParserResolvingException.java +++ b/jmix-search/search/src/main/java/io/jmix/search/exception/ParserResolvingException.java @@ -16,11 +16,19 @@ package io.jmix.search.exception; +import java.util.List; + /** * An exception that is thrown when the problem with parser resolving is occurred. The parser is need for * the fields of the "File" type indexing. */ public abstract class ParserResolvingException extends Exception { + + protected static String getSupportedExtensionsString(List supportedExtensions){ + return String.join(", ", supportedExtensions); + + } + public ParserResolvingException(String message) { super(message); } diff --git a/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileExtensionException.java b/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileExtensionException.java index d38669471e..02c73a1347 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileExtensionException.java +++ b/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileExtensionException.java @@ -37,6 +37,7 @@ public UnsupportedFileExtensionException(String fileName, List supported MESSAGE, fileName, FilenameUtils.getExtension(fileName), - String.join(", ", supportedExtensions))); + getSupportedExtensionsString(supportedExtensions))); } + } diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java index 8ca6f0b4d7..ffe272bc99 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java @@ -41,7 +41,7 @@ public Parser getParser(FileRef fileRef) throws ParserResolvingException { String fileName = fileRef.getFileName(); String fileExtension = FilenameUtils.getExtension(fileName); if (Strings.isNullOrEmpty(fileExtension)) { - throw new EmptyFileExtensionException(fileName); + throw new EmptyFileExtensionException(fileName, getSupportedExtensions()); } for (FileParserResolver resolver : fileParserResolvers) { diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/exception/EmptyFileExtensionExceptionTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/exception/EmptyFileExtensionExceptionTest.groovy new file mode 100644 index 0000000000..0063f079de --- /dev/null +++ b/jmix-search/search/src/test/groovy/io/jmix/search/exception/EmptyFileExtensionExceptionTest.groovy @@ -0,0 +1,41 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.exception + +import spock.lang.Specification + +class EmptyFileExtensionExceptionTest extends Specification { + + private static final String MESSAGE_1 = "Extension of the file someName is empty. " + + "Only following file extensions are supported: txt, xls." + private static final String MESSAGE_2 = "Extension of the file someName2 is empty. " + + "Only following file extensions are supported: rtf, pdf." + + def "message test"() { + given: + def exception = new EmptyFileExtensionException(fileName, extensions) + + expect: + exception.getMessage() == message + + where: + fileName | extensions | message + "someName" | ["txt", "xls"] | MESSAGE_1 + "someName2" | ["rtf", "pdf"] | MESSAGE_2 + + } +} diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy index 10d92ce6fb..8e19db18f2 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy @@ -60,7 +60,7 @@ class FilePropertyValueExtractorTest extends Specification { loggingEvent.getMessage() == exception.getMessage() where: - exception<<[new UnsupportedFileExtensionException("any.file", ["txt"]), new EmptyFileExtensionException("any")] + exception<<[new UnsupportedFileExtensionException("any.file", ["txt"]), new EmptyFileExtensionException("any", ["txt"])] } void cleanup() { diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy index 50e79a15c7..40fee7c4d1 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy @@ -43,6 +43,6 @@ class FileProcessorTest extends Specification { where: exception << [new UnsupportedFileExtensionException("any.name", List.of("txt, rtf")), - new EmptyFileExtensionException("any")] + new EmptyFileExtensionException("any", ["txt"])] } } From 9f5d67c65f1cc8bc194eb930a813b186a01b9933 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 13 Sep 2024 18:09:59 +0400 Subject: [PATCH 38/81] Java doc --- .../index/fileparsing/FileParserResolver.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java index 283b08ce65..a1175f45a3 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java @@ -20,9 +20,22 @@ import java.util.List; +/** + * Is a part of the extendable engine the gives an ability to implement custom file parser resolvers and to support + * custom file types or to modify behavior of existing file parser resolvers. + */ public interface FileParserResolver { + /** + * Returns a collection of supported extensions of the supported file type. E.g. ["xlsx", "XLSX", "DOCX", "DOCX"]. + * @return collection of supported extensions + */ List getExtension(); + /** + * Returns an instance of a file parser that is returned for the extensions being returned by + * {@link #getExtension()} method. + * @return an instance of a file parser + */ Parser getParser(); } From edf2b10367c3d487d69801ece367795f648dde3a Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 13 Sep 2024 18:13:13 +0400 Subject: [PATCH 39/81] Java doc --- .../java/io/jmix/search/utils/FileParserResolverManager.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java index ffe272bc99..36cf2da7ad 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java @@ -28,6 +28,10 @@ import java.util.List; +/** + * The service that searches appropriate file parsers for the supported file types. + * Search principle is based on a file extension analysing. + */ @Component("search_FileParserResolverManager") public class FileParserResolverManager { From f4d6daec86b9c4e29e21a8621f3517adeab73373 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Wed, 18 Sep 2024 11:41:56 +0400 Subject: [PATCH 40/81] FileParserResolverManagerIntegrationTest creation and resolvers correction --- ...a => MSOfficeDocumentsParserResolver.java} | 2 +- ...> OldMSOfficeDocumentsParserResolver.java} | 2 +- .../resolvers/PDFParserResolver.java | 5 +- .../resolvers/RTFParserResolver.java | 5 +- .../resolvers/TXTParserResolver.java | 3 +- ...arserResolverManagerIntegrationTest.groovy | 72 +++++++++++++++++++ 6 files changed, 82 insertions(+), 7 deletions(-) rename jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/{OfficeDocumentsParserResolver.java => MSOfficeDocumentsParserResolver.java} (93%) rename jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/{OldOfficeDocumentsParserResolver.java => OldMSOfficeDocumentsParserResolver.java} (93%) create mode 100644 jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java similarity index 93% rename from jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OfficeDocumentsParserResolver.java rename to jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java index 1f8aa8aae7..add566bed3 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java @@ -26,7 +26,7 @@ @Component("search_OfficeDocumentsParserResolver") @Order(100) -public class OfficeDocumentsParserResolver implements FileParserResolver { +public class MSOfficeDocumentsParserResolver implements FileParserResolver { @Override public List getExtension() { return List.of("docx", "xlsx"); diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldMSOfficeDocumentsParserResolver.java similarity index 93% rename from jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldOfficeDocumentsParserResolver.java rename to jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldMSOfficeDocumentsParserResolver.java index 968ab774af..15aa2aacf1 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldMSOfficeDocumentsParserResolver.java @@ -26,7 +26,7 @@ @Component("search_OldOfficeDocumentsParserResolver") @Order(100) -public class OldOfficeDocumentsParserResolver implements FileParserResolver { +public class OldMSOfficeDocumentsParserResolver implements FileParserResolver { @Override public List getExtension() { return List.of("doc", "xls"); diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java index 9b79d462ea..7f151ad1e0 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java @@ -18,6 +18,7 @@ import io.jmix.search.index.fileparsing.FileParserResolver; import org.apache.tika.parser.Parser; +import org.apache.tika.parser.pdf.PDFParser; import org.apache.tika.parser.rtf.RTFParser; import org.springframework.core.annotation.Order; import org.springframework.stereotype.Component; @@ -29,11 +30,11 @@ public class PDFParserResolver implements FileParserResolver { @Override public List getExtension() { - return List.of("rtf"); + return List.of("pdf"); } @Override public Parser getParser() { - return new RTFParser(); + return new PDFParser(); } } \ No newline at end of file diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java index e3e8e885b6..11b727a92c 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java @@ -19,6 +19,7 @@ import io.jmix.search.index.fileparsing.FileParserResolver; import org.apache.tika.parser.Parser; import org.apache.tika.parser.pdf.PDFParser; +import org.apache.tika.parser.rtf.RTFParser; import org.springframework.core.annotation.Order; import org.springframework.stereotype.Component; @@ -29,11 +30,11 @@ public class RTFParserResolver implements FileParserResolver { @Override public List getExtension() { - return List.of("pdf"); + return List.of("rtf"); } @Override public Parser getParser() { - return new PDFParser(); + return new RTFParser(); } } \ No newline at end of file diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java index 8884272e40..49f440b072 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java @@ -19,6 +19,7 @@ import io.jmix.search.index.fileparsing.FileParserResolver; import org.apache.tika.parser.Parser; import org.apache.tika.parser.pdf.PDFParser; +import org.apache.tika.parser.txt.TXTParser; import org.springframework.core.annotation.Order; import org.springframework.stereotype.Component; @@ -34,6 +35,6 @@ public List getExtension() { @Override public Parser getParser() { - return new PDFParser(); + return new TXTParser(); } } \ No newline at end of file diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy new file mode 100644 index 0000000000..89c4adafe1 --- /dev/null +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy @@ -0,0 +1,72 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.utils.parserresolving + +import io.jmix.core.FileRef +import io.jmix.search.index.fileparsing.FileParserResolver +import io.jmix.search.index.fileparsing.resolvers.MSOfficeDocumentsParserResolver +import io.jmix.search.index.fileparsing.resolvers.OldMSOfficeDocumentsParserResolver +import io.jmix.search.index.fileparsing.resolvers.OpenOfficeDocumentsParserResolver +import io.jmix.search.index.fileparsing.resolvers.PDFParserResolver +import io.jmix.search.index.fileparsing.resolvers.RTFParserResolver +import io.jmix.search.index.fileparsing.resolvers.TXTParserResolver +import io.jmix.search.utils.FileParserResolverManager +import org.apache.tika.parser.microsoft.OfficeParser +import org.apache.tika.parser.microsoft.ooxml.OOXMLParser +import org.apache.tika.parser.odf.OpenDocumentParser +import org.apache.tika.parser.pdf.PDFParser +import org.apache.tika.parser.rtf.RTFParser +import org.apache.tika.parser.txt.TXTParser +import spock.lang.Specification + +class FileParserResolverManagerIntegrationTest extends Specification { + + def "resolvers test"() { + given: + def manager = new FileParserResolverManager(getResolvers()) + + and: + def fileRef = Mock(FileRef) + fileRef.getFileName() >> "filename." + extension + + expect: + manager.getParser(fileRef).getClass() == theClass + + where: + extension | theClass + "txt" | TXTParser + "pdf" | PDFParser + "rtf" | RTFParser + "odt" | OpenDocumentParser + "ods" | OpenDocumentParser + "doc" | OfficeParser + "xls" | OfficeParser + "docx" | OOXMLParser + "xlsx" | OOXMLParser + } + + List getResolvers() { + List.of( + new MSOfficeDocumentsParserResolver(), + new OldMSOfficeDocumentsParserResolver(), + new OpenOfficeDocumentsParserResolver(), + new PDFParserResolver(), + new RTFParserResolver(), + new TXTParserResolver() + ) + } +} From 5fd00765c582863d3f710f72e76a774eb8a64995 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Wed, 18 Sep 2024 12:05:17 +0400 Subject: [PATCH 41/81] a not necessary extra dependency --- jmix-search/search/search.gradle | 1 - 1 file changed, 1 deletion(-) diff --git a/jmix-search/search/search.gradle b/jmix-search/search/search.gradle index ec3597c066..186c9a27c1 100644 --- a/jmix-search/search/search.gradle +++ b/jmix-search/search/search.gradle @@ -71,7 +71,6 @@ dependencies { testImplementation 'org.junit.vintage:junit-vintage-engine' testImplementation 'org.spockframework:spock-core' testImplementation 'org.mockito:mockito-core' - testImplementation 'org.spockframework:spock-core' testImplementation 'ch.qos.logback:logback-classic' testRuntimeOnly 'org.slf4j:slf4j-simple' testRuntimeOnly 'org.hsqldb:hsqldb' From 1f1ef1b13ce3b26fcd0ed17aa8f6d333886e310a Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Wed, 18 Sep 2024 12:40:55 +0400 Subject: [PATCH 42/81] Method renaming --- .../jmix/search/index/fileparsing/FileParserResolver.java | 6 +++--- .../resolvers/MSOfficeDocumentsParserResolver.java | 2 +- .../resolvers/OldMSOfficeDocumentsParserResolver.java | 2 +- .../resolvers/OpenOfficeDocumentsParserResolver.java | 2 +- .../index/fileparsing/resolvers/PDFParserResolver.java | 2 +- .../index/fileparsing/resolvers/RTFParserResolver.java | 2 +- .../index/fileparsing/resolvers/TXTParserResolver.java | 2 +- .../io/jmix/search/utils/FileParserResolverManager.java | 4 ++-- .../parserresolving/FileParserResolverManagerTest.groovy | 8 ++++---- 9 files changed, 15 insertions(+), 15 deletions(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java index a1175f45a3..63ff8fed1e 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java @@ -27,14 +27,14 @@ public interface FileParserResolver { /** - * Returns a collection of supported extensions of the supported file type. E.g. ["xlsx", "XLSX", "DOCX", "DOCX"]. + * Returns a collection of supported extensions of the supported file type. E.g. ["xlsx", "XLSX", "docx", "DOCX"]. * @return collection of supported extensions */ - List getExtension(); + List getSupportedExtensions(); /** * Returns an instance of a file parser that is returned for the extensions being returned by - * {@link #getExtension()} method. + * {@link #getSupportedExtensions()} method. * @return an instance of a file parser */ Parser getParser(); diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java index add566bed3..207d5efc50 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java @@ -28,7 +28,7 @@ @Order(100) public class MSOfficeDocumentsParserResolver implements FileParserResolver { @Override - public List getExtension() { + public List getSupportedExtensions() { return List.of("docx", "xlsx"); } diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldMSOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldMSOfficeDocumentsParserResolver.java index 15aa2aacf1..d4e32c4770 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldMSOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldMSOfficeDocumentsParserResolver.java @@ -28,7 +28,7 @@ @Order(100) public class OldMSOfficeDocumentsParserResolver implements FileParserResolver { @Override - public List getExtension() { + public List getSupportedExtensions() { return List.of("doc", "xls"); } diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java index 7526787867..21a6f0aace 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java @@ -28,7 +28,7 @@ @Order(100) public class OpenOfficeDocumentsParserResolver implements FileParserResolver { @Override - public List getExtension() { + public List getSupportedExtensions() { return List.of("odt", "ods"); } diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java index 7f151ad1e0..369bbb5bea 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java @@ -29,7 +29,7 @@ @Order(100) public class PDFParserResolver implements FileParserResolver { @Override - public List getExtension() { + public List getSupportedExtensions() { return List.of("pdf"); } diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java index 11b727a92c..302c6bc42a 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java @@ -29,7 +29,7 @@ @Order(100) public class RTFParserResolver implements FileParserResolver { @Override - public List getExtension() { + public List getSupportedExtensions() { return List.of("rtf"); } diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java index 49f440b072..92614d9c51 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java @@ -29,7 +29,7 @@ @Order(100) public class TXTParserResolver implements FileParserResolver { @Override - public List getExtension() { + public List getSupportedExtensions() { return List.of("txt"); } diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java index 36cf2da7ad..340d96f2a5 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java @@ -49,7 +49,7 @@ public Parser getParser(FileRef fileRef) throws ParserResolvingException { } for (FileParserResolver resolver : fileParserResolvers) { - if (resolver.getExtension().contains(fileExtension)) { + if (resolver.getSupportedExtensions().contains(fileExtension)) { return resolver.getParser(); } } @@ -60,7 +60,7 @@ public Parser getParser(FileRef fileRef) throws ParserResolvingException { protected List getSupportedExtensions() { return fileParserResolvers .stream() - .flatMap(fileParserResolver -> fileParserResolver.getExtension().stream()) + .flatMap(fileParserResolver -> fileParserResolver.getSupportedExtensions().stream()) .toList(); } } diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy index 9a07e0e45b..d2fbb08832 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy @@ -50,9 +50,9 @@ class FileParserResolverManagerTest extends Specification { and: def resolver = Mock(FileParserResolver) - resolver.getExtension() >> List.of("docx", "xlsx") + resolver.getSupportedExtensions() >> List.of("docx", "xlsx") def resolver2 = Mock(FileParserResolver) - resolver2.getExtension() >> List.of("doc", "xls") + resolver2.getSupportedExtensions() >> List.of("doc", "xls") and: def parserResolver = new FileParserResolverManager(List.of(resolver, resolver2)) @@ -70,13 +70,13 @@ class FileParserResolverManagerTest extends Specification { def "should return parser of the type that corresponds to the file extension"() { given: def resolver = Mock(FileParserResolver) - resolver.getExtension() >> List.of("docx", "xlsx") + resolver.getSupportedExtensions() >> List.of("docx", "xlsx") def parser1 = Mock(Parser) resolver.getParser() >> parser1 and: def resolver2 = Mock(FileParserResolver) - resolver2.getExtension() >> List.of("doc", "xls") + resolver2.getSupportedExtensions() >> List.of("doc", "xls") def parser2 = Mock(Parser) resolver2.getParser() >> parser2 From 784a3534fa16f75a063100448374a0646c9d0fbc Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Wed, 18 Sep 2024 15:36:22 +0400 Subject: [PATCH 43/81] FileParserResolver class's signature changing --- .../EmptyFileExtensionException.java | 33 ------------ ...java => UnsupportedFileTypeException.java} | 16 +++--- ...tractExtensionBasedFileParserResolver.java | 53 +++++++++++++++++++ .../index/fileparsing/FileParserResolver.java | 9 ++-- .../MSOfficeDocumentsParserResolver.java | 4 +- .../OldMSOfficeDocumentsParserResolver.java | 3 +- .../OpenOfficeDocumentsParserResolver.java | 3 +- .../resolvers/PDFParserResolver.java | 3 +- .../resolvers/RTFParserResolver.java | 3 +- .../resolvers/TXTParserResolver.java | 3 +- .../impl/FilePropertyValueExtractor.java | 4 +- .../utils/FileParserResolverManager.java | 27 +++------- .../io/jmix/search/utils/FileProcessor.java | 6 +-- 13 files changed, 90 insertions(+), 77 deletions(-) delete mode 100644 jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileExtensionException.java rename jmix-search/search/src/main/java/io/jmix/search/exception/{UnsupportedFileExtensionException.java => UnsupportedFileTypeException.java} (65%) create mode 100644 jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java diff --git a/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileExtensionException.java b/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileExtensionException.java deleted file mode 100644 index 1472d5dfc4..0000000000 --- a/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileExtensionException.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2024 Haulmont. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.jmix.search.exception; - -import java.util.List; - -/** - * An exception that is thrown when a user added some file without any extension. - * In this case any parser for this file couldn't be resolved. - */ -public class EmptyFileExtensionException extends ParserResolvingException { - - private static final String MESSAGE = "Extension of the file %s is empty. " + - "Only following file extensions are supported: %s."; - - public EmptyFileExtensionException(String fileName, List supportedExtensions) { - super(String.format(MESSAGE, fileName, getSupportedExtensionsString(supportedExtensions))); - } -} diff --git a/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileExtensionException.java b/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileTypeException.java similarity index 65% rename from jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileExtensionException.java rename to jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileTypeException.java index 02c73a1347..7670502cdf 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileExtensionException.java +++ b/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileTypeException.java @@ -16,28 +16,28 @@ package io.jmix.search.exception; -import org.apache.commons.io.FilenameUtils; - import java.util.List; /** * An exception that is thrown when a user added some file with extension that there are no any known parser for. */ -public class UnsupportedFileExtensionException extends ParserResolvingException { +public class UnsupportedFileTypeException extends Exception { - private static final String MESSAGE = "The file %s with '%s' extension is not supported. " + - "Only following file extensions are supported: %s."; + private static final String MESSAGE = "The file %s can't be parsed. " + + "Only the following file parsing criteria are supported: %s"; /** * @param fileName - the name of the file which type is not supported - * @param supportedExtensions - the list of the supported extensions + * @param supportedExtensions - the list of the supported file parsing cri */ - public UnsupportedFileExtensionException(String fileName, List supportedExtensions) { + public UnsupportedFileTypeException(String fileName, List supportedExtensions) { super(String.format( MESSAGE, fileName, - FilenameUtils.getExtension(fileName), getSupportedExtensionsString(supportedExtensions))); } + protected static String getSupportedExtensionsString(List supportedExtensions){ + return String.join("\n", supportedExtensions); + } } diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java new file mode 100644 index 0000000000..ff95a4a152 --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java @@ -0,0 +1,53 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.index.fileparsing; + +import com.google.common.base.Strings; +import io.jmix.core.FileRef; +import org.apache.commons.io.FilenameUtils; + +import java.util.List; + + +public abstract class AbstractExtensionBasedFileParserResolver implements FileParserResolver { + + /** + * Returns a collection of supported extensions of the supported file type. E.g. ["xlsx", "XLSX", "docx", "DOCX"]. + * @return collection of supported extensions + */ + public abstract List getSupportedExtensions(); + + @Override + public String getCriteriaDescription() { + return String.format("The file extension should be one of following: %s", getSupportedExtensionsString(getSupportedExtensions())); + } + + @Override + public boolean supports(FileRef fileRef) { + String fileName = fileRef.getFileName(); + String fileExtension = FilenameUtils.getExtension(fileName); + if (Strings.isNullOrEmpty(fileExtension)) { + return false; + } + + return getSupportedExtensions().contains(fileExtension); + } + + protected String getSupportedExtensionsString(List supportedExtensions){ + return String.join(", ", supportedExtensions); + } +} diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java index 63ff8fed1e..d56e8348dd 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java @@ -16,6 +16,7 @@ package io.jmix.search.index.fileparsing; +import io.jmix.core.FileRef; import org.apache.tika.parser.Parser; import java.util.List; @@ -26,11 +27,7 @@ */ public interface FileParserResolver { - /** - * Returns a collection of supported extensions of the supported file type. E.g. ["xlsx", "XLSX", "docx", "DOCX"]. - * @return collection of supported extensions - */ - List getSupportedExtensions(); + String getCriteriaDescription(); /** * Returns an instance of a file parser that is returned for the extensions being returned by @@ -38,4 +35,6 @@ public interface FileParserResolver { * @return an instance of a file parser */ Parser getParser(); + + boolean supports(FileRef fileRef); } diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java index 207d5efc50..7e81904c8f 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java @@ -16,7 +16,7 @@ package io.jmix.search.index.fileparsing.resolvers; -import io.jmix.search.index.fileparsing.FileParserResolver; +import io.jmix.search.index.fileparsing.AbstractExtensionBasedFileParserResolver; import org.apache.tika.parser.Parser; import org.apache.tika.parser.microsoft.ooxml.OOXMLParser; import org.springframework.core.annotation.Order; @@ -26,7 +26,7 @@ @Component("search_OfficeDocumentsParserResolver") @Order(100) -public class MSOfficeDocumentsParserResolver implements FileParserResolver { +public class MSOfficeDocumentsParserResolver extends AbstractExtensionBasedFileParserResolver { @Override public List getSupportedExtensions() { return List.of("docx", "xlsx"); diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldMSOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldMSOfficeDocumentsParserResolver.java index d4e32c4770..d1eb841b14 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldMSOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldMSOfficeDocumentsParserResolver.java @@ -16,6 +16,7 @@ package io.jmix.search.index.fileparsing.resolvers; +import io.jmix.search.index.fileparsing.AbstractExtensionBasedFileParserResolver; import io.jmix.search.index.fileparsing.FileParserResolver; import org.apache.tika.parser.Parser; import org.apache.tika.parser.microsoft.OfficeParser; @@ -26,7 +27,7 @@ @Component("search_OldOfficeDocumentsParserResolver") @Order(100) -public class OldMSOfficeDocumentsParserResolver implements FileParserResolver { +public class OldMSOfficeDocumentsParserResolver extends AbstractExtensionBasedFileParserResolver { @Override public List getSupportedExtensions() { return List.of("doc", "xls"); diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java index 21a6f0aace..5944a0631a 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java @@ -16,6 +16,7 @@ package io.jmix.search.index.fileparsing.resolvers; +import io.jmix.search.index.fileparsing.AbstractExtensionBasedFileParserResolver; import io.jmix.search.index.fileparsing.FileParserResolver; import org.apache.tika.parser.Parser; import org.apache.tika.parser.odf.OpenDocumentParser; @@ -26,7 +27,7 @@ @Component("search_OpenOfficeDocumentsParserResolver") @Order(100) -public class OpenOfficeDocumentsParserResolver implements FileParserResolver { +public class OpenOfficeDocumentsParserResolver extends AbstractExtensionBasedFileParserResolver { @Override public List getSupportedExtensions() { return List.of("odt", "ods"); diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java index 369bbb5bea..3e56a082ff 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java @@ -16,6 +16,7 @@ package io.jmix.search.index.fileparsing.resolvers; +import io.jmix.search.index.fileparsing.AbstractExtensionBasedFileParserResolver; import io.jmix.search.index.fileparsing.FileParserResolver; import org.apache.tika.parser.Parser; import org.apache.tika.parser.pdf.PDFParser; @@ -27,7 +28,7 @@ @Component("search_PDFParserResolver") @Order(100) -public class PDFParserResolver implements FileParserResolver { +public class PDFParserResolver extends AbstractExtensionBasedFileParserResolver { @Override public List getSupportedExtensions() { return List.of("pdf"); diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java index 302c6bc42a..df132c612d 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java @@ -16,6 +16,7 @@ package io.jmix.search.index.fileparsing.resolvers; +import io.jmix.search.index.fileparsing.AbstractExtensionBasedFileParserResolver; import io.jmix.search.index.fileparsing.FileParserResolver; import org.apache.tika.parser.Parser; import org.apache.tika.parser.pdf.PDFParser; @@ -27,7 +28,7 @@ @Component("search_RTFParserResolver") @Order(100) -public class RTFParserResolver implements FileParserResolver { +public class RTFParserResolver extends AbstractExtensionBasedFileParserResolver { @Override public List getSupportedExtensions() { return List.of("rtf"); diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java index 92614d9c51..81505d7a75 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java @@ -16,6 +16,7 @@ package io.jmix.search.index.fileparsing.resolvers; +import io.jmix.search.index.fileparsing.AbstractExtensionBasedFileParserResolver; import io.jmix.search.index.fileparsing.FileParserResolver; import org.apache.tika.parser.Parser; import org.apache.tika.parser.pdf.PDFParser; @@ -27,7 +28,7 @@ @Component("search_TXTParserResolver") @Order(100) -public class TXTParserResolver implements FileParserResolver { +public class TXTParserResolver extends AbstractExtensionBasedFileParserResolver { @Override public List getSupportedExtensions() { return List.of("txt"); diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractor.java b/jmix-search/search/src/main/java/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractor.java index a4d30979a9..18f304faec 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractor.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractor.java @@ -25,7 +25,7 @@ import io.jmix.core.metamodel.datatype.impl.FileRefDatatype; import io.jmix.core.metamodel.model.MetaPropertyPath; import io.jmix.search.exception.FileParseException; -import io.jmix.search.exception.ParserResolvingException; +import io.jmix.search.exception.UnsupportedFileTypeException; import io.jmix.search.index.mapping.ParameterKeys; import io.jmix.search.utils.BooleanParser; import io.jmix.search.utils.FileProcessor; @@ -91,7 +91,7 @@ protected void addFileContent(ObjectNode node, FileRef fileRef) { try { String content = fileProcessor.extractFileContent(fileRef); node.put("_content", content); - } catch (ParserResolvingException e) { + } catch (UnsupportedFileTypeException e) { log.warn(e.getMessage()); } catch (FileParseException e) { log.error("Unable to index file content", e); diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java index 340d96f2a5..5105f48f40 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java @@ -16,16 +16,13 @@ package io.jmix.search.utils; -import com.google.common.base.Strings; import io.jmix.core.FileRef; -import io.jmix.search.exception.EmptyFileExtensionException; -import io.jmix.search.exception.ParserResolvingException; -import io.jmix.search.exception.UnsupportedFileExtensionException; +import io.jmix.search.exception.UnsupportedFileTypeException; import io.jmix.search.index.fileparsing.FileParserResolver; -import org.apache.commons.io.FilenameUtils; import org.apache.tika.parser.Parser; import org.springframework.stereotype.Component; +import java.util.ArrayList; import java.util.List; /** @@ -41,26 +38,18 @@ public FileParserResolverManager(List fileParserResolvers) { this.fileParserResolvers = fileParserResolvers; } - public Parser getParser(FileRef fileRef) throws ParserResolvingException { + public Parser getParser(FileRef fileRef) throws UnsupportedFileTypeException { String fileName = fileRef.getFileName(); - String fileExtension = FilenameUtils.getExtension(fileName); - if (Strings.isNullOrEmpty(fileExtension)) { - throw new EmptyFileExtensionException(fileName, getSupportedExtensions()); - } + + List messages = new ArrayList<>(); for (FileParserResolver resolver : fileParserResolvers) { - if (resolver.getSupportedExtensions().contains(fileExtension)) { + if (resolver.supports(fileRef)) { return resolver.getParser(); } + messages.add(resolver.getCriteriaDescription()); } - throw new UnsupportedFileExtensionException(fileName, getSupportedExtensions()); - } - - protected List getSupportedExtensions() { - return fileParserResolvers - .stream() - .flatMap(fileParserResolver -> fileParserResolver.getSupportedExtensions().stream()) - .toList(); + throw new UnsupportedFileTypeException(fileName, messages); } } diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java index b0b6201a4c..e496f880c9 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java @@ -21,7 +21,7 @@ import io.jmix.core.FileStorageLocator; import io.jmix.core.common.util.Preconditions; import io.jmix.search.exception.FileParseException; -import io.jmix.search.exception.ParserResolvingException; +import io.jmix.search.exception.UnsupportedFileTypeException; import org.apache.poi.poifs.filesystem.OfficeXmlFileException; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; @@ -50,7 +50,7 @@ public FileProcessor(FileStorageLocator fileStorageLocator, FileParserResolverMa this.fileParserResolverManager = fileParserResolverManager; } - public String extractFileContent(FileRef fileRef) throws FileParseException, ParserResolvingException { + public String extractFileContent(FileRef fileRef) throws FileParseException, UnsupportedFileTypeException { Preconditions.checkNotNullArgument(fileRef); log.debug("Extract content of file {}", fileRef); FileStorage fileStorage = fileStorageLocator.getByName(fileRef.getStorageName()); @@ -80,7 +80,7 @@ public String extractFileContent(FileRef fileRef) throws FileParseException, Par return stringWriter.toString(); } - protected Parser getParser(FileRef fileRef) throws ParserResolvingException { + protected Parser getParser(FileRef fileRef) throws UnsupportedFileTypeException { return fileParserResolverManager.getParser(fileRef); } } From 62569354d354e746c3a424074d9112cb4274a3ea Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Wed, 18 Sep 2024 15:56:17 +0400 Subject: [PATCH 44/81] UnsupportedFileTypeException correction --- .../UnsupportedFileTypeException.java | 13 ++--- ...supportedFileExtensionExceptionTest.groovy | 41 --------------- .../UnsupportedFileTypeExceptionTest.groovy | 52 +++++++++++++++++++ 3 files changed, 59 insertions(+), 47 deletions(-) delete mode 100644 jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileExtensionExceptionTest.groovy create mode 100644 jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileTypeExceptionTest.groovy diff --git a/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileTypeException.java b/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileTypeException.java index 7670502cdf..6e395ec46c 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileTypeException.java +++ b/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileTypeException.java @@ -19,16 +19,17 @@ import java.util.List; /** - * An exception that is thrown when a user added some file with extension that there are no any known parser for. + * An exception that is thrown when a user added some file of the type that is not supported + * and there are no any known parser for. */ public class UnsupportedFileTypeException extends Exception { private static final String MESSAGE = "The file %s can't be parsed. " + - "Only the following file parsing criteria are supported: %s"; + "Only the following file parsing criteria are supported:\n -%s"; /** - * @param fileName - the name of the file which type is not supported - * @param supportedExtensions - the list of the supported file parsing cri + * @param fileName - the name of the file which type is not supported + * @param supportedExtensions - the list of the criteria that are supported in the application */ public UnsupportedFileTypeException(String fileName, List supportedExtensions) { super(String.format( @@ -37,7 +38,7 @@ public UnsupportedFileTypeException(String fileName, List supportedExten getSupportedExtensionsString(supportedExtensions))); } - protected static String getSupportedExtensionsString(List supportedExtensions){ - return String.join("\n", supportedExtensions); + protected static String getSupportedExtensionsString(List supportedExtensions) { + return String.join("\n -", supportedExtensions); } } diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileExtensionExceptionTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileExtensionExceptionTest.groovy deleted file mode 100644 index 91f6adeef3..0000000000 --- a/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileExtensionExceptionTest.groovy +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2024 Haulmont. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain fileName copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.jmix.search.exception - -import spock.lang.Specification - -class UnsupportedFileExtensionExceptionTest extends Specification { - - - private static final String MESSAGE_1 = "The file the-file-with-not-supported-extension.sql with 'sql' " + - "extension is not supported. Only following file extensions are supported: txt, rtf." - private static final String MESSAGE_2 = "The file another-file.smt with 'smt' extension is not supported. " + - "Only following file extensions are supported: abc, def." - - def "message test"() { - when: - def exception = new UnsupportedFileExtensionException(fileName, supportedTypes) - - then: - exception.getMessage() == message - - where: - fileName |supportedTypes| message - "the-file-with-not-supported-extension.sql" |["txt", "rtf"]| MESSAGE_1 - "another-file.smt" |["abc", "def"]| MESSAGE_2 - } -} diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileTypeExceptionTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileTypeExceptionTest.groovy new file mode 100644 index 0000000000..3d7e26e4c5 --- /dev/null +++ b/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileTypeExceptionTest.groovy @@ -0,0 +1,52 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain fileName copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.exception + +import spock.lang.Specification + +class UnsupportedFileTypeExceptionTest extends Specification { + + + private static final String MESSAGE_1 = "The file another-file.smt can't be parsed. " + + "Only the following file parsing criteria are supported:\n" + + " -The only one criteria." + private static final String MESSAGE_2 = "The file the-file-with-not-supported-extension.sql can't be parsed. " + + "Only the following file parsing criteria are supported:\n" + + " -The first criteria.\n" + + " -The second criteria." + + private static final String MESSAGE_3 = "The file anyfile can't be parsed. " + + "Only the following file parsing criteria are supported:\n" + + " -line1\n" + + " -line2\n" + + " -line3\n" + + " -line4" + + def "message test"() { + when: + def exception = new UnsupportedFileTypeException(fileName, supportedTypes) + + then: + exception.getMessage() == message + + where: + fileName | supportedTypes | message + "another-file.smt" | ["The only one criteria."] | MESSAGE_1 + "the-file-with-not-supported-extension.sql" | ["The first criteria.", "The second criteria."] | MESSAGE_2 + "anyfile" | ["line1", "line2", "line3", "line4"] | MESSAGE_3 + } +} From e793ca815c08a04f8dd012c3cbbeacd403bdb5ae Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Wed, 18 Sep 2024 16:08:17 +0400 Subject: [PATCH 45/81] FilePropertyValueExtractorTest correction --- .../impl/FilePropertyValueExtractorTest.groovy | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy index 8e19db18f2..09aafe2aa8 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy @@ -20,8 +20,7 @@ import ch.qos.logback.classic.Level import ch.qos.logback.classic.spi.ILoggingEvent import ch.qos.logback.core.read.ListAppender import io.jmix.core.FileRef -import io.jmix.search.exception.EmptyFileExtensionException -import io.jmix.search.exception.UnsupportedFileExtensionException +import io.jmix.search.exception.UnsupportedFileTypeException import io.jmix.search.utils.FileProcessor import spock.lang.Specification @@ -39,13 +38,17 @@ class FilePropertyValueExtractorTest extends Specification { Level.WARN) } - def "nothing should be thrown if fileProcessor throws a ParserResolvingException"() { + def "nothing should be thrown if fileProcessor throws a ParserResolvingException but should be logged"() { given: FileRef fileRef = Mock() + and: + def exceptionMock = Mock(UnsupportedFileTypeException) + exceptionMock.getMessage() >> "Some exception message." + and: FileProcessor fileProcessor = Mock() - fileProcessor.extractFileContent(fileRef) >> {throw exception} + fileProcessor.extractFileContent(fileRef) >> {throw exceptionMock} and: FilePropertyValueExtractor extractor = new FilePropertyValueExtractor(fileProcessor) @@ -57,10 +60,7 @@ class FilePropertyValueExtractorTest extends Specification { this.appender.list.size() == 1 def loggingEvent = this.appender.list.get(0) loggingEvent.getLevel() == Level.WARN - loggingEvent.getMessage() == exception.getMessage() - - where: - exception<<[new UnsupportedFileExtensionException("any.file", ["txt"]), new EmptyFileExtensionException("any", ["txt"])] + loggingEvent.getMessage() == exceptionMock.getMessage() } void cleanup() { From 945962a01b6880c0e59dfecafda009b07c745d65 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Wed, 18 Sep 2024 16:09:01 +0400 Subject: [PATCH 46/81] FileProcessorTest correction --- .../jmix/search/utils/FileProcessorTest.groovy | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy index 40fee7c4d1..94aafeb8b1 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy @@ -18,16 +18,18 @@ package io.jmix.search.utils import io.jmix.core.FileRef import io.jmix.core.FileStorageLocator -import io.jmix.search.exception.EmptyFileExtensionException -import io.jmix.search.exception.ParserResolvingException -import io.jmix.search.exception.UnsupportedFileExtensionException +import io.jmix.search.exception.UnsupportedFileTypeException import spock.lang.Specification class FileProcessorTest extends Specification { - def "should throw the ParserResolvingException that have been thrown by the FileParserResolver"() { + def "should throw the UnsupportedFileTypeException that have been thrown by the FileParserResolver"() { given: FileStorageLocator storageLocatorMock = Mock() + and: + + def exception = Mock (UnsupportedFileTypeException) + and: FileParserResolverManager fileParserResolver = Mock() FileRef fileRefMock = Mock() @@ -38,11 +40,7 @@ class FileProcessorTest extends Specification { fileProcessor.extractFileContent(fileRefMock) then: - ParserResolvingException throwable = thrown() + UnsupportedFileTypeException throwable = thrown() throwable == exception - - where: - exception << [new UnsupportedFileExtensionException("any.name", List.of("txt, rtf")), - new EmptyFileExtensionException("any", ["txt"])] } } From a71d1032e244d98036e00d86487becec58f708b0 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Wed, 18 Sep 2024 16:10:58 +0400 Subject: [PATCH 47/81] The tests correction --- .../EmptyFileExtensionExceptionTest.groovy | 41 ------------------- 1 file changed, 41 deletions(-) delete mode 100644 jmix-search/search/src/test/groovy/io/jmix/search/exception/EmptyFileExtensionExceptionTest.groovy diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/exception/EmptyFileExtensionExceptionTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/exception/EmptyFileExtensionExceptionTest.groovy deleted file mode 100644 index 0063f079de..0000000000 --- a/jmix-search/search/src/test/groovy/io/jmix/search/exception/EmptyFileExtensionExceptionTest.groovy +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2024 Haulmont. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.jmix.search.exception - -import spock.lang.Specification - -class EmptyFileExtensionExceptionTest extends Specification { - - private static final String MESSAGE_1 = "Extension of the file someName is empty. " + - "Only following file extensions are supported: txt, xls." - private static final String MESSAGE_2 = "Extension of the file someName2 is empty. " + - "Only following file extensions are supported: rtf, pdf." - - def "message test"() { - given: - def exception = new EmptyFileExtensionException(fileName, extensions) - - expect: - exception.getMessage() == message - - where: - fileName | extensions | message - "someName" | ["txt", "xls"] | MESSAGE_1 - "someName2" | ["rtf", "pdf"] | MESSAGE_2 - - } -} From 85fed7cae7266a86d22a15ca94045c1e02bc6704 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Wed, 18 Sep 2024 18:48:53 +0400 Subject: [PATCH 48/81] FileParserResolverManager and the test correction --- .../EmptyFileParserResolversList.java | 23 +++++ .../utils/FileParserResolverManager.java | 5 ++ .../FileParserResolverManagerTest.groovy | 90 +++++++++++-------- 3 files changed, 81 insertions(+), 37 deletions(-) create mode 100644 jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileParserResolversList.java diff --git a/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileParserResolversList.java b/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileParserResolversList.java new file mode 100644 index 0000000000..42d4a592dc --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileParserResolversList.java @@ -0,0 +1,23 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.exception; + +public class EmptyFileParserResolversList extends RuntimeException{ + public EmptyFileParserResolversList() { + super("There are no any file parser resolvers in the application."); + } +} diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java index 5105f48f40..38530c870d 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java @@ -17,6 +17,7 @@ package io.jmix.search.utils; import io.jmix.core.FileRef; +import io.jmix.search.exception.EmptyFileParserResolversList; import io.jmix.search.exception.UnsupportedFileTypeException; import io.jmix.search.index.fileparsing.FileParserResolver; import org.apache.tika.parser.Parser; @@ -39,6 +40,10 @@ public FileParserResolverManager(List fileParserResolvers) { } public Parser getParser(FileRef fileRef) throws UnsupportedFileTypeException { + if(fileParserResolvers.isEmpty()){ + throw new EmptyFileParserResolversList(); + } + String fileName = fileRef.getFileName(); List messages = new ArrayList<>(); diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy index d2fbb08832..44be57acca 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy @@ -17,83 +17,99 @@ package io.jmix.search.utils.parserresolving import io.jmix.core.FileRef -import io.jmix.search.exception.EmptyFileExtensionException -import io.jmix.search.exception.UnsupportedFileExtensionException +import io.jmix.search.exception.EmptyFileParserResolversList +import io.jmix.search.exception.UnsupportedFileTypeException import io.jmix.search.index.fileparsing.FileParserResolver import io.jmix.search.utils.FileParserResolverManager import org.apache.tika.parser.Parser import spock.lang.Specification +import static java.util.Collections.emptyList + class FileParserResolverManagerTest extends Specification { - def "should throw EmptyFileExtensionException when the given file name has no extension"() { + + def "should throw UnsupportedFileExtensionException when the given file of unsupported type"() { given: FileRef fileRef = Mock() fileRef.getFileName() >> fileName and: - def parserResolver = new FileParserResolverManager(Collections.emptyList()) + def resolver = Mock(FileParserResolver) + resolver.supports(fileRef) >> false + def resolver2 = Mock(FileParserResolver) + resolver2.supports(fileRef) >> false + + and: + def parserResolver = new FileParserResolverManager(List.of(resolver, resolver2)) when: parserResolver.getParser(fileRef) then: - thrown(EmptyFileExtensionException) + def exception = thrown(UnsupportedFileTypeException) + exception.getMessage().contains(fileName) where: - fileName << ["abc", "def", "abc.", "abc.."] + fileName << ["abc.def", "def.zxc"] } - def "should throw UnsupportedFileExtensionException when the given file name with unsupported extension"() { + def "should return parser of the type that is supported with exact resolver"() { given: FileRef fileRef = Mock() fileRef.getFileName() >> fileName and: - def resolver = Mock(FileParserResolver) - resolver.getSupportedExtensions() >> List.of("docx", "xlsx") - def resolver2 = Mock(FileParserResolver) - resolver2.getSupportedExtensions() >> List.of("doc", "xls") + def resolver1 = createExtensionBasedResolverResolver("txt", parser1) + def resolver2 = createExtensionBasedResolverResolver("rtf", parser2) + def resolver3 = Mock(FileParserResolver) + resolver3.supports(_ as FileRef) >> true; + resolver3.getParser() >> parser3 and: - def parserResolver = new FileParserResolverManager(List.of(resolver, resolver2)) + def resolverManager = new FileParserResolverManager(List.of(resolver1, resolver2, resolver3)) when: - parserResolver.getParser(fileRef) + def resolvedParser = resolverManager.getParser(fileRef) then: - def exception = thrown(UnsupportedFileExtensionException) - exception.getMessage().contains(fileName) + resolvedParser != null + resolvedParser == expectedResolvedParser + where: - fileName << ["abc.def", "def.zxc"] + fileName | parser1 | parser2 | parser3 | expectedResolvedParser + "file.txt" | Mock(Parser) | null | null | parser1 + "file.rtf" | null | Mock(Parser) | null | parser2 + "another.rtf" | null | Mock(Parser) | null | parser2 + "another.txt" | Mock(Parser) | null | null | parser1 + "file.eps" | null | null | Mock(Parser) | parser3 + "file" | null | null | Mock(Parser) | parser3 } - def "should return parser of the type that corresponds to the file extension"() { + def "should throw an exception when there are no any resolver"() { given: - def resolver = Mock(FileParserResolver) - resolver.getSupportedExtensions() >> List.of("docx", "xlsx") - def parser1 = Mock(Parser) - resolver.getParser() >> parser1 - - and: - def resolver2 = Mock(FileParserResolver) - resolver2.getSupportedExtensions() >> List.of("doc", "xls") - def parser2 = Mock(Parser) - resolver2.getParser() >> parser2 + FileRef fileRef = Mock() and: - def parserResolver = new FileParserResolverManager(List.of(resolver, resolver2)) + def resolverManager = new FileParserResolverManager(emptyList()) - expect: - parserResolver.getParser(createFileRefMock("docx")) == parser1 - parserResolver.getParser(createFileRefMock("xlsx")) == parser1 - parserResolver.getParser(createFileRefMock("doc")) == parser2 - parserResolver.getParser(createFileRefMock("xls")) == parser2 + when: + resolverManager.getParser(fileRef) + then: + thrown(EmptyFileParserResolversList) } - private FileRef createFileRefMock(String extension) { - def fileRef = Mock(FileRef) - fileRef.getFileName() >> "filename." + extension - fileRef + FileParserResolver createExtensionBasedResolverResolver(String fileExtension, Parser parser) { + def resolver = Mock(FileParserResolver) + resolver.supports(_ as FileRef) >> { FileRef fileRef1 -> + { + if (fileRef1.getFileName().contains(fileExtension)) { + return true + } + return false + } + } + resolver.getParser() >> parser + resolver } } From 0054c3e1f4f9bf7c0d02c5303099c0e56aa852a8 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 19 Sep 2024 00:15:30 +0400 Subject: [PATCH 49/81] AbstractExtensionBasedFileParserResolverTest --- .../exception/ParserResolvingException.java | 35 --------- ...tractExtensionBasedFileParserResolver.java | 2 +- ...xtensionBasedFileParserResolverTest.groovy | 76 +++++++++++++++++++ 3 files changed, 77 insertions(+), 36 deletions(-) delete mode 100644 jmix-search/search/src/main/java/io/jmix/search/exception/ParserResolvingException.java create mode 100644 jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy diff --git a/jmix-search/search/src/main/java/io/jmix/search/exception/ParserResolvingException.java b/jmix-search/search/src/main/java/io/jmix/search/exception/ParserResolvingException.java deleted file mode 100644 index 8a8c903fe6..0000000000 --- a/jmix-search/search/src/main/java/io/jmix/search/exception/ParserResolvingException.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright 2024 Haulmont. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.jmix.search.exception; - -import java.util.List; - -/** - * An exception that is thrown when the problem with parser resolving is occurred. The parser is need for - * the fields of the "File" type indexing. - */ -public abstract class ParserResolvingException extends Exception { - - protected static String getSupportedExtensionsString(List supportedExtensions){ - return String.join(", ", supportedExtensions); - - } - - public ParserResolvingException(String message) { - super(message); - } -} diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java index ff95a4a152..33e41dc8eb 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java @@ -33,7 +33,7 @@ public abstract class AbstractExtensionBasedFileParserResolver implements FilePa @Override public String getCriteriaDescription() { - return String.format("The file extension should be one of following: %s", getSupportedExtensionsString(getSupportedExtensions())); + return String.format("The file extension should be one of following: %s.", getSupportedExtensionsString(getSupportedExtensions())); } @Override diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy new file mode 100644 index 0000000000..e69a45aaf5 --- /dev/null +++ b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy @@ -0,0 +1,76 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.index.fileparsing + +import io.jmix.core.FileRef +import org.apache.tika.parser.Parser +import spock.lang.Specification + +class AbstractExtensionBasedFileParserResolverTest extends Specification { + def "GetCriteriaDescription"() { + given: + def resolver = new TestExtensionBasedFileParserResolver(Mock(Parser), extensions as List) + + expect: + resolver.getCriteriaDescription() == criteriaDescription + + where: + extensions | criteriaDescription + ["ext1"] | "The file extension should be one of following: ext1." + ["ext1", "ext2"] | "The file extension should be one of following: ext1, ext2." + ["ext1", "ext2", "ext3"] | "The file extension should be one of following: ext1, ext2, ext3." + } + + def "Supports"() { + given: + def resolver = new TestExtensionBasedFileParserResolver(Mock(Parser), extensions as List) + + and: + def fileRef = Mock(FileRef) + fileRef.getFileName() >> fileName + + expect: + resolver.supports(fileRef) == supports + + where: + fileName | extensions | supports + "file1.ext1" | ["ext1"] | true + "file1.ext1" | ["ext1", "ext2"] | true + "file1.doc" | ["docx"] | false + "file" | ["ext1"] | false + } + + private static class TestExtensionBasedFileParserResolver extends AbstractExtensionBasedFileParserResolver { + private Parser innerParser + private List extensions + + TestExtensionBasedFileParserResolver(Parser innerParser, List extensions) { + this.innerParser = innerParser + this.extensions = extensions + } + + @Override + List getSupportedExtensions() { + return extensions; + } + + @Override + Parser getParser() { + return innerParser + } + } +} From 7dd1ab46adbf733ae1d1a7c44212fe140a5824c2 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 19 Sep 2024 00:18:00 +0400 Subject: [PATCH 50/81] AbstractExtensionBasedFileParserResolverTest --- .../AbstractExtensionBasedFileParserResolverTest.groovy | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy index e69a45aaf5..0f06b56b33 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy @@ -55,11 +55,11 @@ class AbstractExtensionBasedFileParserResolverTest extends Specification { } private static class TestExtensionBasedFileParserResolver extends AbstractExtensionBasedFileParserResolver { - private Parser innerParser + private Parser parser private List extensions - TestExtensionBasedFileParserResolver(Parser innerParser, List extensions) { - this.innerParser = innerParser + TestExtensionBasedFileParserResolver(Parser parser, List extensions) { + this.parser = parser this.extensions = extensions } @@ -70,7 +70,7 @@ class AbstractExtensionBasedFileParserResolverTest extends Specification { @Override Parser getParser() { - return innerParser + return parser } } } From 8c1c3b9898e23a88ce5949b17491a45f95903902 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 19 Sep 2024 00:33:21 +0400 Subject: [PATCH 51/81] JavaDoc --- ...stractExtensionBasedFileParserResolver.java | 7 +++++-- .../index/fileparsing/FileParserResolver.java | 18 ++++++++++++++++-- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java index 33e41dc8eb..8d62e2a7ac 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java @@ -22,11 +22,14 @@ import java.util.List; - +/** + * Implements the common logic for all extension based file parser resolvers. + */ public abstract class AbstractExtensionBasedFileParserResolver implements FileParserResolver { /** * Returns a collection of supported extensions of the supported file type. E.g. ["xlsx", "XLSX", "docx", "DOCX"]. + * * @return collection of supported extensions */ public abstract List getSupportedExtensions(); @@ -47,7 +50,7 @@ public boolean supports(FileRef fileRef) { return getSupportedExtensions().contains(fileExtension); } - protected String getSupportedExtensionsString(List supportedExtensions){ + protected String getSupportedExtensionsString(List supportedExtensions) { return String.join(", ", supportedExtensions); } } diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java index d56e8348dd..36d0134b41 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java @@ -27,14 +27,28 @@ */ public interface FileParserResolver { + /** + * This method should return the description that describes the constraints or the constraint for the files + * that are supported with this resolver. This message is used for generating the log message that + * is written into the log while no one of the resolvers supports the processed file. + * + * @return criteria description + */ String getCriteriaDescription(); /** - * Returns an instance of a file parser that is returned for the extensions being returned by - * {@link #getSupportedExtensions()} method. + * Returns an instance of a file parser for the supported file types. + * * @return an instance of a file parser */ Parser getParser(); + /** + * This method should implement the logic for checking + * if the file with given fileRef is supported by the resolver or not. + * + * @param fileRef object with the file information + * @return the given FileRef's checking result + */ boolean supports(FileRef fileRef); } From d6b228e8f3671d75a0fd7836d0c6a13819e8b524 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 19 Sep 2024 00:36:59 +0400 Subject: [PATCH 52/81] JavaDoc --- .../java/io/jmix/search/utils/FileParserResolverManager.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java index 38530c870d..9ee4dcea14 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java @@ -28,7 +28,7 @@ /** * The service that searches appropriate file parsers for the supported file types. - * Search principle is based on a file extension analysing. + * A search principle is based on the sequential applying FileParserResolver objects' checks for the given file. */ @Component("search_FileParserResolverManager") public class FileParserResolverManager { @@ -40,7 +40,7 @@ public FileParserResolverManager(List fileParserResolvers) { } public Parser getParser(FileRef fileRef) throws UnsupportedFileTypeException { - if(fileParserResolvers.isEmpty()){ + if (fileParserResolvers.isEmpty()) { throw new EmptyFileParserResolversList(); } From 7b7b8d58fa9865e8a6e13234a9a7386395e3114a Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 19 Sep 2024 00:37:43 +0400 Subject: [PATCH 53/81] minor change --- .../jmix/search/exception/EmptyFileParserResolversList.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileParserResolversList.java b/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileParserResolversList.java index 42d4a592dc..6ee6faaf5c 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileParserResolversList.java +++ b/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileParserResolversList.java @@ -17,7 +17,10 @@ package io.jmix.search.exception; public class EmptyFileParserResolversList extends RuntimeException{ + + private static final String MESSAGE = "There are no any file parser resolvers in the application."; + public EmptyFileParserResolversList() { - super("There are no any file parser resolvers in the application."); + super(MESSAGE); } } From 4b4582c59244460cff28b53389e970062dc5efcd Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 19 Sep 2024 00:42:36 +0400 Subject: [PATCH 54/81] minor change --- .../AbstractExtensionBasedFileParserResolverTest.groovy | 1 + 1 file changed, 1 insertion(+) diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy index 0f06b56b33..9c78964d72 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy @@ -51,6 +51,7 @@ class AbstractExtensionBasedFileParserResolverTest extends Specification { "file1.ext1" | ["ext1"] | true "file1.ext1" | ["ext1", "ext2"] | true "file1.doc" | ["docx"] | false + "file1." | ["docx"] | false "file" | ["ext1"] | false } From 5f5fc3375d4c88c65f5cce72c3c09c91385bbbb4 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 19 Sep 2024 00:44:45 +0400 Subject: [PATCH 55/81] minor change --- ...ExtensionBasedFileParserResolverTest.groovy | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy index 9c78964d72..07919a1db1 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy @@ -47,12 +47,18 @@ class AbstractExtensionBasedFileParserResolverTest extends Specification { resolver.supports(fileRef) == supports where: - fileName | extensions | supports - "file1.ext1" | ["ext1"] | true - "file1.ext1" | ["ext1", "ext2"] | true - "file1.doc" | ["docx"] | false - "file1." | ["docx"] | false - "file" | ["ext1"] | false + fileName | extensions | supports + "file1.ext1" | ["ext1"] | true + "file1.ext11" | ["ext1"] | false + "file1..ext1" | ["ext1"] | true + "file1..ext" | ["ext1"] | false + "file1.ext1" | ["ext1", "ext2"] | true + "file1.ext2" | ["ext1", "ext2"] | true + "file1.ext3" | ["ext1", "ext2", "ext3"] | true + "file1.ext33" | ["ext1", "ext2", "ext3"] | false + "file1.doc" | ["docx"] | false + "file1." | ["docx"] | false + "file" | ["ext1"] | false } private static class TestExtensionBasedFileParserResolver extends AbstractExtensionBasedFileParserResolver { From 2be1c419dfdea6e298a06606378d3630d215fa9c Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 19 Sep 2024 00:49:56 +0400 Subject: [PATCH 56/81] minor change --- .../AbstractExtensionBasedFileParserResolver.java | 2 +- .../AbstractExtensionBasedFileParserResolverTest.groovy | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java index 8d62e2a7ac..4cbb7719f1 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java @@ -36,7 +36,7 @@ public abstract class AbstractExtensionBasedFileParserResolver implements FilePa @Override public String getCriteriaDescription() { - return String.format("The file extension should be one of following: %s.", getSupportedExtensionsString(getSupportedExtensions())); + return String.format("The file extension should be one of the following: %s.", getSupportedExtensionsString(getSupportedExtensions())); } @Override diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy index 07919a1db1..bc956184e2 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy @@ -30,9 +30,9 @@ class AbstractExtensionBasedFileParserResolverTest extends Specification { where: extensions | criteriaDescription - ["ext1"] | "The file extension should be one of following: ext1." - ["ext1", "ext2"] | "The file extension should be one of following: ext1, ext2." - ["ext1", "ext2", "ext3"] | "The file extension should be one of following: ext1, ext2, ext3." + ["ext1"] | "The file extension should be one of the following: ext1." + ["ext1", "ext2"] | "The file extension should be one of the following: ext1, ext2." + ["ext1", "ext2", "ext3"] | "The file extension should be one of the following: ext1, ext2, ext3." } def "Supports"() { From 1f374ab6de006501cdff6b7584ae0a3b6ebf48b5 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 19 Sep 2024 00:51:10 +0400 Subject: [PATCH 57/81] FileParserResolverManagerIntegrationTest extending --- ...arserResolverManagerIntegrationTest.groovy | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy index 89c4adafe1..a71a5fbda1 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy @@ -17,6 +17,7 @@ package io.jmix.search.utils.parserresolving import io.jmix.core.FileRef +import io.jmix.search.exception.UnsupportedFileTypeException import io.jmix.search.index.fileparsing.FileParserResolver import io.jmix.search.index.fileparsing.resolvers.MSOfficeDocumentsParserResolver import io.jmix.search.index.fileparsing.resolvers.OldMSOfficeDocumentsParserResolver @@ -35,7 +36,7 @@ import spock.lang.Specification class FileParserResolverManagerIntegrationTest extends Specification { - def "resolvers test"() { + def "there is appropriate resolver for the file"() { given: def manager = new FileParserResolverManager(getResolvers()) @@ -59,6 +60,24 @@ class FileParserResolverManagerIntegrationTest extends Specification { "xlsx" | OOXMLParser } + def "there is not appropriate resolver for the file"() { + given: + def manager = new FileParserResolverManager(getResolvers()) + + and: + def fileRef = Mock(FileRef) + fileRef.getFileName() >> "filename." + extension + + when: + manager.getParser(fileRef) + + then: + thrown(UnsupportedFileTypeException) + + where: + extension<<["txt1", "ems", "", "od", "ods2"] + } + List getResolvers() { List.of( new MSOfficeDocumentsParserResolver(), From 2d891cda36f34cf3d6701323a44f3318e20b5c36 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 19 Sep 2024 00:55:55 +0400 Subject: [PATCH 58/81] minor change --- .../test/groovy/io/jmix/search/utils/FileProcessorTest.groovy | 1 - 1 file changed, 1 deletion(-) diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy index 94aafeb8b1..efe7b51a4a 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy @@ -27,7 +27,6 @@ class FileProcessorTest extends Specification { FileStorageLocator storageLocatorMock = Mock() and: - def exception = Mock (UnsupportedFileTypeException) and: From 67753ffae7f165118f5dd21e3d585dab84e32b35 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 19 Sep 2024 09:05:52 +0400 Subject: [PATCH 59/81] code formatting --- .../io/jmix/search/exception/EmptyFileParserResolversList.java | 2 +- .../propertyvalue/impl/FilePropertyValueExtractorTest.groovy | 2 +- .../test/groovy/io/jmix/search/utils/FileProcessorTest.groovy | 2 +- .../src/test/groovy/io/jmix/search/utils/LogbackMocker.java | 2 +- .../FileParserResolverManagerIntegrationTest.groovy | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileParserResolversList.java b/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileParserResolversList.java index 6ee6faaf5c..0390b7ddf3 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileParserResolversList.java +++ b/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileParserResolversList.java @@ -16,7 +16,7 @@ package io.jmix.search.exception; -public class EmptyFileParserResolversList extends RuntimeException{ +public class EmptyFileParserResolversList extends RuntimeException { private static final String MESSAGE = "There are no any file parser resolvers in the application."; diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy index 09aafe2aa8..bfbf8f4f9a 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy @@ -48,7 +48,7 @@ class FilePropertyValueExtractorTest extends Specification { and: FileProcessor fileProcessor = Mock() - fileProcessor.extractFileContent(fileRef) >> {throw exceptionMock} + fileProcessor.extractFileContent(fileRef) >> { throw exceptionMock } and: FilePropertyValueExtractor extractor = new FilePropertyValueExtractor(fileProcessor) diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy index efe7b51a4a..0408df7ddf 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy @@ -27,7 +27,7 @@ class FileProcessorTest extends Specification { FileStorageLocator storageLocatorMock = Mock() and: - def exception = Mock (UnsupportedFileTypeException) + def exception = Mock(UnsupportedFileTypeException) and: FileParserResolverManager fileParserResolver = Mock() diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/LogbackMocker.java b/jmix-search/search/src/test/groovy/io/jmix/search/utils/LogbackMocker.java index f122e7be43..4d5056b677 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/LogbackMocker.java +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/LogbackMocker.java @@ -36,7 +36,7 @@ public static ListAppender createAttachedAppender(Class classF return appender; } - public static void cleanUpAppender(Class classForLogging, Appender appender){ + public static void cleanUpAppender(Class classForLogging, Appender appender) { Logger logger = (Logger) LoggerFactory.getLogger(classForLogging); logger.detachAppender(appender); } diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy index a71a5fbda1..a424e48152 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy @@ -75,7 +75,7 @@ class FileParserResolverManagerIntegrationTest extends Specification { thrown(UnsupportedFileTypeException) where: - extension<<["txt1", "ems", "", "od", "ods2"] + extension << ["txt1", "ems", "", "od", "ods2"] } List getResolvers() { From 4f0d0538732aa323787b1bea3722f032a241b99f Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 26 Sep 2024 16:26:20 +0400 Subject: [PATCH 60/81] Capital letters checking --- ...ParserResolverManagerIntegrationTest.groovy | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy index a424e48152..fd013dd508 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy @@ -60,6 +60,24 @@ class FileParserResolverManagerIntegrationTest extends Specification { "xlsx" | OOXMLParser } + def "there is no appropriate resolver for the file if the capital letters"() { + given: + def manager = new FileParserResolverManager(getResolvers()) + + and: + def fileRef = Mock(FileRef) + fileRef.getFileName() >> "filename." + extension + + when: + manager.getParser(fileRef) + + then: + thrown(UnsupportedFileTypeException) + + where: + extension << ["TXT", "PDF", "RTF", "ODT", "ODS", "DOC", "XLS", "DOCX", "XLSX"] + } + def "there is not appropriate resolver for the file"() { given: def manager = new FileParserResolverManager(getResolvers()) From 545302bec220f5e552b75ff37a2f85e2aaa5f874 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 26 Sep 2024 16:34:42 +0400 Subject: [PATCH 61/81] Removing not necessary custom exception --- .../EmptyFileParserResolversList.java | 26 ------------------- .../utils/FileParserResolverManager.java | 6 +++-- .../FileParserResolverManagerTest.groovy | 4 +-- 3 files changed, 6 insertions(+), 30 deletions(-) delete mode 100644 jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileParserResolversList.java diff --git a/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileParserResolversList.java b/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileParserResolversList.java deleted file mode 100644 index 0390b7ddf3..0000000000 --- a/jmix-search/search/src/main/java/io/jmix/search/exception/EmptyFileParserResolversList.java +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright 2024 Haulmont. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.jmix.search.exception; - -public class EmptyFileParserResolversList extends RuntimeException { - - private static final String MESSAGE = "There are no any file parser resolvers in the application."; - - public EmptyFileParserResolversList() { - super(MESSAGE); - } -} diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java index 9ee4dcea14..3b73aef77f 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java @@ -17,7 +17,6 @@ package io.jmix.search.utils; import io.jmix.core.FileRef; -import io.jmix.search.exception.EmptyFileParserResolversList; import io.jmix.search.exception.UnsupportedFileTypeException; import io.jmix.search.index.fileparsing.FileParserResolver; import org.apache.tika.parser.Parser; @@ -33,6 +32,9 @@ @Component("search_FileParserResolverManager") public class FileParserResolverManager { + private static final String EMPTY_FILE_PARSER_RESOLVERS_LIST_MESSAGE + = "There are no any file parser resolvers in the application."; + protected List fileParserResolvers; public FileParserResolverManager(List fileParserResolvers) { @@ -41,7 +43,7 @@ public FileParserResolverManager(List fileParserResolvers) { public Parser getParser(FileRef fileRef) throws UnsupportedFileTypeException { if (fileParserResolvers.isEmpty()) { - throw new EmptyFileParserResolversList(); + throw new IllegalStateException(EMPTY_FILE_PARSER_RESOLVERS_LIST_MESSAGE); } String fileName = fileRef.getFileName(); diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy index 44be57acca..2d765d06e5 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy @@ -17,7 +17,6 @@ package io.jmix.search.utils.parserresolving import io.jmix.core.FileRef -import io.jmix.search.exception.EmptyFileParserResolversList import io.jmix.search.exception.UnsupportedFileTypeException import io.jmix.search.index.fileparsing.FileParserResolver import io.jmix.search.utils.FileParserResolverManager @@ -96,7 +95,8 @@ class FileParserResolverManagerTest extends Specification { resolverManager.getParser(fileRef) then: - thrown(EmptyFileParserResolversList) + def exception = thrown(IllegalStateException) + exception.getMessage() == "There are no any file parser resolvers in the application." } FileParserResolver createExtensionBasedResolverResolver(String fileExtension, Parser parser) { From c7475c7f66307355ccbf2e1e48c8b69fcb5ece86 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 26 Sep 2024 16:40:12 +0400 Subject: [PATCH 62/81] Renaming the exception --- ...peException.java => UnsupportedFileFormatException.java} | 4 ++-- .../propertyvalue/impl/FilePropertyValueExtractor.java | 4 ++-- .../io/jmix/search/utils/FileParserResolverManager.java | 6 +++--- .../src/main/java/io/jmix/search/utils/FileProcessor.java | 6 +++--- ...est.groovy => UnsupportedFileFormatExceptionTest.groovy} | 4 ++-- .../impl/FilePropertyValueExtractorTest.groovy | 4 ++-- .../groovy/io/jmix/search/utils/FileProcessorTest.groovy | 6 +++--- .../FileParserResolverManagerIntegrationTest.groovy | 6 +++--- .../parserresolving/FileParserResolverManagerTest.groovy | 4 ++-- 9 files changed, 22 insertions(+), 22 deletions(-) rename jmix-search/search/src/main/java/io/jmix/search/exception/{UnsupportedFileTypeException.java => UnsupportedFileFormatException.java} (90%) rename jmix-search/search/src/test/groovy/io/jmix/search/exception/{UnsupportedFileTypeExceptionTest.groovy => UnsupportedFileFormatExceptionTest.groovy} (92%) diff --git a/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileTypeException.java b/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileFormatException.java similarity index 90% rename from jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileTypeException.java rename to jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileFormatException.java index 6e395ec46c..34e74fddb2 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileTypeException.java +++ b/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileFormatException.java @@ -22,7 +22,7 @@ * An exception that is thrown when a user added some file of the type that is not supported * and there are no any known parser for. */ -public class UnsupportedFileTypeException extends Exception { +public class UnsupportedFileFormatException extends Exception { private static final String MESSAGE = "The file %s can't be parsed. " + "Only the following file parsing criteria are supported:\n -%s"; @@ -31,7 +31,7 @@ public class UnsupportedFileTypeException extends Exception { * @param fileName - the name of the file which type is not supported * @param supportedExtensions - the list of the criteria that are supported in the application */ - public UnsupportedFileTypeException(String fileName, List supportedExtensions) { + public UnsupportedFileFormatException(String fileName, List supportedExtensions) { super(String.format( MESSAGE, fileName, diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractor.java b/jmix-search/search/src/main/java/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractor.java index 18f304faec..a0b2453f6d 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractor.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractor.java @@ -25,7 +25,7 @@ import io.jmix.core.metamodel.datatype.impl.FileRefDatatype; import io.jmix.core.metamodel.model.MetaPropertyPath; import io.jmix.search.exception.FileParseException; -import io.jmix.search.exception.UnsupportedFileTypeException; +import io.jmix.search.exception.UnsupportedFileFormatException; import io.jmix.search.index.mapping.ParameterKeys; import io.jmix.search.utils.BooleanParser; import io.jmix.search.utils.FileProcessor; @@ -91,7 +91,7 @@ protected void addFileContent(ObjectNode node, FileRef fileRef) { try { String content = fileProcessor.extractFileContent(fileRef); node.put("_content", content); - } catch (UnsupportedFileTypeException e) { + } catch (UnsupportedFileFormatException e) { log.warn(e.getMessage()); } catch (FileParseException e) { log.error("Unable to index file content", e); diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java index 3b73aef77f..1f841ce55f 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java @@ -17,7 +17,7 @@ package io.jmix.search.utils; import io.jmix.core.FileRef; -import io.jmix.search.exception.UnsupportedFileTypeException; +import io.jmix.search.exception.UnsupportedFileFormatException; import io.jmix.search.index.fileparsing.FileParserResolver; import org.apache.tika.parser.Parser; import org.springframework.stereotype.Component; @@ -41,7 +41,7 @@ public FileParserResolverManager(List fileParserResolvers) { this.fileParserResolvers = fileParserResolvers; } - public Parser getParser(FileRef fileRef) throws UnsupportedFileTypeException { + public Parser getParser(FileRef fileRef) throws UnsupportedFileFormatException { if (fileParserResolvers.isEmpty()) { throw new IllegalStateException(EMPTY_FILE_PARSER_RESOLVERS_LIST_MESSAGE); } @@ -57,6 +57,6 @@ public Parser getParser(FileRef fileRef) throws UnsupportedFileTypeException { messages.add(resolver.getCriteriaDescription()); } - throw new UnsupportedFileTypeException(fileName, messages); + throw new UnsupportedFileFormatException(fileName, messages); } } diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java index e496f880c9..c3c2a9fa19 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java @@ -21,7 +21,7 @@ import io.jmix.core.FileStorageLocator; import io.jmix.core.common.util.Preconditions; import io.jmix.search.exception.FileParseException; -import io.jmix.search.exception.UnsupportedFileTypeException; +import io.jmix.search.exception.UnsupportedFileFormatException; import org.apache.poi.poifs.filesystem.OfficeXmlFileException; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; @@ -50,7 +50,7 @@ public FileProcessor(FileStorageLocator fileStorageLocator, FileParserResolverMa this.fileParserResolverManager = fileParserResolverManager; } - public String extractFileContent(FileRef fileRef) throws FileParseException, UnsupportedFileTypeException { + public String extractFileContent(FileRef fileRef) throws FileParseException, UnsupportedFileFormatException { Preconditions.checkNotNullArgument(fileRef); log.debug("Extract content of file {}", fileRef); FileStorage fileStorage = fileStorageLocator.getByName(fileRef.getStorageName()); @@ -80,7 +80,7 @@ public String extractFileContent(FileRef fileRef) throws FileParseException, Uns return stringWriter.toString(); } - protected Parser getParser(FileRef fileRef) throws UnsupportedFileTypeException { + protected Parser getParser(FileRef fileRef) throws UnsupportedFileFormatException { return fileParserResolverManager.getParser(fileRef); } } diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileTypeExceptionTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileFormatExceptionTest.groovy similarity index 92% rename from jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileTypeExceptionTest.groovy rename to jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileFormatExceptionTest.groovy index 3d7e26e4c5..0626af61b1 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileTypeExceptionTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileFormatExceptionTest.groovy @@ -18,7 +18,7 @@ package io.jmix.search.exception import spock.lang.Specification -class UnsupportedFileTypeExceptionTest extends Specification { +class UnsupportedFileFormatExceptionTest extends Specification { private static final String MESSAGE_1 = "The file another-file.smt can't be parsed. " + @@ -38,7 +38,7 @@ class UnsupportedFileTypeExceptionTest extends Specification { def "message test"() { when: - def exception = new UnsupportedFileTypeException(fileName, supportedTypes) + def exception = new UnsupportedFileFormatException(fileName, supportedTypes) then: exception.getMessage() == message diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy index bfbf8f4f9a..ddff5856f8 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/index/mapping/propertyvalue/impl/FilePropertyValueExtractorTest.groovy @@ -20,7 +20,7 @@ import ch.qos.logback.classic.Level import ch.qos.logback.classic.spi.ILoggingEvent import ch.qos.logback.core.read.ListAppender import io.jmix.core.FileRef -import io.jmix.search.exception.UnsupportedFileTypeException +import io.jmix.search.exception.UnsupportedFileFormatException import io.jmix.search.utils.FileProcessor import spock.lang.Specification @@ -43,7 +43,7 @@ class FilePropertyValueExtractorTest extends Specification { FileRef fileRef = Mock() and: - def exceptionMock = Mock(UnsupportedFileTypeException) + def exceptionMock = Mock(UnsupportedFileFormatException) exceptionMock.getMessage() >> "Some exception message." and: diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy index 0408df7ddf..19b318c9ce 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy @@ -18,7 +18,7 @@ package io.jmix.search.utils import io.jmix.core.FileRef import io.jmix.core.FileStorageLocator -import io.jmix.search.exception.UnsupportedFileTypeException +import io.jmix.search.exception.UnsupportedFileFormatException import spock.lang.Specification class FileProcessorTest extends Specification { @@ -27,7 +27,7 @@ class FileProcessorTest extends Specification { FileStorageLocator storageLocatorMock = Mock() and: - def exception = Mock(UnsupportedFileTypeException) + def exception = Mock(UnsupportedFileFormatException) and: FileParserResolverManager fileParserResolver = Mock() @@ -39,7 +39,7 @@ class FileProcessorTest extends Specification { fileProcessor.extractFileContent(fileRefMock) then: - UnsupportedFileTypeException throwable = thrown() + UnsupportedFileFormatException throwable = thrown() throwable == exception } } diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy index fd013dd508..ab95aa2d84 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy @@ -17,7 +17,7 @@ package io.jmix.search.utils.parserresolving import io.jmix.core.FileRef -import io.jmix.search.exception.UnsupportedFileTypeException +import io.jmix.search.exception.UnsupportedFileFormatException import io.jmix.search.index.fileparsing.FileParserResolver import io.jmix.search.index.fileparsing.resolvers.MSOfficeDocumentsParserResolver import io.jmix.search.index.fileparsing.resolvers.OldMSOfficeDocumentsParserResolver @@ -72,7 +72,7 @@ class FileParserResolverManagerIntegrationTest extends Specification { manager.getParser(fileRef) then: - thrown(UnsupportedFileTypeException) + thrown(UnsupportedFileFormatException) where: extension << ["TXT", "PDF", "RTF", "ODT", "ODS", "DOC", "XLS", "DOCX", "XLSX"] @@ -90,7 +90,7 @@ class FileParserResolverManagerIntegrationTest extends Specification { manager.getParser(fileRef) then: - thrown(UnsupportedFileTypeException) + thrown(UnsupportedFileFormatException) where: extension << ["txt1", "ems", "", "od", "ods2"] diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy index 2d765d06e5..1ddaaa7cca 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy @@ -17,7 +17,7 @@ package io.jmix.search.utils.parserresolving import io.jmix.core.FileRef -import io.jmix.search.exception.UnsupportedFileTypeException +import io.jmix.search.exception.UnsupportedFileFormatException import io.jmix.search.index.fileparsing.FileParserResolver import io.jmix.search.utils.FileParserResolverManager import org.apache.tika.parser.Parser @@ -45,7 +45,7 @@ class FileParserResolverManagerTest extends Specification { parserResolver.getParser(fileRef) then: - def exception = thrown(UnsupportedFileTypeException) + def exception = thrown(UnsupportedFileFormatException) exception.getMessage().contains(fileName) where: From d04cf63f7f88670ec654ea11958c98db7334a7de Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 26 Sep 2024 16:48:28 +0400 Subject: [PATCH 63/81] Code style changes --- .../fileparsing/resolvers/MSOfficeDocumentsParserResolver.java | 1 + .../resolvers/OldMSOfficeDocumentsParserResolver.java | 2 +- .../resolvers/OpenOfficeDocumentsParserResolver.java | 2 +- .../search/index/fileparsing/resolvers/PDFParserResolver.java | 3 +-- .../search/index/fileparsing/resolvers/RTFParserResolver.java | 3 +-- .../search/index/fileparsing/resolvers/TXTParserResolver.java | 3 +-- .../search/exception/UnsupportedFileFormatExceptionTest.groovy | 1 - .../AbstractExtensionBasedFileParserResolverTest.groovy | 1 + .../test/groovy/io/jmix/search/utils/FileProcessorTest.groovy | 1 + 9 files changed, 8 insertions(+), 9 deletions(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java index 7e81904c8f..1f7a971fca 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java @@ -27,6 +27,7 @@ @Component("search_OfficeDocumentsParserResolver") @Order(100) public class MSOfficeDocumentsParserResolver extends AbstractExtensionBasedFileParserResolver { + @Override public List getSupportedExtensions() { return List.of("docx", "xlsx"); diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldMSOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldMSOfficeDocumentsParserResolver.java index d1eb841b14..73120af92e 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldMSOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldMSOfficeDocumentsParserResolver.java @@ -17,7 +17,6 @@ package io.jmix.search.index.fileparsing.resolvers; import io.jmix.search.index.fileparsing.AbstractExtensionBasedFileParserResolver; -import io.jmix.search.index.fileparsing.FileParserResolver; import org.apache.tika.parser.Parser; import org.apache.tika.parser.microsoft.OfficeParser; import org.springframework.core.annotation.Order; @@ -28,6 +27,7 @@ @Component("search_OldOfficeDocumentsParserResolver") @Order(100) public class OldMSOfficeDocumentsParserResolver extends AbstractExtensionBasedFileParserResolver { + @Override public List getSupportedExtensions() { return List.of("doc", "xls"); diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java index 5944a0631a..4b1dff5623 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java @@ -17,7 +17,6 @@ package io.jmix.search.index.fileparsing.resolvers; import io.jmix.search.index.fileparsing.AbstractExtensionBasedFileParserResolver; -import io.jmix.search.index.fileparsing.FileParserResolver; import org.apache.tika.parser.Parser; import org.apache.tika.parser.odf.OpenDocumentParser; import org.springframework.core.annotation.Order; @@ -28,6 +27,7 @@ @Component("search_OpenOfficeDocumentsParserResolver") @Order(100) public class OpenOfficeDocumentsParserResolver extends AbstractExtensionBasedFileParserResolver { + @Override public List getSupportedExtensions() { return List.of("odt", "ods"); diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java index 3e56a082ff..5ff65aa825 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java @@ -17,10 +17,8 @@ package io.jmix.search.index.fileparsing.resolvers; import io.jmix.search.index.fileparsing.AbstractExtensionBasedFileParserResolver; -import io.jmix.search.index.fileparsing.FileParserResolver; import org.apache.tika.parser.Parser; import org.apache.tika.parser.pdf.PDFParser; -import org.apache.tika.parser.rtf.RTFParser; import org.springframework.core.annotation.Order; import org.springframework.stereotype.Component; @@ -29,6 +27,7 @@ @Component("search_PDFParserResolver") @Order(100) public class PDFParserResolver extends AbstractExtensionBasedFileParserResolver { + @Override public List getSupportedExtensions() { return List.of("pdf"); diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java index df132c612d..d85d4f2891 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java @@ -17,9 +17,7 @@ package io.jmix.search.index.fileparsing.resolvers; import io.jmix.search.index.fileparsing.AbstractExtensionBasedFileParserResolver; -import io.jmix.search.index.fileparsing.FileParserResolver; import org.apache.tika.parser.Parser; -import org.apache.tika.parser.pdf.PDFParser; import org.apache.tika.parser.rtf.RTFParser; import org.springframework.core.annotation.Order; import org.springframework.stereotype.Component; @@ -29,6 +27,7 @@ @Component("search_RTFParserResolver") @Order(100) public class RTFParserResolver extends AbstractExtensionBasedFileParserResolver { + @Override public List getSupportedExtensions() { return List.of("rtf"); diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java index 81505d7a75..6ee0cb41be 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java @@ -17,9 +17,7 @@ package io.jmix.search.index.fileparsing.resolvers; import io.jmix.search.index.fileparsing.AbstractExtensionBasedFileParserResolver; -import io.jmix.search.index.fileparsing.FileParserResolver; import org.apache.tika.parser.Parser; -import org.apache.tika.parser.pdf.PDFParser; import org.apache.tika.parser.txt.TXTParser; import org.springframework.core.annotation.Order; import org.springframework.stereotype.Component; @@ -29,6 +27,7 @@ @Component("search_TXTParserResolver") @Order(100) public class TXTParserResolver extends AbstractExtensionBasedFileParserResolver { + @Override public List getSupportedExtensions() { return List.of("txt"); diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileFormatExceptionTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileFormatExceptionTest.groovy index 0626af61b1..d0e319b7bf 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileFormatExceptionTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/exception/UnsupportedFileFormatExceptionTest.groovy @@ -20,7 +20,6 @@ import spock.lang.Specification class UnsupportedFileFormatExceptionTest extends Specification { - private static final String MESSAGE_1 = "The file another-file.smt can't be parsed. " + "Only the following file parsing criteria are supported:\n" + " -The only one criteria." diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy index bc956184e2..d512ab7bad 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy @@ -21,6 +21,7 @@ import org.apache.tika.parser.Parser import spock.lang.Specification class AbstractExtensionBasedFileParserResolverTest extends Specification { + def "GetCriteriaDescription"() { given: def resolver = new TestExtensionBasedFileParserResolver(Mock(Parser), extensions as List) diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy index 19b318c9ce..2f62bc02ff 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy @@ -22,6 +22,7 @@ import io.jmix.search.exception.UnsupportedFileFormatException import spock.lang.Specification class FileProcessorTest extends Specification { + def "should throw the UnsupportedFileTypeException that have been thrown by the FileParserResolver"() { given: FileStorageLocator storageLocatorMock = Mock() From 11c0b80b711c65c607f980a9538fb7c1a13daf00 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 26 Sep 2024 17:49:41 +0400 Subject: [PATCH 64/81] JavaDocs correction --- .../UnsupportedFileFormatException.java | 4 ++-- .../index/fileparsing/FileParserResolver.java | 17 ++++++++--------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileFormatException.java b/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileFormatException.java index 34e74fddb2..1b958547ab 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileFormatException.java +++ b/jmix-search/search/src/main/java/io/jmix/search/exception/UnsupportedFileFormatException.java @@ -28,8 +28,8 @@ public class UnsupportedFileFormatException extends Exception { "Only the following file parsing criteria are supported:\n -%s"; /** - * @param fileName - the name of the file which type is not supported - * @param supportedExtensions - the list of the criteria that are supported in the application + * @param fileName the name of the file which type is not supported + * @param supportedExtensions the list of the criteria that are supported in the application */ public UnsupportedFileFormatException(String fileName, List supportedExtensions) { super(String.format( diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java index 36d0134b41..ce06936937 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java @@ -19,18 +19,18 @@ import io.jmix.core.FileRef; import org.apache.tika.parser.Parser; -import java.util.List; - /** - * Is a part of the extendable engine the gives an ability to implement custom file parser resolvers and to support - * custom file types or to modify behavior of existing file parser resolvers. + * Interface to be implemented for adding a custom file parser resolver + * or modifying the behavior of the existing file parser resolvers. It gives an ability to define the exact parser + * for the exact file types with a custom implementation of the file checking logic. These parsers are used to extract + * file content for sending it to the search server and indexing. */ public interface FileParserResolver { /** - * This method should return the description that describes the constraints or the constraint for the files - * that are supported with this resolver. This message is used for generating the log message that - * is written into the log while no one of the resolvers supports the processed file. + * Returns the description of the criteria for the files that are supported with this resolver. + * This message is used for generating the log message that is written into the log + * while no one of the resolvers supports the processing file. * * @return criteria description */ @@ -44,8 +44,7 @@ public interface FileParserResolver { Parser getParser(); /** - * This method should implement the logic for checking - * if the file with given fileRef is supported by the resolver or not. + * Returns the result of the checking if the file with the given fileRef is supported by the resolver or not. * * @param fileRef object with the file information * @return the given FileRef's checking result From 0bf2067be37f02c5225a6afba0a14db6338e5078 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 26 Sep 2024 17:53:55 +0400 Subject: [PATCH 65/81] JavaDocs correction --- .../io/jmix/search/index/fileparsing/FileParserResolver.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java index ce06936937..71913beedf 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java @@ -29,8 +29,8 @@ public interface FileParserResolver { /** * Returns the description of the criteria for the files that are supported with this resolver. - * This message is used for generating the log message that is written into the log - * while no one of the resolvers supports the processing file. + * This text is used for generating the log message that is written into the log + * while no one of the resolvers supports the processing of the given file. * * @return criteria description */ From 8292c8850459e31ac29c1304a2aa9ae0fb3bbff7 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 26 Sep 2024 18:00:37 +0400 Subject: [PATCH 66/81] Test correction --- .../FileParserResolverManagerIntegrationTest.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy index ab95aa2d84..c16ae2d8e0 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy @@ -60,7 +60,7 @@ class FileParserResolverManagerIntegrationTest extends Specification { "xlsx" | OOXMLParser } - def "there is no appropriate resolver for the file if the capital letters"() { + def "there is no appropriate resolver for the file if the file is with the capital letters"() { given: def manager = new FileParserResolverManager(getResolvers()) From 33fcc94ff70166260c98397e4a7ee293331ca05a Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 26 Sep 2024 18:18:02 +0400 Subject: [PATCH 67/81] List to Set changing --- .../AbstractExtensionBasedFileParserResolver.java | 6 +++--- .../resolvers/MSOfficeDocumentsParserResolver.java | 6 +++--- .../resolvers/OldMSOfficeDocumentsParserResolver.java | 6 +++--- .../resolvers/OpenOfficeDocumentsParserResolver.java | 6 +++--- .../index/fileparsing/resolvers/PDFParserResolver.java | 6 +++--- .../index/fileparsing/resolvers/RTFParserResolver.java | 6 +++--- .../index/fileparsing/resolvers/TXTParserResolver.java | 6 +++--- ...AbstractExtensionBasedFileParserResolverTest.groovy | 10 +++++----- 8 files changed, 26 insertions(+), 26 deletions(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java index 4cbb7719f1..28e76ce99c 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java @@ -20,7 +20,7 @@ import io.jmix.core.FileRef; import org.apache.commons.io.FilenameUtils; -import java.util.List; +import java.util.Set; /** * Implements the common logic for all extension based file parser resolvers. @@ -32,7 +32,7 @@ public abstract class AbstractExtensionBasedFileParserResolver implements FilePa * * @return collection of supported extensions */ - public abstract List getSupportedExtensions(); + public abstract Set getSupportedExtensions(); @Override public String getCriteriaDescription() { @@ -50,7 +50,7 @@ public boolean supports(FileRef fileRef) { return getSupportedExtensions().contains(fileExtension); } - protected String getSupportedExtensionsString(List supportedExtensions) { + protected String getSupportedExtensionsString(Set supportedExtensions) { return String.join(", ", supportedExtensions); } } diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java index 1f7a971fca..4c91513203 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java @@ -22,15 +22,15 @@ import org.springframework.core.annotation.Order; import org.springframework.stereotype.Component; -import java.util.List; +import java.util.Set; @Component("search_OfficeDocumentsParserResolver") @Order(100) public class MSOfficeDocumentsParserResolver extends AbstractExtensionBasedFileParserResolver { @Override - public List getSupportedExtensions() { - return List.of("docx", "xlsx"); + public Set getSupportedExtensions() { + return Set.of("docx", "xlsx"); } @Override diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldMSOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldMSOfficeDocumentsParserResolver.java index 73120af92e..a2173839f8 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldMSOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldMSOfficeDocumentsParserResolver.java @@ -22,15 +22,15 @@ import org.springframework.core.annotation.Order; import org.springframework.stereotype.Component; -import java.util.List; +import java.util.Set; @Component("search_OldOfficeDocumentsParserResolver") @Order(100) public class OldMSOfficeDocumentsParserResolver extends AbstractExtensionBasedFileParserResolver { @Override - public List getSupportedExtensions() { - return List.of("doc", "xls"); + public Set getSupportedExtensions() { + return Set.of("doc", "xls"); } @Override diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java index 4b1dff5623..42c65b6a48 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java @@ -22,15 +22,15 @@ import org.springframework.core.annotation.Order; import org.springframework.stereotype.Component; -import java.util.List; +import java.util.Set; @Component("search_OpenOfficeDocumentsParserResolver") @Order(100) public class OpenOfficeDocumentsParserResolver extends AbstractExtensionBasedFileParserResolver { @Override - public List getSupportedExtensions() { - return List.of("odt", "ods"); + public Set getSupportedExtensions() { + return Set.of("odt", "ods"); } @Override diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java index 5ff65aa825..588a3b83de 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java @@ -22,15 +22,15 @@ import org.springframework.core.annotation.Order; import org.springframework.stereotype.Component; -import java.util.List; +import java.util.Set; @Component("search_PDFParserResolver") @Order(100) public class PDFParserResolver extends AbstractExtensionBasedFileParserResolver { @Override - public List getSupportedExtensions() { - return List.of("pdf"); + public Set getSupportedExtensions() { + return Set.of("pdf"); } @Override diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java index d85d4f2891..f95e9f4f15 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java @@ -22,15 +22,15 @@ import org.springframework.core.annotation.Order; import org.springframework.stereotype.Component; -import java.util.List; +import java.util.Set; @Component("search_RTFParserResolver") @Order(100) public class RTFParserResolver extends AbstractExtensionBasedFileParserResolver { @Override - public List getSupportedExtensions() { - return List.of("rtf"); + public Set getSupportedExtensions() { + return Set.of("rtf"); } @Override diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java index 6ee0cb41be..146fb7ef9d 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java @@ -22,15 +22,15 @@ import org.springframework.core.annotation.Order; import org.springframework.stereotype.Component; -import java.util.List; +import java.util.Set; @Component("search_TXTParserResolver") @Order(100) public class TXTParserResolver extends AbstractExtensionBasedFileParserResolver { @Override - public List getSupportedExtensions() { - return List.of("txt"); + public Set getSupportedExtensions() { + return Set.of("txt"); } @Override diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy index d512ab7bad..4cb394a9e5 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy @@ -24,7 +24,7 @@ class AbstractExtensionBasedFileParserResolverTest extends Specification { def "GetCriteriaDescription"() { given: - def resolver = new TestExtensionBasedFileParserResolver(Mock(Parser), extensions as List) + def resolver = new TestExtensionBasedFileParserResolver(Mock(Parser), extensions as Set) expect: resolver.getCriteriaDescription() == criteriaDescription @@ -38,7 +38,7 @@ class AbstractExtensionBasedFileParserResolverTest extends Specification { def "Supports"() { given: - def resolver = new TestExtensionBasedFileParserResolver(Mock(Parser), extensions as List) + def resolver = new TestExtensionBasedFileParserResolver(Mock(Parser), extensions as Set) and: def fileRef = Mock(FileRef) @@ -64,15 +64,15 @@ class AbstractExtensionBasedFileParserResolverTest extends Specification { private static class TestExtensionBasedFileParserResolver extends AbstractExtensionBasedFileParserResolver { private Parser parser - private List extensions + private Set extensions - TestExtensionBasedFileParserResolver(Parser parser, List extensions) { + TestExtensionBasedFileParserResolver(Parser parser, Set extensions) { this.parser = parser this.extensions = extensions } @Override - List getSupportedExtensions() { + Set getSupportedExtensions() { return extensions; } From 6b0786409d3f7e62ec388557ce6615ebce0de9a1 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 26 Sep 2024 18:29:25 +0400 Subject: [PATCH 68/81] FileParserResolverManager -> FileParserProvider --- ...olverManager.java => FileParserProvider.java} | 6 +++--- .../java/io/jmix/search/utils/FileProcessor.java | 8 ++++---- .../jmix/search/utils/FileProcessorTest.groovy | 6 +++--- ... => FileParserProviderIntegrationTest.groovy} | 16 ++++++++-------- ...Test.groovy => FileParserProviderTest.groovy} | 14 +++++++------- 5 files changed, 25 insertions(+), 25 deletions(-) rename jmix-search/search/src/main/java/io/jmix/search/utils/{FileParserResolverManager.java => FileParserProvider.java} (92%) rename jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/{FileParserResolverManagerIntegrationTest.groovy => FileParserProviderIntegrationTest.groovy} (87%) rename jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/{FileParserResolverManagerTest.groovy => FileParserProviderTest.groovy} (87%) diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserProvider.java similarity index 92% rename from jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java rename to jmix-search/search/src/main/java/io/jmix/search/utils/FileParserProvider.java index 1f841ce55f..c351000955 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserResolverManager.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserProvider.java @@ -29,15 +29,15 @@ * The service that searches appropriate file parsers for the supported file types. * A search principle is based on the sequential applying FileParserResolver objects' checks for the given file. */ -@Component("search_FileParserResolverManager") -public class FileParserResolverManager { +@Component("search_FileParserProvider") +public class FileParserProvider { private static final String EMPTY_FILE_PARSER_RESOLVERS_LIST_MESSAGE = "There are no any file parser resolvers in the application."; protected List fileParserResolvers; - public FileParserResolverManager(List fileParserResolvers) { + public FileParserProvider(List fileParserResolvers) { this.fileParserResolvers = fileParserResolvers; } diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java index c3c2a9fa19..4f23c7df5c 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java @@ -43,11 +43,11 @@ public class FileProcessor { private static final Logger log = LoggerFactory.getLogger(FileProcessor.class); protected FileStorageLocator fileStorageLocator; - protected FileParserResolverManager fileParserResolverManager; + protected FileParserProvider fileParserProvider; - public FileProcessor(FileStorageLocator fileStorageLocator, FileParserResolverManager fileParserResolverManager) { + public FileProcessor(FileStorageLocator fileStorageLocator, FileParserProvider fileParserProvider) { this.fileStorageLocator = fileStorageLocator; - this.fileParserResolverManager = fileParserResolverManager; + this.fileParserProvider = fileParserProvider; } public String extractFileContent(FileRef fileRef) throws FileParseException, UnsupportedFileFormatException { @@ -81,6 +81,6 @@ public String extractFileContent(FileRef fileRef) throws FileParseException, Uns } protected Parser getParser(FileRef fileRef) throws UnsupportedFileFormatException { - return fileParserResolverManager.getParser(fileRef); + return fileParserProvider.getParser(fileRef); } } diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy index 2f62bc02ff..60ef3267a2 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy @@ -31,10 +31,10 @@ class FileProcessorTest extends Specification { def exception = Mock(UnsupportedFileFormatException) and: - FileParserResolverManager fileParserResolver = Mock() + FileParserProvider fileParserProvider = Mock() FileRef fileRefMock = Mock() - fileParserResolver.getParser(fileRefMock) >> { throw exception } - FileProcessor fileProcessor = new FileProcessor(storageLocatorMock, fileParserResolver) + fileParserProvider.getParser(fileRefMock) >> { throw exception } + FileProcessor fileProcessor = new FileProcessor(storageLocatorMock, fileParserProvider) when: fileProcessor.extractFileContent(fileRefMock) diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderIntegrationTest.groovy similarity index 87% rename from jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy rename to jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderIntegrationTest.groovy index c16ae2d8e0..a6bdb4206a 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerIntegrationTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderIntegrationTest.groovy @@ -25,7 +25,7 @@ import io.jmix.search.index.fileparsing.resolvers.OpenOfficeDocumentsParserResol import io.jmix.search.index.fileparsing.resolvers.PDFParserResolver import io.jmix.search.index.fileparsing.resolvers.RTFParserResolver import io.jmix.search.index.fileparsing.resolvers.TXTParserResolver -import io.jmix.search.utils.FileParserResolverManager +import io.jmix.search.utils.FileParserProvider import org.apache.tika.parser.microsoft.OfficeParser import org.apache.tika.parser.microsoft.ooxml.OOXMLParser import org.apache.tika.parser.odf.OpenDocumentParser @@ -34,18 +34,18 @@ import org.apache.tika.parser.rtf.RTFParser import org.apache.tika.parser.txt.TXTParser import spock.lang.Specification -class FileParserResolverManagerIntegrationTest extends Specification { +class FileParserProviderIntegrationTest extends Specification { def "there is appropriate resolver for the file"() { given: - def manager = new FileParserResolverManager(getResolvers()) + def provider = new FileParserProvider(getResolvers()) and: def fileRef = Mock(FileRef) fileRef.getFileName() >> "filename." + extension expect: - manager.getParser(fileRef).getClass() == theClass + provider.getParser(fileRef).getClass() == theClass where: extension | theClass @@ -62,14 +62,14 @@ class FileParserResolverManagerIntegrationTest extends Specification { def "there is no appropriate resolver for the file if the file is with the capital letters"() { given: - def manager = new FileParserResolverManager(getResolvers()) + def provider = new FileParserProvider(getResolvers()) and: def fileRef = Mock(FileRef) fileRef.getFileName() >> "filename." + extension when: - manager.getParser(fileRef) + provider.getParser(fileRef) then: thrown(UnsupportedFileFormatException) @@ -80,14 +80,14 @@ class FileParserResolverManagerIntegrationTest extends Specification { def "there is not appropriate resolver for the file"() { given: - def manager = new FileParserResolverManager(getResolvers()) + def provider = new FileParserProvider(getResolvers()) and: def fileRef = Mock(FileRef) fileRef.getFileName() >> "filename." + extension when: - manager.getParser(fileRef) + provider.getParser(fileRef) then: thrown(UnsupportedFileFormatException) diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderTest.groovy similarity index 87% rename from jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy rename to jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderTest.groovy index 1ddaaa7cca..43eff0b6fc 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverManagerTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderTest.groovy @@ -19,13 +19,13 @@ package io.jmix.search.utils.parserresolving import io.jmix.core.FileRef import io.jmix.search.exception.UnsupportedFileFormatException import io.jmix.search.index.fileparsing.FileParserResolver -import io.jmix.search.utils.FileParserResolverManager +import io.jmix.search.utils.FileParserProvider import org.apache.tika.parser.Parser import spock.lang.Specification import static java.util.Collections.emptyList -class FileParserResolverManagerTest extends Specification { +class FileParserProviderTest extends Specification { def "should throw UnsupportedFileExtensionException when the given file of unsupported type"() { given: @@ -39,10 +39,10 @@ class FileParserResolverManagerTest extends Specification { resolver2.supports(fileRef) >> false and: - def parserResolver = new FileParserResolverManager(List.of(resolver, resolver2)) + def parserProvider = new FileParserProvider(List.of(resolver, resolver2)) when: - parserResolver.getParser(fileRef) + parserProvider.getParser(fileRef) then: def exception = thrown(UnsupportedFileFormatException) @@ -65,10 +65,10 @@ class FileParserResolverManagerTest extends Specification { resolver3.getParser() >> parser3 and: - def resolverManager = new FileParserResolverManager(List.of(resolver1, resolver2, resolver3)) + def parserProvider = new FileParserProvider(List.of(resolver1, resolver2, resolver3)) when: - def resolvedParser = resolverManager.getParser(fileRef) + def resolvedParser = parserProvider.getParser(fileRef) then: resolvedParser != null @@ -89,7 +89,7 @@ class FileParserResolverManagerTest extends Specification { FileRef fileRef = Mock() and: - def resolverManager = new FileParserResolverManager(emptyList()) + def resolverManager = new FileParserProvider(emptyList()) when: resolverManager.getParser(fileRef) From 51ee3fd30e978c3079c320c80bcbbd3dbb9a33d0 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 26 Sep 2024 23:51:25 +0400 Subject: [PATCH 69/81] Message text correction --- .../AbstractExtensionBasedFileParserResolver.java | 5 ++++- .../AbstractExtensionBasedFileParserResolverTest.groovy | 6 +++--- .../FileParserProviderIntegrationTest.groovy | 4 ++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java index 28e76ce99c..f5460f01c8 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java @@ -36,7 +36,10 @@ public abstract class AbstractExtensionBasedFileParserResolver implements FilePa @Override public String getCriteriaDescription() { - return String.format("The file extension should be one of the following: %s.", getSupportedExtensionsString(getSupportedExtensions())); + return String.format( + "Parser: %s. Supported extensions: %s.", + this.getClass().getSimpleName(), + getSupportedExtensionsString(getSupportedExtensions())); } @Override diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy index 4cb394a9e5..c23a43c93d 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy @@ -31,9 +31,9 @@ class AbstractExtensionBasedFileParserResolverTest extends Specification { where: extensions | criteriaDescription - ["ext1"] | "The file extension should be one of the following: ext1." - ["ext1", "ext2"] | "The file extension should be one of the following: ext1, ext2." - ["ext1", "ext2", "ext3"] | "The file extension should be one of the following: ext1, ext2, ext3." + ["ext1"] | "Parser: TestExtensionBasedFileParserResolver. Supported extensions: ext1." + ["ext1", "ext2"] | "Parser: TestExtensionBasedFileParserResolver. Supported extensions: ext1, ext2." + ["ext1", "ext2", "ext3"] | "Parser: TestExtensionBasedFileParserResolver. Supported extensions: ext1, ext2, ext3." } def "Supports"() { diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderIntegrationTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderIntegrationTest.groovy index a6bdb4206a..45a9fd5af9 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderIntegrationTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderIntegrationTest.groovy @@ -20,7 +20,7 @@ import io.jmix.core.FileRef import io.jmix.search.exception.UnsupportedFileFormatException import io.jmix.search.index.fileparsing.FileParserResolver import io.jmix.search.index.fileparsing.resolvers.MSOfficeDocumentsParserResolver -import io.jmix.search.index.fileparsing.resolvers.OldMSOfficeDocumentsParserResolver +import io.jmix.search.index.fileparsing.resolvers.LegacyMSOfficeDocumentsParserResolver import io.jmix.search.index.fileparsing.resolvers.OpenOfficeDocumentsParserResolver import io.jmix.search.index.fileparsing.resolvers.PDFParserResolver import io.jmix.search.index.fileparsing.resolvers.RTFParserResolver @@ -99,7 +99,7 @@ class FileParserProviderIntegrationTest extends Specification { List getResolvers() { List.of( new MSOfficeDocumentsParserResolver(), - new OldMSOfficeDocumentsParserResolver(), + new LegacyMSOfficeDocumentsParserResolver(), new OpenOfficeDocumentsParserResolver(), new PDFParserResolver(), new RTFParserResolver(), From e8016ffa9e0ac652d02cc997aac3dee6454f062f Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 26 Sep 2024 23:51:50 +0400 Subject: [PATCH 70/81] OldMSOfficeDocumentsParserResolver > LegacyMSOfficeDocumentsParserResolver --- ...solver.java => LegacyMSOfficeDocumentsParserResolver.java} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/{OldMSOfficeDocumentsParserResolver.java => LegacyMSOfficeDocumentsParserResolver.java} (87%) diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldMSOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/LegacyMSOfficeDocumentsParserResolver.java similarity index 87% rename from jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldMSOfficeDocumentsParserResolver.java rename to jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/LegacyMSOfficeDocumentsParserResolver.java index a2173839f8..b5c953626e 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OldMSOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/LegacyMSOfficeDocumentsParserResolver.java @@ -24,9 +24,9 @@ import java.util.Set; -@Component("search_OldOfficeDocumentsParserResolver") +@Component("search_LegacyMSOfficeDocumentsParserResolver") @Order(100) -public class OldMSOfficeDocumentsParserResolver extends AbstractExtensionBasedFileParserResolver { +public class LegacyMSOfficeDocumentsParserResolver extends AbstractExtensionBasedFileParserResolver { @Override public Set getSupportedExtensions() { From b6bf51f2615b9295ad928482f0760c150cd13783 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 27 Sep 2024 00:09:27 +0400 Subject: [PATCH 71/81] JavaDoc --- .../AbstractExtensionBasedFileParserResolver.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java index f5460f01c8..1f38b88341 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java @@ -28,7 +28,10 @@ public abstract class AbstractExtensionBasedFileParserResolver implements FileParserResolver { /** - * Returns a collection of supported extensions of the supported file type. E.g. ["xlsx", "XLSX", "docx", "DOCX"]. + * Returns a collection of supported extensions of the supported file type. + * Note that the extension checking mechanism is case-sensitive. So in order to support + * the both uppercase one and lowercase option of the extension they should be defined explicitly. + * E.g. ["xlsx", "XLSX", "docx", "DOCX"]. * * @return collection of supported extensions */ From 6da446f1ed6427196b24ab5bf4a73207897e4289 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 27 Sep 2024 01:51:17 +0400 Subject: [PATCH 72/81] Getting FileParsingBundle with FileParserResolver --- ...tractExtensionBasedFileParserResolver.java | 41 +++++++++++++++++++ .../index/fileparsing/FileParserResolver.java | 5 +-- .../index/fileparsing/FileParsingBundle.java | 32 +++++++++++++++ .../jmix/search/utils/FileParserProvider.java | 6 +-- .../io/jmix/search/utils/FileProcessor.java | 14 +++++-- ...xtensionBasedFileParserResolverTest.groovy | 24 +++++++++++ .../index/fileparsing/ObjectsComparator.java | 23 +++++++++++ .../search/utils/FileProcessorTest.groovy | 2 +- .../FileParserProviderIntegrationTest.groovy | 6 +-- .../FileParserProviderTest.groovy | 27 ++++++++---- 10 files changed, 158 insertions(+), 22 deletions(-) create mode 100644 jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParsingBundle.java create mode 100644 jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/ObjectsComparator.java diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java index 1f38b88341..23a4708d86 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java @@ -19,8 +19,14 @@ import com.google.common.base.Strings; import io.jmix.core.FileRef; import org.apache.commons.io.FilenameUtils; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.Parser; +import org.apache.tika.sax.BodyContentHandler; +import java.io.StringWriter; import java.util.Set; +import java.util.function.Function; /** * Implements the common logic for all extension based file parser resolvers. @@ -59,4 +65,39 @@ public boolean supports(FileRef fileRef) { protected String getSupportedExtensionsString(Set supportedExtensions) { return String.join(", ", supportedExtensions); } + + @Override + public FileParsingBundle getParsingBundle() { + return new FileParsingBundle( + getParser(), + getBodyContentHandlerGenerator(), + getMetadata(), + getParseContext()); + } + + /** + * Returns a parser for the supported file type. + */ + protected abstract Parser getParser(); + + /** + * Returns a function for the BodyContentHandler generating that is necessary for the given file parsing. + */ + protected Function getBodyContentHandlerGenerator() { + return stringWriter -> new BodyContentHandler(stringWriter); + } + + /** + * Returns a Metadata object for the given file parsing. + */ + protected Metadata getMetadata() { + return new Metadata(); + } + + /** + * Returns a ParseContext object for the given file parsing. + */ + protected ParseContext getParseContext() { + return new ParseContext(); + } } diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java index 71913beedf..9fa26c9ccc 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java @@ -17,7 +17,6 @@ package io.jmix.search.index.fileparsing; import io.jmix.core.FileRef; -import org.apache.tika.parser.Parser; /** * Interface to be implemented for adding a custom file parser resolver @@ -37,11 +36,11 @@ public interface FileParserResolver { String getCriteriaDescription(); /** - * Returns an instance of a file parser for the supported file types. + * Returns a complex object that contains the all necessary objects for the supported file type parsing. * * @return an instance of a file parser */ - Parser getParser(); + FileParsingBundle getParsingBundle(); /** * Returns the result of the checking if the file with the given fileRef is supported by the resolver or not. diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParsingBundle.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParsingBundle.java new file mode 100644 index 0000000000..60629b424c --- /dev/null +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParsingBundle.java @@ -0,0 +1,32 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.index.fileparsing; + +import jakarta.validation.constraints.NotNull; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.Parser; +import org.apache.tika.sax.BodyContentHandler; + +import java.io.StringWriter; +import java.util.function.Function; + +public record FileParsingBundle( + @NotNull Parser parser, + @NotNull Function bodyContentHandlerGenerator, + @NotNull Metadata metadata, + @NotNull ParseContext parseContext) {} diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserProvider.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserProvider.java index c351000955..ce280c0d56 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserProvider.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserProvider.java @@ -19,7 +19,7 @@ import io.jmix.core.FileRef; import io.jmix.search.exception.UnsupportedFileFormatException; import io.jmix.search.index.fileparsing.FileParserResolver; -import org.apache.tika.parser.Parser; +import io.jmix.search.index.fileparsing.FileParsingBundle; import org.springframework.stereotype.Component; import java.util.ArrayList; @@ -41,7 +41,7 @@ public FileParserProvider(List fileParserResolvers) { this.fileParserResolvers = fileParserResolvers; } - public Parser getParser(FileRef fileRef) throws UnsupportedFileFormatException { + public FileParsingBundle getParsingBundle(FileRef fileRef) throws UnsupportedFileFormatException { if (fileParserResolvers.isEmpty()) { throw new IllegalStateException(EMPTY_FILE_PARSER_RESOLVERS_LIST_MESSAGE); } @@ -52,7 +52,7 @@ public Parser getParser(FileRef fileRef) throws UnsupportedFileFormatException { for (FileParserResolver resolver : fileParserResolvers) { if (resolver.supports(fileRef)) { - return resolver.getParser(); + return resolver.getParsingBundle(); } messages.add(resolver.getCriteriaDescription()); } diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java index 4f23c7df5c..3017a1c089 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java @@ -22,6 +22,7 @@ import io.jmix.core.common.util.Preconditions; import io.jmix.search.exception.FileParseException; import io.jmix.search.exception.UnsupportedFileFormatException; +import io.jmix.search.index.fileparsing.FileParsingBundle; import org.apache.poi.poifs.filesystem.OfficeXmlFileException; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; @@ -54,13 +55,18 @@ public String extractFileContent(FileRef fileRef) throws FileParseException, Uns Preconditions.checkNotNullArgument(fileRef); log.debug("Extract content of file {}", fileRef); FileStorage fileStorage = fileStorageLocator.getByName(fileRef.getStorageName()); - Parser parser = getParser(fileRef); + FileParsingBundle parsingBundle = getParsingBundle(fileRef); + Parser parser = parsingBundle.parser(); log.debug("Parser for file {}: {}", fileRef, parser); StringWriter stringWriter = new StringWriter(); ParseContext parseContext = createParseContext(); try (InputStream stream = fileStorage.openStream(fileRef)) { - parser.parse(stream, new BodyContentHandler(stringWriter), new Metadata(), parseContext); + parser.parse( + stream, + parsingBundle.bodyContentHandlerGenerator().apply(stringWriter), + parsingBundle.metadata(), + parsingBundle.parseContext()); } catch (OfficeXmlFileException e) { if (parser instanceof OfficeParser) { parser = new OOXMLParser(); @@ -80,7 +86,7 @@ public String extractFileContent(FileRef fileRef) throws FileParseException, Uns return stringWriter.toString(); } - protected Parser getParser(FileRef fileRef) throws UnsupportedFileFormatException { - return fileParserProvider.getParser(fileRef); + protected FileParsingBundle getParsingBundle(FileRef fileRef) throws UnsupportedFileFormatException { + return fileParserProvider.getParsingBundle(fileRef); } } diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy index c23a43c93d..ec6b844d81 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy @@ -20,8 +20,32 @@ import io.jmix.core.FileRef import org.apache.tika.parser.Parser import spock.lang.Specification +import static io.jmix.search.index.fileparsing.ObjectsComparator.isTheSame + class AbstractExtensionBasedFileParserResolverTest extends Specification { + def "getParsingBundle. Different bundles are returned each time"() { + given: + def resolver = new TestExtensionBasedFileParserResolver(Mock(Parser), Mock(Set)) + + expect: + resolver.getParsingBundle() != resolver.getParsingBundle() + } + + def "getParsingBundle. Not the same objects inside of the bundles"() { + given: + def resolver = new TestExtensionBasedFileParserResolver(Mock(Parser), Mock(Set)) + + when: + def bundle1 = resolver.getParsingBundle() + def bundle2 = resolver.getParsingBundle() + + then: + bundle1.bodyContentHandlerGenerator() != null + !isTheSame(bundle1.metadata(), bundle2.metadata()) + !isTheSame(bundle1.parseContext(), bundle2.parseContext()) + } + def "GetCriteriaDescription"() { given: def resolver = new TestExtensionBasedFileParserResolver(Mock(Parser), extensions as Set) diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/ObjectsComparator.java b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/ObjectsComparator.java new file mode 100644 index 0000000000..b2697e07a7 --- /dev/null +++ b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/ObjectsComparator.java @@ -0,0 +1,23 @@ +/* + * Copyright 2024 Haulmont. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.jmix.search.index.fileparsing; + +public class ObjectsComparator { + public static boolean isTheSame(Object object, Object object2){ + return object == object2; + } +} diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy index 60ef3267a2..4d6087da34 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy @@ -33,7 +33,7 @@ class FileProcessorTest extends Specification { and: FileParserProvider fileParserProvider = Mock() FileRef fileRefMock = Mock() - fileParserProvider.getParser(fileRefMock) >> { throw exception } + fileParserProvider.getParsingBundle(fileRefMock) >> { throw exception } FileProcessor fileProcessor = new FileProcessor(storageLocatorMock, fileParserProvider) when: diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderIntegrationTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderIntegrationTest.groovy index 45a9fd5af9..aef72381f7 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderIntegrationTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderIntegrationTest.groovy @@ -45,7 +45,7 @@ class FileParserProviderIntegrationTest extends Specification { fileRef.getFileName() >> "filename." + extension expect: - provider.getParser(fileRef).getClass() == theClass + provider.getParsingBundle(fileRef).parser().getClass() == theClass where: extension | theClass @@ -69,7 +69,7 @@ class FileParserProviderIntegrationTest extends Specification { fileRef.getFileName() >> "filename." + extension when: - provider.getParser(fileRef) + provider.getParsingBundle(fileRef) then: thrown(UnsupportedFileFormatException) @@ -87,7 +87,7 @@ class FileParserProviderIntegrationTest extends Specification { fileRef.getFileName() >> "filename." + extension when: - provider.getParser(fileRef) + provider.getParsingBundle(fileRef) then: thrown(UnsupportedFileFormatException) diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderTest.groovy index 43eff0b6fc..67d7b80365 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderTest.groovy @@ -19,9 +19,13 @@ package io.jmix.search.utils.parserresolving import io.jmix.core.FileRef import io.jmix.search.exception.UnsupportedFileFormatException import io.jmix.search.index.fileparsing.FileParserResolver +import io.jmix.search.index.fileparsing.FileParsingBundle import io.jmix.search.utils.FileParserProvider +import org.apache.tika.metadata.Metadata +import org.apache.tika.parser.ParseContext import org.apache.tika.parser.Parser import spock.lang.Specification +import java.util.function.Function import static java.util.Collections.emptyList @@ -42,7 +46,7 @@ class FileParserProviderTest extends Specification { def parserProvider = new FileParserProvider(List.of(resolver, resolver2)) when: - parserProvider.getParser(fileRef) + parserProvider.getParsingBundle(fileRef) then: def exception = thrown(UnsupportedFileFormatException) @@ -58,17 +62,20 @@ class FileParserProviderTest extends Specification { fileRef.getFileName() >> fileName and: - def resolver1 = createExtensionBasedResolverResolver("txt", parser1) - def resolver2 = createExtensionBasedResolverResolver("rtf", parser2) + def resolver1 = createExtensionBasedResolver("txt", parser1) + def resolver2 = createExtensionBasedResolver("rtf", parser2) def resolver3 = Mock(FileParserResolver) resolver3.supports(_ as FileRef) >> true; - resolver3.getParser() >> parser3 + resolver3.getParsingBundle() >> new FileParsingBundle(parser3, + Mock(Function), + Mock(Metadata), + Mock(ParseContext)) and: def parserProvider = new FileParserProvider(List.of(resolver1, resolver2, resolver3)) when: - def resolvedParser = parserProvider.getParser(fileRef) + def resolvedParser = parserProvider.getParsingBundle(fileRef).parser() then: resolvedParser != null @@ -92,14 +99,14 @@ class FileParserProviderTest extends Specification { def resolverManager = new FileParserProvider(emptyList()) when: - resolverManager.getParser(fileRef) + resolverManager.getParsingBundle(fileRef) then: def exception = thrown(IllegalStateException) exception.getMessage() == "There are no any file parser resolvers in the application." } - FileParserResolver createExtensionBasedResolverResolver(String fileExtension, Parser parser) { + FileParserResolver createExtensionBasedResolver(String fileExtension, Parser parser) { def resolver = Mock(FileParserResolver) resolver.supports(_ as FileRef) >> { FileRef fileRef1 -> { @@ -109,7 +116,11 @@ class FileParserProviderTest extends Specification { return false } } - resolver.getParser() >> parser + resolver.getParsingBundle() >> new FileParsingBundle( + parser, + Mock(Function), + Mock(Metadata), + Mock(ParseContext)) resolver } } From b744d63acfcf418ba965b68b002b6c2595b17e93 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 27 Sep 2024 01:52:38 +0400 Subject: [PATCH 73/81] Comment adding --- .../search/src/main/java/io/jmix/search/utils/FileProcessor.java | 1 + 1 file changed, 1 insertion(+) diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java index 3017a1c089..2646113177 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java @@ -68,6 +68,7 @@ public String extractFileContent(FileRef fileRef) throws FileParseException, Uns parsingBundle.metadata(), parsingBundle.parseContext()); } catch (OfficeXmlFileException e) { + //Protection from Office 2007 documents with old .doc extension. if (parser instanceof OfficeParser) { parser = new OOXMLParser(); try (InputStream secondStream = fileStorage.openStream(fileRef)) { From 1d056e65b6116919ecef654326671d70f11179a2 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 27 Sep 2024 18:12:31 +0400 Subject: [PATCH 74/81] Uppercase extensions' support --- ...LegacyMSOfficeDocumentsParserResolver.java | 2 +- .../MSOfficeDocumentsParserResolver.java | 2 +- .../OpenOfficeDocumentsParserResolver.java | 2 +- .../resolvers/PDFParserResolver.java | 2 +- .../resolvers/RTFParserResolver.java | 2 +- .../resolvers/TXTParserResolver.java | 2 +- .../FileParserProviderIntegrationTest.groovy | 27 +++++++------------ 7 files changed, 15 insertions(+), 24 deletions(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/LegacyMSOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/LegacyMSOfficeDocumentsParserResolver.java index b5c953626e..141c4d4725 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/LegacyMSOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/LegacyMSOfficeDocumentsParserResolver.java @@ -30,7 +30,7 @@ public class LegacyMSOfficeDocumentsParserResolver extends AbstractExtensionBase @Override public Set getSupportedExtensions() { - return Set.of("doc", "xls"); + return Set.of("doc", "xls", "DOC", "XLS"); } @Override diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java index 4c91513203..e9fdade13c 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java @@ -30,7 +30,7 @@ public class MSOfficeDocumentsParserResolver extends AbstractExtensionBasedFileP @Override public Set getSupportedExtensions() { - return Set.of("docx", "xlsx"); + return Set.of("docx", "xlsx", "DOCX", "XLSX"); } @Override diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java index 42c65b6a48..9668ae3a2d 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/OpenOfficeDocumentsParserResolver.java @@ -30,7 +30,7 @@ public class OpenOfficeDocumentsParserResolver extends AbstractExtensionBasedFil @Override public Set getSupportedExtensions() { - return Set.of("odt", "ods"); + return Set.of("odt", "ods", "ODT", "ODS"); } @Override diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java index 588a3b83de..ca50cb685e 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/PDFParserResolver.java @@ -30,7 +30,7 @@ public class PDFParserResolver extends AbstractExtensionBasedFileParserResolver @Override public Set getSupportedExtensions() { - return Set.of("pdf"); + return Set.of("pdf", "PDF"); } @Override diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java index f95e9f4f15..0ee3b86f87 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/RTFParserResolver.java @@ -30,7 +30,7 @@ public class RTFParserResolver extends AbstractExtensionBasedFileParserResolver @Override public Set getSupportedExtensions() { - return Set.of("rtf"); + return Set.of("rtf", "RTF"); } @Override diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java index 146fb7ef9d..b5fb962604 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/TXTParserResolver.java @@ -30,7 +30,7 @@ public class TXTParserResolver extends AbstractExtensionBasedFileParserResolver @Override public Set getSupportedExtensions() { - return Set.of("txt"); + return Set.of("txt", "TXT"); } @Override diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderIntegrationTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderIntegrationTest.groovy index aef72381f7..155395ca2c 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderIntegrationTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderIntegrationTest.groovy @@ -50,32 +50,23 @@ class FileParserProviderIntegrationTest extends Specification { where: extension | theClass "txt" | TXTParser + "TXT" | TXTParser "pdf" | PDFParser + "PDF" | PDFParser "rtf" | RTFParser + "RTF" | RTFParser "odt" | OpenDocumentParser + "ODT" | OpenDocumentParser "ods" | OpenDocumentParser + "ODS" | OpenDocumentParser "doc" | OfficeParser + "DOC" | OfficeParser "xls" | OfficeParser + "XLS" | OfficeParser "docx" | OOXMLParser + "DOCX" | OOXMLParser "xlsx" | OOXMLParser - } - - def "there is no appropriate resolver for the file if the file is with the capital letters"() { - given: - def provider = new FileParserProvider(getResolvers()) - - and: - def fileRef = Mock(FileRef) - fileRef.getFileName() >> "filename." + extension - - when: - provider.getParsingBundle(fileRef) - - then: - thrown(UnsupportedFileFormatException) - - where: - extension << ["TXT", "PDF", "RTF", "ODT", "ODS", "DOC", "XLS", "DOCX", "XLSX"] + "XLSX" | OOXMLParser } def "there is not appropriate resolver for the file"() { From be96ef6520eddc437ea1c790a014b1a2d92cb433 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 27 Sep 2024 19:18:30 +0400 Subject: [PATCH 75/81] FileParsingBundle -> FileParserKit --- .../AbstractExtensionBasedFileParserResolver.java | 4 ++-- .../{FileParsingBundle.java => FileParserKit.java} | 2 +- .../search/index/fileparsing/FileParserResolver.java | 6 +++--- .../io/jmix/search/utils/FileParserProvider.java | 6 +++--- .../java/io/jmix/search/utils/FileProcessor.java | 8 ++++---- ...stractExtensionBasedFileParserResolverTest.groovy | 12 ++++++------ .../io/jmix/search/utils/FileProcessorTest.groovy | 2 +- .../FileParserProviderIntegrationTest.groovy | 4 ++-- .../parserresolving/FileParserProviderTest.groovy | 12 ++++++------ 9 files changed, 28 insertions(+), 28 deletions(-) rename jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/{FileParsingBundle.java => FileParserKit.java} (97%) diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java index 23a4708d86..21e735a7c0 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java @@ -67,8 +67,8 @@ protected String getSupportedExtensionsString(Set supportedExtensions) { } @Override - public FileParsingBundle getParsingBundle() { - return new FileParsingBundle( + public FileParserKit getParserKit() { + return new FileParserKit( getParser(), getBodyContentHandlerGenerator(), getMetadata(), diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParsingBundle.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserKit.java similarity index 97% rename from jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParsingBundle.java rename to jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserKit.java index 60629b424c..c7ca0b7d72 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParsingBundle.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserKit.java @@ -25,7 +25,7 @@ import java.io.StringWriter; import java.util.function.Function; -public record FileParsingBundle( +public record FileParserKit( @NotNull Parser parser, @NotNull Function bodyContentHandlerGenerator, @NotNull Metadata metadata, diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java index 9fa26c9ccc..3cb44b3110 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserResolver.java @@ -36,11 +36,11 @@ public interface FileParserResolver { String getCriteriaDescription(); /** - * Returns a complex object that contains the all necessary objects for the supported file type parsing. + * Returns a complex object that contains all necessary objects for the supported file type parsing. * - * @return an instance of a file parser + * @return an instance of a file parser kit */ - FileParsingBundle getParsingBundle(); + FileParserKit getParserKit(); /** * Returns the result of the checking if the file with the given fileRef is supported by the resolver or not. diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserProvider.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserProvider.java index ce280c0d56..a9b871bd37 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserProvider.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileParserProvider.java @@ -19,7 +19,7 @@ import io.jmix.core.FileRef; import io.jmix.search.exception.UnsupportedFileFormatException; import io.jmix.search.index.fileparsing.FileParserResolver; -import io.jmix.search.index.fileparsing.FileParsingBundle; +import io.jmix.search.index.fileparsing.FileParserKit; import org.springframework.stereotype.Component; import java.util.ArrayList; @@ -41,7 +41,7 @@ public FileParserProvider(List fileParserResolvers) { this.fileParserResolvers = fileParserResolvers; } - public FileParsingBundle getParsingBundle(FileRef fileRef) throws UnsupportedFileFormatException { + public FileParserKit getParserKit(FileRef fileRef) throws UnsupportedFileFormatException { if (fileParserResolvers.isEmpty()) { throw new IllegalStateException(EMPTY_FILE_PARSER_RESOLVERS_LIST_MESSAGE); } @@ -52,7 +52,7 @@ public FileParsingBundle getParsingBundle(FileRef fileRef) throws UnsupportedFil for (FileParserResolver resolver : fileParserResolvers) { if (resolver.supports(fileRef)) { - return resolver.getParsingBundle(); + return resolver.getParserKit(); } messages.add(resolver.getCriteriaDescription()); } diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java index 2646113177..94d16ab8ce 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java @@ -22,7 +22,7 @@ import io.jmix.core.common.util.Preconditions; import io.jmix.search.exception.FileParseException; import io.jmix.search.exception.UnsupportedFileFormatException; -import io.jmix.search.index.fileparsing.FileParsingBundle; +import io.jmix.search.index.fileparsing.FileParserKit; import org.apache.poi.poifs.filesystem.OfficeXmlFileException; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; @@ -55,7 +55,7 @@ public String extractFileContent(FileRef fileRef) throws FileParseException, Uns Preconditions.checkNotNullArgument(fileRef); log.debug("Extract content of file {}", fileRef); FileStorage fileStorage = fileStorageLocator.getByName(fileRef.getStorageName()); - FileParsingBundle parsingBundle = getParsingBundle(fileRef); + FileParserKit parsingBundle = getParserKit(fileRef); Parser parser = parsingBundle.parser(); log.debug("Parser for file {}: {}", fileRef, parser); @@ -87,7 +87,7 @@ public String extractFileContent(FileRef fileRef) throws FileParseException, Uns return stringWriter.toString(); } - protected FileParsingBundle getParsingBundle(FileRef fileRef) throws UnsupportedFileFormatException { - return fileParserProvider.getParsingBundle(fileRef); + protected FileParserKit getParserKit(FileRef fileRef) throws UnsupportedFileFormatException { + return fileParserProvider.getParserKit(fileRef); } } diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy index ec6b844d81..058d472cca 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy @@ -29,7 +29,7 @@ class AbstractExtensionBasedFileParserResolverTest extends Specification { def resolver = new TestExtensionBasedFileParserResolver(Mock(Parser), Mock(Set)) expect: - resolver.getParsingBundle() != resolver.getParsingBundle() + resolver.getParserKit() != resolver.getParserKit() } def "getParsingBundle. Not the same objects inside of the bundles"() { @@ -37,13 +37,13 @@ class AbstractExtensionBasedFileParserResolverTest extends Specification { def resolver = new TestExtensionBasedFileParserResolver(Mock(Parser), Mock(Set)) when: - def bundle1 = resolver.getParsingBundle() - def bundle2 = resolver.getParsingBundle() + def kit1 = resolver.getParserKit() + def kit2 = resolver.getParserKit() then: - bundle1.bodyContentHandlerGenerator() != null - !isTheSame(bundle1.metadata(), bundle2.metadata()) - !isTheSame(bundle1.parseContext(), bundle2.parseContext()) + kit1.bodyContentHandlerGenerator() != null + !isTheSame(kit1.metadata(), kit2.metadata()) + !isTheSame(kit1.parseContext(), kit2.parseContext()) } def "GetCriteriaDescription"() { diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy index 4d6087da34..3ed1995328 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest.groovy @@ -33,7 +33,7 @@ class FileProcessorTest extends Specification { and: FileParserProvider fileParserProvider = Mock() FileRef fileRefMock = Mock() - fileParserProvider.getParsingBundle(fileRefMock) >> { throw exception } + fileParserProvider.getParserKit(fileRefMock) >> { throw exception } FileProcessor fileProcessor = new FileProcessor(storageLocatorMock, fileParserProvider) when: diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderIntegrationTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderIntegrationTest.groovy index 155395ca2c..be391054ca 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderIntegrationTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderIntegrationTest.groovy @@ -45,7 +45,7 @@ class FileParserProviderIntegrationTest extends Specification { fileRef.getFileName() >> "filename." + extension expect: - provider.getParsingBundle(fileRef).parser().getClass() == theClass + provider.getParserKit(fileRef).parser().getClass() == theClass where: extension | theClass @@ -78,7 +78,7 @@ class FileParserProviderIntegrationTest extends Specification { fileRef.getFileName() >> "filename." + extension when: - provider.getParsingBundle(fileRef) + provider.getParserKit(fileRef) then: thrown(UnsupportedFileFormatException) diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderTest.groovy index 67d7b80365..d2bdbd6ede 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderTest.groovy @@ -19,7 +19,7 @@ package io.jmix.search.utils.parserresolving import io.jmix.core.FileRef import io.jmix.search.exception.UnsupportedFileFormatException import io.jmix.search.index.fileparsing.FileParserResolver -import io.jmix.search.index.fileparsing.FileParsingBundle +import io.jmix.search.index.fileparsing.FileParserKit import io.jmix.search.utils.FileParserProvider import org.apache.tika.metadata.Metadata import org.apache.tika.parser.ParseContext @@ -46,7 +46,7 @@ class FileParserProviderTest extends Specification { def parserProvider = new FileParserProvider(List.of(resolver, resolver2)) when: - parserProvider.getParsingBundle(fileRef) + parserProvider.getParserKit(fileRef) then: def exception = thrown(UnsupportedFileFormatException) @@ -66,7 +66,7 @@ class FileParserProviderTest extends Specification { def resolver2 = createExtensionBasedResolver("rtf", parser2) def resolver3 = Mock(FileParserResolver) resolver3.supports(_ as FileRef) >> true; - resolver3.getParsingBundle() >> new FileParsingBundle(parser3, + resolver3.getParserKit() >> new FileParserKit(parser3, Mock(Function), Mock(Metadata), Mock(ParseContext)) @@ -75,7 +75,7 @@ class FileParserProviderTest extends Specification { def parserProvider = new FileParserProvider(List.of(resolver1, resolver2, resolver3)) when: - def resolvedParser = parserProvider.getParsingBundle(fileRef).parser() + def resolvedParser = parserProvider.getParserKit(fileRef).parser() then: resolvedParser != null @@ -99,7 +99,7 @@ class FileParserProviderTest extends Specification { def resolverManager = new FileParserProvider(emptyList()) when: - resolverManager.getParsingBundle(fileRef) + resolverManager.getParserKit(fileRef) then: def exception = thrown(IllegalStateException) @@ -116,7 +116,7 @@ class FileParserProviderTest extends Specification { return false } } - resolver.getParsingBundle() >> new FileParsingBundle( + resolver.getParserKit() >> new FileParserKit( parser, Mock(Function), Mock(Metadata), From 11453d86d28a752e4c86ff9012c5003e14ed246f Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 27 Sep 2024 19:32:26 +0400 Subject: [PATCH 76/81] BodyContentHandler -> ContentHandler --- .../AbstractExtensionBasedFileParserResolver.java | 7 ++++--- .../io/jmix/search/index/fileparsing/FileParserKit.java | 4 ++-- .../src/main/java/io/jmix/search/utils/FileProcessor.java | 2 +- .../AbstractExtensionBasedFileParserResolverTest.groovy | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java index 21e735a7c0..ea2c0893f3 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java @@ -23,6 +23,7 @@ import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; import org.apache.tika.sax.BodyContentHandler; +import org.xml.sax.ContentHandler; import java.io.StringWriter; import java.util.Set; @@ -70,7 +71,7 @@ protected String getSupportedExtensionsString(Set supportedExtensions) { public FileParserKit getParserKit() { return new FileParserKit( getParser(), - getBodyContentHandlerGenerator(), + getContentHandlerGenerator(), getMetadata(), getParseContext()); } @@ -81,9 +82,9 @@ public FileParserKit getParserKit() { protected abstract Parser getParser(); /** - * Returns a function for the BodyContentHandler generating that is necessary for the given file parsing. + * Returns a function for the ContentHandler generating that is necessary for the given file parsing. */ - protected Function getBodyContentHandlerGenerator() { + protected Function getContentHandlerGenerator() { return stringWriter -> new BodyContentHandler(stringWriter); } diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserKit.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserKit.java index c7ca0b7d72..2c6ecbc218 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserKit.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/FileParserKit.java @@ -20,13 +20,13 @@ import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; -import org.apache.tika.sax.BodyContentHandler; +import org.xml.sax.ContentHandler; import java.io.StringWriter; import java.util.function.Function; public record FileParserKit( @NotNull Parser parser, - @NotNull Function bodyContentHandlerGenerator, + @NotNull Function contentHandlerGenerator, @NotNull Metadata metadata, @NotNull ParseContext parseContext) {} diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java index 94d16ab8ce..4d3adbd2a5 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java @@ -64,7 +64,7 @@ public String extractFileContent(FileRef fileRef) throws FileParseException, Uns try (InputStream stream = fileStorage.openStream(fileRef)) { parser.parse( stream, - parsingBundle.bodyContentHandlerGenerator().apply(stringWriter), + parsingBundle.contentHandlerGenerator().apply(stringWriter), parsingBundle.metadata(), parsingBundle.parseContext()); } catch (OfficeXmlFileException e) { diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy index 058d472cca..aaa56c62d3 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy @@ -41,7 +41,7 @@ class AbstractExtensionBasedFileParserResolverTest extends Specification { def kit2 = resolver.getParserKit() then: - kit1.bodyContentHandlerGenerator() != null + kit1.contentHandlerGenerator() != null !isTheSame(kit1.metadata(), kit2.metadata()) !isTheSame(kit1.parseContext(), kit2.parseContext()) } From 59c6a8811df8ae9d78a3387b9fc43a7cd5e3a391 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 27 Sep 2024 20:06:40 +0400 Subject: [PATCH 77/81] MSOfficeDocumentsParserResolver --- .../resolvers/MSOfficeDocumentsParserResolver.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java index e9fdade13c..23c9d74314 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/resolvers/MSOfficeDocumentsParserResolver.java @@ -17,7 +17,9 @@ package io.jmix.search.index.fileparsing.resolvers; import io.jmix.search.index.fileparsing.AbstractExtensionBasedFileParserResolver; +import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; +import org.apache.tika.parser.microsoft.OfficeParserConfig; import org.apache.tika.parser.microsoft.ooxml.OOXMLParser; import org.springframework.core.annotation.Order; import org.springframework.stereotype.Component; @@ -37,4 +39,15 @@ public Set getSupportedExtensions() { public Parser getParser() { return new OOXMLParser(); } + + @Override + protected ParseContext getParseContext() { + ParseContext parseContext = super.getParseContext(); + + OfficeParserConfig officeParserConfig = new OfficeParserConfig(); + officeParserConfig.setIncludeHeadersAndFooters(false); + parseContext.set(OfficeParserConfig.class, officeParserConfig); + + return parseContext; + } } \ No newline at end of file From 5319a28aa1545f37946819fcfc735ac7a5c78833 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 18 Sep 2025 18:48:47 +0400 Subject: [PATCH 78/81] A fixing of the wrong merging --- .../src/main/java/io/jmix/search/utils/FileProcessor.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java index 4d3adbd2a5..f1964b53f1 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java +++ b/jmix-search/search/src/main/java/io/jmix/search/utils/FileProcessor.java @@ -60,7 +60,6 @@ public String extractFileContent(FileRef fileRef) throws FileParseException, Uns log.debug("Parser for file {}: {}", fileRef, parser); StringWriter stringWriter = new StringWriter(); - ParseContext parseContext = createParseContext(); try (InputStream stream = fileStorage.openStream(fileRef)) { parser.parse( stream, @@ -73,7 +72,7 @@ public String extractFileContent(FileRef fileRef) throws FileParseException, Uns parser = new OOXMLParser(); try (InputStream secondStream = fileStorage.openStream(fileRef)) { stringWriter = new StringWriter(); - parser.parse(secondStream, new BodyContentHandler(stringWriter), new Metadata(), parseContext); + parser.parse(secondStream, new BodyContentHandler(stringWriter), new Metadata(), parsingBundle.parseContext()); } catch (Exception e1) { log.error("Unable to parse OOXML file '{}'", fileRef.getFileName(), e1); throw new FileParseException(fileRef.getFileName(), "Fail to parse OOXML file via OOXMLParser", e); From b24d7d3321281e99e5d1292f11c61959dd05c63c Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Thu, 18 Sep 2025 18:49:22 +0400 Subject: [PATCH 79/81] Not necessary util method --- ...xtensionBasedFileParserResolverTest.groovy | 6 ++--- .../index/fileparsing/ObjectsComparator.java | 23 ------------------- 2 files changed, 2 insertions(+), 27 deletions(-) delete mode 100644 jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/ObjectsComparator.java diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy index aaa56c62d3..aa87a8f538 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy @@ -20,8 +20,6 @@ import io.jmix.core.FileRef import org.apache.tika.parser.Parser import spock.lang.Specification -import static io.jmix.search.index.fileparsing.ObjectsComparator.isTheSame - class AbstractExtensionBasedFileParserResolverTest extends Specification { def "getParsingBundle. Different bundles are returned each time"() { @@ -42,8 +40,8 @@ class AbstractExtensionBasedFileParserResolverTest extends Specification { then: kit1.contentHandlerGenerator() != null - !isTheSame(kit1.metadata(), kit2.metadata()) - !isTheSame(kit1.parseContext(), kit2.parseContext()) + !kit1.metadata().is(kit2.metadata()) + !kit1.parseContext().is(kit2.parseContext()) } def "GetCriteriaDescription"() { diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/ObjectsComparator.java b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/ObjectsComparator.java deleted file mode 100644 index b2697e07a7..0000000000 --- a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/ObjectsComparator.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2024 Haulmont. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.jmix.search.index.fileparsing; - -public class ObjectsComparator { - public static boolean isTheSame(Object object, Object object2){ - return object == object2; - } -} From 6f600d98227ba6415f1bcfbbd5b294d7f1c39ce7 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 19 Sep 2025 15:22:32 +0400 Subject: [PATCH 80/81] Tests correction --- ...tractExtensionBasedFileParserResolver.java | 2 +- ...xtensionBasedFileParserResolverTest.groovy | 10 ++--- .../search/utils/FileProcessorTest2.groovy | 45 ------------------- ...t.groovy => FileParserResolverTest.groovy} | 4 +- 4 files changed, 8 insertions(+), 53 deletions(-) delete mode 100644 jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest2.groovy rename jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/{FileParserProviderTest.groovy => FileParserResolverTest.groovy} (96%) diff --git a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java index ea2c0893f3..e4f14bd916 100644 --- a/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java +++ b/jmix-search/search/src/main/java/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolver.java @@ -47,7 +47,7 @@ public abstract class AbstractExtensionBasedFileParserResolver implements FilePa @Override public String getCriteriaDescription() { return String.format( - "Parser: %s. Supported extensions: %s.", + "File parser resolver: %s. Supported extensions: %s.", this.getClass().getSimpleName(), getSupportedExtensionsString(getSupportedExtensions())); } diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy index aa87a8f538..d681460fba 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/index/fileparsing/AbstractExtensionBasedFileParserResolverTest.groovy @@ -22,7 +22,7 @@ import spock.lang.Specification class AbstractExtensionBasedFileParserResolverTest extends Specification { - def "getParsingBundle. Different bundles are returned each time"() { + def "getParserKit. Different kits are returned each time"() { given: def resolver = new TestExtensionBasedFileParserResolver(Mock(Parser), Mock(Set)) @@ -30,7 +30,7 @@ class AbstractExtensionBasedFileParserResolverTest extends Specification { resolver.getParserKit() != resolver.getParserKit() } - def "getParsingBundle. Not the same objects inside of the bundles"() { + def "getParserKit. Not the same objects inside of the kits"() { given: def resolver = new TestExtensionBasedFileParserResolver(Mock(Parser), Mock(Set)) @@ -53,9 +53,9 @@ class AbstractExtensionBasedFileParserResolverTest extends Specification { where: extensions | criteriaDescription - ["ext1"] | "Parser: TestExtensionBasedFileParserResolver. Supported extensions: ext1." - ["ext1", "ext2"] | "Parser: TestExtensionBasedFileParserResolver. Supported extensions: ext1, ext2." - ["ext1", "ext2", "ext3"] | "Parser: TestExtensionBasedFileParserResolver. Supported extensions: ext1, ext2, ext3." + ["ext1"] | "File parser resolver: TestExtensionBasedFileParserResolver. Supported extensions: ext1." + ["ext1", "ext2"] | "File parser resolver: TestExtensionBasedFileParserResolver. Supported extensions: ext1, ext2." + ["ext1", "ext2", "ext3"] | "File parser resolver: TestExtensionBasedFileParserResolver. Supported extensions: ext1, ext2, ext3." } def "Supports"() { diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest2.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest2.groovy deleted file mode 100644 index 0a870213f1..0000000000 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/FileProcessorTest2.groovy +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright 2024 Haulmont. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.jmix.search.utils - -import io.jmix.core.FileRef -import io.jmix.core.FileStorageLocator -import io.jmix.search.exception.UnsupportedFileFormatException -import spock.lang.Specification - -class FileProcessorTest2 extends Specification { - def "ExtractFileContent"() { - given: - FileStorageLocator storageLocatorMock = Mock() - FileRef fileRefMock = Mock() - fileRefMock.getFileName() >> fileName - FileProcessor fileProcessor = new FileProcessor(storageLocatorMock) - - when: - fileProcessor.extractFileContent(fileRefMock) - - then: - def exception = thrown(UnsupportedFileFormatException) - exception.getMessage() == message - - where: - fileName | message - "file-name.sql" | "The file file-name.sql with the 'sql' extension is not supported." - "any-file.abc" | "The file any-file.abc with the 'abc' extension is not supported." - "any-file-without-extension" | "The file any-file-without-extension with the '' extension is not supported." - } -} diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverTest.groovy similarity index 96% rename from jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderTest.groovy rename to jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverTest.groovy index d2bdbd6ede..78dca0e0d2 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverTest.groovy @@ -29,7 +29,7 @@ import java.util.function.Function import static java.util.Collections.emptyList -class FileParserProviderTest extends Specification { +class FileParserResolverTest extends Specification { def "should throw UnsupportedFileExtensionException when the given file of unsupported type"() { given: @@ -106,7 +106,7 @@ class FileParserProviderTest extends Specification { exception.getMessage() == "There are no any file parser resolvers in the application." } - FileParserResolver createExtensionBasedResolver(String fileExtension, Parser parser) { + private FileParserResolver createExtensionBasedResolver(String fileExtension, Parser parser) { def resolver = Mock(FileParserResolver) resolver.supports(_ as FileRef) >> { FileRef fileRef1 -> { From b5633752c7a259930a4def97b62b4c69cbdbfcf0 Mon Sep 17 00:00:00 2001 From: aleksandrovpv Date: Fri, 19 Sep 2025 20:35:44 +0400 Subject: [PATCH 81/81] =?UTF-8?q?=D0=9E=D1=82=D0=BA=D0=B0=D1=82=20=D0=BD?= =?UTF-8?q?=D0=B5=D0=BA=D0=BE=D1=80=D1=80=D0=B5=D0=BA=D1=82=D0=BD=D0=BE?= =?UTF-8?q?=D0=B3=D0=BE=20=D0=BF=D0=B5=D1=80=D0=B5=D0=B8=D0=BC=D0=B5=D0=BD?= =?UTF-8?q?=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...eParserResolverTest.groovy => FileParserProviderTest.groovy} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/{FileParserResolverTest.groovy => FileParserProviderTest.groovy} (98%) diff --git a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverTest.groovy b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderTest.groovy similarity index 98% rename from jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverTest.groovy rename to jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderTest.groovy index 78dca0e0d2..9987eb5881 100644 --- a/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserResolverTest.groovy +++ b/jmix-search/search/src/test/groovy/io/jmix/search/utils/parserresolving/FileParserProviderTest.groovy @@ -29,7 +29,7 @@ import java.util.function.Function import static java.util.Collections.emptyList -class FileParserResolverTest extends Specification { +class FileParserProviderTest extends Specification { def "should throw UnsupportedFileExtensionException when the given file of unsupported type"() { given: