diff --git a/pom.xml b/pom.xml index e2947ada..252e797f 100644 --- a/pom.xml +++ b/pom.xml @@ -33,16 +33,16 @@ - - snapshots - https://repository.apache.org/content/repositories/snapshots/ - - false - - - true - - + + snapshots + https://repository.apache.org/content/repositories/snapshots/ + + false + + + true + + @@ -109,20 +109,20 @@ - org.apache.maven.plugins - maven-javadoc-plugin - 3.2.0 - - 8 - - - - attach-javadocs - - jar - - - + org.apache.maven.plugins + maven-javadoc-plugin + 3.2.0 + + 8 + + + + attach-javadocs + + jar + + + org.apache.maven.plugins @@ -136,10 +136,7 @@ sign - - --pinentry-mode - loopback - + true @@ -159,166 +156,166 @@ technology.tabula.CommandLineApp - - - jar-with-dependencies - + + + jar-with-dependencies + - - - org.apache.maven.plugins - maven-surefire-plugin - 2.22.2 - - - -Xms1024m -Xmx2048m - - - - - org.apache.maven.plugins - maven-eclipse-plugin - 2.10 - - true - true - - - - - - - - release - - + org.apache.maven.plugins - maven-javadoc-plugin - 3.2.0 + maven-surefire-plugin + 2.22.2 - 8 + + -Xms1024m -Xmx2048m - - - attach-javadocs - - jar - - - - - org.apache.maven.plugins - maven-source-plugin - 3.2.1 - - - attach-sources - - jar-no-fork - - - - - - org.apache.maven.plugins - maven-gpg-plugin - 1.6 - - - sign-artifacts - verify - - sign - - - - - - - - + + + org.apache.maven.plugins + maven-eclipse-plugin + 2.10 + + true + true + + + + + + + + release + + + + org.apache.maven.plugins + maven-javadoc-plugin + 3.2.0 + + 8 + + + + attach-javadocs + + jar + + + + + + org.apache.maven.plugins + maven-source-plugin + 3.2.1 + + + attach-sources + + jar-no-fork + + + + + + org.apache.maven.plugins + maven-gpg-plugin + 1.6 + + + sign-artifacts + verify + + sign + + + + + + + + - - - org.locationtech.jts - jts-core - 1.18.1 - + + + org.locationtech.jts + jts-core + 1.18.1 + - - org.slf4j - slf4j-api - 1.7.35 - + + org.slf4j + slf4j-api + 1.7.35 + - - org.slf4j - slf4j-simple - 1.7.32 - + + org.slf4j + slf4j-simple + 1.7.32 + - - org.apache.pdfbox - pdfbox - 2.0.25 - + + org.apache.pdfbox + pdfbox + 2.0.25 + - - org.bouncycastle - bcprov-jdk15on - 1.69 - + + org.bouncycastle + bcprov-jdk15on + 1.69 + - - org.bouncycastle - bcmail-jdk15on - 1.69 - + + org.bouncycastle + bcmail-jdk15on + 1.69 + - - junit - junit - 4.13.2 - test - + + junit + junit + 4.13.2 + test + - - commons-cli - commons-cli - 1.4 - + + commons-cli + commons-cli + 1.4 + - - org.apache.commons - commons-csv - 1.9.0 - + + org.apache.commons + commons-csv + 1.9.0 + - - com.google.code.gson - gson - 2.8.7 - + + com.google.code.gson + gson + 2.8.7 + - - com.github.jai-imageio - jai-imageio-core - 1.4.0 - + + com.github.jai-imageio + jai-imageio-core + 1.4.0 + - - com.github.jai-imageio - jai-imageio-jpeg2000 - 1.4.0 - + + com.github.jai-imageio + jai-imageio-jpeg2000 + 1.4.0 + - - org.apache.pdfbox - jbig2-imageio - 3.0.3 - - + + org.apache.pdfbox + jbig2-imageio + 3.0.3 + + - + \ No newline at end of file diff --git a/src/main/java/technology/tabula/CloneFactoryTabula.java b/src/main/java/technology/tabula/CloneFactoryTabula.java new file mode 100644 index 00000000..64a95ca1 --- /dev/null +++ b/src/main/java/technology/tabula/CloneFactoryTabula.java @@ -0,0 +1,7 @@ +package technology.tabula; + +public class CloneFactoryTabula { + public TabulaInterface makeDuplicate(TabulaInterface tabulaInterface){ + return tabulaInterface.newDuplicate(); + } +} diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java index 3a6773a9..618b2f5b 100644 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ b/src/main/java/technology/tabula/CommandLineApp.java @@ -21,10 +21,7 @@ import technology.tabula.detectors.NurminenDetectionAlgorithm; import technology.tabula.extractors.BasicExtractionAlgorithm; import technology.tabula.extractors.SpreadsheetExtractionAlgorithm; -import technology.tabula.writers.CSVWriter; -import technology.tabula.writers.JSONWriter; -import technology.tabula.writers.TSVWriter; -import technology.tabula.writers.Writer; +import technology.tabula.writers.*; public class CommandLineApp { @@ -460,12 +457,13 @@ public List extractTablesSpreadsheet(Page page) { private void writeTables(List
tables, Appendable out) throws IOException { Writer writer = null; + FactoryWriter factoryWriter= new FactoryWriter(); switch (outputFormat) { case CSV: - writer = new CSVWriter(); + writer = factoryWriter.getInstance("CSVWriter"); break; case JSON: - writer = new JSONWriter(); + writer= factoryWriter.getInstance("JsonWriter"); break; case TSV: writer = new TSVWriter(); diff --git a/src/main/java/technology/tabula/Line.java b/src/main/java/technology/tabula/Line.java index 31d10529..df3aed43 100644 --- a/src/main/java/technology/tabula/Line.java +++ b/src/main/java/technology/tabula/Line.java @@ -6,7 +6,7 @@ // TODO this class seems superfluous - get rid of it @SuppressWarnings("serial") -public class Line extends Rectangle { +public class Line extends Rectangle implements TabulaInterface { List textChunks = new ArrayList<>(); public static final Character[] WHITE_SPACE_CHARS = { ' ', '\t', '\r', '\n', '\f' }; @@ -73,4 +73,16 @@ static Line removeRepeatedCharacters(Line line, Character c, int minRunLength) { return rv; } + +// public TabulaInterface makeDuplicate(){ +// Line line = null; +// line = (Line) super.clone(); +// return line; +// } + + @Override + public TabulaInterface newDuplicate() { + Line line = (Line) super.clone(); + return line; + } } diff --git a/src/main/java/technology/tabula/Page.java b/src/main/java/technology/tabula/Page.java index ed74d14a..0e2655b7 100644 --- a/src/main/java/technology/tabula/Page.java +++ b/src/main/java/technology/tabula/Page.java @@ -222,7 +222,7 @@ public List getTexts() { public Rectangle getTextBounds() { List texts = this.getText(); if (!texts.isEmpty()) { - return Utils.bounds(texts); + return Utils.getInstance().bounds(texts); } else { return new Rectangle(); } @@ -249,7 +249,7 @@ public List getRulings() { } // TODO: Move as a static method to the Ruling class? - Utils.snapPoints(rulings, minCharWidth, minCharHeight); + Utils.getInstance().snapPoints(rulings, minCharWidth, minCharHeight); verticalRulingLines = getCollapsedVerticalRulings(); horizontalRulingLines = getCollapsedHorizontalRulings(); diff --git a/src/main/java/technology/tabula/TableWithRulingLines.java b/src/main/java/technology/tabula/TableWithRulingLines.java index cde0ce72..61f95bd1 100644 --- a/src/main/java/technology/tabula/TableWithRulingLines.java +++ b/src/main/java/technology/tabula/TableWithRulingLines.java @@ -81,7 +81,7 @@ public int compare(Cell arg0, Cell arg1) { while (iter.hasNext()) { c = iter.next(); - if (!Utils.feq(c.getTop(), lastTop)) { + if (!Utils.getInstance().feq(c.getTop(), lastTop)) { lastRow = new ArrayList<>(); rv.add(lastRow); } diff --git a/src/main/java/technology/tabula/TabulaInterface.java b/src/main/java/technology/tabula/TabulaInterface.java new file mode 100644 index 00000000..c065d31a --- /dev/null +++ b/src/main/java/technology/tabula/TabulaInterface.java @@ -0,0 +1,5 @@ +package technology.tabula; + +public interface TabulaInterface { + public TabulaInterface newDuplicate(); +} diff --git a/src/main/java/technology/tabula/TextChunk.java b/src/main/java/technology/tabula/TextChunk.java index 9f5adbd5..662130f5 100644 --- a/src/main/java/technology/tabula/TextChunk.java +++ b/src/main/java/technology/tabula/TextChunk.java @@ -10,6 +10,7 @@ @SuppressWarnings("serial") public class TextChunk extends RectangularTextContainer { public static final TextChunk EMPTY = new TextChunk(0, 0, 0, 0); + private static Line cloneLine; // List textElements = new ArrayList<>(); public TextChunk(float top, float left, float width, float height) { @@ -330,7 +331,10 @@ public static List groupByLines(List textChunks) { float bbwidth = Rectangle.boundingBoxOf(textChunks).width; + //Line l = new Line(); + CloneFactoryTabula cloneFactoryTabula = new CloneFactoryTabula(); Line l = new Line(); + cloneLine = (Line)cloneFactoryTabula.makeDuplicate(l); l.addTextChunk(textChunks.get(0)); textChunks.remove(0); lines.add(l); diff --git a/src/main/java/technology/tabula/TextStripper.java b/src/main/java/technology/tabula/TextStripper.java index 557fa439..24f6bf10 100644 --- a/src/main/java/technology/tabula/TextStripper.java +++ b/src/main/java/technology/tabula/TextStripper.java @@ -63,7 +63,7 @@ protected void writeString(String string, List textPositions) thro float wos = textPosition.getWidthOfSpace(); - TextElement te = new TextElement(Utils.round(textPosition.getYDirAdj() - h, 2), + TextElement te = new TextElement(Utils.getInstance().round(textPosition.getYDirAdj() - h, 2), Utils.round(textPosition.getXDirAdj(), 2), Utils.round(textPosition.getWidthDirAdj(), 2), Utils.round(textPosition.getHeightDir(), 2), textPosition.getFont(), textPosition.getFontSizeInPt(), c, // workaround a possible bug in PDFBox: diff --git a/src/main/java/technology/tabula/Utils.java b/src/main/java/technology/tabula/Utils.java index 00814429..d7510352 100644 --- a/src/main/java/technology/tabula/Utils.java +++ b/src/main/java/technology/tabula/Utils.java @@ -19,6 +19,19 @@ * @author manuel */ public class Utils { + + private Utils(){ + + } + + private static Utils instance; + public static Utils getInstance(){ + if (instance == null){ + instance = new Utils(); + } + return instance; + } + public static boolean within(double first, double second, double variance) { return second < first + variance && second > first - variance; } diff --git a/src/main/java/technology/tabula/writers/FactoryWriter.java b/src/main/java/technology/tabula/writers/FactoryWriter.java new file mode 100644 index 00000000..047b1a52 --- /dev/null +++ b/src/main/java/technology/tabula/writers/FactoryWriter.java @@ -0,0 +1,13 @@ +package technology.tabula.writers; + +public class FactoryWriter { + public Writer getInstance(String string){ + if(string=="JsonWriter"){ + return new JSONWriter(); + } + if (string == "CSVWriter"){ + return new CSVWriter(); + } + return null; + } +} diff --git a/src/test/java/technology/tabula/TestSpreadsheetExtractor.java b/src/test/java/technology/tabula/TestSpreadsheetExtractor.java index f8bd4074..ebd1a15b 100644 --- a/src/test/java/technology/tabula/TestSpreadsheetExtractor.java +++ b/src/test/java/technology/tabula/TestSpreadsheetExtractor.java @@ -21,6 +21,7 @@ import technology.tabula.extractors.SpreadsheetExtractionAlgorithm; import technology.tabula.writers.CSVWriter; +import technology.tabula.writers.FactoryWriter; import technology.tabula.writers.JSONWriter; public class TestSpreadsheetExtractor { @@ -203,7 +204,7 @@ public void testSpanningCells() throws IOException { StringBuilder sb = new StringBuilder(); - (new JSONWriter()).write(sb, tables); + (new FactoryWriter().getInstance("JsonWriter")).write(sb, tables); assertEquals(expectedJson, sb.toString()); page.getPDDoc().close(); } diff --git a/src/test/java/technology/tabula/TestWriters.java b/src/test/java/technology/tabula/TestWriters.java index 961d57af..eed350ef 100644 --- a/src/test/java/technology/tabula/TestWriters.java +++ b/src/test/java/technology/tabula/TestWriters.java @@ -13,6 +13,7 @@ import technology.tabula.extractors.BasicExtractionAlgorithm; import technology.tabula.extractors.SpreadsheetExtractionAlgorithm; import technology.tabula.writers.CSVWriter; +import technology.tabula.writers.FactoryWriter; import technology.tabula.writers.JSONWriter; import technology.tabula.writers.TSVWriter; @@ -63,7 +64,7 @@ public void testJSONWriter() throws IOException { String expectedJson = UtilsForTesting.loadJson("src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json"); Table table = this.getTable(); StringBuilder sb = new StringBuilder(); - (new JSONWriter()).write(sb, table); + (new FactoryWriter().getInstance("JsonWriter")).write(sb, table); String s = sb.toString(); assertEquals(expectedJson, s); } @@ -76,7 +77,7 @@ public void testJSONSerializeInfinity() throws IOException { Table table = sea.extract(page).get(0); StringBuilder sb = new StringBuilder(); - (new JSONWriter()).write(sb, table); + (new FactoryWriter().getInstance("JsonWriter")).write(sb, table); String s = sb.toString(); assertEquals(expectedJson, s); } @@ -99,7 +100,7 @@ public void testJSONSerializeTwoTables() throws IOException { String expectedJson = UtilsForTesting.loadJson("src/test/resources/technology/tabula/json/twotables.json"); List
tables = this.getTables(); StringBuilder sb = new StringBuilder(); - (new JSONWriter()).write(sb, tables); + (new FactoryWriter().getInstance("JsonWriter")).write(sb, tables); String s = sb.toString(); assertEquals(expectedJson, s);