From e5d8ca836c319037c743c12eebbe3101c6998f7b Mon Sep 17 00:00:00 2001 From: Alex Wilson Date: Wed, 7 Sep 2016 11:17:41 -0400 Subject: [PATCH 001/200] Add logging when an error occurs in batch mode --- src/main/java/technology/tabula/CommandLineApp.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java index 940bb4d4..e2c94aba 100644 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ b/src/main/java/technology/tabula/CommandLineApp.java @@ -113,7 +113,12 @@ public boolean accept(File dir, String name) { for (File pdfFile : pdfs) { File outputFile = new File(getOutputFilename(pdfFile)); - extractFileInto(pdfFile, outputFile); + try { + extractFileInto(pdfFile, outputFile); + } catch (ParseException e) { + System.err.println("Caught exception while processing file: " + pdfFile.toString()); + throw e; + } } } From a747549eb66bab9992a97957e0cbf98f8026a477 Mon Sep 17 00:00:00 2001 From: Alex Wilson Date: Wed, 7 Sep 2016 12:57:23 -0400 Subject: [PATCH 002/200] Add -b option to README list of start-up mitagations --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 755497df..eed9b0ca 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,7 @@ You can also integrate `tabula-java` with any JVM language. For Java examples, s JVM start-up time is a lot of the cost of the `tabula` command, so if you're trying to extract many tables from PDFs, you have a few options for speeding it up: + - the -b option, which allows you to convert all pdfs in a given directory - the [drip](https://github.com/ninjudd/drip) utility - the [Ruby](http://github.com/tabulapdf/tabula-extractor), [R](https://github.com/leeper/tabulizer), and [Node.js](https://github.com/ezodude/tabula-js) bindings - writing your own program in any JVM language (Java, JRuby, Scala) that imports tabula-java. From 3fa8a324e4200b80010d3ebd95696cd361d8e483 Mon Sep 17 00:00:00 2001 From: Giorgio Gallo Date: Tue, 29 Aug 2017 21:04:44 +0200 Subject: [PATCH 003/200] Merged the implementations of compareTo in Rectangle and TextChunk Since both classes implement Comparable, their compareTo implementations have to be compatible (previously the result of comparing a Rectangle with an RTL TextChunk depended on which compareTo implementation happened to be used). --- .../java/technology/tabula/Rectangle.java | 24 ++++++++++++----- .../java/technology/tabula/TextChunk.java | 27 ------------------- 2 files changed, 17 insertions(+), 34 deletions(-) diff --git a/src/main/java/technology/tabula/Rectangle.java b/src/main/java/technology/tabula/Rectangle.java index 41b79374..16510b73 100644 --- a/src/main/java/technology/tabula/Rectangle.java +++ b/src/main/java/technology/tabula/Rectangle.java @@ -19,20 +19,30 @@ public Rectangle(float top, float left, float width, float height) { } @Override + /* + We're comparing based on ordering in the logical ordering of text here. + Assuming identical Y-axis positions, if TextChunk A has a lower X-axis + than TextChunk B, then A is "before" it -- iff this is LTR text. Otherwise, + it is A is after B. + */ public int compareTo(Rectangle other) { double thisBottom = this.getBottom(); double otherBottom = other.getBottom(); int rv; - if (this.equals(other)) return 0; + if (this.equals(other)) return 0; - if (this.verticalOverlap(other) > VERTICAL_COMPARISON_THRESHOLD) { + if (this.verticalOverlap(other) > VERTICAL_COMPARISON_THRESHOLD) { rv = java.lang.Double.compare(this.getX(), other.getX()); - } - else { - rv = java.lang.Double.compare(thisBottom, otherBottom); - } - return rv; + + // reverse the ordering if both TextChunks are RTL + if (this.isLtrDominant() == -1 && other.isLtrDominant() == -1) { + rv = -1 * rv; + } + } else { + rv = java.lang.Double.compare(thisBottom, otherBottom); + } + return rv; } // I'm bad at Java and need this for fancy sorting in technology.tabula.TextChunk. diff --git a/src/main/java/technology/tabula/TextChunk.java b/src/main/java/technology/tabula/TextChunk.java index 046f10d9..ed8614db 100644 --- a/src/main/java/technology/tabula/TextChunk.java +++ b/src/main/java/technology/tabula/TextChunk.java @@ -126,33 +126,6 @@ public TextChunk groupByDirectionality(Boolean isLtrDominant) { return new TextChunk(everything); } - @Override - /* - We're comparing based on ordering in the logical ordering of text here. - Assuming identical Y-axis positions, if TextChunk A has a lower X-axis - than TextChunk B, then A is "before" it -- iff this is LTR text. Otherwise, - it is A is after B. - */ - public int compareTo(Rectangle other) { - double thisBottom = this.getBottom(); - double otherBottom = other.getBottom(); - int rv; - - if (this.equals(other)) return 0; - - if (this.verticalOverlap(other) > VERTICAL_COMPARISON_THRESHOLD) { - rv = java.lang.Double.compare(this.getX(), other.getX()); - - // reverse the ordering if both TextChunks are RTL - if (this.isLtrDominant() == -1 && other.isLtrDominant() == -1) { - rv = -1 * rv; - } - } else { - rv = java.lang.Double.compare(thisBottom, otherBottom); - } - return rv; - } - public int isLtrDominant() { int ltrCnt = 0; int rtlCnt = 0; From 6fe0c8948de52564de2002b991d0bd32e0972abc Mon Sep 17 00:00:00 2001 From: Giorgio Gallo Date: Tue, 29 Aug 2017 22:51:22 +0200 Subject: [PATCH 004/200] Added test cases for the Rectangle.coparateTo() transitivity issue --- .../java/technology/tabula/TestRectangle.java | 41 ++++++++++++++++++- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/src/test/java/technology/tabula/TestRectangle.java b/src/test/java/technology/tabula/TestRectangle.java index c763fcbc..5e156f1d 100644 --- a/src/test/java/technology/tabula/TestRectangle.java +++ b/src/test/java/technology/tabula/TestRectangle.java @@ -234,6 +234,43 @@ public void testGetBoundingBox() { } - - + + @Test + public void testTransitiveComparison1() { + // +-------+ + // | | + // | A | +-------+ + // | | | | + // +-------+ | B | +-------+ + // | | | | + // +-------+ | C | + // | | + // +-------+ + Rectangle a = new Rectangle(0,0,2,2); + Rectangle b = new Rectangle(1,1,2,2); + Rectangle c = new Rectangle(2,2,2,2); + assertTrue(a.compareTo(b) < 0); + assertTrue(b.compareTo(c) < 0); + assertTrue(a.compareTo(c) < 0); + } + + @Test + public void testTransitiveComparison2() { + // +-------+ + // | | + // +-------+ | C | + // | | | | + // +-------+ | B | +-------+ + // | | | | + // | A | +-------+ + // | | + // +-------+ + Rectangle a = new Rectangle(2,0,2,2); + Rectangle b = new Rectangle(1,1,2,2); + Rectangle c = new Rectangle(0,2,2,2); + assertTrue(a.compareTo(b) < 0); + assertTrue(b.compareTo(c) < 0); + assertTrue(a.compareTo(c) < 0); + } + } From 3310c0e6f919bc8649ef9599fe00a7ccd9c98c23 Mon Sep 17 00:00:00 2001 From: Giorgio Gallo Date: Tue, 29 Aug 2017 23:10:21 +0200 Subject: [PATCH 005/200] Cleaned up classes CSVWriter and TSVWriter --- .../technology/tabula/writers/CSVWriter.java | 62 ++++++++----------- .../technology/tabula/writers/TSVWriter.java | 20 ++---- 2 files changed, 31 insertions(+), 51 deletions(-) diff --git a/src/main/java/technology/tabula/writers/CSVWriter.java b/src/main/java/technology/tabula/writers/CSVWriter.java index 16382585..2466cc6e 100644 --- a/src/main/java/technology/tabula/writers/CSVWriter.java +++ b/src/main/java/technology/tabula/writers/CSVWriter.java @@ -2,6 +2,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import org.apache.commons.csv.CSVPrinter; @@ -11,47 +12,34 @@ import technology.tabula.Table; public class CSVWriter implements Writer { - - CSVPrinter printer; - private boolean useLineReturns = true; - -// public CSVWriter() { -// super(); -// } -// -// public CSVWriter(boolean useLineReturns) { -// super(); -// this.useLineReturns = useLineReturns; -// } - - void createWriter(Appendable out) { - try { - this.printer = new CSVPrinter(out, CSVFormat.EXCEL); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - - @Override - public void write(Appendable out, Table table) throws IOException { - this.createWriter(out); - for (List row: table.getRows()) { - List cells = new ArrayList(row.size()); - for (RectangularTextContainer tc: row) { - cells.add(tc.getText()); - } - this.printer.printRecord(cells); - } - printer.flush(); - } + + public CSVWriter() { + this(CSVFormat.EXCEL); + } + + protected CSVWriter(CSVFormat format) { + this.format = format; + } + + private final CSVFormat format; + + @Override + public void write(Appendable out, Table table) throws IOException { + write(out, Collections.singletonList(table)); + } @Override public void write(Appendable out, List tables) throws IOException { - for (Table table : tables) { - write(out, table); + try (CSVPrinter printer = new CSVPrinter(out, format)) { + for (Table table : tables) { + for (List row : table.getRows()) { + List cells = new ArrayList<>(row.size()); + for (RectangularTextContainer tc : row) cells.add(tc.getText()); + printer.printRecord(cells); + } + } + printer.flush(); } - } } diff --git a/src/main/java/technology/tabula/writers/TSVWriter.java b/src/main/java/technology/tabula/writers/TSVWriter.java index 225ba980..16f05ad3 100644 --- a/src/main/java/technology/tabula/writers/TSVWriter.java +++ b/src/main/java/technology/tabula/writers/TSVWriter.java @@ -1,20 +1,12 @@ package technology.tabula.writers; -import java.io.IOException; - import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVPrinter; public class TSVWriter extends CSVWriter { - - @Override - void createWriter(Appendable out) { - try { - this.printer = new CSVPrinter(out, CSVFormat.TDF); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - + + public TSVWriter() { + super(CSVFormat.TDF); + } + + } From 90c52327a9f9865542619c305928a8798a03b97c Mon Sep 17 00:00:00 2001 From: Giorgio Gallo Date: Tue, 29 Aug 2017 23:31:44 +0200 Subject: [PATCH 006/200] Cleanup of JSONWriter and related classes --- .../RectangularTextContainerSerializer.java | 31 +++++++++ .../tabula/json/TableSerializer.java | 64 ++++++++++--------- .../tabula/json/TextChunkSerializer.java | 27 -------- .../technology/tabula/writers/JSONWriter.java | 62 ++++++++---------- .../technology/tabula/writers/TSVWriter.java | 1 - 5 files changed, 92 insertions(+), 93 deletions(-) create mode 100644 src/main/java/technology/tabula/json/RectangularTextContainerSerializer.java delete mode 100644 src/main/java/technology/tabula/json/TextChunkSerializer.java diff --git a/src/main/java/technology/tabula/json/RectangularTextContainerSerializer.java b/src/main/java/technology/tabula/json/RectangularTextContainerSerializer.java new file mode 100644 index 00000000..31bf1060 --- /dev/null +++ b/src/main/java/technology/tabula/json/RectangularTextContainerSerializer.java @@ -0,0 +1,31 @@ +package technology.tabula.json; + +import java.lang.reflect.Type; + +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonSerializationContext; +import com.google.gson.JsonSerializer; + +import technology.tabula.RectangularTextContainer; + +public final class RectangularTextContainerSerializer implements JsonSerializer { + + public static final RectangularTextContainerSerializer INSTANCE = new RectangularTextContainerSerializer(); + + private RectangularTextContainerSerializer() { + // singleton + } + + @Override + public JsonElement serialize(RectangularTextContainer src, Type typeOfSrc, JsonSerializationContext context) { + JsonObject result = new JsonObject(); + result.addProperty("top", src.getTop()); + result.addProperty("left", src.getLeft()); + result.addProperty("width", src.getWidth()); + result.addProperty("height", src.getHeight()); + result.addProperty("text", src.getText()); + return result; + } + +} \ No newline at end of file diff --git a/src/main/java/technology/tabula/json/TableSerializer.java b/src/main/java/technology/tabula/json/TableSerializer.java index 970c7310..e3f211c9 100644 --- a/src/main/java/technology/tabula/json/TableSerializer.java +++ b/src/main/java/technology/tabula/json/TableSerializer.java @@ -12,33 +12,39 @@ import com.google.gson.JsonSerializationContext; import com.google.gson.JsonSerializer; -public class TableSerializer implements JsonSerializer
{ - - @Override - public JsonElement serialize(Table table, Type type, - JsonSerializationContext context) { - - JsonObject object = new JsonObject(); - if( table.getExtractionAlgorithm() == null){ - object.addProperty("extraction_method", ""); - }else{ - object.addProperty("extraction_method", (table.getExtractionAlgorithm()).toString()); - } - object.addProperty("top", table.getTop()); - object.addProperty("left", table.getLeft()); - object.addProperty("width", table.getWidth()); - object.addProperty("height", table.getHeight()); - - JsonArray jsonDataArray = new JsonArray(); - for (List row: table.getRows()) { - JsonArray jsonRowArray = new JsonArray(); - for (RectangularTextContainer textChunk: row) { - jsonRowArray.add(context.serialize(textChunk)); - } - jsonDataArray.add(jsonRowArray); - } - object.add("data", jsonDataArray); - - return object; - } +public final class TableSerializer implements JsonSerializer
{ + + public static final TableSerializer INSTANCE = new TableSerializer(); + + private TableSerializer() { + // singleton + } + + @Override + public JsonElement serialize(Table src, Type typeOfSrc, JsonSerializationContext context) { + + JsonObject result = new JsonObject(); + + if (src.getExtractionAlgorithm() == null) { + result.addProperty("extraction_method", ""); + } else { + result.addProperty("extraction_method", (src.getExtractionAlgorithm()).toString()); + } + result.addProperty("top", src.getTop()); + result.addProperty("left", src.getLeft()); + result.addProperty("width", src.getWidth()); + result.addProperty("height", src.getHeight()); + + JsonArray data; + result.add("data", data = new JsonArray()); + + for (List srcRow : src.getRows()) { + JsonArray row = new JsonArray(); + for (RectangularTextContainer textChunk : srcRow) row.add(context.serialize(textChunk)); + data.add(row); + } + + return result; + } + } diff --git a/src/main/java/technology/tabula/json/TextChunkSerializer.java b/src/main/java/technology/tabula/json/TextChunkSerializer.java deleted file mode 100644 index 5f4252c1..00000000 --- a/src/main/java/technology/tabula/json/TextChunkSerializer.java +++ /dev/null @@ -1,27 +0,0 @@ -package technology.tabula.json; - -import java.lang.reflect.Type; - -import technology.tabula.RectangularTextContainer; - -import com.google.gson.JsonElement; -import com.google.gson.JsonObject; -import com.google.gson.JsonSerializationContext; -import com.google.gson.JsonSerializer; - -public class TextChunkSerializer implements JsonSerializer { - - @Override - public JsonElement serialize(RectangularTextContainer textChunk, Type arg1, - JsonSerializationContext context) { - JsonObject object = new JsonObject(); - - object.addProperty("top", textChunk.getTop()); - object.addProperty("left", textChunk.getLeft()); - object.addProperty("width", textChunk.getWidth()); - object.addProperty("height", textChunk.getHeight()); - object.addProperty("text", textChunk.getText()); - - return object; - } -} \ No newline at end of file diff --git a/src/main/java/technology/tabula/writers/JSONWriter.java b/src/main/java/technology/tabula/writers/JSONWriter.java index cf9a843b..7d2fd4eb 100644 --- a/src/main/java/technology/tabula/writers/JSONWriter.java +++ b/src/main/java/technology/tabula/writers/JSONWriter.java @@ -9,55 +9,45 @@ import technology.tabula.Table; import technology.tabula.TextChunk; import technology.tabula.json.TableSerializer; -import technology.tabula.json.TextChunkSerializer; +import technology.tabula.json.RectangularTextContainerSerializer; import com.google.gson.ExclusionStrategy; import com.google.gson.FieldAttributes; import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.google.gson.JsonArray; +import com.google.gson.JsonSerializer; public class JSONWriter implements Writer { - class TableSerializerExclusionStrategy implements ExclusionStrategy { + private static final ExclusionStrategy ALLCLASSES_SKIPNONPUBLIC = new ExclusionStrategy() { + @Override public boolean shouldSkipClass(Class c) { return false; } + @Override public boolean shouldSkipField(FieldAttributes fa) { return !fa.hasModifier(Modifier.PUBLIC); } + }; - @Override - public boolean shouldSkipClass(Class arg0) { - return false; - } + + final Gson gson; - @Override - public boolean shouldSkipField(FieldAttributes fa) { - return !fa.hasModifier(Modifier.PUBLIC); - } - } + public JSONWriter() { + gson = new GsonBuilder().addSerializationExclusionStrategy(ALLCLASSES_SKIPNONPUBLIC) + .registerTypeAdapter(Table.class, TableSerializer.INSTANCE) + .registerTypeAdapter(RectangularTextContainer.class, new RectangularTextContainerSerializer()) + .registerTypeAdapter(Cell.class, RectangularTextContainerSerializer.INSTANCE) + .registerTypeAdapter(TextChunk.class, RectangularTextContainerSerializer.INSTANCE).create(); + } + @Override + public void write(Appendable out, Table table) throws IOException { + out.append(gson.toJson(table, Table.class)); + } - final Gson gson; + public void write(Appendable out, List
tables) throws IOException { + JsonArray array = new JsonArray(); + for (Table table : tables) { + array.add(gson.toJsonTree(table, Table.class)); + } + out.append(gson.toJson(array)); - public JSONWriter() { - gson = new GsonBuilder() - .addSerializationExclusionStrategy(new TableSerializerExclusionStrategy()) - .registerTypeAdapter(Table.class, new TableSerializer()) - .registerTypeAdapter(RectangularTextContainer.class, new TextChunkSerializer()) - .registerTypeAdapter(Cell.class, new TextChunkSerializer()) - .registerTypeAdapter(TextChunk.class, new TextChunkSerializer()) - .create(); - } + } - @Override - public void write(Appendable out, Table table) throws IOException { - - out.append(gson.toJson(table, Table.class)); - } - - public void write(Appendable out, List
tables) throws IOException { - - JsonArray array = new JsonArray(); - for (Table table : tables) { - array.add(gson.toJsonTree(table, Table.class)); - } - out.append(gson.toJson(array)); - - } } diff --git a/src/main/java/technology/tabula/writers/TSVWriter.java b/src/main/java/technology/tabula/writers/TSVWriter.java index 16f05ad3..c3ddb737 100644 --- a/src/main/java/technology/tabula/writers/TSVWriter.java +++ b/src/main/java/technology/tabula/writers/TSVWriter.java @@ -8,5 +8,4 @@ public TSVWriter() { super(CSVFormat.TDF); } - } From d5761a381ef4e5568b2fe6a30652f6fef969630c Mon Sep 17 00:00:00 2001 From: Giorgio Gallo Date: Tue, 29 Aug 2017 23:33:16 +0200 Subject: [PATCH 007/200] JSONWriter clanup --- .../technology/tabula/writers/JSONWriter.java | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/main/java/technology/tabula/writers/JSONWriter.java b/src/main/java/technology/tabula/writers/JSONWriter.java index 7d2fd4eb..eaf77e77 100644 --- a/src/main/java/technology/tabula/writers/JSONWriter.java +++ b/src/main/java/technology/tabula/writers/JSONWriter.java @@ -4,19 +4,18 @@ import java.lang.reflect.Modifier; import java.util.List; -import technology.tabula.Cell; -import technology.tabula.RectangularTextContainer; -import technology.tabula.Table; -import technology.tabula.TextChunk; -import technology.tabula.json.TableSerializer; -import technology.tabula.json.RectangularTextContainerSerializer; - import com.google.gson.ExclusionStrategy; import com.google.gson.FieldAttributes; import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.google.gson.JsonArray; -import com.google.gson.JsonSerializer; + +import technology.tabula.Cell; +import technology.tabula.RectangularTextContainer; +import technology.tabula.Table; +import technology.tabula.TextChunk; +import technology.tabula.json.RectangularTextContainerSerializer; +import technology.tabula.json.TableSerializer; public class JSONWriter implements Writer { @@ -31,7 +30,7 @@ public class JSONWriter implements Writer { public JSONWriter() { gson = new GsonBuilder().addSerializationExclusionStrategy(ALLCLASSES_SKIPNONPUBLIC) .registerTypeAdapter(Table.class, TableSerializer.INSTANCE) - .registerTypeAdapter(RectangularTextContainer.class, new RectangularTextContainerSerializer()) + .registerTypeAdapter(RectangularTextContainer.class, RectangularTextContainerSerializer.INSTANCE) .registerTypeAdapter(Cell.class, RectangularTextContainerSerializer.INSTANCE) .registerTypeAdapter(TextChunk.class, RectangularTextContainerSerializer.INSTANCE).create(); } @@ -41,7 +40,7 @@ public void write(Appendable out, Table table) throws IOException { out.append(gson.toJson(table, Table.class)); } - public void write(Appendable out, List
tables) throws IOException { + @Override public void write(Appendable out, List
tables) throws IOException { JsonArray array = new JsonArray(); for (Table table : tables) { array.add(gson.toJsonTree(table, Table.class)); From 6bfbe07f8b750028092456e014ebf23d845ddf48 Mon Sep 17 00:00:00 2001 From: Giorgio Gallo Date: Wed, 30 Aug 2017 00:14:23 +0200 Subject: [PATCH 008/200] More cleanup --- .../java/technology/tabula/Rectangle.java | 1 + .../tabula/RectangularTextContainer.java | 55 ++++++++++--------- .../tabula/json/RulingSerializer.java | 13 ++--- .../technology/tabula/writers/JSONWriter.java | 27 ++++----- 4 files changed, 46 insertions(+), 50 deletions(-) diff --git a/src/main/java/technology/tabula/Rectangle.java b/src/main/java/technology/tabula/Rectangle.java index 16510b73..d188944f 100644 --- a/src/main/java/technology/tabula/Rectangle.java +++ b/src/main/java/technology/tabula/Rectangle.java @@ -26,6 +26,7 @@ public Rectangle(float top, float left, float width, float height) { it is A is after B. */ public int compareTo(Rectangle other) { + // FIXME this needs fixing - see https://github.com/tabulapdf/tabula-java/issues/116#issuecomment-325798979 double thisBottom = this.getBottom(); double otherBottom = other.getBottom(); int rv; diff --git a/src/main/java/technology/tabula/RectangularTextContainer.java b/src/main/java/technology/tabula/RectangularTextContainer.java index f9e0036f..5f4d3716 100644 --- a/src/main/java/technology/tabula/RectangularTextContainer.java +++ b/src/main/java/technology/tabula/RectangularTextContainer.java @@ -5,31 +5,32 @@ @SuppressWarnings("serial") public abstract class RectangularTextContainer extends Rectangle { - public RectangularTextContainer(float top, float left, float width, float height) { - super(top, left, width, height); - } - - public String toString() { - StringBuilder sb = new StringBuilder(); - String s = super.toString(); - sb.append(s.substring(0, s.length() - 1)); - sb.append(String.format(",text=%s]", this.getText() == null ? "null" : "\"" + this.getText() + "\"")); - return sb.toString(); - } - - public RectangularTextContainer merge(RectangularTextContainer other) { - if (this.compareTo(other) < 0) { - this.getTextElements().addAll(other.getTextElements()); - - } - else { - this.getTextElements().addAll(0, other.getTextElements()); - } - super.merge(other); - return this; - } - - public abstract String getText(); - public abstract String getText(boolean useLineReturns); - public abstract List getTextElements(); + public RectangularTextContainer(float top, float left, float width, float height) { + super(top, left, width, height); + } + + public RectangularTextContainer merge(RectangularTextContainer other) { + if (compareTo(other) < 0) { + this.getTextElements().addAll(other.getTextElements()); + } else { + this.getTextElements().addAll(0, other.getTextElements()); + } + super.merge(other); + return this; + } + + public abstract String getText(); + + public abstract String getText(boolean useLineReturns); + + public abstract List getTextElements(); + + @Override public String toString() { + StringBuilder sb = new StringBuilder(); + String s = super.toString(); + sb.append(s.substring(0, s.length() - 1)); + sb.append(String.format(",text=%s]", this.getText() == null ? "null" : "\"" + this.getText() + "\"")); + return sb.toString(); + } + } diff --git a/src/main/java/technology/tabula/json/RulingSerializer.java b/src/main/java/technology/tabula/json/RulingSerializer.java index 8a3fe297..19fe8bcd 100644 --- a/src/main/java/technology/tabula/json/RulingSerializer.java +++ b/src/main/java/technology/tabula/json/RulingSerializer.java @@ -2,21 +2,18 @@ import java.lang.reflect.Type; -import technology.tabula.Ruling; - import com.google.gson.JsonElement; -import com.google.gson.JsonObject; import com.google.gson.JsonSerializationContext; import com.google.gson.JsonSerializer; +import technology.tabula.Ruling; + +@Deprecated +/** @deprecated This class is unused (Aug 2017) and will be removed at some later point */ public class RulingSerializer implements JsonSerializer { @Override - public JsonElement serialize(Ruling arg0, Type arg1, - JsonSerializationContext arg2) { - - JsonObject object = new JsonObject(); - + public JsonElement serialize(Ruling src, Type typeOfSrc, JsonSerializationContext context) { return null; } diff --git a/src/main/java/technology/tabula/writers/JSONWriter.java b/src/main/java/technology/tabula/writers/JSONWriter.java index eaf77e77..59e9b274 100644 --- a/src/main/java/technology/tabula/writers/JSONWriter.java +++ b/src/main/java/technology/tabula/writers/JSONWriter.java @@ -24,29 +24,26 @@ public class JSONWriter implements Writer { @Override public boolean shouldSkipField(FieldAttributes fa) { return !fa.hasModifier(Modifier.PUBLIC); } }; - - final Gson gson; - - public JSONWriter() { - gson = new GsonBuilder().addSerializationExclusionStrategy(ALLCLASSES_SKIPNONPUBLIC) - .registerTypeAdapter(Table.class, TableSerializer.INSTANCE) - .registerTypeAdapter(RectangularTextContainer.class, RectangularTextContainerSerializer.INSTANCE) - .registerTypeAdapter(Cell.class, RectangularTextContainerSerializer.INSTANCE) - .registerTypeAdapter(TextChunk.class, RectangularTextContainerSerializer.INSTANCE).create(); - } - @Override public void write(Appendable out, Table table) throws IOException { - out.append(gson.toJson(table, Table.class)); + out.append(gson().toJson(table, Table.class)); } @Override public void write(Appendable out, List
tables) throws IOException { + Gson gson = gson(); JsonArray array = new JsonArray(); - for (Table table : tables) { - array.add(gson.toJsonTree(table, Table.class)); - } + for (Table table : tables) array.add(gson.toJsonTree(table, Table.class)); out.append(gson.toJson(array)); + } + private static Gson gson() { + return new GsonBuilder() + .addSerializationExclusionStrategy(ALLCLASSES_SKIPNONPUBLIC) + .registerTypeAdapter(Table.class, TableSerializer.INSTANCE) + .registerTypeAdapter(RectangularTextContainer.class, RectangularTextContainerSerializer.INSTANCE) + .registerTypeAdapter(Cell.class, RectangularTextContainerSerializer.INSTANCE) + .registerTypeAdapter(TextChunk.class, RectangularTextContainerSerializer.INSTANCE) + .create(); } } From d299ceebf9079f91e4690b7b824aeda61b157984 Mon Sep 17 00:00:00 2001 From: Giorgio Gallo Date: Wed, 30 Aug 2017 00:21:07 +0200 Subject: [PATCH 009/200] Even more cleanup --- .../tabula/json/RectangularTextContainerSerializer.java | 4 ++-- src/test/java/technology/tabula/TestRectangle.java | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main/java/technology/tabula/json/RectangularTextContainerSerializer.java b/src/main/java/technology/tabula/json/RectangularTextContainerSerializer.java index 31bf1060..f6359b91 100644 --- a/src/main/java/technology/tabula/json/RectangularTextContainerSerializer.java +++ b/src/main/java/technology/tabula/json/RectangularTextContainerSerializer.java @@ -9,7 +9,7 @@ import technology.tabula.RectangularTextContainer; -public final class RectangularTextContainerSerializer implements JsonSerializer { +public final class RectangularTextContainerSerializer implements JsonSerializer> { public static final RectangularTextContainerSerializer INSTANCE = new RectangularTextContainerSerializer(); @@ -18,7 +18,7 @@ private RectangularTextContainerSerializer() { } @Override - public JsonElement serialize(RectangularTextContainer src, Type typeOfSrc, JsonSerializationContext context) { + public JsonElement serialize(RectangularTextContainer src, Type typeOfSrc, JsonSerializationContext context) { JsonObject result = new JsonObject(); result.addProperty("top", src.getTop()); result.addProperty("left", src.getLeft()); diff --git a/src/test/java/technology/tabula/TestRectangle.java b/src/test/java/technology/tabula/TestRectangle.java index 5e156f1d..2be0830f 100644 --- a/src/test/java/technology/tabula/TestRectangle.java +++ b/src/test/java/technology/tabula/TestRectangle.java @@ -9,6 +9,7 @@ import java.util.List; import org.junit.Assert; +import org.junit.Ignore; import org.junit.Test; public class TestRectangle { @@ -254,7 +255,7 @@ public void testTransitiveComparison1() { assertTrue(a.compareTo(c) < 0); } - @Test + @Test @Ignore public void testTransitiveComparison2() { // +-------+ // | | From d0b93b063e8fff09e78f56c7fdf1031d11171cfe Mon Sep 17 00:00:00 2001 From: Giorgio Gallo Date: Sun, 3 Sep 2017 02:55:12 +0200 Subject: [PATCH 010/200] Additional test case for #116 --- .../java/technology/tabula/TestRectangle.java | 166 ++++++++++-------- 1 file changed, 90 insertions(+), 76 deletions(-) diff --git a/src/test/java/technology/tabula/TestRectangle.java b/src/test/java/technology/tabula/TestRectangle.java index 2be0830f..72a9c0b9 100644 --- a/src/test/java/technology/tabula/TestRectangle.java +++ b/src/test/java/technology/tabula/TestRectangle.java @@ -1,10 +1,10 @@ package technology.tabula; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.*; import java.awt.geom.Point2D; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -13,227 +13,227 @@ import org.junit.Test; public class TestRectangle { - - + + @Test public void testCompareEqualsRectangles() { Rectangle first = new Rectangle(); Rectangle second = new Rectangle(); - + assertTrue(first.equals(second)); assertTrue(second.equals(first)); } - + @Test public void testCompareAlignedHorizontalRectangle() { Rectangle lower = new Rectangle(0f, 10f, 10f, 10f); Rectangle upper = new Rectangle(0f,20f, 10f, 10f); - + assertTrue(lower.compareTo(upper) < 0); } - + @Test public void testCompareAlignedVerticalRectangle() { Rectangle lower = new Rectangle(10f, 0f, 10f, 10f); Rectangle upper = new Rectangle(20f,0f, 10f, 10f); - + assertTrue(lower.compareTo(upper) < 0); } - + @Test public void testCompareVerticalOverlapRectangle() { Rectangle lower = new Rectangle(5f, 0f, 10f, 10f); Rectangle upper = new Rectangle(0f, 10f, 10f, 10f); - + assertTrue(lower.compareTo(upper) < 0); } - + @Test public void testCompareVerticalOverlapLessThresholdRectangle() { Rectangle lower = new Rectangle(0f, 10f, 10f, 10f); Rectangle upper = new Rectangle(9.8f, 0f, 10f, 10f); - + assertTrue(lower.compareTo(upper) < 0); } - - - + + + @Test public void testQuickSortOneUpperThanOther() { - + Rectangle lower = new Rectangle(175.72f, 72.72f, 1.67f, 1.52f); //, (Comma after AARON) Rectangle upper = new Rectangle(169.21f, 161.16f, 4.33f, 4.31f); // R (REGIONAL PULMONARY) - + assertTrue(lower.compareTo(upper) > 0); - + } - + @Test public void testQuickSortRectangleList() { - + //Testing wrong sorting // Expected: AARON, JOSHUA, N // but was: AARON JOSHUA N , , - Rectangle first = new Rectangle(172.92999267578125f, 51.47999954223633f, 4.0f, 4.309999942779541f); //A + Rectangle first = new Rectangle(172.92999267578125f, 51.47999954223633f, 4.0f, 4.309999942779541f); //A Rectangle second = new Rectangle(175.72000122070312f, 72.72000122070312f, 1.6699999570846558f, 1.5199999809265137f); //, Rectangle third = new Rectangle(172.92999267578125f, 96.36000061035156f, 4.0f, 4.309999942779541f); //A Rectangle fourth = new Rectangle(175.72000122070312f, 100.31999969482422f, 1.6699999570846558f, 1.5199999809265137f); //, Rectangle fifth = new Rectangle(172.92999267578125f, 103.68000030517578f, 4.329999923706055f, 4.309999942779541f); //N Rectangle sixth = new Rectangle(169.2100067138672f, 161.16000366210938f, 4.329999923706055f, 4.309999942779541f); //R - - List expectedList = new ArrayList(); + + List expectedList = new ArrayList<>(); expectedList.add(first); expectedList.add(sixth); expectedList.add(second); expectedList.add(third); expectedList.add(fourth); expectedList.add(fifth); - List toSortList = new ArrayList(); + List toSortList = new ArrayList<>(); toSortList.add(sixth); toSortList.add(second); toSortList.add(third); toSortList.add(fifth); toSortList.add(first); toSortList.add(fourth); - + Collections.sort(toSortList); - + assertEquals(expectedList, toSortList); } - + @Test public void testGetVerticalOverlapShouldReturnZero() { - + Rectangle lower = new Rectangle(10f, 0f, 10f, 10f); Rectangle upper = new Rectangle(20f,0f, 10f, 10f); - + float overlap = lower.verticalOverlap(upper); - + assertEquals(0f, overlap, 0); assertTrue(!lower.verticallyOverlaps(upper)); assertEquals(0f, lower.verticalOverlapRatio(upper), 0); assertEquals(0f, lower.overlapRatio(upper), 0); - + } - + @Test public void testGetVerticalOverlapShouldReturnMoreThanZero() { - + Rectangle lower = new Rectangle(15f, 10f, 10f, 10f); Rectangle upper = new Rectangle(20f, 0f, 10f, 10f); - + float overlap = lower.verticalOverlap(upper); - + assertEquals(5f, overlap, 0); assertTrue(lower.verticallyOverlaps(upper)); assertEquals(0.5f, lower.verticalOverlapRatio(upper), 0); assertEquals(0f, lower.overlapRatio(upper), 0); - + } - + @Test public void testGetHorizontalOverlapShouldReturnZero() { - + Rectangle one = new Rectangle(0f, 0f, 10f, 10f); Rectangle two = new Rectangle(10f, 10f, 10f, 10f); - + assertTrue(!one.horizontallyOverlaps(two)); assertEquals(0f, one.overlapRatio(two), 0); - + } - + @Test public void testGetHorizontalOverlapShouldReturnMoreThanZero() { - + Rectangle one = new Rectangle(0f, 0f, 10f, 10f); Rectangle two = new Rectangle(10f, 5f, 10f, 10f); - + assertTrue(one.horizontallyOverlaps(two)); assertEquals(5f, one.horizontalOverlap(two), 0); assertEquals(0f, one.overlapRatio(two), 0); - + } - + @Test public void testGetOverlapShouldReturnMoreThanZero() { - + Rectangle one = new Rectangle(0f, 0f, 10f, 10f); Rectangle two = new Rectangle(5f, 5f, 10f, 10f); - + assertTrue(one.horizontallyOverlaps(two)); assertTrue(one.verticallyOverlaps(two)); assertEquals(5f, one.horizontalOverlap(two), 0); assertEquals(5f, one.verticalOverlap(two), 0); assertEquals((25f/175), one.overlapRatio(two), 0); - + } - + @Test public void testMergeNoOverlappingRectangles() { - + Rectangle one = new Rectangle(0f, 0f, 10f, 10f); Rectangle two = new Rectangle(0f, 10f, 10f, 10f); - + one.merge(two); - + assertEquals(20f, one.getWidth(), 0); assertEquals(10f, one.getHeight(), 0); assertEquals(0f, one.getLeft(), 0); assertEquals(0f, one.getTop(), 0); assertEquals(10f, one.getBottom(), 0); assertEquals(20f * 10f, one.getArea(), 0); - + } - + @Test public void testMergeOverlappingRectangles() { - + Rectangle one = new Rectangle(0f, 0f, 10f, 10f); Rectangle two = new Rectangle(5f, 5f, 10f, 10f); - + one.merge(two); - + assertEquals(15f, one.getWidth(), 0); assertEquals(15f, one.getHeight(), 0); assertEquals(0f, one.getLeft(), 0); assertEquals(0f, one.getTop(), 0); - + } - + @Test public void testRectangleGetPoints() { - + Rectangle one = new Rectangle(10f, 20f, 30f, 40f); - + Point2D[] points = one.getPoints(); - - Point2D[] expectedPoints = new Point2D[]{ + + Point2D[] expectedPoints = new Point2D[]{ new Point2D.Float(20f, 10f), new Point2D.Float(50f, 10f), new Point2D.Float(50f, 50f), new Point2D.Float(20f, 50f) - + }; - + Assert.assertArrayEquals(expectedPoints, points); - + } - + @Test public void testGetBoundingBox() { - - List rectangles = new ArrayList(); + + List rectangles = new ArrayList<>(); rectangles.add(new Rectangle(0f, 0f, 10f, 10f)); rectangles.add(new Rectangle(20f, 30f, 10f, 10f)); - + Rectangle boundingBoxOf = Rectangle.boundingBoxOf(rectangles); - + assertEquals(new Rectangle(0f, 0f, 40f, 30f), boundingBoxOf); - - - - + + + + } @Test @@ -274,4 +274,18 @@ public void testTransitiveComparison2() { assertTrue(a.compareTo(c) < 0); } + @Test @Ignore + public void testWellDefinedComparison1() { + Rectangle a = new Rectangle(2,0,2,2); + Rectangle b = new Rectangle(1,1,2,2); + Rectangle c = new Rectangle(0,2,2,2); + List l1 = new ArrayList<>(Arrays.asList(b, a, c)); + List l2 = new ArrayList<>(Arrays.asList(c, b, a)); + QuickSort.sort(l1); + QuickSort.sort(l2); + assertEquals(l1.get(0), l2.get(0)); + assertEquals(l1.get(1), l2.get(1)); + assertEquals(l1.get(2), l2.get(2)); + } + } From 083925a0835a0a854030771a83310c14f2018f78 Mon Sep 17 00:00:00 2001 From: Giorgio Gallo Date: Sun, 3 Sep 2017 05:55:18 +0200 Subject: [PATCH 011/200] mvn eclipse:eclipse will now download sources --- pom.xml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 2145aa06..63608059 100644 --- a/pom.xml +++ b/pom.xml @@ -103,7 +103,6 @@ - org.apache.maven.plugins maven-javadoc-plugin @@ -160,6 +159,15 @@ -Xms1024m -Xmx2048m + + + org.apache.maven.plugins + maven-eclipse-plugin + + true + true + + @@ -279,7 +287,7 @@ com.github.jai-imageio jai-imageio-core 1.3.1 - + com.github.jai-imageio From 19d8c84af3165879a83dff0eecb90b302e21f097 Mon Sep 17 00:00:00 2001 From: Giorgio Gallo Date: Sun, 3 Sep 2017 05:56:13 +0200 Subject: [PATCH 012/200] deprecated a few seemengly unused methods in Page --- src/main/java/technology/tabula/Page.java | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/main/java/technology/tabula/Page.java b/src/main/java/technology/tabula/Page.java index 8177921b..2f5419ad 100644 --- a/src/main/java/technology/tabula/Page.java +++ b/src/main/java/technology/tabula/Page.java @@ -116,11 +116,13 @@ public List getText(Rectangle area) { return this.spatial_index.contains(area); } - public List getText(float top, float left, float bottom, float right) { + /** @deprecated use {@linkplain #getText(Rectangle)} instead */ + @Deprecated public List getText(float top, float left, float bottom, float right) { return this.getText(new Rectangle(top, left, right - left, bottom - top)); } - public Integer getRotation() { + /** @deprecated with no replacement */ + @Deprecated public Integer getRotation() { return rotation; } @@ -128,7 +130,8 @@ public int getPageNumber() { return pageNumber; } - public List getTexts() { + /** @deprecated use {@linkplain #getText()} instead */ + @Deprecated public List getTexts() { return texts; } @@ -213,11 +216,13 @@ public List getUnprocessedRulings() { return this.rulings; } - public float getMinCharWidth() { + /** @deprecated with no replacement */ + @Deprecated public float getMinCharWidth() { return minCharWidth; } - public float getMinCharHeight() { + /** @deprecated with no replacement */ + @Deprecated public float getMinCharHeight() { return minCharHeight; } @@ -225,11 +230,13 @@ public PDPage getPDPage() { return pdPage; } - public RectangleSpatialIndex getSpatialIndex() { + /** @deprecated with no replacement */ + @Deprecated public RectangleSpatialIndex getSpatialIndex() { return this.spatial_index; } - public boolean hasText() { + /** @deprecated with no replacement */ + @Deprecated public boolean hasText() { return this.texts.size() > 0; } From c4fa2d892f2e9a7c97a80f95f0567157272eb2b9 Mon Sep 17 00:00:00 2001 From: Giorgio Gallo Date: Sun, 3 Sep 2017 06:03:15 +0200 Subject: [PATCH 013/200] Redundant type arguments removed --- src/main/java/technology/tabula/CommandLineApp.java | 2 +- .../java/technology/tabula/ProjectionProfile.java | 4 ++-- src/main/java/technology/tabula/QuickSort.java | 2 +- .../java/technology/tabula/TableWithRulingLines.java | 4 ++-- src/main/java/technology/tabula/debug/Debug.java | 4 ++-- .../tabula/detectors/NurminenDetectionAlgorithm.java | 12 ++++++------ .../tabula/extractors/BasicExtractionAlgorithm.java | 4 ++-- .../java/technology/tabula/TestTableDetection.java | 6 +++--- 8 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java index 21df07b5..0feb9ea5 100644 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ b/src/main/java/technology/tabula/CommandLineApp.java @@ -246,7 +246,7 @@ private static TableExtractor createExtractor(CommandLine line) throws ParseExce public static List parseFloatList(String option) throws ParseException { String[] f = option.split(","); - List rv = new ArrayList(); + List rv = new ArrayList<>(); try { for (int i = 0; i < f.length; i++) { rv.add(Float.parseFloat(f[i])); diff --git a/src/main/java/technology/tabula/ProjectionProfile.java b/src/main/java/technology/tabula/ProjectionProfile.java index d80f18b0..aae1801f 100644 --- a/src/main/java/technology/tabula/ProjectionProfile.java +++ b/src/main/java/technology/tabula/ProjectionProfile.java @@ -105,7 +105,7 @@ public float[] findVerticalSeparators(float minColumnWidth) { public float[] findHorizontalSeparators(float minRowHeight) { boolean foundShorter = false; - List horizontalSeparators = new ArrayList(); + List horizontalSeparators = new ArrayList<>(); for (Ruling r: area.getHorizontalRulings()) { System.out.println(r.length() / this.textBounds.getWidth()); if (r.length() / this.textBounds.getWidth() >= 0.95) { @@ -136,7 +136,7 @@ public float[] findHorizontalSeparators(float minRowHeight) { } private static List findSeparatorsFromProjection(float[] derivative) { - List separators = new ArrayList(); + List separators = new ArrayList<>(); Integer lastNeg = null; float s; boolean positiveSlope = false; diff --git a/src/main/java/technology/tabula/QuickSort.java b/src/main/java/technology/tabula/QuickSort.java index 21d26dd5..29491028 100644 --- a/src/main/java/technology/tabula/QuickSort.java +++ b/src/main/java/technology/tabula/QuickSort.java @@ -58,7 +58,7 @@ public static void sort(List list) private static void quicksort(List list, Comparator cmp) { - Stack stack = new Stack(); + Stack stack = new Stack<>(); stack.push(0); stack.push(list.size()); while (!stack.isEmpty()) { diff --git a/src/main/java/technology/tabula/TableWithRulingLines.java b/src/main/java/technology/tabula/TableWithRulingLines.java index 54de67e2..df1527d3 100644 --- a/src/main/java/technology/tabula/TableWithRulingLines.java +++ b/src/main/java/technology/tabula/TableWithRulingLines.java @@ -79,14 +79,14 @@ public int compare(Cell arg0, Cell arg1) { Iterator iter = cells.iterator(); c = iter.next(); lastTop = (float) c.getTop(); - lastRow = new ArrayList(); + lastRow = new ArrayList<>(); lastRow.add(c); rv.add(lastRow); while (iter.hasNext()) { c = iter.next(); if (!Utils.feq(c.getTop(), lastTop)) { - lastRow = new ArrayList(); + lastRow = new ArrayList<>(); rv.add(lastRow); } lastRow.add(c); diff --git a/src/main/java/technology/tabula/debug/Debug.java b/src/main/java/technology/tabula/debug/Debug.java index 0c62784d..6619f2ab 100644 --- a/src/main/java/technology/tabula/debug/Debug.java +++ b/src/main/java/technology/tabula/debug/Debug.java @@ -314,7 +314,7 @@ public static void main(String[] args) throws IOException { try { // parse the command line arguments CommandLine line = parser.parse(buildOptions(), args); - List pages = new ArrayList(); + List pages = new ArrayList<>(); if (line.hasOption('p')) { pages = Utils.parsePagesOption(line.getOptionValue('p')); } else { @@ -353,7 +353,7 @@ public static void main(String[] args) throws IOException { PDDocument document = PDDocument.load(pdfFile); int numPages = document.getNumberOfPages(); - pages = new ArrayList(numPages); + pages = new ArrayList<>(numPages); for (int i = 1; i <= numPages; i++) { pages.add(i); diff --git a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java index 69f50c9d..6b946333 100644 --- a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java +++ b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java @@ -517,13 +517,13 @@ private TextEdges getTextEdges(List lines) { // get all text edges (lines that align with the left, middle and right of chunks of text) that extend // uninterrupted over at least REQUIRED_TEXT_LINES_FOR_EDGE lines of text - List leftTextEdges = new ArrayList(); - List midTextEdges = new ArrayList(); - List rightTextEdges = new ArrayList(); + List leftTextEdges = new ArrayList<>(); + List midTextEdges = new ArrayList<>(); + List rightTextEdges = new ArrayList<>(); - Map> currLeftEdges = new HashMap>(); - Map> currMidEdges = new HashMap>(); - Map> currRightEdges = new HashMap>(); + Map> currLeftEdges = new HashMap<>(); + Map> currMidEdges = new HashMap<>(); + Map> currRightEdges = new HashMap<>(); for (Line textRow : lines) { for (TextChunk text : textRow.getTextElements()) { diff --git a/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java b/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java index 3b22f44e..b6a7bcb5 100644 --- a/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java +++ b/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java @@ -54,7 +54,7 @@ public int compare(Ruling arg0, Ruling arg1) { return Double.compare(arg0.getLeft(), arg1.getLeft()); } }); - columns = new ArrayList(this.verticalRulings.size()); + columns = new ArrayList<>(this.verticalRulings.size()); for (Ruling vr: this.verticalRulings) { columns.add(vr.getLeft()); } @@ -150,7 +150,7 @@ public static List columnPositions(List lines) { } } - List rv = new ArrayList(); + List rv = new ArrayList<>(); for (Rectangle r: regions) { rv.add((float) r.getRight()); } diff --git a/src/test/java/technology/tabula/TestTableDetection.java b/src/test/java/technology/tabula/TestTableDetection.java index 33e8cff9..899bfcec 100644 --- a/src/test/java/technology/tabula/TestTableDetection.java +++ b/src/test/java/technology/tabula/TestTableDetection.java @@ -169,7 +169,7 @@ public void testDetectionOfTables() throws Exception { ObjectExtractor extractor = new ObjectExtractor(pdfDocument); // parse expected tables from the ground truth dataset - Map> expectedTables = new HashMap>(); + Map> expectedTables = new HashMap<>(); int numExpectedTables = 0; @@ -188,7 +188,7 @@ public void testDetectionOfTables() throws Exception { List pageTables = expectedTables.get(page); if (pageTables == null) { - pageTables = new ArrayList(); + pageTables = new ArrayList<>(); expectedTables.put(page, pageTables); } @@ -207,7 +207,7 @@ public void testDetectionOfTables() throws Exception { } // now find tables detected by tabula-java - Map> detectedTables = new HashMap>(); + Map> detectedTables = new HashMap<>(); // the algorithm we're going to be testing NurminenDetectionAlgorithm detectionAlgorithm = new NurminenDetectionAlgorithm(); From 8247954b1d7505f529170b7dad5bf289e015c7aa Mon Sep 17 00:00:00 2001 From: Giorgio Gallo Date: Sun, 3 Sep 2017 06:34:21 +0200 Subject: [PATCH 014/200] Cosmetic changes Removed several redundant casts and type parameters --- .../technology/tabula/CommandLineApp.java | 9 ++-- src/main/java/technology/tabula/Line.java | 2 +- .../technology/tabula/ObjectExtractor.java | 2 +- .../tabula/ObjectExtractorStreamEngine.java | 2 +- src/main/java/technology/tabula/Page.java | 16 +++---- .../technology/tabula/ProjectionProfile.java | 6 +-- .../java/technology/tabula/Rectangle.java | 22 ++++----- .../tabula/RectangleSpatialIndex.java | 12 ++--- src/main/java/technology/tabula/Ruling.java | 10 ++-- src/main/java/technology/tabula/Table.java | 8 ++-- .../tabula/TableWithRulingLines.java | 8 ++-- .../java/technology/tabula/TextChunk.java | 28 +++++------ .../java/technology/tabula/TextElement.java | 32 ++++++------- src/main/java/technology/tabula/Utils.java | 22 ++++----- .../java/technology/tabula/debug/Debug.java | 13 +++-- .../detectors/NurminenDetectionAlgorithm.java | 30 ++++++------ .../extractors/BasicExtractionAlgorithm.java | 12 ++--- .../SpreadsheetExtractionAlgorithm.java | 48 +++++++++---------- .../technology/tabula/TestBasicExtractor.java | 4 +- src/test/java/technology/tabula/TestCell.java | 2 +- src/test/java/technology/tabula/TestLine.java | 2 +- .../tabula/TestProjectionProfile.java | 12 ++--- .../tabula/TestRectangleSpatialIndex.java | 2 +- .../tabula/TestSpreadsheetExtractor.java | 31 ++++++------ .../technology/tabula/TestTableDetection.java | 7 +-- .../technology/tabula/TestTextElement.java | 27 +++++------ .../java/technology/tabula/TestUtils.java | 6 +-- .../java/technology/tabula/TestWriters.java | 2 +- 28 files changed, 187 insertions(+), 190 deletions(-) diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java index 0feb9ea5..66de8b05 100644 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ b/src/main/java/technology/tabula/CommandLineApp.java @@ -151,7 +151,7 @@ private void extractFile(File pdfFile, Appendable outFile) throws ParseException try { pdfDocument = this.password == null ? PDDocument.load(pdfFile) : PDDocument.load(pdfFile, this.password); PageIterator pageIterator = getPageIterator(pdfDocument); - List
tables = new ArrayList
(); + List
tables = new ArrayList<>(); while (pageIterator.hasNext()) { Page page = pageIterator.next(); @@ -262,7 +262,6 @@ private static void printHelp() { formatter.printHelp("tabula", BANNER, buildOptions(), "", true); } - @SuppressWarnings("static-access") public static Options buildOptions() { Options o = new Options(); @@ -362,7 +361,7 @@ public List
extractTables(Page page) { case SPREADSHEET: return extractTablesSpreadsheet(page); default: - return new ArrayList
(); + return new ArrayList<>(); } } @@ -372,7 +371,7 @@ public List
extractTablesBasic(Page page) { // currently we only have a detector that uses spreadsheets to find table areas DetectionAlgorithm detector = new NurminenDetectionAlgorithm(); List guesses = detector.detect(page); - List
tables = new ArrayList
(); + List
tables = new ArrayList<>(); for (Rectangle guessRect : guesses) { Page guess = page.getArea(guessRect); @@ -389,7 +388,7 @@ public List
extractTablesBasic(Page page) { public List
extractTablesSpreadsheet(Page page) { // TODO add useLineReturns - return (List
) spreadsheetExtractor.extract(page); + return spreadsheetExtractor.extract(page); } } diff --git a/src/main/java/technology/tabula/Line.java b/src/main/java/technology/tabula/Line.java index ed2f6895..2dc8ce86 100644 --- a/src/main/java/technology/tabula/Line.java +++ b/src/main/java/technology/tabula/Line.java @@ -8,7 +8,7 @@ @SuppressWarnings("serial") public class Line extends Rectangle { - List textChunks = new ArrayList(); + List textChunks = new ArrayList<>(); public static final Character[] WHITE_SPACE_CHARS = { ' ', '\t', '\r', '\n', '\f' }; diff --git a/src/main/java/technology/tabula/ObjectExtractor.java b/src/main/java/technology/tabula/ObjectExtractor.java index 2b97a5a8..1b6e91a6 100644 --- a/src/main/java/technology/tabula/ObjectExtractor.java +++ b/src/main/java/technology/tabula/ObjectExtractor.java @@ -9,7 +9,7 @@ public class ObjectExtractor { private final PDDocument pdfDocument; - public ObjectExtractor(PDDocument pdfDocument) throws IOException { + public ObjectExtractor(PDDocument pdfDocument) { this.pdfDocument = pdfDocument; } diff --git a/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java b/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java index 1538cfa6..70181454 100644 --- a/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java +++ b/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java @@ -41,7 +41,7 @@ protected ObjectExtractorStreamEngine(PDPage page) { this.log = LoggerFactory.getLogger(ObjectExtractorStreamEngine.class); - this.rulings = new ArrayList(); + this.rulings = new ArrayList<>(); this.pageTransform = null; // calculate page transform diff --git a/src/main/java/technology/tabula/Page.java b/src/main/java/technology/tabula/Page.java index 2f5419ad..491eaf7b 100644 --- a/src/main/java/technology/tabula/Page.java +++ b/src/main/java/technology/tabula/Page.java @@ -66,8 +66,8 @@ public int compare(TextElement te1, TextElement te2) { }}).height; } Page rv = new Page( - (float) area.getTop(), - (float) area.getLeft(), + area.getTop(), + area.getLeft(), (float) area.getWidth(), (float) area.getHeight(), rotation, @@ -155,14 +155,14 @@ public List getRulings() { } if (this.rulings == null || this.rulings.isEmpty()) { - this.verticalRulingLines = new ArrayList(); - this.horizontalRulingLines = new ArrayList(); - return new ArrayList(); + this.verticalRulingLines = new ArrayList<>(); + this.horizontalRulingLines = new ArrayList<>(); + return new ArrayList<>(); } Utils.snapPoints(this.rulings, this.minCharWidth, this.minCharHeight); - List vrs = new ArrayList(); + List vrs = new ArrayList<>(); for (Ruling vr: this.rulings) { if (vr.vertical()) { vrs.add(vr); @@ -170,7 +170,7 @@ public List getRulings() { } this.verticalRulingLines = Ruling.collapseOrientedRulings(vrs); - List hrs = new ArrayList(); + List hrs = new ArrayList<>(); for (Ruling hr: this.rulings) { if (hr.horizontal()) { hrs.add(hr); @@ -178,7 +178,7 @@ public List getRulings() { } this.horizontalRulingLines = Ruling.collapseOrientedRulings(hrs); - this.cleanRulings = new ArrayList(this.verticalRulingLines); + this.cleanRulings = new ArrayList<>(this.verticalRulingLines); this.cleanRulings.addAll(this.horizontalRulingLines); return this.cleanRulings; diff --git a/src/main/java/technology/tabula/ProjectionProfile.java b/src/main/java/technology/tabula/ProjectionProfile.java index aae1801f..39ab9e41 100644 --- a/src/main/java/technology/tabula/ProjectionProfile.java +++ b/src/main/java/technology/tabula/ProjectionProfile.java @@ -73,7 +73,7 @@ public float[] getHorizontalProjection() { public float[] findVerticalSeparators(float minColumnWidth) { boolean foundNarrower = false; - List verticalSeparators = new ArrayList(); + List verticalSeparators = new ArrayList<>(); for (Ruling r: area.getVerticalRulings()) { if (r.length() / this.textBounds.getHeight() >= 0.95) { verticalSeparators.add(toFixed(r.getPosition() - this.areaLeft)); @@ -167,7 +167,7 @@ public static float[] smooth(float[] data, int kernelSize) { + kernelSize / 2, data.length); j++) { s += data[j]; } - rv[i] = (float) Math.floor(s / (float) kernelSize); + rv[i] = (float) Math.floor(s / kernelSize); } } return rv; @@ -213,7 +213,7 @@ private static int toFixed(double value) { } private static double toDouble(int value) { - return (double) value / Math.pow(10, DECIMAL_PLACES); + return value / Math.pow(10, DECIMAL_PLACES); } } diff --git a/src/main/java/technology/tabula/Rectangle.java b/src/main/java/technology/tabula/Rectangle.java index d188944f..61899a1f 100644 --- a/src/main/java/technology/tabula/Rectangle.java +++ b/src/main/java/technology/tabula/Rectangle.java @@ -57,7 +57,7 @@ public float getArea() { } public float verticalOverlap(Rectangle other) { - return (float) Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop())); + return Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop())); } public boolean verticallyOverlaps(Rectangle other) { @@ -65,7 +65,7 @@ public boolean verticallyOverlaps(Rectangle other) { } public float horizontalOverlap(Rectangle other) { - return (float) Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft())); + return Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft())); } public boolean horizontallyOverlaps(Rectangle other) { @@ -74,19 +74,19 @@ public boolean horizontallyOverlaps(Rectangle other) { public float verticalOverlapRatio(Rectangle other) { float rv = 0, - delta = (float) Math.min(this.getBottom() - this.getTop(), other.getBottom() - other.getTop()); + delta = Math.min(this.getBottom() - this.getTop(), other.getBottom() - other.getTop()); if (other.getTop() <= this.getTop() && this.getTop() <= other.getBottom() && other.getBottom() <= this.getBottom()) { - rv = (float) ((other.getBottom() - this.getTop()) / delta); + rv = (other.getBottom() - this.getTop()) / delta; } else if (this.getTop() <= other.getTop() && other.getTop() <= this.getBottom() && this.getBottom() <= other.getBottom()) { - rv = (float) ((this.getBottom() - other.getTop()) / delta); + rv = (this.getBottom() - other.getTop()) / delta; } else if (this.getTop() <= other.getTop() && other.getTop() <= other.getBottom() && other.getBottom() <= this.getBottom()) { - rv = (float) ((other.getBottom() - other.getTop()) / delta); + rv = (other.getBottom() - other.getTop()) / delta; } else if (other.getTop() <= this.getTop() && this.getTop() <= this.getBottom() && this.getBottom() <= other.getBottom()) { - rv = (float) ((this.getBottom() - this.getTop()) / delta); + rv = (this.getBottom() - this.getTop()) / delta; } return rv; @@ -143,10 +143,10 @@ public void setBottom(float bottom) { public Point2D[] getPoints() { return new Point2D[] { - new Point2D.Float((float) this.getLeft(), (float) this.getTop()), - new Point2D.Float((float) this.getRight(), (float) this.getTop()), - new Point2D.Float((float) this.getRight(), (float) this.getBottom()), - new Point2D.Float((float) this.getLeft(), (float) this.getBottom()) + new Point2D.Float(this.getLeft(), this.getTop()), + new Point2D.Float(this.getRight(), this.getTop()), + new Point2D.Float(this.getRight(), this.getBottom()), + new Point2D.Float(this.getLeft(), this.getBottom()) }; } diff --git a/src/main/java/technology/tabula/RectangleSpatialIndex.java b/src/main/java/technology/tabula/RectangleSpatialIndex.java index 498106db..46e5fd46 100644 --- a/src/main/java/technology/tabula/RectangleSpatialIndex.java +++ b/src/main/java/technology/tabula/RectangleSpatialIndex.java @@ -11,14 +11,14 @@ class RectangleSpatialIndex { class SaveToListProcedure implements TIntProcedure { - private List ids = new ArrayList(); + private List ids = new ArrayList<>(); - public boolean execute(int id) { + @Override public boolean execute(int id) { ids.add(id); return true; } - private List getIds() { + List getIds() { return ids; } } @@ -30,7 +30,7 @@ private List getIds() { public RectangleSpatialIndex() { si = new RTree(); si.init(null); - rectangles = new ArrayList(); + rectangles = new ArrayList<>(); } public void add(T te) { @@ -48,7 +48,7 @@ public void add(T te) { public List contains(Rectangle r) { SaveToListProcedure proc = new SaveToListProcedure(); si.contains(rectangleToSpatialIndexRectangle(r), proc); - ArrayList rv = new ArrayList(); + ArrayList rv = new ArrayList<>(); for (int i : proc.getIds()) { rv.add(rectangles.get(i)); } @@ -59,7 +59,7 @@ public List contains(Rectangle r) { public List intersects(Rectangle r) { SaveToListProcedure proc = new SaveToListProcedure(); si.intersects(rectangleToSpatialIndexRectangle(r), proc); - ArrayList rv = new ArrayList(); + ArrayList rv = new ArrayList<>(); for (int i : proc.getIds()) { rv.add(rectangles.get(i)); } diff --git a/src/main/java/technology/tabula/Ruling.java b/src/main/java/technology/tabula/Ruling.java index 8eb16b5e..2ad33aaa 100644 --- a/src/main/java/technology/tabula/Ruling.java +++ b/src/main/java/technology/tabula/Ruling.java @@ -297,7 +297,7 @@ public String toString() { } public static List cropRulingsToArea(List rulings, Rectangle2D area) { - ArrayList rv = new ArrayList(); + ArrayList rv = new ArrayList<>(); for (Ruling r : rulings) { if (r.intersects(area)) { rv.add(r.intersect(area)); @@ -322,15 +322,15 @@ public SortObject(SOType type, float position, Ruling ruling) { } } - List sos = new ArrayList(); + List sos = new ArrayList<>(); - TreeMap tree = new TreeMap(new Comparator() { + TreeMap tree = new TreeMap<>(new Comparator() { @Override public int compare(Ruling o1, Ruling o2) { return java.lang.Double.compare(o1.getTop(), o2.getTop()); }}); - TreeMap rv = new TreeMap(new Comparator() { + TreeMap rv = new TreeMap<>(new Comparator() { @Override public int compare(Point2D o1, Point2D o2) { if (o1.getY() > o2.getY()) return 1; @@ -409,7 +409,7 @@ public static List collapseOrientedRulings(List lines) { } public static List collapseOrientedRulings(List lines, int expandAmount) { - ArrayList rv = new ArrayList(); + ArrayList rv = new ArrayList<>(); Collections.sort(lines, new Comparator() { @Override public int compare(Ruling a, Ruling b) { diff --git a/src/main/java/technology/tabula/Table.java b/src/main/java/technology/tabula/Table.java index eda11251..525f1f3a 100644 --- a/src/main/java/technology/tabula/Table.java +++ b/src/main/java/technology/tabula/Table.java @@ -57,7 +57,7 @@ public RectangularTextContainer get(int row, int col) { } public List getRow(int row) { - return new ArrayList(this.subMap(new CellPosition(row, 0), new CellPosition(row, maxRow+1)).values()); + return new ArrayList<>(this.subMap(new CellPosition(row, 0), new CellPosition(row, maxRow+1)).values()); } @Override @@ -110,9 +110,9 @@ public List> getRows() { return this.rows; } - this.rows = new ArrayList>(); + this.rows = new ArrayList<>(); for (int i = 0; i <= this.cellContainer.maxRow; i++) { - List lastRow = new ArrayList(); + List lastRow = new ArrayList<>(); this.rows.add(lastRow); for (int j = 0; j <= this.cellContainer.maxCol; j++) { lastRow.add(this.cellContainer.containsKey(i, j) ? this.cellContainer.get(i, j) : TextChunk.EMPTY); @@ -138,7 +138,7 @@ public ExtractionAlgorithm getExtractionAlgorithm() { } public List getCells() { - return (List) new ArrayList(this.cellContainer.values()); + return new ArrayList<>(this.cellContainer.values()); } diff --git a/src/main/java/technology/tabula/TableWithRulingLines.java b/src/main/java/technology/tabula/TableWithRulingLines.java index df1527d3..69d0d475 100644 --- a/src/main/java/technology/tabula/TableWithRulingLines.java +++ b/src/main/java/technology/tabula/TableWithRulingLines.java @@ -10,7 +10,7 @@ public class TableWithRulingLines extends Table { List verticalRulings, horizontalRulings; - RectangleSpatialIndex si = new RectangleSpatialIndex(); + RectangleSpatialIndex si = new RectangleSpatialIndex<>(); public TableWithRulingLines() { super(); @@ -61,7 +61,7 @@ private void addCells(List cells) { private static List> rowsOfCells(List cells) { Cell c; float lastTop; - List> rv = new ArrayList>(); + List> rv = new ArrayList<>(); List lastRow; if (cells.isEmpty()) { @@ -78,7 +78,7 @@ public int compare(Cell arg0, Cell arg1) { Iterator iter = cells.iterator(); c = iter.next(); - lastTop = (float) c.getTop(); + lastTop = c.getTop(); lastRow = new ArrayList<>(); lastRow.add(c); rv.add(lastRow); @@ -90,7 +90,7 @@ public int compare(Cell arg0, Cell arg1) { rv.add(lastRow); } lastRow.add(c); - lastTop = (float) c.getTop(); + lastTop = c.getTop(); } return rv; } diff --git a/src/main/java/technology/tabula/TextChunk.java b/src/main/java/technology/tabula/TextChunk.java index ed8614db..d387a5de 100644 --- a/src/main/java/technology/tabula/TextChunk.java +++ b/src/main/java/technology/tabula/TextChunk.java @@ -10,7 +10,7 @@ @SuppressWarnings("serial") public class TextChunk extends RectangularTextContainer implements HasText { public static final TextChunk EMPTY = new TextChunk(0, 0, 0, 0); - List textElements = new ArrayList(); + List textElements = new ArrayList<>(); public TextChunk(float top, float left, float width, float height) { super(top, left, width, height); @@ -37,7 +37,7 @@ private enum DirectionalityOptions { private static HashMap directionalities; static { - directionalities = new HashMap(); + directionalities = new HashMap<>(); // BCT = bidirectional character type directionalities.put(java.lang.Character.DIRECTIONALITY_ARABIC_NUMBER, DirectionalityOptions.LTR); // Weak BCT "AN" in the Unicode specification. directionalities.put(java.lang.Character.DIRECTIONALITY_BOUNDARY_NEUTRAL, DirectionalityOptions.NONE); // Weak BCT "BN" in the Unicode specification. @@ -73,8 +73,8 @@ public TextChunk groupByDirectionality(Boolean isLtrDominant) { throw new IllegalArgumentException(); } - ArrayList> chunks = new ArrayList>(); - ArrayList buff = new ArrayList(); + ArrayList> chunks = new ArrayList<>(); + ArrayList buff = new ArrayList<>(); DirectionalityOptions buffDirectionality = DirectionalityOptions.NONE; // the directionality of the characters in buff; for (TextElement te : this.getTextElements()) { @@ -107,7 +107,7 @@ public TextChunk groupByDirectionality(Boolean isLtrDominant) { // and start a new one buffDirectionality = directionalities.get(Character.getDirectionality(te.getText().charAt(0))); - buff = new ArrayList(); + buff = new ArrayList<>(); buff.add(te); } } @@ -116,7 +116,7 @@ public TextChunk groupByDirectionality(Boolean isLtrDominant) { Collections.reverse(buff); } chunks.add(buff); - ArrayList everything = new ArrayList(); + ArrayList everything = new ArrayList<>(); if (!isLtrDominant) { Collections.reverse(chunks); } @@ -126,7 +126,7 @@ public TextChunk groupByDirectionality(Boolean isLtrDominant) { return new TextChunk(everything); } - public int isLtrDominant() { + @Override public int isLtrDominant() { int ltrCnt = 0; int rtlCnt = 0; for (int i = 0; i < this.getTextElements().size(); i++) { @@ -159,17 +159,17 @@ public void add(TextElement textElement) { this.merge(textElement); } - public void add(List textElements) { - for (TextElement te : textElements) { + public void add(List elements) { + for (TextElement te : elements) { this.add(te); } } - public List getTextElements() { + @Override public List getTextElements() { return textElements; } - public String getText() { + @Override public String getText() { if (this.textElements.size() == 0) { return ""; } @@ -231,7 +231,7 @@ public List squeeze(Character c, int minRunLength) { Character currentChar, lastChar = null; int subSequenceLength = 0, subSequenceStart = 0; TextChunk[] t; - List rv = new ArrayList(); + List rv = new ArrayList<>(); for (int i = 0; i < this.getTextElements().size(); i++) { TextElement textElement = this.getTextElements().get(i); @@ -331,7 +331,7 @@ public static boolean allSameChar(List textChunks) { } public static List groupByLines(List textChunks) { - List lines = new ArrayList(); + List lines = new ArrayList<>(); if (textChunks.size() == 0) { return lines; @@ -360,7 +360,7 @@ public static List groupByLines(List textChunks) { lines.remove(lines.size() - 1); } - List rv = new ArrayList(lines.size()); + List rv = new ArrayList<>(lines.size()); for (Line line : lines) { rv.add(Line.removeRepeatedCharacters(line, ' ', 3)); diff --git a/src/main/java/technology/tabula/TextElement.java b/src/main/java/technology/tabula/TextElement.java index 6e232854..f54c4e2f 100644 --- a/src/main/java/technology/tabula/TextElement.java +++ b/src/main/java/technology/tabula/TextElement.java @@ -30,7 +30,7 @@ public TextElement(float y, float x, float width, float height, this.dir = dir; } - public String getText() { + @Override public String getText() { return text; } @@ -50,7 +50,7 @@ public float getFontSize() { return fontSize; } - public String toString() { + @Override public String toString() { StringBuilder sb = new StringBuilder(); String s = super.toString(); sb.append(s.substring(0, s.length() - 1)); @@ -110,7 +110,7 @@ public static List mergeWords(List textElements) { */ public static List mergeWords(List textElements, List verticalRulings) { - List textChunks = new ArrayList(); + List textChunks = new ArrayList<>(); if (textElements.isEmpty()) { return textChunks; @@ -120,15 +120,15 @@ public static List mergeWords(List textElements, List copyOfTextElements = new ArrayList(textElements); + List copyOfTextElements = new ArrayList<>(textElements); textChunks.add(new TextChunk(copyOfTextElements.remove(0))); TextChunk firstTC = textChunks.get(0); float previousAveCharWidth = (float) firstTC.getWidth(); - float endOfLastTextX = (float) firstTC.getRight(); - float maxYForLine = (float) firstTC.getBottom(); + float endOfLastTextX = firstTC.getRight(); + float maxYForLine = firstTC.getBottom(); float maxHeightForLine = (float) firstTC.getHeight(); - float minYTopForLine = (float) firstTC.getTop(); + float minYTopForLine = firstTC.getTop(); float lastWordSpacing = -1; float wordSpacing, deltaSpace, averageCharWidth, deltaCharWidth; float expectedStartOfNextWordX, dist; @@ -202,7 +202,7 @@ public static List mergeWords(List textElements, List mergeWords(List textElements, List mergeWords(List textElements, List mergeWords(List textElements, List mergeWords(List textElements, List textChunksSeparatedByDirectionality = new ArrayList(); + List textChunksSeparatedByDirectionality = new ArrayList<>(); // count up characters by directionality for (TextChunk chunk : textChunks) { // choose the dominant direction diff --git a/src/main/java/technology/tabula/Utils.java b/src/main/java/technology/tabula/Utils.java index 1f04a4c2..d41bbe49 100644 --- a/src/main/java/technology/tabula/Utils.java +++ b/src/main/java/technology/tabula/Utils.java @@ -105,10 +105,10 @@ public static String join(String glue, String... s) { } public static List> transpose(List> table) { - List> ret = new ArrayList>(); + List> ret = new ArrayList<>(); final int N = table.get(0).size(); for (int i = 0; i < N; i++) { - List col = new ArrayList(); + List col = new ArrayList<>(); for (List row : table) { col.add(row.get(i)); } @@ -160,7 +160,7 @@ public static List parsePagesOption(String pagesSpec) throws ParseExcep return null; } - List rv = new ArrayList(); + List rv = new ArrayList<>(); String[] ranges = pagesSpec.split(","); for (int i = 0; i < ranges.length; i++) { @@ -188,8 +188,8 @@ public static List parsePagesOption(String pagesSpec) throws ParseExcep public static void snapPoints(List rulings, float xThreshold, float yThreshold) { // collect points and keep a Line -> p1,p2 map - Map linesToPoints = new HashMap(); - List points = new ArrayList(); + Map linesToPoints = new HashMap<>(); + List points = new ArrayList<>(); for (Line2D.Float r : rulings) { Point2D p1 = r.getP1(); Point2D p2 = r.getP2(); @@ -206,15 +206,15 @@ public int compare(Point2D arg0, Point2D arg1) { } }); - List> groupedPoints = new ArrayList>(); - groupedPoints.add(new ArrayList(Arrays.asList(new Point2D[]{points.get(0)}))); + List> groupedPoints = new ArrayList<>(); + groupedPoints.add(new ArrayList<>(Arrays.asList(new Point2D[]{points.get(0)}))); for (Point2D p : points.subList(1, points.size() - 1)) { List last = groupedPoints.get(groupedPoints.size() - 1); if (Math.abs(p.getX() - last.get(0).getX()) < xThreshold) { groupedPoints.get(groupedPoints.size() - 1).add(p); } else { - groupedPoints.add(new ArrayList(Arrays.asList(new Point2D[]{p}))); + groupedPoints.add(new ArrayList<>(Arrays.asList(new Point2D[]{p}))); } } @@ -238,15 +238,15 @@ public int compare(Point2D arg0, Point2D arg1) { } }); - groupedPoints = new ArrayList>(); - groupedPoints.add(new ArrayList(Arrays.asList(new Point2D[]{points.get(0)}))); + groupedPoints = new ArrayList<>(); + groupedPoints.add(new ArrayList<>(Arrays.asList(new Point2D[]{points.get(0)}))); for (Point2D p : points.subList(1, points.size() - 1)) { List last = groupedPoints.get(groupedPoints.size() - 1); if (Math.abs(p.getY() - last.get(0).getY()) < yThreshold) { groupedPoints.get(groupedPoints.size() - 1).add(p); } else { - groupedPoints.add(new ArrayList(Arrays.asList(new Point2D[]{p}))); + groupedPoints.add(new ArrayList<>(Arrays.asList(new Point2D[]{p}))); } } diff --git a/src/main/java/technology/tabula/debug/Debug.java b/src/main/java/technology/tabula/debug/Debug.java index 6619f2ab..f9f923b5 100644 --- a/src/main/java/technology/tabula/debug/Debug.java +++ b/src/main/java/technology/tabula/debug/Debug.java @@ -59,7 +59,7 @@ private static void debugNonCleanRulings(Graphics2D g, Page page) { private static void debugRulings(Graphics2D g, Page page) { // draw detected lines - List rulings = new ArrayList(page.getHorizontalRulings()); + List rulings = new ArrayList<>(page.getHorizontalRulings()); rulings.addAll(page.getVerticalRulings()); drawShapes(g, rulings); } @@ -70,8 +70,8 @@ private static void debugColumns(Graphics2D g, Page page) { List columns = BasicExtractionAlgorithm.columnPositions(lines); int i = 0; for (float p : columns) { - Ruling r = new Ruling(new Point2D.Float(p, (float) page.getTop()), - new Point2D.Float(p, (float) page.getBottom())); + Ruling r = new Ruling(new Point2D.Float(p, page.getTop()), + new Point2D.Float(p, page.getBottom())); g.setColor(COLORS[(i++) % 5]); drawShape(g, r); } @@ -171,7 +171,7 @@ private static void debugProjectionProfile(Graphics2D g, Page page) { g.setStroke(new BasicStroke(1f)); float[] seps = profile.findVerticalSeparators(horizSmoothKernel * 2.5f); for (int i = 0; i < seps.length; i++) { - float x = (float) (page.getLeft() + seps[i]); + float x = page.getLeft() + seps[i]; g.draw(new Line2D.Double(x, page.getTop(), x, page.getBottom())); } @@ -199,7 +199,7 @@ private static void debugProjectionProfile(Graphics2D g, Page page) { g.setStroke(new BasicStroke(1.5f)); seps = profile.findHorizontalSeparators(vertSmoothKernel); for (int i = 0; i < seps.length; i++) { - float y = (float) (page.getTop() + seps[i]); + float y = page.getTop() + seps[i]; g.draw(new Line2D.Double(page.getLeft(), y, page.getRight(), y)); } @@ -225,7 +225,7 @@ public static void renderPage(String pdfPath, String outPath, int pageNumber, Re page = page.getArea(area); } - PDPage p = (PDPage) document.getPage(pageNumber); + PDPage p = document.getPage(pageNumber); BufferedImage image = Utils.pageConvertToImage(p, 72, ImageType.RGB); @@ -276,7 +276,6 @@ public static void renderPage(String pdfPath, String outPath, int pageNumber, Re ImageIO.write(image, "jpg", new File(outPath)); } - @SuppressWarnings("static-access") private static Options buildOptions() { Options o = new Options(); diff --git a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java index 6b946333..8f155ae3 100644 --- a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java +++ b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java @@ -108,7 +108,7 @@ public List detect(Page page) { try { image = Utils.pageConvertToImage(pdfPage, 144, ImageType.GRAY); } catch (IOException e) { - return new ArrayList(); + return new ArrayList<>(); } List horizontalRulings = this.getHorizontalRulings(image); @@ -119,7 +119,7 @@ public List detect(Page page) { removeTextDocument = this.removeText(pdfPage); image = Utils.pageConvertToImage(pdfPage, 144, ImageType.GRAY); } catch (Exception e) { - return new ArrayList(); + return new ArrayList<>(); } finally { if (removeTextDocument != null) { try { @@ -133,10 +133,10 @@ public List detect(Page page) { List verticalRulings = this.getVerticalRulings(image); - List allEdges = new ArrayList(horizontalRulings); + List allEdges = new ArrayList<>(horizontalRulings); allEdges.addAll(verticalRulings); - List tableAreas = new ArrayList(); + List tableAreas = new ArrayList<>(); // if we found some edges, try to find some tables based on them if (allEdges.size() > 0) { @@ -289,7 +289,7 @@ public List detect(Page page) { } while (foundTable); // create a set of our current tables that will eliminate duplicate tables - Set tableSet = new TreeSet(new Comparator() { + Set tableSet = new TreeSet<>(new Comparator() { @Override public int compare(Rectangle o1, Rectangle o2) { if (o1.equals(o2)) { @@ -317,7 +317,7 @@ public int compare(Rectangle o1, Rectangle o2) { tableSet.addAll(tableAreas); - return new ArrayList(tableSet); + return new ArrayList<>(tableSet); } private Rectangle getTableFromText(List lines, @@ -534,21 +534,21 @@ private TextEdges getTextEdges(List lines) { // first put this chunk into any edge buckets it belongs to List leftEdge = currLeftEdges.get(left); if (leftEdge == null) { - leftEdge = new ArrayList(); + leftEdge = new ArrayList<>(); currLeftEdges.put(left, leftEdge); } leftEdge.add(text); List midEdge = currMidEdges.get(mid); if (midEdge == null) { - midEdge = new ArrayList(); + midEdge = new ArrayList<>(); currMidEdges.put(mid, midEdge); } midEdge.add(text); List rightEdge = currRightEdges.get(right); if (rightEdge == null) { - rightEdge = new ArrayList(); + rightEdge = new ArrayList<>(); currRightEdges.put(right, rightEdge); } rightEdge.add(text); @@ -654,7 +654,7 @@ private TextEdges getTextEdges(List lines) { } private List getTableAreasFromCells(List cells) { - List> cellGroups = new ArrayList>(); + List> cellGroups = new ArrayList<>(); for (Rectangle cell : cells) { boolean addedToGroup = false; @@ -677,14 +677,14 @@ private List getTableAreasFromCells(List cells) } if (!addedToGroup) { - ArrayList cellGroup = new ArrayList(); + ArrayList cellGroup = new ArrayList<>(); cellGroup.add(cell); cellGroups.add(cellGroup); } } // create table areas based on cell group - List tableAreas = new ArrayList(); + List tableAreas = new ArrayList<>(); for (List cellGroup : cellGroups) { // less than four cells should not make a table if (cellGroup.size() < REQUIRED_CELLS_FOR_TABLE) { @@ -713,7 +713,7 @@ private List getHorizontalRulings(BufferedImage image) { // get all horizontal edges, which we'll define as a change in grayscale colour // along a straight line of a certain length - ArrayList horizontalRulings = new ArrayList(); + ArrayList horizontalRulings = new ArrayList<>(); Raster r = image.getRaster(); int width = r.getWidth(); @@ -776,7 +776,7 @@ private List getVerticalRulings(BufferedImage image) { // get all vertical edges, which we'll define as a change in grayscale colour // along a straight line of a certain length - ArrayList verticalRulings = new ArrayList(); + ArrayList verticalRulings = new ArrayList<>(); Raster r = image.getRaster(); int width = r.getWidth(); @@ -842,7 +842,7 @@ private PDDocument removeText(PDPage page) throws IOException { PDFStreamParser parser = new PDFStreamParser(page); parser.parse(); List tokens = parser.getTokens(); - List newTokens = new ArrayList(); + List newTokens = new ArrayList<>(); for (Object token : tokens) { if (token instanceof Operator) { Operator op = (Operator) token; diff --git a/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java b/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java index b6a7bcb5..40283f46 100644 --- a/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java +++ b/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java @@ -26,9 +26,9 @@ public BasicExtractionAlgorithm(List verticalRulings) { } public List
extract(Page page, List verticalRulingPositions) { - List verticalRulings = new ArrayList(verticalRulingPositions.size()); + List verticalRulings = new ArrayList<>(verticalRulingPositions.size()); for (Float p: verticalRulingPositions) { - verticalRulings.add(new Ruling((float) page.getTop(), (float) p, 0.0f, (float) page.getHeight())); + verticalRulings.add(new Ruling(page.getTop(), p, 0.0f, (float) page.getHeight())); } this.verticalRulings = verticalRulings; return this.extract(page); @@ -109,7 +109,7 @@ public String toString() { */ public static List columnPositions(List lines) { - List regions = new ArrayList(); + List regions = new ArrayList<>(); for (TextChunk tc: lines.get(0).getTextElements()) { if (tc.isSameChar(Line.WHITE_SPACE_CHARS)) { continue; @@ -120,7 +120,7 @@ public static List columnPositions(List lines) { } for (Line l: lines.subList(1, lines.size())) { - List lineTextElements = new ArrayList(); + List lineTextElements = new ArrayList<>(); for (TextChunk tc: l.getTextElements()) { if (!tc.isSameChar(Line.WHITE_SPACE_CHARS)) { lineTextElements.add(tc); @@ -129,7 +129,7 @@ public static List columnPositions(List lines) { for (Rectangle cr: regions) { - List overlaps = new ArrayList(); + List overlaps = new ArrayList<>(); for (TextChunk te: lineTextElements) { if (cr.horizontallyOverlaps(te)) { overlaps.add(te); @@ -152,7 +152,7 @@ public static List columnPositions(List lines) { List rv = new ArrayList<>(); for (Rectangle r: regions) { - rv.add((float) r.getRight()); + rv.add(r.getRight()); } Collections.sort(rv); diff --git a/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java b/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java index f5052e69..2e663bd3 100644 --- a/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java +++ b/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java @@ -80,17 +80,17 @@ else if (arg0Y < arg1Y) { @Override - public List extract(Page page) { + public List
extract(Page page) { return extract(page, page.getRulings()); } /** * Extract a list of Table from page using rulings as separators */ - public List extract(Page page, List rulings) { + public List
extract(Page page, List rulings) { // split rulings into horizontal and vertical - List horizontalR = new ArrayList(), - verticalR = new ArrayList(); + List horizontalR = new ArrayList<>(), + verticalR = new ArrayList<>(); for (Ruling r: rulings) { if (r.horizontal()) { @@ -106,10 +106,10 @@ else if (r.vertical()) { List cells = findCells(horizontalR, verticalR); List spreadsheetAreas = findSpreadsheetsFromCells(cells); - List spreadsheets = new ArrayList(); + List
spreadsheets = new ArrayList<>(); for (Rectangle area: spreadsheetAreas) { - List overlappingCells = new ArrayList(); + List overlappingCells = new ArrayList<>(); for (Cell c: cells) { if (c.intersects(area)) { @@ -118,13 +118,13 @@ else if (r.vertical()) { } } - List horizontalOverlappingRulings = new ArrayList(); + List horizontalOverlappingRulings = new ArrayList<>(); for (Ruling hr: horizontalR) { if (area.intersectsLine(hr)) { horizontalOverlappingRulings.add(hr); } } - List verticalOverlappingRulings = new ArrayList(); + List verticalOverlappingRulings = new ArrayList<>(); for (Ruling vr: verticalR) { if (area.intersectsLine(vr)) { verticalOverlappingRulings.add(vr); @@ -176,9 +176,9 @@ public boolean isTabular(Page page) { } public static List findCells(List horizontalRulingLines, List verticalRulingLines) { - List cellsFound = new ArrayList(); + List cellsFound = new ArrayList<>(); Map intersectionPoints = Ruling.findIntersections(horizontalRulingLines, verticalRulingLines); - List intersectionPointsList = new ArrayList(intersectionPoints.keySet()); + List intersectionPointsList = new ArrayList<>(intersectionPoints.keySet()); Collections.sort(intersectionPointsList, POINT_COMPARATOR); boolean doBreak = false; @@ -188,9 +188,9 @@ public static List findCells(List horizontalRulingLines, List xPoints = new ArrayList(); + List xPoints = new ArrayList<>(); // CrossingPointsDirectlyToTheRight( topLeft ); - List yPoints = new ArrayList(); + List yPoints = new ArrayList<>(); for (Point2D p: intersectionPointsList.subList(i, intersectionPointsList.size())) { if (p.getX() == topLeft.getX() && p.getY() > topLeft.getY()) { @@ -234,13 +234,13 @@ public static List findCells(List horizontalRulingLines, List findSpreadsheetsFromCells(List cells) { // via: http://stackoverflow.com/questions/13746284/merging-multiple-adjacent-rectangles-into-one-polygon - List rectangles = new ArrayList(); - Set pointSet = new HashSet(); - Map edgesH = new HashMap(); - Map edgesV = new HashMap(); + List rectangles = new ArrayList<>(); + Set pointSet = new HashSet<>(); + Map edgesH = new HashMap<>(); + Map edgesV = new HashMap<>(); int i = 0; - cells = new ArrayList(new HashSet(cells)); + cells = new ArrayList<>(new HashSet<>(cells)); Utils.sort(cells); @@ -256,10 +256,10 @@ public static List findSpreadsheetsFromCells(List pointsSortX = new ArrayList(pointSet); + List pointsSortX = new ArrayList<>(pointSet); Collections.sort(pointsSortX, X_FIRST_POINT_COMPARATOR); // Y first sort - List pointsSortY = new ArrayList(pointSet); + List pointsSortY = new ArrayList<>(pointSet); Collections.sort(pointsSortY, POINT_COMPARATOR); while (i < pointSet.size()) { @@ -282,10 +282,10 @@ public static List findSpreadsheetsFromCells(List> polygons = new ArrayList>(); + List> polygons = new ArrayList<>(); Point2D nextVertex; while (!edgesH.isEmpty()) { - ArrayList polygon = new ArrayList(); + ArrayList polygon = new ArrayList<>(); Point2D first = edgesH.keySet().iterator().next(); polygon.add(new PolygonVertex(first, Direction.HORIZONTAL)); edgesH.remove(first); @@ -357,7 +357,7 @@ public PolygonVertex(Point2D point, Direction direction) { this.point = point; } - public boolean equals(Object other) { + @Override public boolean equals(Object other) { if (this == other) return true; if (!(other instanceof PolygonVertex)) @@ -365,11 +365,11 @@ public boolean equals(Object other) { return this.point.equals(((PolygonVertex) other).point); } - public int hashCode() { + @Override public int hashCode() { return this.point.hashCode(); } - public String toString() { + @Override public String toString() { return String.format("%s[point=%s,direction=%s]", this.getClass().getName(), this.point.toString(), this.direction.toString()); } } diff --git a/src/test/java/technology/tabula/TestBasicExtractor.java b/src/test/java/technology/tabula/TestBasicExtractor.java index da40db78..fd43f288 100644 --- a/src/test/java/technology/tabula/TestBasicExtractor.java +++ b/src/test/java/technology/tabula/TestBasicExtractor.java @@ -187,7 +187,7 @@ public void testColumnRecognition() throws IOException { @Test public void testVerticalRulingsPreventMergingOfColumns() throws IOException { - List rulings = new ArrayList(); + List rulings = new ArrayList<>(); Float[] rulingsVerticalPositions = {147f, 256f, 310f, 375f, 431f, 504f}; for (int i = 0; i < 6; i++) { rulings.add(new Ruling(255.57f, rulingsVerticalPositions[i], 0, 398.76f - 255.57f)); @@ -326,7 +326,7 @@ public void testNaturalOrderOfRectanglesOneMoreTime() throws IOException { Charset.forName("utf-8"), CSVFormat.DEFAULT); - List rectangles = new ArrayList(); + List rectangles = new ArrayList<>(); for (CSVRecord record : parse) { rectangles.add(new Rectangle(Float.parseFloat(record.get(0)), diff --git a/src/test/java/technology/tabula/TestCell.java b/src/test/java/technology/tabula/TestCell.java index 92796920..de1b8cb8 100644 --- a/src/test/java/technology/tabula/TestCell.java +++ b/src/test/java/technology/tabula/TestCell.java @@ -33,7 +33,7 @@ public void testGetTextElements() { TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5); TextChunk tChunk = new TextChunk(tElement); - List tList = new ArrayList(); + List tList = new ArrayList<>(); tList.add(tChunk); cell.setTextElements(tList); diff --git a/src/test/java/technology/tabula/TestLine.java b/src/test/java/technology/tabula/TestLine.java index 9748415a..90df0e31 100644 --- a/src/test/java/technology/tabula/TestLine.java +++ b/src/test/java/technology/tabula/TestLine.java @@ -16,7 +16,7 @@ public void testSetTextElements() { TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5); TextChunk tChunk = new TextChunk(tElement); - List tList = new ArrayList(); + List tList = new ArrayList<>(); tList.add(tChunk); line.setTextElements(tList); diff --git a/src/test/java/technology/tabula/TestProjectionProfile.java b/src/test/java/technology/tabula/TestProjectionProfile.java index 4bdd83af..44d488eb 100644 --- a/src/test/java/technology/tabula/TestProjectionProfile.java +++ b/src/test/java/technology/tabula/TestProjectionProfile.java @@ -21,18 +21,18 @@ public void setUpProjectionProfile() { TextElement textElement = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "test", 1f); TextElement textElement2 = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "test", 1f); - List textList = new ArrayList(); + List textList = new ArrayList<>(); textList.add(textElement); textList.add(textElement2); Ruling ruling = new Ruling(0, 0, 10, 10); - List rulingList = new ArrayList(); + List rulingList = new ArrayList<>(); rulingList.add(ruling); page = new Page(0, 0, 1, 1, 0, 1, pdPage, textList, rulingList); - List rectangles = new ArrayList(); + List rectangles = new ArrayList<>(); rectangles.add(new Rectangle(0f, 0f, 500f, 5f)); pProfile = new ProjectionProfile(page, rectangles, 5, 5); @@ -65,7 +65,7 @@ public void testFindHorizontalSeparators() { @Test public void testSmooth() { float[] data = {0, 1, 2}; - float[] rv = pProfile.smooth(data, 3); + float[] rv = ProjectionProfile.smooth(data, 3); assertEquals(1f, rv[2], 1e-5); } @@ -73,7 +73,7 @@ public void testSmooth() { @Test public void testFilter() { float[] data = {0, 1, 2}; - float[] rv = pProfile.filter(data, 3); + float[] rv = ProjectionProfile.filter(data, 3); assertEquals(3f, rv[1], 1e-5); } @@ -81,7 +81,7 @@ public void testFilter() { @Test public void testGetAutocorrelation() { float[] projection = {0, 1, 2}; - float[] rv = pProfile.getAutocorrelation(projection); + float[] rv = ProjectionProfile.getAutocorrelation(projection); assertEquals(0f, rv[0], 1e-5); assertTrue(rv.length == 2); diff --git a/src/test/java/technology/tabula/TestRectangleSpatialIndex.java b/src/test/java/technology/tabula/TestRectangleSpatialIndex.java index 1c05daf1..46eb1ea3 100644 --- a/src/test/java/technology/tabula/TestRectangleSpatialIndex.java +++ b/src/test/java/technology/tabula/TestRectangleSpatialIndex.java @@ -11,7 +11,7 @@ public void testIntersects() { Rectangle r = new Rectangle(0, 0, 0, 0); - RectangleSpatialIndex rSpatialIndex = new RectangleSpatialIndex(); + RectangleSpatialIndex rSpatialIndex = new RectangleSpatialIndex<>(); rSpatialIndex.add(r); assertTrue(rSpatialIndex.intersects(r).size() > 0); diff --git a/src/test/java/technology/tabula/TestSpreadsheetExtractor.java b/src/test/java/technology/tabula/TestSpreadsheetExtractor.java index d63da204..51375cb9 100644 --- a/src/test/java/technology/tabula/TestSpreadsheetExtractor.java +++ b/src/test/java/technology/tabula/TestSpreadsheetExtractor.java @@ -163,7 +163,7 @@ public void testFindSpreadsheetsFromCells() throws IOException { Charset.forName("utf-8"), CSVFormat.DEFAULT); - List cells = new ArrayList(); + List cells = new ArrayList<>(); for (CSVRecord record : parse) { cells.add(new Cell(Float.parseFloat(record.get(0)), @@ -173,10 +173,9 @@ public void testFindSpreadsheetsFromCells() throws IOException { } - SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); List expected = Arrays.asList(EXPECTED_RECTANGLES); Collections.sort(expected); - List foundRectangles = se.findSpreadsheetsFromCells(cells); + List foundRectangles = SpreadsheetExtractionAlgorithm.findSpreadsheetsFromCells(cells); Collections.sort(foundRectangles); assertTrue(foundRectangles.equals(expected)); } @@ -198,12 +197,12 @@ public void testSpanningCells() throws IOException { .getPage("src/test/resources/technology/tabula/spanning_cells.pdf", 1); String expectedJson = UtilsForTesting.loadJson("src/test/resources/technology/tabula/json/spanning_cells.json"); SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); - List tables = se.extract(page); + List
tables = se.extract(page); assertEquals(2, tables.size()); StringBuilder sb = new StringBuilder(); - (new JSONWriter()).write(sb, (List
) tables); + (new JSONWriter()).write(sb, tables); assertEquals(expectedJson, sb.toString()); } @@ -214,12 +213,12 @@ public void testSpanningCellsToCsv() throws IOException { .getPage("src/test/resources/technology/tabula/spanning_cells.pdf", 1); String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/spanning_cells.csv"); SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); - List tables = se.extract(page); + List
tables = se.extract(page); assertEquals(2, tables.size()); StringBuilder sb = new StringBuilder(); - (new CSVWriter()).write(sb, (List
) tables); + (new CSVWriter()).write(sb, tables); assertEquals(expectedCsv, sb.toString()); } @@ -354,7 +353,7 @@ public void testDontRaiseSortException() throws IOException { 446.0f, 97.0f, 685.0f, 520.0f); page.getText(); SpreadsheetExtractionAlgorithm bea = new SpreadsheetExtractionAlgorithm(); - Table table = bea.extract(page).get(0); + bea.extract(page).get(0); } @Test @@ -364,7 +363,7 @@ public void testShouldDetectASingleSpreadsheet() throws IOException { 1, 68.08f, 16.44f, 680.85f, 597.84f); SpreadsheetExtractionAlgorithm bea = new SpreadsheetExtractionAlgorithm(); - List
tables = (List
) bea.extract(page); + List
tables = bea.extract(page); assertEquals(1, tables.size()); } @@ -373,7 +372,7 @@ public void testExtractTableWithExternallyDefinedRulings() throws IOException { Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/us-007.pdf", 1); SpreadsheetExtractionAlgorithm bea = new SpreadsheetExtractionAlgorithm(); - List
tables = (List
) bea.extract(page, + List
tables = bea.extract(page, Arrays.asList(EXTERNALLY_DEFINED_RULINGS)); assertEquals(1, tables.size()); Table table = tables.get(0); @@ -404,7 +403,7 @@ public void testAnotherExtractTableWithExternallyDefinedRulings() throws IOExcep Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/us-024.pdf", 1); SpreadsheetExtractionAlgorithm bea = new SpreadsheetExtractionAlgorithm(); - List
tables = (List
) bea.extract(page, + List
tables = bea.extract(page, Arrays.asList(EXTERNALLY_DEFINED_RULINGS2)); assertEquals(1, tables.size()); Table table = tables.get(0); @@ -419,7 +418,7 @@ public void testSpreadsheetsSortedByTopAndRight() throws IOException { 1); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - List
tables = (List
) sea.extract(page); + List
tables = sea.extract(page); for (int i = 1; i < tables.size(); i++) { assert (tables.get(i - 1).getTop() <= tables.get(i).getTop()); } @@ -431,7 +430,7 @@ public void testDontStackOverflowQuicksort() throws IOException { 1); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - List
tables = (List
) sea.extract(page); + List
tables = sea.extract(page); for (int i = 1; i < tables.size(); i++) { assert (tables.get(i - 1).getTop() <= tables.get(i).getTop()); } @@ -442,7 +441,7 @@ public void testRTL() throws IOException { Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/arabic.pdf", 1); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - List
tables = (List
) sea.extract(page); + List
tables = sea.extract(page); // assertEquals(1, tables.size()); Table table = tables.get(0); @@ -473,7 +472,7 @@ public void testRealLifeRTL() throws IOException { Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/mednine.pdf", 1); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - List
tables = (List
) sea.extract(page); + List
tables = sea.extract(page); // assertEquals(1, tables.size()); Table table = tables.get(0); @@ -522,7 +521,7 @@ public void testSpreadsheetExtractionIssue656() throws IOException { String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/Publication_of_award_of_Bids_for_Transport_Sector__August_2016.csv"); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - List
tables = (List
) sea.extract(page); + List
tables = sea.extract(page); assertEquals(1, tables.size()); Table table = tables.get(0); diff --git a/src/test/java/technology/tabula/TestTableDetection.java b/src/test/java/technology/tabula/TestTableDetection.java index 899bfcec..1bb86c3d 100644 --- a/src/test/java/technology/tabula/TestTableDetection.java +++ b/src/test/java/technology/tabula/TestTableDetection.java @@ -80,6 +80,7 @@ public void save() { w.write(gson.toJson(this)); w.close(); } catch (Exception e) { + throw new Error(e); } } @@ -108,7 +109,7 @@ public static void enableLogging() { public static Collection data() { String[] regionCodes = {"eu", "us"}; - ArrayList data = new ArrayList(); + ArrayList data = new ArrayList<>(); for (String regionCode : regionCodes) { String directoryName = "src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-" + regionCode + "/"; @@ -224,7 +225,7 @@ public void testDetectionOfTables() throws Exception { // now compare System.out.println("Testing " + this.pdf.getName()); - List errors = new ArrayList(); + List errors = new ArrayList<>(); this.status.numExpectedTables = numExpectedTables; totalExpectedTables += numExpectedTables; @@ -289,7 +290,7 @@ public void testDetectionOfTables() throws Exception { } private List comparePages(Integer page, List detected, List expected) { - ArrayList errors = new ArrayList(); + ArrayList errors = new ArrayList<>(); // go through the detected tables and try to match them with expected tables // from http://www.orsigiorgio.net/wp-content/papercite-data/pdf/gho*12.pdf (comparing regions): diff --git a/src/test/java/technology/tabula/TestTextElement.java b/src/test/java/technology/tabula/TestTextElement.java index d662d5ab..feaaa5e6 100644 --- a/src/test/java/technology/tabula/TestTextElement.java +++ b/src/test/java/technology/tabula/TestTextElement.java @@ -1,6 +1,5 @@ package technology.tabula; -import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -12,7 +11,7 @@ public class TestTextElement { @Test - public void createTextElement() throws IOException { + public void createTextElement() { TextElement textElement = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f); @@ -31,7 +30,7 @@ public void createTextElement() throws IOException { } @Test - public void createTextElementWithDirection() throws IOException { + public void createTextElementWithDirection() { TextElement textElement = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f); @@ -52,7 +51,7 @@ public void createTextElementWithDirection() throws IOException { @Test public void mergeFourElementsIntoFourWords() { - List elements = new ArrayList(); + List elements = new ArrayList<>(); elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); elements.add(new TextElement(20f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)); elements.add(new TextElement(40f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)); @@ -60,7 +59,7 @@ public void mergeFourElementsIntoFourWords() { List words = TextElement.mergeWords(elements); - List expectedWords = new ArrayList(); + List expectedWords = new ArrayList<>(); expectedWords.add(new TextChunk(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f))); expectedWords.add(new TextChunk(new TextElement(20f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f))); expectedWords.add(new TextChunk(new TextElement(40f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f))); @@ -73,7 +72,7 @@ public void mergeFourElementsIntoFourWords() { @Test public void mergeFourElementsIntoOneWord() { - List elements = new ArrayList(); + List elements = new ArrayList<>(); elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); elements.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)); elements.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)); @@ -81,7 +80,7 @@ public void mergeFourElementsIntoOneWord() { List words = TextElement.mergeWords(elements); - List expectedWords = new ArrayList(); + List expectedWords = new ArrayList<>(); TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); textChunk.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)); textChunk.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)); @@ -99,7 +98,7 @@ public void mergeElementsShouldBeIdempotent() { * it was called with. Discussion here: https://github.com/tabulapdf/tabula-java/issues/78 */ - List elements = new ArrayList(); + List elements = new ArrayList<>(); elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); elements.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)); elements.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)); @@ -113,7 +112,7 @@ public void mergeElementsShouldBeIdempotent() { @Test public void mergeElementsWithSkippingRules() { - List elements = new ArrayList(); + List elements = new ArrayList<>(); elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); elements.add(new TextElement(0f, 17f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); elements.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)); @@ -123,7 +122,7 @@ public void mergeElementsWithSkippingRules() { List words = TextElement.mergeWords(elements); - List expectedWords = new ArrayList(); + List expectedWords = new ArrayList<>(); TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); textChunk.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)); textChunk.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)); @@ -137,7 +136,7 @@ public void mergeElementsWithSkippingRules() { @Test public void mergeTenElementsIntoTwoWords() { - List elements = new ArrayList(); + List elements = new ArrayList<>(); elements.add(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f)); elements.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f)); elements.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f)); @@ -150,7 +149,7 @@ public void mergeTenElementsIntoTwoWords() { List words = TextElement.mergeWords(elements); - List expectedWords = new ArrayList(); + List expectedWords = new ArrayList<>(); TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f)); textChunk.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f)); textChunk.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f)); @@ -172,7 +171,7 @@ public void mergeTenElementsIntoTwoWords() { @Test public void mergeTenElementsIntoTwoLines() { - List elements = new ArrayList(); + List elements = new ArrayList<>(); elements.add(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f)); elements.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f)); elements.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f)); @@ -185,7 +184,7 @@ public void mergeTenElementsIntoTwoLines() { List words = TextElement.mergeWords(elements); - List expectedWords = new ArrayList(); + List expectedWords = new ArrayList<>(); TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f)); textChunk.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f)); textChunk.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f)); diff --git a/src/test/java/technology/tabula/TestUtils.java b/src/test/java/technology/tabula/TestUtils.java index 75146565..ffba82e6 100644 --- a/src/test/java/technology/tabula/TestUtils.java +++ b/src/test/java/technology/tabula/TestUtils.java @@ -82,7 +82,7 @@ public void testAnotherExceptionInParsePages() throws ParseException { @Test public void testQuickSortEmptyList() { - List numbers = new ArrayList(); + List numbers = new ArrayList<>(); QuickSort.sort(numbers); assertEquals(Collections.emptyList(), numbers); @@ -107,8 +107,8 @@ public void testQuickSortShortList() { @Test public void testQuickSortLongList() { - List numbers = new ArrayList(); - List expectedNumbers = new ArrayList(); + List numbers = new ArrayList<>(); + List expectedNumbers = new ArrayList<>(); for(int i = 0; i <= 12000; i++){ numbers.add(12000 - i); diff --git a/src/test/java/technology/tabula/TestWriters.java b/src/test/java/technology/tabula/TestWriters.java index 63b3dcce..961d57af 100644 --- a/src/test/java/technology/tabula/TestWriters.java +++ b/src/test/java/technology/tabula/TestWriters.java @@ -31,7 +31,7 @@ private List
getTables() throws IOException { Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/twotables.pdf", 1); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - return (List
) sea.extract(page); + return sea.extract(page); } @Test From c2eafdf7fae54d1bcc6fba256ea3afc68767482a Mon Sep 17 00:00:00 2001 From: Giorgio Gallo Date: Sun, 3 Sep 2017 06:54:10 +0200 Subject: [PATCH 015/200] Table.CellPosition made a static inner class --- src/main/java/technology/tabula/Table.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/technology/tabula/Table.java b/src/main/java/technology/tabula/Table.java index 525f1f3a..f0306f2c 100644 --- a/src/main/java/technology/tabula/Table.java +++ b/src/main/java/technology/tabula/Table.java @@ -9,7 +9,7 @@ @SuppressWarnings("serial") public class Table extends Rectangle { - class CellPosition implements Comparable { + static class CellPosition implements Comparable { int row, col; CellPosition(int row, int col) { this.row = row; this.col = col; From 5d3cebefe3f43e49a2323fc105a1be64ea8d937c Mon Sep 17 00:00:00 2001 From: Giorgio Gallo Date: Sun, 3 Sep 2017 06:55:15 +0200 Subject: [PATCH 016/200] TestCellPosition removed (superfluous tests) --- .../technology/tabula/TestCellPosition.java | 45 ------------------- 1 file changed, 45 deletions(-) delete mode 100644 src/test/java/technology/tabula/TestCellPosition.java diff --git a/src/test/java/technology/tabula/TestCellPosition.java b/src/test/java/technology/tabula/TestCellPosition.java deleted file mode 100644 index 476168dd..00000000 --- a/src/test/java/technology/tabula/TestCellPosition.java +++ /dev/null @@ -1,45 +0,0 @@ -package technology.tabula; - -import static org.junit.Assert.*; - -import org.junit.Test; - -import technology.tabula.Table.CellPosition; - -public class TestCellPosition { - - @Test - public void testHashCode() { - Table table = new Table(); - CellPosition cellPosition = table.new CellPosition(5, 5); - - assertEquals(500005, cellPosition.hashCode()); - - } - - @Test - public void testEqualsObject() { - Table table = new Table(); - CellPosition cellPosition1 = table.new CellPosition(5, 5); - - assertTrue(cellPosition1.equals(cellPosition1)); - } - - @Test - public void testNotEqualsObject() { - Table table = new Table(); - CellPosition cellPosition1 = table.new CellPosition(5, 5); - CellPosition cellPosition2 = table.new CellPosition(5, 6); - - assertFalse(cellPosition1.equals(cellPosition2)); - } - - @Test - public void testNotInstanceOfObject() { - Table table = new Table(); - CellPosition cellPosition = table.new CellPosition(5, 5); - - assertFalse(cellPosition.equals("test")); - } - -} From db985d2411cabe066579ce7a83ec57d6adc8c0cf Mon Sep 17 00:00:00 2001 From: Giorgio Gallo Date: Sun, 3 Sep 2017 08:36:21 +0200 Subject: [PATCH 017/200] Started cleaning up Table --- .../java/technology/tabula/QuickSort.java | 151 ++++++------ src/main/java/technology/tabula/Table.java | 224 +++++++----------- .../tabula/TableWithRulingLines.java | 13 +- src/main/java/technology/tabula/Utils.java | 21 +- .../extractors/BasicExtractionAlgorithm.java | 2 +- .../SpreadsheetExtractionAlgorithm.java | 14 +- .../tabula/json/TableSerializer.java | 6 +- .../technology/tabula/TestBasicExtractor.java | 6 +- .../technology/tabula/TestTableDetection.java | 8 +- .../java/technology/tabula/TestUtils.java | 2 +- 10 files changed, 190 insertions(+), 257 deletions(-) diff --git a/src/main/java/technology/tabula/QuickSort.java b/src/main/java/technology/tabula/QuickSort.java index 29491028..d6ada5b2 100644 --- a/src/main/java/technology/tabula/QuickSort.java +++ b/src/main/java/technology/tabula/QuickSort.java @@ -16,94 +16,97 @@ */ package technology.tabula; +import java.util.ArrayList; import java.util.Comparator; import java.util.List; +import java.util.RandomAccess; import java.util.Stack; /** - * see http://de.wikipedia.org/wiki/Quicksort. + * An implementation of Quicksort. + * + * @see http://de.wikipedia.org/wiki/Quicksort * * @author UWe Pachler */ -public class QuickSort -{ - - private QuickSort() - { - } - - private static final Comparator objComp = new Comparator() - { - public int compare(Comparable object1, Comparable object2) - { - return object1.compareTo(object2); - } - }; +public final class QuickSort { + + private QuickSort() { + // utility + } + + /** + * Sorts the given list according to natural order. + */ + public static > void sort(List list) { + sort(list, QuickSort.naturalOrder()); // JAVA_8 replace with Comparator.naturalOrder() (and cleanup) + } + + /** + * Sorts the given list using the given comparator. + */ + public static void sort(List list, Comparator comparator) { + if (list instanceof RandomAccess) { + quicksort(list, comparator); + } else { + List copy = new ArrayList<>(list); + quicksort(copy, comparator); + list.clear(); + list.addAll(copy); + } + } - /** - * Sorts the given list using the given comparator. - */ - public static void sort(List list, Comparator cmp) - { - quicksort(list, cmp); - } + private static void quicksort(List list, Comparator cmp) { + Stack stack = new Stack<>(); + stack.push(0); + stack.push(list.size()); + while (!stack.isEmpty()) { + int right = stack.pop(); + int left = stack.pop(); + + if (right - left < 2) continue; + int p = left + ((right - left) / 2); + p = partition(list, cmp, p, left, right); - /** - * Sorts the given list using compareTo as comparator. - */ - public static void sort(List list) - { - sort(list, (Comparator) objComp); - } + stack.push(p + 1); + stack.push(right); - private static void quicksort(List list, Comparator cmp) - { - Stack stack = new Stack<>(); - stack.push(0); - stack.push(list.size()); - while (!stack.isEmpty()) { - int right = stack.pop(); - int left = stack.pop(); - if (right - left < 2) continue; - int p = left + ((right-left)/2); - p = partition(list, cmp, p, left, right); - - stack.push(p+1); - stack.push(right); + stack.push(left); + stack.push(p); + } + } - stack.push(left); - stack.push(p); + private static int partition(List list, Comparator cmp, int p, int start, int end) { + int l = start; + int h = end - 2; + T piv = list.get(p); + swap(list, p, end - 1); - } - } - - private static int partition(List list, Comparator cmp, int p, int start, int end) { - int l = start; - int h = end - 2; - T piv = list.get(p); - swap(list,p,end-1); + while (l < h) { + if (cmp.compare(list.get(l), piv) <= 0) l++; + else if (cmp.compare(piv, list.get(h)) <= 0) h--; + else swap(list, l, h); + } + int idx = h; + if (cmp.compare(list.get(h), piv) < 0) idx++; + swap(list, end - 1, idx); + return idx; + } - while (l < h) { - if (cmp.compare(list.get(l), piv) <= 0) { - l++; - } else if (cmp.compare(piv, list.get(h)) <= 0) { - h--; - } else { - swap(list,l,h); - } - } - int idx = h; - if (cmp.compare(list.get(h), piv) < 0) idx++; - swap(list,end-1,idx); - return idx; - } - + private static void swap(List list, int i, int j) { + T tmp = list.get(i); + list.set(i, list.get(j)); + list.set(j, tmp); + } - private static void swap(List list, int i, int j) - { - T tmp = list.get(i); - list.set(i, list.get(j)); - list.set(j, tmp); - } + @SuppressWarnings({ "rawtypes", "unchecked" }) + private static final Comparator NATURAL_ORDER = new Comparator() { + @Override public int compare(Object l, Object r) { return ((Comparable) l).compareTo(r); } + }; + + @SuppressWarnings("unchecked") + private static > Comparator naturalOrder() { + return NATURAL_ORDER; + } } diff --git a/src/main/java/technology/tabula/Table.java b/src/main/java/technology/tabula/Table.java index f0306f2c..e0f27d11 100644 --- a/src/main/java/technology/tabula/Table.java +++ b/src/main/java/technology/tabula/Table.java @@ -8,139 +8,95 @@ @SuppressWarnings("serial") public class Table extends Rectangle { - - static class CellPosition implements Comparable { - int row, col; - CellPosition(int row, int col) { - this.row = row; this.col = col; - } - - @Override - public boolean equals(Object other) { - if (this == other) - return true; - if (!(other instanceof CellPosition)) - return false; - return other != null && this.row == ((CellPosition) other).row && this.col == ((CellPosition) other).col; - } - - @Override - public int hashCode() { - return this.row * 100000 + this.col; - } - - @Override - public int compareTo(CellPosition other) { - int rv = 0; - if(this.row < other.row) { - rv = -1; - } - else if (this.row > other.row) { - rv = 1; - } - else if (this.col > other.col) { - rv = 1; - } - else if (this.col < other.col) { - rv = -1; - } - return rv; - } - } - - class CellContainer extends TreeMap { - - public int maxRow = 0, maxCol = 0; - - public RectangularTextContainer get(int row, int col) { - return this.get(new CellPosition(row, col)); - } - - public List getRow(int row) { - return new ArrayList<>(this.subMap(new CellPosition(row, 0), new CellPosition(row, maxRow+1)).values()); - } - - @Override - public RectangularTextContainer put(CellPosition cp, RectangularTextContainer value) { - this.maxRow = Math.max(maxRow, cp.row); - this.maxCol = Math.max(maxCol, cp.col); - if (this.containsKey(cp)) { // adding on an existing CellPosition, concatenate content and resize - value.merge(this.get(cp)); - } - super.put(cp, value); - return value; - } - - @Override - public RectangularTextContainer get(Object key) { - return this.containsKey(key) ? super.get(key) : TextChunk.EMPTY; - } - - public boolean containsKey(int row, int col) { - return this.containsKey(new CellPosition(row, col)); - } - - } - - public static final Table EMPTY = new Table(); - - CellContainer cellContainer = new CellContainer(); - Page page; - ExtractionAlgorithm extractionAlgorithm; - List> rows = null; - - public Table() { - super(); - } - - public Table(Page page, ExtractionAlgorithm extractionAlgorithm) { - this(); - this.page = page; - this.extractionAlgorithm = extractionAlgorithm; - } - - public void add(RectangularTextContainer tc, int i, int j) { - this.merge(tc); - this.cellContainer.put(new CellPosition(i, j), tc); - this.rows = null; // clear the memoized rows - } - - public List> getRows() { - if (this.rows != null) { - return this.rows; - } - - this.rows = new ArrayList<>(); - for (int i = 0; i <= this.cellContainer.maxRow; i++) { - List lastRow = new ArrayList<>(); - this.rows.add(lastRow); - for (int j = 0; j <= this.cellContainer.maxCol; j++) { - lastRow.add(this.cellContainer.containsKey(i, j) ? this.cellContainer.get(i, j) : TextChunk.EMPTY); - } - } - return this.rows; - } - - public RectangularTextContainer getCell(int i, int j) { - return this.cellContainer.get(i, j); - } - - public List> getCols() { - return Utils.transpose(this.getRows()); - } - - public void setExtractionAlgorithm(ExtractionAlgorithm extractionAlgorithm) { - this.extractionAlgorithm = extractionAlgorithm; - } - - public ExtractionAlgorithm getExtractionAlgorithm() { - return extractionAlgorithm; - } - - public List getCells() { - return new ArrayList<>(this.cellContainer.values()); - } - - + + public static final Table EMPTY = new Table(""); + + private Table(String extractionMethod) { + this.extractionMethod = extractionMethod; + } + + public Table(ExtractionAlgorithm extractionAlgorithm) { + this(extractionAlgorithm.toString()); + } + + private final String extractionMethod; + + private int rowCount = 0; + private int colCount = 0; + + /* visible for testing */ final TreeMap cells = new TreeMap<>(); + + public int getRowCount() { return rowCount; } + public int getColCount() { return colCount; } + + public String getExtractionMethod() { return extractionMethod; } + + public void add(RectangularTextContainer chunk, int row, int col) { + this.merge(chunk); + + rowCount = Math.max(rowCount, row); + colCount = Math.max(colCount, col); + + CellPosition cp = new CellPosition(row, col); + + RectangularTextContainer old = cells.get(cp); + if (old != null) chunk.merge(old); + cells.put(cp, chunk); + + this.memoizedRows = null; + } + + private List> memoizedRows = null; + + public List> getRows() { + if (this.memoizedRows == null) this.memoizedRows = computeRows(); + return this.memoizedRows; + } + + private List> computeRows() { + List> rows = new ArrayList<>(); + for (int i = 0; i <= rowCount; i++) { + List lastRow = new ArrayList<>(); + rows.add(lastRow); + for (int j = 0; j <= colCount; j++) { + RectangularTextContainer cell = cells.get(new CellPosition(i,j)); // JAVA_8 use getOrDefault() + lastRow.add(cell != null ? cell : TextChunk.EMPTY); + } + } + return rows; + } + + public RectangularTextContainer getCell(int i, int j) { + RectangularTextContainer cell = cells.get(new CellPosition(i,j)); // JAVA_8 use getOrDefault() + return cell != null ? cell : TextChunk.EMPTY; + } + +} + +class CellPosition implements Comparable { + + CellPosition(int row, int col) { + this.row = row; + this.col = col; + } + + final int row, col; + + @Override public int hashCode() { + return Integer.hashCode(row) + 101 * Integer.hashCode(col); + } + + @Override public boolean equals(Object obj) { + if (this == obj) return true; + if (obj == null) return false; + if (getClass() != obj.getClass()) return false; + CellPosition other = (CellPosition) obj; + return row == other.row && col == other.col; + } + + @Override public int compareTo(CellPosition other) { + int rowdiff = row - other.row; + return rowdiff != 0 ? rowdiff : col - other.col; + } } diff --git a/src/main/java/technology/tabula/TableWithRulingLines.java b/src/main/java/technology/tabula/TableWithRulingLines.java index 69d0d475..c119f191 100644 --- a/src/main/java/technology/tabula/TableWithRulingLines.java +++ b/src/main/java/technology/tabula/TableWithRulingLines.java @@ -6,22 +6,17 @@ import java.util.Iterator; import java.util.List; +import technology.tabula.extractors.ExtractionAlgorithm; + @SuppressWarnings("serial") public class TableWithRulingLines extends Table { List verticalRulings, horizontalRulings; RectangleSpatialIndex si = new RectangleSpatialIndex<>(); - public TableWithRulingLines() { - super(); - } - - public TableWithRulingLines(Rectangle area, Page page, List cells, - List horizontalRulings, - List verticalRulings) { - this(); + public TableWithRulingLines(Rectangle area, List cells, List horizontalRulings, List verticalRulings, ExtractionAlgorithm extractionAlgorithm) { + super(extractionAlgorithm); this.setRect(area); - this.page = page; this.verticalRulings = verticalRulings; this.horizontalRulings = horizontalRulings; this.addCells(cells); diff --git a/src/main/java/technology/tabula/Utils.java b/src/main/java/technology/tabula/Utils.java index d41bbe49..561cd00e 100644 --- a/src/main/java/technology/tabula/Utils.java +++ b/src/main/java/technology/tabula/Utils.java @@ -269,18 +269,13 @@ public int compare(Point2D arg0, Point2D arg1) { } } - public static BufferedImage pageConvertToImage(PDPage page, int dpi, ImageType imageType) throws IOException { - // Yeah, this sucks. But PDFBox 2 wants PDFRenderers to have - // a reference to a PDDocument (unnecessarily, IMHO) - - PDDocument document = new PDDocument(); - document.addPage(page); - - PDFRenderer renderer = new PDFRenderer(document); - - document.close(); - - return renderer.renderImageWithDPI(0, dpi, imageType); - } + public static BufferedImage pageConvertToImage(PDPage page, int dpi, ImageType imageType) throws IOException { + try (PDDocument document = new PDDocument()) { + document.addPage(page); + PDFRenderer renderer = new PDFRenderer(document); + document.close(); + return renderer.renderImageWithDPI(0, dpi, imageType); + } + } } diff --git a/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java b/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java index 40283f46..31a5ca89 100644 --- a/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java +++ b/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java @@ -63,7 +63,7 @@ public int compare(Ruling arg0, Ruling arg1) { columns = columnPositions(lines); } - Table table = new Table(page, this); + Table table = new Table(this); for (int i = 0; i < lines.size(); i++) { Line line = lines.get(i); diff --git a/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java b/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java index 2e663bd3..c00de95f 100644 --- a/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java +++ b/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java @@ -131,11 +131,7 @@ else if (r.vertical()) { } } - TableWithRulingLines t = new TableWithRulingLines(area, page, overlappingCells, - horizontalOverlappingRulings, verticalOverlappingRulings); - - t.setExtractionAlgorithm(this); - + TableWithRulingLines t = new TableWithRulingLines(area, overlappingCells, horizontalOverlappingRulings, verticalOverlappingRulings, this); spreadsheets.add(t); } Utils.sort(spreadsheets); @@ -159,16 +155,16 @@ public boolean isTabular(Page page) { return false; } Table table = tables.get(0); - int rowsDefinedByLines = table.getRows().size(); - int colsDefinedByLines = table.getCols().size(); + int rowsDefinedByLines = table.getRowCount(); + int colsDefinedByLines = table.getColCount(); tables = new BasicExtractionAlgorithm().extract(minimalRegion); if (tables.size() == 0) { // TODO WHAT DO WE DO HERE? } table = tables.get(0); - int rowsDefinedWithoutLines = table.getRows().size(); - int colsDefinedWithoutLines = table.getCols().size(); + int rowsDefinedWithoutLines = table.getRowCount(); + int colsDefinedWithoutLines = table.getColCount(); float ratio = (((float) colsDefinedByLines / colsDefinedWithoutLines) + ((float) rowsDefinedByLines / rowsDefinedWithoutLines)) / 2.0f; diff --git a/src/main/java/technology/tabula/json/TableSerializer.java b/src/main/java/technology/tabula/json/TableSerializer.java index e3f211c9..87d091d0 100644 --- a/src/main/java/technology/tabula/json/TableSerializer.java +++ b/src/main/java/technology/tabula/json/TableSerializer.java @@ -25,11 +25,7 @@ public JsonElement serialize(Table src, Type typeOfSrc, JsonSerializationContext JsonObject result = new JsonObject(); - if (src.getExtractionAlgorithm() == null) { - result.addProperty("extraction_method", ""); - } else { - result.addProperty("extraction_method", (src.getExtractionAlgorithm()).toString()); - } + result.addProperty("extraction_method", src.getExtractionMethod()); result.addProperty("top", src.getTop()); result.addProperty("left", src.getLeft()); result.addProperty("width", src.getWidth()); diff --git a/src/test/java/technology/tabula/TestBasicExtractor.java b/src/test/java/technology/tabula/TestBasicExtractor.java index fd43f288..a9e06575 100644 --- a/src/test/java/technology/tabula/TestBasicExtractor.java +++ b/src/test/java/technology/tabula/TestBasicExtractor.java @@ -13,12 +13,8 @@ import org.apache.commons.csv.CSVRecord; import org.junit.Test; -import technology.tabula.Page; -import technology.tabula.Ruling; -import technology.tabula.Table; import technology.tabula.extractors.BasicExtractionAlgorithm; import technology.tabula.writers.CSVWriter; -import technology.tabula.UtilsForTesting; public class TestBasicExtractor { @@ -254,7 +250,7 @@ public void testNaturalOrderOfRectangles() throws IOException { page.getVerticalRulings()); Table table = bea.extract(page).get(0); - List cells = table.getCells(); + List cells = new ArrayList<>(table.cells.values()); for (RectangularTextContainer rectangularTextContainer : cells) { System.out.println(rectangularTextContainer.getText()); } diff --git a/src/test/java/technology/tabula/TestTableDetection.java b/src/test/java/technology/tabula/TestTableDetection.java index 1bb86c3d..6e58f6a4 100644 --- a/src/test/java/technology/tabula/TestTableDetection.java +++ b/src/test/java/technology/tabula/TestTableDetection.java @@ -47,10 +47,6 @@ private static final class TestStatus { private transient boolean firstRun; private transient String pdfFilename; - public TestStatus() { - this(null); - } - public TestStatus(String pdfFilename) { this.numExpectedTables = 0; this.numCorrectlyDetectedTables = 0; @@ -74,8 +70,7 @@ public static TestStatus load(String pdfFilename) { } public void save() { - try { - FileWriter w = new FileWriter(jsonFilename(this.pdfFilename)); + try (FileWriter w = new FileWriter(jsonFilename(this.pdfFilename))) { Gson gson = new Gson(); w.write(gson.toJson(this)); w.close(); @@ -145,6 +140,7 @@ public TestTableDetection(File pdf) { try { this.builder = factory.newDocumentBuilder(); } catch (Exception e) { + // ignored } } diff --git a/src/test/java/technology/tabula/TestUtils.java b/src/test/java/technology/tabula/TestUtils.java index ffba82e6..db634d89 100644 --- a/src/test/java/technology/tabula/TestUtils.java +++ b/src/test/java/technology/tabula/TestUtils.java @@ -48,7 +48,7 @@ public void testBoundsOfOneEmptyRectangleAndAnotherNonEmpty() { @Test public void testBoundsOfOneRectangle() { - ArrayList shapes = new ArrayList(); + ArrayList shapes = new ArrayList<>(); shapes.add(new Rectangle(0, 0, 20, 40)); Rectangle r = Utils.bounds(shapes); assertEquals(r, shapes.get(0)); From af0ce5e5e5043176512cad7561c0eef970cfc595 Mon Sep 17 00:00:00 2001 From: Giorgio Gallo Date: Sun, 3 Sep 2017 08:58:10 +0200 Subject: [PATCH 018/200] Rectangle no longer Comparable --- src/main/java/technology/tabula/Cell.java | 112 +++--- src/main/java/technology/tabula/HasText.java | 4 +- .../technology/tabula/ObjectExtractor.java | 2 +- .../java/technology/tabula/QuickSort.java | 6 +- .../java/technology/tabula/Rectangle.java | 342 +++++++++--------- .../tabula/RectangleSpatialIndex.java | 4 +- src/main/java/technology/tabula/Utils.java | 24 +- .../SpreadsheetDetectionAlgorithm.java | 2 +- .../SpreadsheetExtractionAlgorithm.java | 4 +- .../technology/tabula/TestBasicExtractor.java | 4 +- .../java/technology/tabula/TestRectangle.java | 6 +- .../tabula/TestSpreadsheetExtractor.java | 8 +- 12 files changed, 257 insertions(+), 261 deletions(-) diff --git a/src/main/java/technology/tabula/Cell.java b/src/main/java/technology/tabula/Cell.java index b7e568db..79c64fbc 100644 --- a/src/main/java/technology/tabula/Cell.java +++ b/src/main/java/technology/tabula/Cell.java @@ -7,69 +7,69 @@ @SuppressWarnings("serial") public class Cell extends RectangularTextContainer { - private boolean spanning; - private boolean placeholder; - private List textElements; - - public Cell(float top, float left, float width, float height) { - super(top, left, width, height); - this.setPlaceholder(false); - this.setSpanning(false); - this.setTextElements(new ArrayList()); - } - - public Cell(Point2D topLeft, Point2D bottomRight) { - super((float) topLeft.getY(), (float) topLeft.getX(), (float) (bottomRight.getX() - topLeft.getX()), (float) (bottomRight.getY() - topLeft.getY())); - this.setPlaceholder(false); - this.setSpanning(false); - this.setTextElements(new ArrayList()); - } - - @Override - public String getText(boolean useLineReturns) { - if (this.textElements.size() == 0) { - return ""; - } - StringBuilder sb = new StringBuilder(); - Collections.sort(this.textElements); - double curTop = this.textElements.get(0).getTop(); - for (TextChunk tc: this.textElements) { - if (useLineReturns && tc.getTop() > curTop) { - sb.append('\r'); - } - sb.append(tc.getText()); - curTop = tc.getTop(); - } - return sb.toString().trim(); - } - public String getText() { - return getText(true); - } + public Cell(float top, float left, float width, float height) { + super(top, left, width, height); + this.setPlaceholder(false); + this.setSpanning(false); + this.setTextElements(new ArrayList()); + } - public boolean isSpanning() { - return spanning; - } + public Cell(Point2D topLeft, Point2D bottomRight) { + super((float) topLeft.getY(), (float) topLeft.getX(), (float) (bottomRight.getX() - topLeft.getX()), (float) (bottomRight.getY() - topLeft.getY())); + this.setPlaceholder(false); + this.setSpanning(false); + this.setTextElements(new ArrayList()); + } - public void setSpanning(boolean spanning) { - this.spanning = spanning; - } + private boolean spanning; + private boolean placeholder; + private List textElements; - public boolean isPlaceholder() { - return placeholder; - } + @Override + public String getText(boolean useLineReturns) { + if (this.textElements.size() == 0) { + return ""; + } + StringBuilder sb = new StringBuilder(); + Collections.sort(this.textElements, Rectangle.ILL_DEFINED_ORDER); + double curTop = this.textElements.get(0).getTop(); + for (TextChunk tc : this.textElements) { + if (useLineReturns && tc.getTop() > curTop) { + sb.append('\r'); + } + sb.append(tc.getText()); + curTop = tc.getTop(); + } + return sb.toString().trim(); + } - public void setPlaceholder(boolean placeholder) { - this.placeholder = placeholder; - } + public String getText() { + return getText(true); + } + public boolean isSpanning() { + return spanning; + } - public List getTextElements() { - return textElements; - } + public void setSpanning(boolean spanning) { + this.spanning = spanning; + } - public void setTextElements(List textElements) { - this.textElements = textElements; - } + public boolean isPlaceholder() { + return placeholder; + } + + public void setPlaceholder(boolean placeholder) { + this.placeholder = placeholder; + } + + public List getTextElements() { + return textElements; + } + + public void setTextElements(List textElements) { + this.textElements = textElements; + } } diff --git a/src/main/java/technology/tabula/HasText.java b/src/main/java/technology/tabula/HasText.java index 6f375dbc..99455afb 100644 --- a/src/main/java/technology/tabula/HasText.java +++ b/src/main/java/technology/tabula/HasText.java @@ -1,7 +1,7 @@ package technology.tabula; public interface HasText { - - String getText(); + + String getText(); } diff --git a/src/main/java/technology/tabula/ObjectExtractor.java b/src/main/java/technology/tabula/ObjectExtractor.java index 1b6e91a6..87c2a2f9 100644 --- a/src/main/java/technology/tabula/ObjectExtractor.java +++ b/src/main/java/technology/tabula/ObjectExtractor.java @@ -30,7 +30,7 @@ protected Page extractPage(Integer pageNumber) throws IOException { pdfTextStripper.process(); - Utils.sort(pdfTextStripper.textElements); + Utils.sort(pdfTextStripper.textElements, Rectangle.ILL_DEFINED_ORDER); float w, h; int pageRotation = p.getRotation(); diff --git a/src/main/java/technology/tabula/QuickSort.java b/src/main/java/technology/tabula/QuickSort.java index d6ada5b2..34757ca3 100644 --- a/src/main/java/technology/tabula/QuickSort.java +++ b/src/main/java/technology/tabula/QuickSort.java @@ -45,7 +45,7 @@ public static > void sort(List list) { /** * Sorts the given list using the given comparator. */ - public static void sort(List list, Comparator comparator) { + public static void sort(List list, Comparator comparator) { if (list instanceof RandomAccess) { quicksort(list, comparator); } else { @@ -56,7 +56,7 @@ public static void sort(List list, Comparator comparator) { } } - private static void quicksort(List list, Comparator cmp) { + private static void quicksort(List list, Comparator cmp) { Stack stack = new Stack<>(); stack.push(0); stack.push(list.size()); @@ -76,7 +76,7 @@ private static void quicksort(List list, Comparator cmp) { } } - private static int partition(List list, Comparator cmp, int p, int start, int end) { + private static int partition(List list, Comparator cmp, int p, int start, int end) { int l = start; int h = end - 2; T piv = list.get(p); diff --git a/src/main/java/technology/tabula/Rectangle.java b/src/main/java/technology/tabula/Rectangle.java index 61899a1f..e4522a0a 100644 --- a/src/main/java/technology/tabula/Rectangle.java +++ b/src/main/java/technology/tabula/Rectangle.java @@ -2,182 +2,176 @@ import java.awt.geom.Point2D; import java.awt.geom.Rectangle2D; +import java.util.Comparator; import java.util.List; @SuppressWarnings("serial") -public class Rectangle extends Rectangle2D.Float implements Comparable { - - protected static final float VERTICAL_COMPARISON_THRESHOLD = 0.4f; - - public Rectangle() { - super(); - } - - public Rectangle(float top, float left, float width, float height) { - super(); - this.setRect(left, top, width, height); - } - - @Override - /* - We're comparing based on ordering in the logical ordering of text here. - Assuming identical Y-axis positions, if TextChunk A has a lower X-axis - than TextChunk B, then A is "before" it -- iff this is LTR text. Otherwise, - it is A is after B. - */ - public int compareTo(Rectangle other) { - // FIXME this needs fixing - see https://github.com/tabulapdf/tabula-java/issues/116#issuecomment-325798979 - double thisBottom = this.getBottom(); - double otherBottom = other.getBottom(); - int rv; - - if (this.equals(other)) return 0; - - if (this.verticalOverlap(other) > VERTICAL_COMPARISON_THRESHOLD) { - rv = java.lang.Double.compare(this.getX(), other.getX()); - - // reverse the ordering if both TextChunks are RTL - if (this.isLtrDominant() == -1 && other.isLtrDominant() == -1) { - rv = -1 * rv; - } - } else { - rv = java.lang.Double.compare(thisBottom, otherBottom); - } - return rv; - } - - // I'm bad at Java and need this for fancy sorting in technology.tabula.TextChunk. - public int isLtrDominant(){ - return 0; - } - - - public float getArea() { - return this.width * this.height; - } - - public float verticalOverlap(Rectangle other) { - return Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop())); - } - - public boolean verticallyOverlaps(Rectangle other) { - return verticalOverlap(other) > 0; - } - - public float horizontalOverlap(Rectangle other) { - return Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft())); - } - - public boolean horizontallyOverlaps(Rectangle other) { - return horizontalOverlap(other) > 0; - } - - public float verticalOverlapRatio(Rectangle other) { - float rv = 0, - delta = Math.min(this.getBottom() - this.getTop(), other.getBottom() - other.getTop()); - - if (other.getTop() <= this.getTop() && this.getTop() <= other.getBottom() && other.getBottom() <= this.getBottom()) { - rv = (other.getBottom() - this.getTop()) / delta; - } - else if (this.getTop() <= other.getTop() && other.getTop() <= this.getBottom() && this.getBottom() <= other.getBottom()) { - rv = (this.getBottom() - other.getTop()) / delta; - } - else if (this.getTop() <= other.getTop() && other.getTop() <= other.getBottom() && other.getBottom() <= this.getBottom()) { - rv = (other.getBottom() - other.getTop()) / delta; - } - else if (other.getTop() <= this.getTop() && this.getTop() <= this.getBottom() && this.getBottom() <= other.getBottom()) { - rv = (this.getBottom() - this.getTop()) / delta; - } - - return rv; - - } - - public float overlapRatio(Rectangle other) { - double intersectionWidth = Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft())); - double intersectionHeight = Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop())); - double intersectionArea = Math.max(0, intersectionWidth * intersectionHeight); - double unionArea = this.getArea() + other.getArea() - intersectionArea; - - return (float) (intersectionArea / unionArea); - } - - public Rectangle merge(Rectangle other) { - this.setRect(this.createUnion(other)); - return this; - } - - public float getTop() { - return (float) this.getMinY(); - } - - public void setTop(float top) { - float deltaHeight = top - this.y; - this.setRect(this.x, top, this.width, this.height - deltaHeight); - } - - public float getRight() { - return (float) this.getMaxX(); - } - - public void setRight(float right) { - this.setRect(this.x, this.y, right - this.x, this.height); - } - - public float getLeft() { - return (float) this.getMinX(); - } - - public void setLeft(float left) { - float deltaWidth = left - this.x; - this.setRect(left, this.y, this.width - deltaWidth, this.height); - } - - public float getBottom() { - return (float) this.getMaxY(); - } - - public void setBottom(float bottom) { - this.setRect(this.x, this.y, this.width, bottom - this.y); - } - - public Point2D[] getPoints() { - return new Point2D[] { - new Point2D.Float(this.getLeft(), this.getTop()), - new Point2D.Float(this.getRight(), this.getTop()), - new Point2D.Float(this.getRight(), this.getBottom()), - new Point2D.Float(this.getLeft(), this.getBottom()) - }; - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - String s = super.toString(); - sb.append(s.substring(0, s.length() - 1)); - sb.append(String.format(",bottom=%f,right=%f]", this.getBottom(), this.getRight())); - return sb.toString(); - } - - - /** - * @param rectangles - * @return minimum bounding box that contains all the rectangles - */ - public static Rectangle boundingBoxOf(List rectangles) { - float minx = java.lang.Float.MAX_VALUE; - float miny = java.lang.Float.MAX_VALUE; - float maxx = java.lang.Float.MIN_VALUE; - float maxy = java.lang.Float.MIN_VALUE; - - for (Rectangle r: rectangles) { - minx = (float) Math.min(r.getMinX(), minx); - miny = (float) Math.min(r.getMinY(), miny); - maxx = (float) Math.max(r.getMaxX(), maxx); - maxy = (float) Math.max(r.getMaxY(), maxy); - } - return new Rectangle(miny, minx, maxx - minx, maxy - miny); - } - +public class Rectangle extends Rectangle2D.Float { + + /** + * Ill-defined comparator, from when Rectangle was Comparable. + * + * @see https://github.com/tabulapdf/tabula-java/issues/116 + * @deprecated with no replacement + */ + @Deprecated + public static final Comparator ILL_DEFINED_ORDER = new Comparator() { + @Override public int compare(Rectangle o1, Rectangle o2) { + if (o1.equals(o2)) return 0; + if (o1.verticalOverlap(o2) > VERTICAL_COMPARISON_THRESHOLD) { + return o1.isLtrDominant() == -1 && o2.isLtrDominant() == -1 + ? - java.lang.Double.compare(o1.getX(), o2.getX()) + : java.lang.Double.compare(o1.getX(), o2.getX()); + } else { + return java.lang.Float.compare(o1.getBottom(), o2.getBottom()); + } + } + }; + + protected static final float VERTICAL_COMPARISON_THRESHOLD = 0.4f; + + public Rectangle() { + super(); + } + + public Rectangle(float top, float left, float width, float height) { + super(); + this.setRect(left, top, width, height); + } + + public int compareTo(Rectangle other) { + return ILL_DEFINED_ORDER.compare(this, other); + } + + // I'm bad at Java and need this for fancy sorting in + // technology.tabula.TextChunk. + public int isLtrDominant() { + return 0; + } + + public float getArea() { + return this.width * this.height; + } + + public float verticalOverlap(Rectangle other) { + return Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop())); + } + + public boolean verticallyOverlaps(Rectangle other) { + return verticalOverlap(other) > 0; + } + + public float horizontalOverlap(Rectangle other) { + return Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft())); + } + + public boolean horizontallyOverlaps(Rectangle other) { + return horizontalOverlap(other) > 0; + } + + public float verticalOverlapRatio(Rectangle other) { + float rv = 0, delta = Math.min(this.getBottom() - this.getTop(), other.getBottom() - other.getTop()); + + if (other.getTop() <= this.getTop() && this.getTop() <= other.getBottom() + && other.getBottom() <= this.getBottom()) { + rv = (other.getBottom() - this.getTop()) / delta; + } else if (this.getTop() <= other.getTop() && other.getTop() <= this.getBottom() + && this.getBottom() <= other.getBottom()) { + rv = (this.getBottom() - other.getTop()) / delta; + } else if (this.getTop() <= other.getTop() && other.getTop() <= other.getBottom() + && other.getBottom() <= this.getBottom()) { + rv = (other.getBottom() - other.getTop()) / delta; + } else if (other.getTop() <= this.getTop() && this.getTop() <= this.getBottom() + && this.getBottom() <= other.getBottom()) { + rv = (this.getBottom() - this.getTop()) / delta; + } + + return rv; + + } + + public float overlapRatio(Rectangle other) { + double intersectionWidth = Math.max(0, + Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft())); + double intersectionHeight = Math.max(0, + Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop())); + double intersectionArea = Math.max(0, intersectionWidth * intersectionHeight); + double unionArea = this.getArea() + other.getArea() - intersectionArea; + + return (float) (intersectionArea / unionArea); + } + + public Rectangle merge(Rectangle other) { + this.setRect(this.createUnion(other)); + return this; + } + + public float getTop() { + return (float) this.getMinY(); + } + + public void setTop(float top) { + float deltaHeight = top - this.y; + this.setRect(this.x, top, this.width, this.height - deltaHeight); + } + + public float getRight() { + return (float) this.getMaxX(); + } + + public void setRight(float right) { + this.setRect(this.x, this.y, right - this.x, this.height); + } + + public float getLeft() { + return (float) this.getMinX(); + } + + public void setLeft(float left) { + float deltaWidth = left - this.x; + this.setRect(left, this.y, this.width - deltaWidth, this.height); + } + + public float getBottom() { + return (float) this.getMaxY(); + } + + public void setBottom(float bottom) { + this.setRect(this.x, this.y, this.width, bottom - this.y); + } + + public Point2D[] getPoints() { + return new Point2D[] { new Point2D.Float(this.getLeft(), this.getTop()), + new Point2D.Float(this.getRight(), this.getTop()), new Point2D.Float(this.getRight(), this.getBottom()), + new Point2D.Float(this.getLeft(), this.getBottom()) }; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + String s = super.toString(); + sb.append(s.substring(0, s.length() - 1)); + sb.append(String.format(",bottom=%f,right=%f]", this.getBottom(), this.getRight())); + return sb.toString(); + } + + /** + * @param rectangles + * @return minimum bounding box that contains all the rectangles + */ + public static Rectangle boundingBoxOf(List rectangles) { + float minx = java.lang.Float.MAX_VALUE; + float miny = java.lang.Float.MAX_VALUE; + float maxx = java.lang.Float.MIN_VALUE; + float maxy = java.lang.Float.MIN_VALUE; + + for (Rectangle r : rectangles) { + minx = (float) Math.min(r.getMinX(), minx); + miny = (float) Math.min(r.getMinY(), miny); + maxx = (float) Math.max(r.getMaxX(), maxx); + maxy = (float) Math.max(r.getMaxY(), maxy); + } + return new Rectangle(miny, minx, maxx - minx, maxy - miny); + } } diff --git a/src/main/java/technology/tabula/RectangleSpatialIndex.java b/src/main/java/technology/tabula/RectangleSpatialIndex.java index 46e5fd46..be252794 100644 --- a/src/main/java/technology/tabula/RectangleSpatialIndex.java +++ b/src/main/java/technology/tabula/RectangleSpatialIndex.java @@ -52,7 +52,7 @@ public List contains(Rectangle r) { for (int i : proc.getIds()) { rv.add(rectangles.get(i)); } - Utils.sort(rv); + Utils.sort(rv, Rectangle.ILL_DEFINED_ORDER); return rv; } @@ -63,7 +63,7 @@ public List intersects(Rectangle r) { for (int i : proc.getIds()) { rv.add(rectangles.get(i)); } - Utils.sort(rv); + Utils.sort(rv, Rectangle.ILL_DEFINED_ORDER); return rv; } diff --git a/src/main/java/technology/tabula/Utils.java b/src/main/java/technology/tabula/Utils.java index 561cd00e..35c6cc4d 100644 --- a/src/main/java/technology/tabula/Utils.java +++ b/src/main/java/technology/tabula/Utils.java @@ -117,17 +117,19 @@ public static List> transpose(List> table) { return ret; } - /** - * Wrap Collections.sort so we can fallback to a non-stable quicksort - * if we're running on JDK7+ - */ - public static > void sort(List list) { - if (useQuickSort) { - QuickSort.sort(list); - } else { - Collections.sort(list); - } - } + /** + * Wrap Collections.sort so we can fallback to a non-stable quicksort if we're + * running on JDK7+ + */ + public static > void sort(List list) { + if (useQuickSort) QuickSort.sort(list); + else Collections.sort(list); + } + + public static void sort(List list, Comparator comparator) { + if (useQuickSort) QuickSort.sort(list, comparator); + else Collections.sort(list, comparator); + } private static boolean useCustomQuickSort() { // taken from PDFBOX: diff --git a/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java index 06f966e1..243cc3bf 100644 --- a/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java +++ b/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java @@ -25,7 +25,7 @@ public List detect(Page page) { List tables = SpreadsheetExtractionAlgorithm.findSpreadsheetsFromCells(cells); // we want tables to be returned from top to bottom on the page - Collections.sort(tables); + Collections.sort(tables, Rectangle.ILL_DEFINED_ORDER); return tables; } diff --git a/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java b/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java index c00de95f..c377507c 100644 --- a/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java +++ b/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java @@ -134,7 +134,7 @@ else if (r.vertical()) { TableWithRulingLines t = new TableWithRulingLines(area, overlappingCells, horizontalOverlappingRulings, verticalOverlappingRulings, this); spreadsheets.add(t); } - Utils.sort(spreadsheets); + Utils.sort(spreadsheets, Rectangle.ILL_DEFINED_ORDER); return spreadsheets; } @@ -238,7 +238,7 @@ public static List findSpreadsheetsFromCells(List(new HashSet<>(cells)); - Utils.sort(cells); + Utils.sort(cells, Rectangle.ILL_DEFINED_ORDER); for (Rectangle cell: cells) { for(Point2D pt: cell.getPoints()) { diff --git a/src/test/java/technology/tabula/TestBasicExtractor.java b/src/test/java/technology/tabula/TestBasicExtractor.java index a9e06575..e3fa6470 100644 --- a/src/test/java/technology/tabula/TestBasicExtractor.java +++ b/src/test/java/technology/tabula/TestBasicExtractor.java @@ -333,12 +333,12 @@ public void testNaturalOrderOfRectanglesOneMoreTime() throws IOException { //List rectangles = Arrays.asList(RECTANGLES_TEST_NATURAL_ORDER); - Utils.sort(rectangles); + Utils.sort(rectangles, Rectangle.ILL_DEFINED_ORDER); for (int i = 0; i < (rectangles.size() - 1); i++) { Rectangle rectangle = rectangles.get(i); Rectangle nextRectangle = rectangles.get(i + 1); - + assertTrue(rectangle.compareTo(nextRectangle) < 0); } } diff --git a/src/test/java/technology/tabula/TestRectangle.java b/src/test/java/technology/tabula/TestRectangle.java index 72a9c0b9..7fa66f7a 100644 --- a/src/test/java/technology/tabula/TestRectangle.java +++ b/src/test/java/technology/tabula/TestRectangle.java @@ -97,7 +97,7 @@ public void testQuickSortRectangleList() { toSortList.add(first); toSortList.add(fourth); - Collections.sort(toSortList); + Collections.sort(toSortList, Rectangle.ILL_DEFINED_ORDER); assertEquals(expectedList, toSortList); } @@ -281,8 +281,8 @@ public void testWellDefinedComparison1() { Rectangle c = new Rectangle(0,2,2,2); List l1 = new ArrayList<>(Arrays.asList(b, a, c)); List l2 = new ArrayList<>(Arrays.asList(c, b, a)); - QuickSort.sort(l1); - QuickSort.sort(l2); + QuickSort.sort(l1, Rectangle.ILL_DEFINED_ORDER); + QuickSort.sort(l2, Rectangle.ILL_DEFINED_ORDER); assertEquals(l1.get(0), l2.get(0)); assertEquals(l1.get(1), l2.get(1)); assertEquals(l1.get(2), l2.get(2)); diff --git a/src/test/java/technology/tabula/TestSpreadsheetExtractor.java b/src/test/java/technology/tabula/TestSpreadsheetExtractor.java index 51375cb9..0abd578b 100644 --- a/src/test/java/technology/tabula/TestSpreadsheetExtractor.java +++ b/src/test/java/technology/tabula/TestSpreadsheetExtractor.java @@ -129,9 +129,9 @@ public class TestSpreadsheetExtractor { @Test public void testLinesToCells() { List cells = SpreadsheetExtractionAlgorithm.findCells(Arrays.asList(HORIZONTAL_RULING_LINES), Arrays.asList(VERTICAL_RULING_LINES)); - Collections.sort(cells); + Collections.sort(cells, Rectangle.ILL_DEFINED_ORDER); List expected = Arrays.asList(EXPECTED_CELLS); - Collections.sort(expected); + Collections.sort(expected, Rectangle.ILL_DEFINED_ORDER); assertTrue(cells.equals(expected)); } @@ -174,9 +174,9 @@ public void testFindSpreadsheetsFromCells() throws IOException { List expected = Arrays.asList(EXPECTED_RECTANGLES); - Collections.sort(expected); + Collections.sort(expected, Rectangle.ILL_DEFINED_ORDER); List foundRectangles = SpreadsheetExtractionAlgorithm.findSpreadsheetsFromCells(cells); - Collections.sort(foundRectangles); + Collections.sort(foundRectangles, Rectangle.ILL_DEFINED_ORDER); assertTrue(foundRectangles.equals(expected)); } From dd25f7b11d38ee0680db3b114fdaae73b6b3da2a Mon Sep 17 00:00:00 2001 From: Giorgio Gallo Date: Sun, 3 Sep 2017 23:42:02 +0200 Subject: [PATCH 019/200] Refactoring --- src/main/java/technology/tabula/Table.java | 10 +- .../extractors/BasicExtractionAlgorithm.java | 2 +- .../java/technology/tabula/TableTest.java | 45 +++ .../technology/tabula/TestBasicExtractor.java | 276 ++++++++---------- .../technology/tabula/UtilsForTesting.java | 22 +- 5 files changed, 181 insertions(+), 174 deletions(-) create mode 100644 src/test/java/technology/tabula/TableTest.java diff --git a/src/main/java/technology/tabula/Table.java b/src/main/java/technology/tabula/Table.java index e0f27d11..8ffca145 100644 --- a/src/main/java/technology/tabula/Table.java +++ b/src/main/java/technology/tabula/Table.java @@ -9,7 +9,7 @@ @SuppressWarnings("serial") public class Table extends Rectangle { - public static final Table EMPTY = new Table(""); + public static final Table empty() { return new Table(""); } private Table(String extractionMethod) { this.extractionMethod = extractionMethod; @@ -34,8 +34,8 @@ public Table(ExtractionAlgorithm extractionAlgorithm) { public void add(RectangularTextContainer chunk, int row, int col) { this.merge(chunk); - rowCount = Math.max(rowCount, row); - colCount = Math.max(colCount, col); + rowCount = Math.max(rowCount, row + 1); + colCount = Math.max(colCount, col + 1); CellPosition cp = new CellPosition(row, col); @@ -55,10 +55,10 @@ public List> getRows() { private List> computeRows() { List> rows = new ArrayList<>(); - for (int i = 0; i <= rowCount; i++) { + for (int i = 0; i < rowCount; i++) { List lastRow = new ArrayList<>(); rows.add(lastRow); - for (int j = 0; j <= colCount; j++) { + for (int j = 0; j < colCount; j++) { RectangularTextContainer cell = cells.get(new CellPosition(i,j)); // JAVA_8 use getOrDefault() lastRow.add(cell != null ? cell : TextChunk.EMPTY); } diff --git a/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java b/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java index 31a5ca89..afaeb5c7 100644 --- a/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java +++ b/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java @@ -40,7 +40,7 @@ public List
extract(Page page) { List textElements = page.getText(); if (textElements.size() == 0) { - return Arrays.asList(new Table[] { Table.EMPTY }); + return Arrays.asList(new Table[] { Table.empty() }); } List textChunks = this.verticalRulings == null ? TextElement.mergeWords(page.getText()) : TextElement.mergeWords(page.getText(), this.verticalRulings); diff --git a/src/test/java/technology/tabula/TableTest.java b/src/test/java/technology/tabula/TableTest.java new file mode 100644 index 00000000..c574a553 --- /dev/null +++ b/src/test/java/technology/tabula/TableTest.java @@ -0,0 +1,45 @@ +package technology.tabula; + +import static org.junit.Assert.*; + +import org.junit.Test; + +public class TableTest { + + @Test public void testEmpty() { + Table empty = Table.empty(); + + assertEquals(TextChunk.EMPTY, empty.getCell(0, 0)); + assertEquals(TextChunk.EMPTY, empty.getCell(1, 1)); + + assertEquals(0, empty.getRowCount()); + assertEquals(0, empty.getColCount()); + + assertEquals("", empty.getExtractionMethod()); + + assertEquals(0, empty.getTop(), 0); + assertEquals(0, empty.getRight(), 0); + assertEquals(0, empty.getBottom(), 0); + assertEquals(0, empty.getLeft(), 0); + + assertEquals(0, empty.getArea(), 0); + } + + @Test public void testRowColCounts() { + Table table = Table.empty(); + + assertEquals(0, table.getRowCount()); + assertEquals(0, table.getColCount()); + + table.add(TextChunk.EMPTY, 0, 0); + + assertEquals(1, table.getRowCount()); + assertEquals(1, table.getColCount()); + + table.add(TextChunk.EMPTY, 9, 9); + + assertEquals(10, table.getRowCount()); + assertEquals(10, table.getColCount()); + } + +} diff --git a/src/test/java/technology/tabula/TestBasicExtractor.java b/src/test/java/technology/tabula/TestBasicExtractor.java index e3fa6470..a6803cdf 100644 --- a/src/test/java/technology/tabula/TestBasicExtractor.java +++ b/src/test/java/technology/tabula/TestBasicExtractor.java @@ -18,150 +18,117 @@ public class TestBasicExtractor { - private static final String[][] EXPECTED_CORRECT_COLUMNS = { - {"", "", "Involvement of pupils in", ""}, - {"", "Preperation and", "Production of", "Presentation an"}, - {"", "planing", "materials", "evaluation"}, - {"Knowledge and awareness of different cultures", "0,2885", - "0,3974", "0,3904"}, - {"Foreign language competence", "0,3057", "0,4184", "0,3899"}, - {"Social skills and abilities", "0,3416", "0,3369", "0,4303"}, - {"Acquaintance of special knowledge", "0,2569", "0,2909", - "0,3557"}, - {"Self competence", "0,3791", "0,3320", "0,4617"}}; - - private static final String[][] EXPECTED_COLUMN_RECOGNITION = { - {"ABDALA de MATARAZZO, Norma Amanda", - "Frente Cívico por Santiago", "Santiago del Estero", - "AFIRMATIVO"}, - {"ALBRIEU, Oscar Edmundo Nicolas", - "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}, - {"ALONSO, María Luz", "Frente para la Victoria - PJ", - "La Pampa", "AFIRMATIVO"}, - {"ARENA, Celia Isabel", "Frente para la Victoria - PJ", - "Santa Fe", "AFIRMATIVO"}, - {"ARREGUI, Andrés Roberto", "Frente para la Victoria - PJ", - "Buenos Aires", "AFIRMATIVO"}, - {"AVOSCAN, Herman Horacio", "Frente para la Victoria - PJ", - "Rio Negro", "AFIRMATIVO"}, - {"BALCEDO, María Ester", "Frente para la Victoria - PJ", - "Buenos Aires", "AFIRMATIVO"}, - {"BARRANDEGUY, Raúl Enrique", "Frente para la Victoria - PJ", - "Entre Ríos", "AFIRMATIVO"}, - {"BASTERRA, Luis Eugenio", "Frente para la Victoria - PJ", - "Formosa", "AFIRMATIVO"}, - {"BEDANO, Nora Esther", "Frente para la Victoria - PJ", - "Córdoba", "AFIRMATIVO"}, - {"BERNAL, María Eugenia", "Frente para la Victoria - PJ", - "Jujuy", "AFIRMATIVO"}, - {"BERTONE, Rosana Andrea", "Frente para la Victoria - PJ", - "Tierra del Fuego", "AFIRMATIVO"}, - {"BIANCHI, María del Carmen", "Frente para la Victoria - PJ", - "Cdad. Aut. Bs. As.", "AFIRMATIVO"}, - {"BIDEGAIN, Gloria Mercedes", "Frente para la Victoria - PJ", - "Buenos Aires", "AFIRMATIVO"}, - {"BRAWER, Mara", "Frente para la Victoria - PJ", - "Cdad. Aut. Bs. As.", "AFIRMATIVO"}, - {"BRILLO, José Ricardo", "Movimiento Popular Neuquino", - "Neuquén", "AFIRMATIVO"}, - {"BROMBERG, Isaac Benjamín", "Frente para la Victoria - PJ", - "Tucumán", "AFIRMATIVO"}, - {"BRUE, Daniel Agustín", "Frente Cívico por Santiago", - "Santiago del Estero", "AFIRMATIVO"}, - {"CALCAGNO, Eric", "Frente para la Victoria - PJ", - "Buenos Aires", "AFIRMATIVO"}, - {"CARLOTTO, Remo Gerardo", "Frente para la Victoria - PJ", - "Buenos Aires", "AFIRMATIVO"}, - {"CARMONA, Guillermo Ramón", "Frente para la Victoria - PJ", - "Mendoza", "AFIRMATIVO"}, - {"CATALAN MAGNI, Julio César", "Frente para la Victoria - PJ", - "Tierra del Fuego", "AFIRMATIVO"}, - {"CEJAS, Jorge Alberto", "Frente para la Victoria - PJ", - "Rio Negro", "AFIRMATIVO"}, - {"CHIENO, María Elena", "Frente para la Victoria - PJ", - "Corrientes", "AFIRMATIVO"}, - {"CIAMPINI, José Alberto", "Frente para la Victoria - PJ", - "Neuquén", "AFIRMATIVO"}, - {"CIGOGNA, Luis Francisco Jorge", - "Frente para la Victoria - PJ", "Buenos Aires", - "AFIRMATIVO"}, - {"CLERI, Marcos", "Frente para la Victoria - PJ", "Santa Fe", - "AFIRMATIVO"}, - {"COMELLI, Alicia Marcela", "Movimiento Popular Neuquino", - "Neuquén", "AFIRMATIVO"}, - {"CONTI, Diana Beatriz", "Frente para la Victoria - PJ", - "Buenos Aires", "AFIRMATIVO"}, - {"CORDOBA, Stella Maris", "Frente para la Victoria - PJ", - "Tucumán", "AFIRMATIVO"}, - {"CURRILEN, Oscar Rubén", "Frente para la Victoria - PJ", - "Chubut", "AFIRMATIVO"}}; - - private static final String[][] EXPECTED_COLUMN_EXTRACTION2 = { - {"", "Austria", "77", "1", "78"}, - {"", "Belgium", "159", "2", "161"}, - {"", "Bulgaria", "52", "0", "52"}, - {"", "Croatia", "144", "0", "144"}, - {"", "Cyprus", "43", "2", "45"}, - {"", "Czech Republic", "78", "0", "78"}, - {"", "Denmark", "151", "2", "153"}, - {"", "Estonia", "46", "0", "46"}, - {"", "Finland", "201", "1", "202"}, - {"", "France", "428", "7", "435"}, - {"", "Germany", "646", "21", "667"}, - {"", "Greece", "113", "2", "115"}, - {"", "Hungary", "187", "0", "187"}, - {"", "Iceland", "18", "0", "18"}, - {"", "Ireland", "213", "4", "217"}, - {"", "Israel", "25", "0", "25"}, - {"", "Italy", "627", "12", "639"}, - {"", "Latvia", "7", "0", "7"}, - {"", "Lithuania", "94", "1", "95"}, - {"", "Luxembourg", "22", "0", "22"}, - {"", "Malta", "18", "0", "18"}, - {"", "Netherlands", "104", "1", "105"}, - {"", "Norway", "195", "0", "195"}, - {"", "Poland", "120", "1", "121"}, - {"", "Portugal", "532", "3", "535"}, - {"", "Romania", "110", "0", "110"}, - {"", "Slovakia", "176", "0", "176"}, - {"", "Slovenia", "56", "0", "56"}, - {"", "Spain", "614", "3", "617"}, - {"", "Sweden", "122", "3", "125"}, - {"", "Switzerland", "64", "0", "64"}, - {"", "Turkey", "96", "0", "96"}, - {"", "United Kingdom", "572", "14", "586"} - }; - - private static final String[][] EXPECTED_TABLE_EXTRACTION = { - {"AANONSEN, DEBORAH, A", "", "STATEN ISLAND, NY", "MEALS", "$85.00"}, - {"TOTAL", "", "", "", "$85.00"}, - {"AARON, CAREN, T", "", "RICHMOND, VA", "EDUCATIONAL ITEMS", "$78.80"}, - {"AARON, CAREN, T", "", "RICHMOND, VA", "MEALS", "$392.45"}, - {"TOTAL", "", "", "", "$471.25"}, - {"AARON, JOHN", "", "CLARKSVILLE, TN", "MEALS", "$20.39"}, - {"TOTAL", "", "", "", "$20.39"}, - {"AARON, JOSHUA, N", "", "WEST GROVE, PA", "MEALS", "$310.33"}, - {"", "REGIONAL PULMONARY & SLEEP", "", "", ""}, - {"AARON, JOSHUA, N", "", "WEST GROVE, PA", "SPEAKING FEES", "$4,700.00"}, - {"", "MEDICINE", "", "", ""}, - {"TOTAL", "", "", "", "$5,010.33"}, - {"AARON, MAUREEN, M", "", "MARTINSVILLE, VA", "MEALS", "$193.67"}, - {"TOTAL", "", "", "", "$193.67"}, - {"AARON, MICHAEL, L", "", "WEST ISLIP, NY", "MEALS", "$19.50"}, - {"TOTAL", "", "", "", "$19.50"}, - {"AARON, MICHAEL, R", "", "BROOKLYN, NY", "MEALS", "$65.92"} - }; - - private static final String[][] EXPECTED_EMPTY_TABLE = { - {""} - }; + private static final String EU_002_PDF = "src/test/resources/technology/tabula/eu-002.pdf"; + private static final String[][] EU_002_EXPECTED = { + {"", "", "Involvement of pupils in", ""}, + {"", "Preperation and", "Production of", "Presentation an"}, + {"", "planing", "materials", "evaluation"}, + {"Knowledge and awareness of different cultures", "0,2885", "0,3974", "0,3904"}, + {"Foreign language competence", "0,3057", "0,4184", "0,3899"}, + {"Social skills and abilities", "0,3416", "0,3369", "0,4303"}, + {"Acquaintance of special knowledge", "0,2569", "0,2909", "0,3557"}, + {"Self competence", "0,3791", "0,3320", "0,4617"} + }; + + private static final String ARGENTINA_DIPUTADOS_VOTING_RECORD_PDF = "src/test/resources/technology/tabula/argentina_diputados_voting_record.pdf"; + private static final String[][] ARGENTINA_DIPUTADOS_VOTING_RECORD_EXPECTED = { + {"ABDALA de MATARAZZO, Norma Amanda", "Frente Cívico por Santiago", "Santiago del Estero", "AFIRMATIVO"}, + {"ALBRIEU, Oscar Edmundo Nicolas", "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}, + {"ALONSO, María Luz", "Frente para la Victoria - PJ", "La Pampa", "AFIRMATIVO"}, + {"ARENA, Celia Isabel", "Frente para la Victoria - PJ", "Santa Fe", "AFIRMATIVO"}, + {"ARREGUI, Andrés Roberto", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, + {"AVOSCAN, Herman Horacio", "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}, + {"BALCEDO, María Ester", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, + {"BARRANDEGUY, Raúl Enrique", "Frente para la Victoria - PJ", "Entre Ríos", "AFIRMATIVO"}, + {"BASTERRA, Luis Eugenio", "Frente para la Victoria - PJ", "Formosa", "AFIRMATIVO"}, + {"BEDANO, Nora Esther", "Frente para la Victoria - PJ", "Córdoba", "AFIRMATIVO"}, + {"BERNAL, María Eugenia", "Frente para la Victoria - PJ", "Jujuy", "AFIRMATIVO"}, + {"BERTONE, Rosana Andrea", "Frente para la Victoria - PJ", "Tierra del Fuego", "AFIRMATIVO"}, + {"BIANCHI, María del Carmen", "Frente para la Victoria - PJ", "Cdad. Aut. Bs. As.", "AFIRMATIVO"}, + {"BIDEGAIN, Gloria Mercedes", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, + {"BRAWER, Mara", "Frente para la Victoria - PJ", "Cdad. Aut. Bs. As.", "AFIRMATIVO"}, + {"BRILLO, José Ricardo", "Movimiento Popular Neuquino", "Neuquén", "AFIRMATIVO"}, + {"BROMBERG, Isaac Benjamín", "Frente para la Victoria - PJ", "Tucumán", "AFIRMATIVO"}, + {"BRUE, Daniel Agustín", "Frente Cívico por Santiago", "Santiago del Estero", "AFIRMATIVO"}, + {"CALCAGNO, Eric", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, + {"CARLOTTO, Remo Gerardo", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, + {"CARMONA, Guillermo Ramón", "Frente para la Victoria - PJ", "Mendoza", "AFIRMATIVO"}, + {"CATALAN MAGNI, Julio César", "Frente para la Victoria - PJ", "Tierra del Fuego", "AFIRMATIVO"}, + {"CEJAS, Jorge Alberto", "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}, + {"CHIENO, María Elena", "Frente para la Victoria - PJ", "Corrientes", "AFIRMATIVO"}, + {"CIAMPINI, José Alberto", "Frente para la Victoria - PJ", "Neuquén", "AFIRMATIVO"}, + {"CIGOGNA, Luis Francisco Jorge", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, + {"CLERI, Marcos", "Frente para la Victoria - PJ", "Santa Fe", "AFIRMATIVO"}, + {"COMELLI, Alicia Marcela", "Movimiento Popular Neuquino", "Neuquén", "AFIRMATIVO"}, + {"CONTI, Diana Beatriz", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, + {"CORDOBA, Stella Maris", "Frente para la Victoria - PJ", "Tucumán", "AFIRMATIVO"}, + {"CURRILEN, Oscar Rubén", "Frente para la Victoria - PJ", "Chubut", "AFIRMATIVO"} + }; + + private static final String EU_017_PDF = "src/test/resources/technology/tabula/eu-017.pdf"; + private static final String[][] EU_017_EXPECTED = { + {"", "Austria", "77", "1", "78"}, + {"", "Belgium", "159", "2", "161"}, + {"", "Bulgaria", "52", "0", "52"}, + {"", "Croatia", "144", "0", "144"}, + {"", "Cyprus", "43", "2", "45"}, + {"", "Czech Republic", "78", "0", "78"}, + {"", "Denmark", "151", "2", "153"}, + {"", "Estonia", "46", "0", "46"}, + {"", "Finland", "201", "1", "202"}, + {"", "France", "428", "7", "435"}, + {"", "Germany", "646", "21", "667"}, + {"", "Greece", "113", "2", "115"}, + {"", "Hungary", "187", "0", "187"}, + {"", "Iceland", "18", "0", "18"}, + {"", "Ireland", "213", "4", "217"}, + {"", "Israel", "25", "0", "25"}, + {"", "Italy", "627", "12", "639"}, + {"", "Latvia", "7", "0", "7"}, + {"", "Lithuania", "94", "1", "95"}, + {"", "Luxembourg", "22", "0", "22"}, + {"", "Malta", "18", "0", "18"}, + {"", "Netherlands", "104", "1", "105"}, + {"", "Norway", "195", "0", "195"}, + {"", "Poland", "120", "1", "121"}, + {"", "Portugal", "532", "3", "535"}, + {"", "Romania", "110", "0", "110"}, + {"", "Slovakia", "176", "0", "176"}, + {"", "Slovenia", "56", "0", "56"}, + {"", "Spain", "614", "3", "617"}, + {"", "Sweden", "122", "3", "125"}, + {"", "Switzerland", "64", "0", "64"}, + {"", "Turkey", "96", "0", "96"}, + {"", "United Kingdom", "572", "14", "586"} + }; + + private static final String FRX_2012_DISCLOSURE_PDF = "src/test/resources/technology/tabula/frx_2012_disclosure.pdf"; + private static final String[][] FRX_2012_DISCLOSURE_EXPECTED = { + {"AANONSEN, DEBORAH, A", "", "STATEN ISLAND, NY", "MEALS", "$85.00"}, + {"TOTAL", "", "", "", "$85.00"}, + {"AARON, CAREN, T", "", "RICHMOND, VA", "EDUCATIONAL ITEMS", "$78.80"}, + {"AARON, CAREN, T", "", "RICHMOND, VA", "MEALS", "$392.45"}, + {"TOTAL", "", "", "", "$471.25"}, + {"AARON, JOHN", "", "CLARKSVILLE, TN", "MEALS", "$20.39"}, + {"TOTAL", "", "", "", "$20.39"}, + {"AARON, JOSHUA, N", "", "WEST GROVE, PA", "MEALS", "$310.33"}, + {"", "REGIONAL PULMONARY & SLEEP", "", "", ""}, + {"AARON, JOSHUA, N", "", "WEST GROVE, PA", "SPEAKING FEES", "$4,700.00"}, + {"", "MEDICINE", "", "", ""}, + {"TOTAL", "", "", "", "$5,010.33"}, + {"AARON, MAUREEN, M", "", "MARTINSVILLE, VA", "MEALS", "$193.67"}, + {"TOTAL", "", "", "", "$193.67"}, + {"AARON, MICHAEL, L", "", "WEST ISLIP, NY", "MEALS", "$19.50"}, + {"TOTAL", "", "", "", "$19.50"}, + {"AARON, MICHAEL, R", "", "BROOKLYN, NY", "MEALS", "$65.92"} + }; + + private static final String[][] EXPECTED_EMPTY_TABLE = { /* actually empty! */ }; @Test public void testRemoveSequentialSpaces() throws IOException { - Page page = UtilsForTesting.getAreaFromFirstPage( - "src/test/resources/technology/tabula/m27.pdf", 79.2f, - 28.28f, 103.04f, 732.6f); + Page page = UtilsForTesting.getAreaFromFirstPage("src/test/resources/technology/tabula/m27.pdf", 79.2f, 28.28f, 103.04f, 732.6f); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.extract(page).get(0); List firstRow = table.getRows().get(0); @@ -172,13 +139,10 @@ public void testRemoveSequentialSpaces() throws IOException { @Test public void testColumnRecognition() throws IOException { - Page page = UtilsForTesting - .getAreaFromFirstPage( - "src/test/resources/technology/tabula/argentina_diputados_voting_record.pdf", - 269.875f, 12.75f, 790.5f, 561f); + Page page = UtilsForTesting.getAreaFromFirstPage(ARGENTINA_DIPUTADOS_VOTING_RECORD_PDF, 269.875f, 12.75f, 790.5f, 561f); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.extract(page).get(0); - assertArrayEquals(EXPECTED_COLUMN_RECOGNITION, UtilsForTesting.tableToArrayOfRows(table)); + assertArrayEquals(ARGENTINA_DIPUTADOS_VOTING_RECORD_EXPECTED, UtilsForTesting.tableToArrayOfRows(table)); } @Test @@ -202,30 +166,26 @@ public void testVerticalRulingsPreventMergingOfColumns() throws IOException { @Test public void testExtractColumnsCorrectly() throws IOException { - Page page = UtilsForTesting.getAreaFromPage( - "src/test/resources/technology/tabula/eu-002.pdf", 1, - 115.0f, 70.0f, 233.0f, 510.0f); + Page page = UtilsForTesting.getAreaFromPage(EU_002_PDF, 1, 115.0f, 70.0f, 233.0f, 510.0f); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.extract(page).get(0); - assertArrayEquals(EXPECTED_CORRECT_COLUMNS, UtilsForTesting.tableToArrayOfRows(table)); + assertArrayEquals(EU_002_EXPECTED, UtilsForTesting.tableToArrayOfRows(table)); } @Test public void testExtractColumnsCorrectly2() throws IOException { - Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/eu-017.pdf", 3); + Page page = UtilsForTesting.getPage(EU_017_PDF, 3); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(page.getVerticalRulings()); Table table = bea.extract(page.getArea(299.625f, 148.44f, 711.875f, 452.32f)).get(0); - assertArrayEquals(EXPECTED_COLUMN_EXTRACTION2, UtilsForTesting.tableToArrayOfRows(table)); + assertArrayEquals(EU_017_EXPECTED, UtilsForTesting.tableToArrayOfRows(table)); } @Test public void testExtractColumnsCorrectly3() throws IOException { - Page page = UtilsForTesting.getAreaFromFirstPage("src/test/resources/technology/tabula/frx_2012_disclosure.pdf", - 106.01f, 48.09f, 227.31f, 551.89f); + Page page = UtilsForTesting.getAreaFromFirstPage(FRX_2012_DISCLOSURE_PDF, 106.01f, 48.09f, 227.31f, 551.89f); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.extract(page).get(0); - - assertArrayEquals(EXPECTED_TABLE_EXTRACTION, UtilsForTesting.tableToArrayOfRows(table)); + assertArrayEquals(FRX_2012_DISCLOSURE_EXPECTED, UtilsForTesting.tableToArrayOfRows(table)); } @Test @@ -359,8 +319,7 @@ public void testRealLifeRTL2() throws IOException { @Test public void testEmptyRegion() throws IOException { - Page page = UtilsForTesting.getAreaFromPage("src/test/resources/technology/tabula/indictb1h_14.pdf", 1, - 0.0f, 0.0f, 80.82f, 100.9f); // an empty area + Page page = UtilsForTesting.getAreaFromPage("src/test/resources/technology/tabula/indictb1h_14.pdf", 1, 0, 0, 80.82f, 100.9f); // an empty area BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.extract(page).get(0); assertArrayEquals(EXPECTED_EMPTY_TABLE, UtilsForTesting.tableToArrayOfRows(table)); @@ -370,8 +329,7 @@ public void testEmptyRegion() throws IOException { @Test public void testTableWithMultilineHeader() throws IOException { String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/us-020.csv"); - Page page = UtilsForTesting.getAreaFromPage("src/test/resources/technology/tabula/us-020.pdf", 2, - 103.0f, 35.0f, 641.0f, 560.0f); + Page page = UtilsForTesting.getAreaFromPage("src/test/resources/technology/tabula/us-020.pdf", 2, 103.0f, 35.0f, 641.0f, 560.0f); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.extract(page).get(0); diff --git a/src/test/java/technology/tabula/UtilsForTesting.java b/src/test/java/technology/tabula/UtilsForTesting.java index 524b497e..3ee8efde 100644 --- a/src/test/java/technology/tabula/UtilsForTesting.java +++ b/src/test/java/technology/tabula/UtilsForTesting.java @@ -8,6 +8,7 @@ import org.apache.commons.csv.CSVParser; import org.apache.commons.csv.CSVPrinter; import org.apache.pdfbox.pdmodel.PDDocument; +import org.junit.Assert; public class UtilsForTesting { @@ -36,7 +37,7 @@ public static Page getPage(String path, int pageNumber) throws IOException { public static String[][] tableToArrayOfRows(Table table) { List> tableRows = table.getRows(); - int maxColCount = -Integer.MAX_VALUE; + int maxColCount = 0; for (int i = 0; i < tableRows.size(); i++) { List row = tableRows.get(i); @@ -44,6 +45,9 @@ public static String[][] tableToArrayOfRows(Table table) { maxColCount = row.size(); } } + + Assert.assertEquals(maxColCount, table.getColCount()); + String[][] rv = new String[tableRows.size()][maxColCount]; for (int i = 0; i < tableRows.size(); i++) { @@ -57,14 +61,14 @@ public static String[][] tableToArrayOfRows(Table table) { } public static String loadJson(String path) throws IOException { - - BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF-8")); - StringBuilder stringBuilder = new StringBuilder(); - String line = null; - - while ((line = reader.readLine()) != null) { - stringBuilder.append(line); - } + + StringBuilder stringBuilder = new StringBuilder(); + try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF-8"))) { + String line = null; + while ((line = reader.readLine()) != null) { + stringBuilder.append(line); + } + } return stringBuilder.toString(); From 6e5f5c2262dc663d9121aa3d08bfe81b4fd6bafd Mon Sep 17 00:00:00 2001 From: Giorgio Gallo Date: Mon, 4 Sep 2017 00:08:24 +0200 Subject: [PATCH 020/200] fixed for jdk 7 --- src/main/java/technology/tabula/Table.java | 2 +- src/test/java/technology/tabula/TestBasicExtractor.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/technology/tabula/Table.java b/src/main/java/technology/tabula/Table.java index 8ffca145..c031c9ed 100644 --- a/src/main/java/technology/tabula/Table.java +++ b/src/main/java/technology/tabula/Table.java @@ -83,7 +83,7 @@ class CellPosition implements Comparable { final int row, col; @Override public int hashCode() { - return Integer.hashCode(row) + 101 * Integer.hashCode(col); + return row + 101 * col; } @Override public boolean equals(Object obj) { diff --git a/src/test/java/technology/tabula/TestBasicExtractor.java b/src/test/java/technology/tabula/TestBasicExtractor.java index a6803cdf..5d5d985c 100644 --- a/src/test/java/technology/tabula/TestBasicExtractor.java +++ b/src/test/java/technology/tabula/TestBasicExtractor.java @@ -123,7 +123,7 @@ public class TestBasicExtractor { {"AARON, MICHAEL, R", "", "BROOKLYN, NY", "MEALS", "$65.92"} }; - private static final String[][] EXPECTED_EMPTY_TABLE = { /* actually empty! */ }; + private static final String[][] EXPECTED_EMPTY_TABLE = { /* actually empty! */ }; @Test From 1d0d79773773a00bee1bf7cb81c6d7d5a9776a9f Mon Sep 17 00:00:00 2001 From: Giorgio Gallo Date: Thu, 14 Sep 2017 18:47:02 +0200 Subject: [PATCH 021/200] removed deprecation from Page.getRotation() as it is used in the ruby webapp --- src/main/java/technology/tabula/Page.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/technology/tabula/Page.java b/src/main/java/technology/tabula/Page.java index 491eaf7b..25f1baef 100644 --- a/src/main/java/technology/tabula/Page.java +++ b/src/main/java/technology/tabula/Page.java @@ -121,8 +121,7 @@ public List getText(Rectangle area) { return this.getText(new Rectangle(top, left, right - left, bottom - top)); } - /** @deprecated with no replacement */ - @Deprecated public Integer getRotation() { + public Integer getRotation() { return rotation; } From 8dcd636b5399c54a69821b0460a17b4e3f03e907 Mon Sep 17 00:00:00 2001 From: Sylwester Lachiewicz Date: Thu, 2 Nov 2017 08:59:11 +0100 Subject: [PATCH 022/200] Fix AppVeyor winows build - maven 3.5.2 --- appveyor.yml | 18 ++++++++++-------- pom.xml | 10 ++++++---- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index f60e8fd5..d16a36ce 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -2,18 +2,20 @@ version: '{build}' install: - ps: | Add-Type -AssemblyName System.IO.Compression.FileSystem - if (!(Test-Path -Path "C:\maven" )) { + if (!(Test-Path -Path "C:\maven\apache-maven-3.5.2" )) { (new-object System.Net.WebClient).DownloadFile( - 'http://www.us.apache.org/dist/maven/maven-3/3.5.0/binaries/apache-maven-3.5.0-bin.zip', + 'http://www-us.apache.org/dist/maven/maven-3/3.5.2/binaries/apache-maven-3.5.2-bin.zip', 'C:\maven-bin.zip' ) [System.IO.Compression.ZipFile]::ExtractToDirectory("C:\maven-bin.zip", "C:\maven") } - - cmd: SET PATH=C:\maven\apache-maven-3.2.5\bin;%JAVA_HOME%\bin;%PATH% - - cmd: SET MAVEN_OPTS=-XX:MaxPermSize=2g -Xmx4g - - cmd: SET JAVA_OPTS=-XX:MaxPermSize=2g -Xmx4g + - cmd: SET PATH=C:\maven\apache-maven-3.5.2\bin;%JAVA_HOME%\bin;%PATH% + - cmd: SET MAVEN_OPTS=-Xmx2g + - cmd: SET JAVA_OPTS=-Xmx2g +build_script: + - mvn clean package -B -DskipTests -Dmaven.javadoc.skip=true test_script: - - mvn clean install --batch-mode + - mvn install -B -Dmaven.javadoc.skip=true -Dgpg.skip cache: - - C:\maven\ - - C:\Users\appveyor\.m2 + - C:\maven -> appveyor.yml + - C:\Users\appveyor\.m2 -> appveyor.yml diff --git a/pom.xml b/pom.xml index 63608059..a71f567c 100644 --- a/pom.xml +++ b/pom.xml @@ -154,15 +154,17 @@ org.apache.maven.plugins maven-surefire-plugin - - - -Xms1024m -Xmx2048m - + 2.20.1 + + + -Xms1024m -Xmx2048m + org.apache.maven.plugins maven-eclipse-plugin + 2.10 true true From 4e15a5034e27099d20b2d509d0e5b5a1cda2e557 Mon Sep 17 00:00:00 2001 From: Manuel Aristaran Date: Fri, 3 Nov 2017 12:12:45 -0300 Subject: [PATCH 023/200] PDFBox 2.0.8 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index a71f567c..bb876ccb 100644 --- a/pom.xml +++ b/pom.xml @@ -245,7 +245,7 @@ org.apache.pdfbox pdfbox - 2.0.7 + 2.0.8 From ec405abb882c55196ff3f59409c4f06986e375b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Wed, 17 Jan 2018 10:57:23 -0300 Subject: [PATCH 024/200] Include JDK9 in Travis --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 46e41e94..cb01b361 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,7 @@ script: mvn test -Dgpg.skip=true jdk: - openjdk7 - oraclejdk8 + - oraclejdk9 sudo: false From 3d98ddf19316ad418fc62df41d9f0406dc8bf120 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Tue, 27 Feb 2018 11:55:51 -0300 Subject: [PATCH 025/200] Update copyright notice --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cd9717c6..81392fca 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ tabula-java [![Build Status](https://travis-ci.org/tabulapdf/tabula-java.svg?bra (This is the new version of the extraction engine; the previous code can be found at [`tabula-extractor`](http://github.com/tabulapdf/tabula-extractor).) -© 2014-2016 Manuel Aristarán. Available under MIT License. See [`LICENSE`](LICENSE). +© 2014-2018 Manuel Aristarán. Available under MIT License. See [`LICENSE`](LICENSE). ## Download From 1c121b662b6bd3d65eaf5414d144d09837163da5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Thu, 1 Mar 2018 15:32:32 -0300 Subject: [PATCH 026/200] fixes #206 --- .../tabula/ObjectExtractorStreamEngine.java | 7 +++++++ .../technology/tabula/TestObjectExtractor.java | 12 ++++++++++++ .../technology/tabula/npe_issue_206.pdf | Bin 0 -> 12736 bytes 3 files changed, 19 insertions(+) create mode 100644 src/test/resources/technology/tabula/npe_issue_206.pdf diff --git a/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java b/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java index 70181454..700d1fe7 100644 --- a/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java +++ b/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java @@ -195,6 +195,10 @@ private void strokeOrFillPath(boolean isFill) { case PathIterator.SEG_LINETO: end_pos = new Point2D.Float(c[0], c[1]); + if (start_pos == null || end_pos == null) { + break; + } + line = pc.compare(start_pos, end_pos) == -1 ? new Line2D.Float(start_pos, end_pos) : new Line2D.Float(end_pos, start_pos); @@ -216,6 +220,9 @@ private void strokeOrFillPath(boolean isFill) { // segment // back to the point corresponding to the most recent // SEG_MOVETO." + if (start_pos == null || end_pos == null) { + break; + } line = pc.compare(end_pos, last_move) == -1 ? new Line2D.Float(end_pos, last_move) : new Line2D.Float(last_move, end_pos); diff --git a/src/test/java/technology/tabula/TestObjectExtractor.java b/src/test/java/technology/tabula/TestObjectExtractor.java index d7ac5a69..fe458b87 100644 --- a/src/test/java/technology/tabula/TestObjectExtractor.java +++ b/src/test/java/technology/tabula/TestObjectExtractor.java @@ -126,6 +126,18 @@ public void testTextElementsContainedInPage() throws IOException { assertTrue(page.contains(te)); } } + + @Test public void testDoNotNPEInPointComparator() throws IOException { + PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/npe_issue_206.pdf")); + ObjectExtractor oe = new ObjectExtractor(pdf_document); + + try { + Page p = oe.extractPage(1); + assertNotNull(p); + } catch (NullPointerException e) { + fail("NPE in ObjectExtractor " + e.toString()); + } + } /* @Test diff --git a/src/test/resources/technology/tabula/npe_issue_206.pdf b/src/test/resources/technology/tabula/npe_issue_206.pdf new file mode 100644 index 0000000000000000000000000000000000000000..352e77ba72bd14925cdbb9aa2b06b09e4dc9f3cc GIT binary patch literal 12736 zcmb_j2|SeD_ea?#L{UhZY*8`$W-0s5*oBJ0U>Jj$u|_FbD-|hAiy}*eY$2qmC`pp- zEg~r?OZKS#&ok&%@B99?&z~O8^W1yxx%Zy)z2`aS+?%YKwyq*V2@8>(8C>jxAfRxl zkFzI4RTToW3ic&KVOk_6iR$AHf!UDh42lm83PUKNAuuzNJDCAR0^}BuJ$oQznk(20 zwW_%molIi-(4q3WbdopOgyco0LlFoixB}3a?&Iq30=&y>xX^u^Nld6Vg~6mysmmnL z{qnXHfMXzIzk&+X+sBpSMj^XG{TURRJCtHY4q`%)N(dz!)HxW6utM##0(e?cm{c-U zUY{I9atB6fgGEAtZrp*sy~*?)45$U!jZ7!gT*y!p63w4P1)3Ow@YAS(nCK|r7pcqCF0j^6=??*K?N{Fxr0Sj>SzxcGQ`L#@az9yA|7#uPGR zH`IXUq6AQ}qz8!mcW!V33W31bE36VasDxj2#!?P2KMF862Q;Xo0{}-Ke;N~tSk(}$ zS!t+6_wm*A34%JnmEchD0>%#f#SwtNKw~o!AUiiPV!vrM=Uo}vW^~ykv$uxJS2NZ>e11Qv` zP?><%V7gSG37|wDS2Ea)!3419%?}B>_|)Ex@rUk)f*!r^alvaXIC&UOT;pld+sq#q zi1on|6-<~Sl_d13e?H~}EF5G-?GN-XnURw3}((%`&rVI9p3zvs# z^Gn`$l_~hoXcd@ryFs3YYsL4~)HI&R;_IGFy!|nZpk$poKNG2J5CJ2<3>FVLd=}R= zHMmuIKc#gdN~Nvb?|XDf;CnxA|5s0w%HHQH?&@nTl{;h*U7H{~Mn6%~LPQiwam+q5 zZYIb_^?bA#Lp^%ga4~K!VHkUNF6lh3xcXh0_l~=7dWx~uTQ2l8+L#=ZEu33b-p ze6wV)T<4df{W)Vb=I(j#rah$&+z##^=Pthgi6e0TH(Ydh8r9@g@pb8P?46ndx4gO1 zWM}I}H%hO953=866Oyb_*|g4*d!#im0p+qul~iK?_?!`qMlO7+I-`_)2nN%oJMQ0{ zJZVqp%V~=&_ip#ZRvFg&Tc^s!i`!c3>gueQQue>}MuAvWOm?!dNmnUOIAcJxk!#uK+o@Hf)D)*MGwDeftueeKtM zj&n$wc<$>oIAAPYTAjMaZO@(F0=lwu7W1sih6_U_0fwIpUx#m1f3dEoX-xQ8ld0!7 zKZAss?j2{+ZZcX2QDyV;mr(vH^CFsvZloR~Qtv`;-e=9IY{9f=SXK5;S;%=m4|;|N z3=TP&KJipJV0YoU{5gGxmYj`;4x?N`+bs6%q-p7&BHq0zb^V2%45Q}cT9d)RuLw!m z%!Zo_+O+vies5BEhLY5%6(5V`a)a0!WOA9Q^MyIdvrXvIYgfO$lQHazEk1SLC*Y2Z zLV5QU!tU7A@dy3^yyM3Pv3KO`qNz1!hafkO_fIE!1PVmGo1}~>?ig>s6wIr+z$@>p z$n77r*8R5G{+#-Y4n?Su?y>fWQ7V_w(KX^&IM-~@V@_j7+3#ODL!QLWdq;ow*yi~C zo*De4P}jCsQfa8i`^%j~XuNphV8FW{n=C?}q&!p>hi;3N9b5BSzK2RTIC8pUZk;x@ z!smfx3LjeOT6ud}`d%}m{ciOcN)BU$5$x;Z+Y)5kdXuc*pSf|SE9(GqZ;`5C2CsmP z{YEjRU0+i9uE`NU)XnmVD@rH6I&(|vc4BAB1yilBs!8v=Q_+I2hh3y>8>u)OTDy+uds6(B0Qs1@JB#$d4 zwcrnq^CF+MJ8jCR$!{A1LvO2C4|SM0Ic|Nw=}uGV$39t_U`ZK|0F^oa#p6w>r^)4l z(Tmn%^$H1A%=H4uy5R0@1a4XN1=u8iiI99}@6Kes*I3=^V;y&5BADVvcDdVorrna> z>Kj2$X31l_=lpjFpV`@B_*OTQt6<`K#FopwU+O+L3TEuI9*2dN&M+Os7I@Z^g-K%p znMSRSb%W@SZZaj5H(6l9~=b7^X|9VgdKCvwVZzYOtgA2 z^y|adnqU7g`QEifpXndL>8#PX@l7C;lPFa67BVxNtvaFCiU`*> zx_9LUhE&MSi#*?eKs?7eb!A+8;ek`S(b~@Si#7CW`tx`%w z3jP9DoyykHySmASLLQ^yPFr_B@vJaRQz|Hs3VJ5@;zu)V%#)H`|G+iPHod-25f$Bx})p(%q9t|dkPG;N`59kn>^$zeW@a9a~rX;@s(Y@Xsdg}2;*h^j&8>u#n zuk0FiTnW$|13Tzx(lqFt_)pyw&cYO2uq>*5~`Y?wE+MO?MaaqpQ%i z&J8KFiedRduP&UWCL45j;N*w86NO#QJF@C2#M3uF-A3kJi%yxqaPoW3&hTpzPc5ve_EAk2~es-B{-=p5v?K9WrKC##B!B4Q) z6%U6W@cOpBE+Eik`rNHB-3dX}*WA;Gk0u+6ufK=7boBTX>WzWt2XV+QWrxHVO=p-k zdv;W&z{QWLH|)(s)pZ#i`Z2ufCu#UFV$iL|AmfO`^(l9<7B6pRj)d@hJn0Y<+{e%@ z*U{YoeL8EgSo8`mL%PEIaILx!pGI9PkErI_2%=@%(XHy_%%lhWib^@9Q8GyVIHIL! zws&f!wEdk1*)5gK&=%ucp@e6g?A|Vq4KGMF<;ryp^+? z*D@QG56ACc|ConIa-yswM4-mxz@){k67M_f+V-W$7>``)Qe;RRVeare9zuMvH9|1! zBge2@WvARH3(uH^NMZJ_W7F^_Jhf|j>bOpJPOGcL@AMNou;}hHO&k`3Z>(~*g5XRY zA>WC8em94W?eWL`*4&KkTbUj z%ZtY*j|ubJ#|0@AB}(rvs3;!wHPo?AWn1gQbL4$Pd@JXk;@i^;vX>EKFI@MY?7md5 zepz?6srS{Fn#Kbqxa`ljU&$PK{8sr;hzHre>5pq`H1$L9#mcH<>@TzHAl+?8>vmRV zQ@-pQZo^!NSa<#*YFf9hg8#!H0sYQirPCu>ITPI(h@!nVKapkM?xaw(aHHc;L)Hdjz^&g1GO5 zV9AAqj_WPK43wyg3VE^tLDVp=T%1XFblgV6`?l1XBVMB{4NT zSy@S&ZEq1IM>ppdmk-Fi(YWy`!}VU#n-6>JqLACS8%g>@_o{pE)SKyv=0(v82Nr^l z6CB4Tj7$qh)w=Gz#Bi@4`Z$rneP~yi<;ZMNDT;%btG-2Eh*vI(jlcd>MC9u`m%ceq zg?WwgZb84iTzP)DsyK8n6@FlN(}lRCV`PDMT3x4e;uH9#3ki7mL3*Ew5^ux&$+S-7 z5v3ZhmZ||{is0tLZSpRGt`;}5A3c9K_A*wWJu~;bb{3BXkzIZ48@Fxo7+0NmNz>@8 zt=~>jN0pH&aosS~aKcf03ocq%>J)u#@TBort98rvi8Or0{_j~WgcxdZ_q1~D@zNG9 zfrAmVh@hJm0&iXJ$-$7kqCB*+sN|I$PuO>S?s;8s zb9ANuQ>{HG^5#x#sc{et+-Afq&f+~SktW13qnz7ZT+$R0x6*fo-U z>ssIPRIM%9cfG4ue>O8>I0z*sj8Q-GN(-%B0-?BtUC`f<`~qiU6*PQxzVN}slwlWM zWlDHfdsk}j{`KMt_QQIQvNqq;9~nN|-Y@$7#MKD{p`_De9A0XC?6cKxG%`My#ui@I zx%PRE%`qw@Hc@=TB!+PJl-sW3a}bfJ+^2&#`IF`kbcEqvESO%)B+AVuI6i8L z*rRiOy-k%-Q5ewAMg%zVQz7T1Hh!-w8*AAeTgS*Ba-oO8 z@B7A_Y}{70i7P;mVP{(JdSakoE!Dm^&tcwk>Ibvzp|6C6bWVP>?Hbas=RH+z+b6rT z?+ize1_UP7n|WS>J3NV2O!%>Gcf}mc=3C5!nN2NiruIiLM^x>WZ|gZm%N_Z_oXb+6 zyOMnY1MZbDStp38XUPhr!Bmc(B?5|Ph2ubU45nXPEXho$0}M$1Kw)4$g_R$m`p_+X zNkE1KY@)@$0t5zHTACyVkb5C4Mdx61T_ECTkyu&bp$H&b0Hy?VQ9yEM>Cwsx2}P`A z3aqeD1eg^9vj$dZD1w!Z0AqCE-*VCj4E$Gyy_9oer34@_Z88H$!RTR>5C|NQLjf~K znn1n=2>2bqUctNskf>p@(p1J2nir5e0FyQj2sBEGKp;S|c%%{*kANcZ1SK2-cqgF2 z_XDf?28FWHH_^-lNRPmLe4%b6Dg#JuEDh$TB!S*ER;q|*D^HdtV7dgkiYF+l6`sr4 z3M7Vr12R8gvVsOCH-M=HP+Z6sdYTZJDHQc@6B!=87taS;S#@rTepVA0a`h>^O)Ml) z2_8PB1l_vUT-1$A^f6mKp9wW4M_pS;h+XGQdN_N=RsH-J=k4-2PV4mr&79$JIXSMn z9fqO7!DH{<%|5E>>v11r%yu8TQl`WvL)@kW<2=s+P1*5=I|Tp2Hd3$k);Bh;pm6pq zHn`nzZmq`XJ#`rvZgWUgBk$sgUOsmH#ai-*VPbZ~4`N%sRV^D^qhrGm)l}K2VP4Ag zv2bSGXOvXdLzy@+-PjbEp;-gnIo@IL`jxOv-R8}oTqPYE1}<85H&GF@F+qo-&PY2q z=rwSP?KjPe@j0;%Wsuw~GU4DyMMkFxu*J!gufyjD38lp(H>?+63Qf@2d^0Ox_@Q;1 ze2do(XjNurx=mL4O>k5x8J(tidOuA+L`Np#IgVc4o0!x%STd}n(3ER@5X*ML{rhe) zKkwmdQVyy14))@yVkw8sbAD9K;bccYOVvQ;UZG+-W}`nBE?RtKb83$BEE!qv(0rIp zxts$U_+x$A&~`Q%oiMMv6Aywt27+TZ^H_MFJcUA`PE|;pKU>S^4QJY?#M9`mfVD2H)=qzb44OA=vEIbl<{I#Cpzw zw9Q${xA3!9IKM3v_{PqiZ8^0d@qUA-u{-a)*a@!zFW%O0HW_x2#x)}8?98Lv;cIN_ zHsLvNMI3WSodg9sI5x#_H-)c9YAAK_LDYr1IA@L-nyvY$A??AwL!BGXuN3~|jdBI= zXgIgi+LICQow%ymCk5A%wj;HV4sN&BUxy5DhH|;CuhiIM1{D_2(ee{qzd^IIbyE({ z&d8lt?Q?{_Zc^qv5MH3Urd7>DIQ7`!$liJ$<72r+$b9}S^?Y5tGf@%!7+;BtT<`1V z$4vnzF5>jP&*PkP%AwqnN3YbE@TkY|@SAJ#32_Y^o7*m=zx%!(78^^2P(nCVftGJxcoL&1Xl|m zT;~%pTVFEKJ+~oLt#;j!HGAW)?9kuH258hFEHuUUb@}kW# zDUwXT?7j0{^5(M7l@Rh1nj2ubvcoc!Hb}kV{a-Q%`I{&T)#u93oe;M4jIc^-SI8yh z+EL<4b4#xupUUPSBV z)e!TIIXdk=Ip5mF?8Lgs#aD~%imi*?UG+S8OwxvQjIJfMnzlxHj9rk2`PtvMD|Rt> zl5kI!L64)y(@qKwH?oQeNdYc!^mQYn5*_T?eF$fSH*|Kgj^2E`zp{KU89R}|NFA}b44^i-;ylx|DC0tD;MUhJSbA+BY zK9YZyQ~x3IR+iuK3EJC(Z;5i7MQUX%GduI7OMCKvyc$;DaaeW@W zl<)MEd&rjJh$o}=X1hZb%k7vBp{1q8gAV2UOzkN4Iu6zj{HPqhBf5yUahRT-?vRXW z%19vgJeDJEBld2Wc#m8Uw%b&x)2_wt;N8$>)7SA7TQ8h5@%HKRy{>x)(ueApIgFrz zoc_iDs_#YLHin_E^3c@%?uL}HS3|1ro!=JszN_9f@Uh?iL(iDp+f4&D!!un6UaPg# zbca2xzwdJCQ82MI`}TXzHon#R`c=mLix+&6s>N)ir72#&ljm^seKKdKKv+p{qjtLM3^xZN?6$Nm7LD1PP_O z?j{|nd0rwNayQjB%8ETWsVsIXUf={R?t9&oO9#T;rP}$1i`zb>{ILA@RJ{PPLvb#T z6y6y(^gKBK(DY&VgU_*Pae*-{cpq9LP!?ZSpkk{S$Qb<)l099vC7mC!r3&XsZSKR@Ho3fb z{`C0M*t8>!sHR;{57o$0jcjY_-?J8P-IR1lTPFjV?uQZEp0Ms|ALKIgO{k zy?0*xUFx;e+31Ss(Tb!9_dx!*I`e5JC4MgcY|D7dQOjMX{(;>AWnn&3-_E}9%+70RysSVl!lxYx=-=N` z**%&2J{#6>Ky)Pj<6&RFmk<6>V^{ljp&E9e^3&mY=R(V|%3T4rrjBKmAs@xx+CTDt zGV(cWQ<(SOeT%IfKHYtX>p$wgHyKGgu;vMOi-0oU!HRbwBVW7=|L7dcuSUUzk<36m zJ!nvAVRW!2^AK#z{neEtMZ?>_lKQ1{_1>LMlI;AJGwU)E^R6+)yz_bO4h^E@$y*OE z|2X&kMfAjC_F(qAjW;%)N_0!i&VH3m+vD(!x)}U%a(?*OmFq_e4qsguxZ@~sWH`+Z zgZiHEgWlgcVEd&ozWU%y)uE0fGZSfpxJgE&&$~X!ut!0i3wu6&YX}|U{uCW|I%M0O zJ;6spD$Z0Fji<4#VVgZZx$rj;0n0o8>5GyWOf3%*unUXE0-iMB*0Vf$m9?eB3PTYe;x8ttO;z- zt_=B?5rabg%ZL$J!s;=zK>1Ij{BN-cY7~XRtY{C4#{$|N`T}vWf<+L+I2`^bib1@9bRgdq zECcndvK6}D@k~IYfAR+T0sRBm0!M+y0y6>E)z7i9@VJEZpXdfDKvaVlSpRnj2WkEt z#=*8I6d*V3(tTy#EW`sNLcjrn-+osuf@Se95B5JP14kaJ~hykiy4L=Yjph|&q7W7!M2Vl>l z1M0wK0X~D*3f&4US&&_>1EIG<2bP1gSennu9MJ#;IPO(<2-w^5Tv+p4nIm`yW%Bzx zS4Ou&2iCJjwj=`x$QupF5efDK+*kds!q?BzoUekx_`u~D#KwqmTk3<0m7LY1% zvBUuC415RgEd4DRpB01jk46-11M&q8C}0u+Y%S@01#RH{ry*QzK$lR>YVp4s&=p04 z$OlFM{9Rcu%Xb!n!DsMVJu~o`^+{Q(gRIc7J}Cena8}DamXHpV0sL7jdbJc;&%nGv zEdC7+meZ^MgoD+Gq?R(5zd91Ee5wn0ic1lR06y}TOtQBP6pvBDW5EO@;B?rMDefLj zz+AzB^e@R*f9aLAGFbJRtW#Wy7~rd~h(cn3#4d&aB@pn-W3>cMT`^repfFP(x;Kdm zf$jU11C`{?fD!=L6YNWu=MzZmYA{$sBftTZ4Az?hX9l5vaZQ(8R1KOt zaMVu`u-<+*lcB*j_)Fooew~%M?(00BxcDi1mRec3 z*Q=O)@a!cTT-~b)EA81%c?gjrjs; z{xj@fqWO!j3)sA0{IVt1ZR&fJnngI@4G3Sk=IGsXKWx;F85bWJRSiDwJUaQHOH87= ztuapdIC9qwZfMGEx22!=^cClcuHEH#@;CSek`i}b`T|MU8%p-B4U9k2tk^#y+dDs) zWcZ3xlr8Fv$HBjgBUb0hLG>&{0RdD22y6%Z zX*U0UNGtvRvRv>>egB6`1&hTjEfwG+#ah?c|7NWKF3f+sR{nXytk1{_$p7txfmOO} zAC|}PA5Iv9#RC(DP6OZ30!9D!aQ*0Kz6|HeLIk)RB4fP1|p1H^x2ApZxY1F`$khy5QX{!7HQbbuC# z2Uf?=?K2P%2rWRS|N20!N3XN16Uo`h)m4~xe8PW{(GhRBTc6R?Y(zBNt-uwVw|IuM zxV`0z)_J}Bk=tJio3HHHM{bh}y8G@n)kI*v>|n;cf$y=ay!^ z%}wTPX=eFm=j<=qTX6|b^ZD7ZRT{Am!6TcUBel~*L*!adUTYVQOt{u?uKiWRHEQG{ zIz3kXUUfr4^I~;_MC6&r$J$F92CX>Vb1>>8HDN~%c4-%tHHn&%UDBWEeYO+q&!6>< zKHYnm>b8IO#ie=o^IgQ#cSnz?bcwiJW@+7^LC|1M|XVEteDA#mtbb7Rs;z( Date: Mon, 12 Mar 2018 12:54:55 -0700 Subject: [PATCH 027/200] 1942-update-tabula-java-for-multi-column-pdf (PolicyReporter/requests#1942) - Allow multiple occurrences of -a parameter - Allow -a parameter to accept % values as well as absolute values - Add test cases - Add test files --- .../technology/tabula/CommandLineApp.java | 46 +++++++++++++----- src/main/java/technology/tabula/Page.java | 18 +++++++ src/main/java/technology/tabula/Pair.java | 19 ++++++++ .../technology/tabula/TestCommandLineApp.java | 40 +++++++++++++++ .../technology/tabula/MultiColumn.pdf | Bin 0 -> 8336 bytes .../technology/tabula/csv/MultiColumn.csv | 44 +++++++++++++++++ 6 files changed, 154 insertions(+), 13 deletions(-) create mode 100644 src/main/java/technology/tabula/Pair.java create mode 100644 src/test/resources/technology/tabula/MultiColumn.pdf create mode 100644 src/test/resources/technology/tabula/csv/MultiColumn.csv diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java index 66de8b05..c0d321f2 100644 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ b/src/main/java/technology/tabula/CommandLineApp.java @@ -6,7 +6,10 @@ import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; import java.util.List; +import java.util.Map; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; @@ -34,7 +37,8 @@ public class CommandLineApp { private static String BANNER = "\nTabula helps you extract tables from PDFs\n\n"; private Appendable defaultOutput; - private Rectangle pageArea; + + private List> pageAreas; private List pages; private OutputFormat outputFormat; private String password; @@ -42,7 +46,7 @@ public class CommandLineApp { public CommandLineApp(Appendable defaultOutput, CommandLine line) throws ParseException { this.defaultOutput = defaultOutput; - this.pageArea = CommandLineApp.whichArea(line); + this.pageAreas = CommandLineApp.whichAreas(line); this.pages = CommandLineApp.whichPages(line); this.outputFormat = CommandLineApp.whichOutputFormat(line); this.tableExtractor = CommandLineApp.createExtractor(line); @@ -156,11 +160,13 @@ private void extractFile(File pdfFile, Appendable outFile) throws ParseException while (pageIterator.hasNext()) { Page page = pageIterator.next(); - if (pageArea != null) { - page = page.getArea(pageArea); + if (pageAreas != null) { + for (Pair areaPair : pageAreas) { + tables.addAll(tableExtractor.extractTables(page.getArea(areaPair.getRight(), areaPair.getLeft()))); + } + } else { + tables.addAll(tableExtractor.extractTables(page)); } - - tables.addAll(tableExtractor.extractTables(page)); } writeTables(tables, outFile); } catch (IOException e) { @@ -200,16 +206,28 @@ private static OutputFormat whichOutputFormat(CommandLine line) throws ParseExce } } - private static Rectangle whichArea(CommandLine line) throws ParseException { + private static List> whichAreas(CommandLine line) throws ParseException { if (!line.hasOption('a')) { return null; } - - List f = parseFloatList(line.getOptionValue('a')); - if (f.size() != 4) { - throw new ParseException("area parameters must be top,left,bottom,right"); + + String[] optionValues = line.getOptionValues('a'); + + List> areaList = new ArrayList>(); + for (String optionValue: optionValues) { + int areaCalculationMode = Page.ABSOLUTE_AREA_CALCULATION_MODE; + int startIndex = 0; + if (optionValue.startsWith("%")) { + startIndex = 1; + areaCalculationMode = Page.RELATIVE_AREA_CALCULATION_MODE; + } + List f = parseFloatList(optionValue.substring(startIndex)); + if (f.size() != 4) { + throw new ParseException("area parameters must be top,left,bottom,right optionally preceded by %"); + } + areaList.add(new Pair(areaCalculationMode, new Rectangle(f.get(0), f.get(1), f.get(3) - f.get(1), f.get(2) - f.get(0)))); } - return new Rectangle(f.get(0), f.get(1), f.get(3) - f.get(1), f.get(2) - f.get(0)); + return areaList; } private static List whichPages(CommandLine line) throws ParseException { @@ -307,7 +325,9 @@ public static Options buildOptions() { .build()); o.addOption(Option.builder("a") .longOpt("area") - .desc("Portion of the page to analyze (top,left,bottom,right). Example: --area 269.875,12.75,790.5,561. Default is entire page") + .desc("-a/--area = Portion of the page to analyze. Accepts top,left,bottom,right . Example: --area 269.875,12.75,790.5,561. " + + "If all values are between 0-100 (inclusive) and preceded by '%', input will be taken as % of actual height or width of the page. " + + "Example: --area %0,0,100,50. To specify multiple areas, -a option should be repeated. Default is entire page") .hasArg() .argName("AREA") .build()); diff --git a/src/main/java/technology/tabula/Page.java b/src/main/java/technology/tabula/Page.java index 25f1baef..959844ca 100644 --- a/src/main/java/technology/tabula/Page.java +++ b/src/main/java/technology/tabula/Page.java @@ -21,6 +21,9 @@ public class Page extends Rectangle { private RectangleSpatialIndex spatial_index; private PDPage pdPage; + public static final int RELATIVE_AREA_CALCULATION_MODE = 0; + public static final int ABSOLUTE_AREA_CALCULATION_MODE = 1; + public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage) { super(top, left, width, height); this.rotation = rotation; @@ -48,6 +51,21 @@ public Page(float top, float left, float width, float height, int rotation, int } + public Page getArea(float top, float left, float bottom, float right, int mode) { + Rectangle area = new Rectangle(top, left, right - left, bottom - top); + return getArea(area, mode); + } + + public Page getArea(Rectangle area, int mode) { + Rectangle newArea = area; + if (mode == RELATIVE_AREA_CALCULATION_MODE) { + newArea = new Rectangle((float) (area.getTop() / 100 * getHeight()), + (float) (area.getLeft() / 100 * getWidth()), (float) (area.getWidth() / 100 * getWidth()), + (float) (area.getHeight() / 100 * getHeight())); + } + return getArea(newArea); + } + public Page getArea(Rectangle area) { List t = getText(area); float min_char_width = 7; diff --git a/src/main/java/technology/tabula/Pair.java b/src/main/java/technology/tabula/Pair.java new file mode 100644 index 00000000..d54cbbe5 --- /dev/null +++ b/src/main/java/technology/tabula/Pair.java @@ -0,0 +1,19 @@ +package technology.tabula; + +public class Pair { + private final L left; + private final R right; + + public Pair(L left, R right) { + this.left = left; + this.right = right; + } + + public L getLeft() { + return this.left; + } + + public R getRight() { + return this.right; + } +} diff --git a/src/test/java/technology/tabula/TestCommandLineApp.java b/src/test/java/technology/tabula/TestCommandLineApp.java index fd79e9c2..81b30d4b 100644 --- a/src/test/java/technology/tabula/TestCommandLineApp.java +++ b/src/test/java/technology/tabula/TestCommandLineApp.java @@ -143,6 +143,46 @@ public void testEncryptedWrongPassword() throws ParseException { }); } + @Test + public void testExtractWithMultiplePercentArea() throws ParseException, IOException { + + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/MultiColumn.csv"); + assertEquals(expectedCsv, this.csvFromCommandLineArgs(new String[]{ + "src/test/resources/technology/tabula/MultiColumn.pdf", + "-p", "1", "-a", + "%0,0,100,50", "-a", + "%0,50,100,100", "-f", + "CSV" + })); + } + + @Test + public void testExtractWithMultipleAbsoluteArea() throws ParseException, IOException { + + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/MultiColumn.csv"); + + assertEquals(expectedCsv, this.csvFromCommandLineArgs(new String[]{ + "src/test/resources/technology/tabula/MultiColumn.pdf", + "-p", "1", "-a", + "0,0,451,212", "-a", + "0,212,451,425", "-f", + "CSV" + })); + } + + @Test + public void testExtractWithPercentAndAbsoluteArea() throws ParseException, IOException { + + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/MultiColumn.csv"); + + assertEquals(expectedCsv, this.csvFromCommandLineArgs(new String[]{ + "src/test/resources/technology/tabula/MultiColumn.pdf", + "-p", "1", "-a", + "%0,0,100,50", "-a", + "0,212,451,425", "-f", + "CSV" + })); + } } diff --git a/src/test/resources/technology/tabula/MultiColumn.pdf b/src/test/resources/technology/tabula/MultiColumn.pdf new file mode 100644 index 0000000000000000000000000000000000000000..197df402af0f4c7a4a2b025385ffe09147de9dc2 GIT binary patch literal 8336 zcma)h2UJwcvbG{1jASGT2qRG>Ookyz9-@GNFyuI77{Y*LC1(&&vXX-!ISUd6lqg9+ zBu9}bAS&q}yyxC?&OPs~_wTj#?C$ES>guYmde*Aq(vU+5Km_3cuFky9=FYm#OaK%J z24cQ#3&+TL&D*6@+py!(rqw7FbKn-MaucoGZr65#X5-lB^O% z2e}@6bjs(wm2=NE!T&J{$9l(%RM0h-wJU}sgWat=TV;xjDFxM{#(d53}AYhiZ^90Aywy;W6?Bo!gHSckD#@0I$b15vH zq3AVrFCkIAgK(Z=_3BbsGDCqkBAS#~$2NV0JT0H$G6f8zir{!k3UI1~Pq@W95h6b~ zOO3~9;MImq8V)^pO+D|tPw+rkCN)c~zD981@bgpVD-^;Ys?Us=z`$u&$qgRckG?0n z-#WC*Lm};Lb`+bMDzMlFmty8@RgH?i{jEU%yB`Cl_Ky3`h954Qswu8zjn|k_YMpDC z+C><;Hr~`<&diNPOWeRp(AT+eO&D)S(KlukxvRcM()L{G(O{Bo6y9#@0zEuQyU!iF zR1;;uX+qE_slRNFuDQi*y(l%`z;DU>PGW?ep{H?4u{#xc9XW_<5q8b2dDrFg1)1Q& zO+ggv#W6?UsH?xc*xxk1ejo&y}qC;!!+5_yq&nZvF zGx5{3$Bdb_w?&VQZnh>m=gOlw18)j+T&T@&tcEetKDhH^_!l4EjI(qxv7m=m@c{o| zsC;{z_J_|?H`G+NH_41Wh=^JX0#^qIO;7RC%)GAyMF!A^{m-AbPgg49KJ6u3h=0-( z6$=u!z>C++0|5Scu1SM~^QCC~AqDF3yS+@$uZDSY(BmzRD$Sao7#kRwy7B5-l$5El#lKIVCZ_d@CJZ!viu|K@S}@F1zhCk#Q-UcxLEy7L;NF) zh5nZ;{v*pFKoN)#@~YQ11??mtrVCt4T@ol zAXNHL%W1$~m#5qjoTv2JK{1>pZ0l##szSy_ zpH2>?AdsS~X$i;%Y}q0huDRNR?~ z-X3wNar8P@)ZDj`JwZx+SL9DqBp{CD?@dCBGk*1~IwvQ@Ndq zT;4vib4k#t^zC4wQ|}6?=yEr~J;MI5+)G+)(9Mms?UY{!=TLyw>K>u^RZyF>C#BOU zqtkjIcoWsT`88};C9kUFT3zSd0}~dp^t&?+DCz#Tj|2JwXcl?2JSJhbd!Ibn5pp?*kINzH8fs(Rr=Dnt|rGf zqNbt$zNTrl{x{d^0X^2g{kf^luGyUBNww9*aY4HHImwC)*kWhbTCB`<$}shxf2 zE{;yqfSE@rQrS0Qst9UQv)8gDGmyH6PYY-e4uoH@q7iA8nnUp1+#aSisahFhyd;ktdjFzGOK? z+;Q0~)qbntQi0(rJa9CJj@8Y) zCjm7J@i%#(CIz@%@2!1bdgkf6az3l3;{&_=u^gS*!!VkD@gf}+0m0fxJ;$9Fd!zp2 zT=x%GbRr7Q2vTBkd!3Ef<#QQ(OCMb(E|#fQel3C7tZJ(K`0}Yi&h}GKzN%`>ca^a_ z`Q2`=nh=I0=GWpj=H?qKS3Jf~!*y$a$yk=^s?5yHlnqYTb>;$AoR~D91fIuePTBdS zI-&}`Mom7(v@e>5^!1`|P^iB95Em}`dLXf#_h4E*VDr_t;=nkUK{YqPF{3pu4VJr| zSW&+v@#1MQ-`w>lhUmR3k)ogCMr)%GsH$SG%*nny?UtA_7w<;?t|B3#4()OLt*l{R zM=(gPrKBg4aHO>AG8{{B$KfX~CfF+FX7chk+jEX=8n{&D2ge7g((8vsuGa{va@_k7 z#%prqU18+c>UXV*)p!f&74N}jFlLP%60Bu8FJBT3-1%{mmb8J}^OHB?HBsRvv{L!B z>P5;_z}=&(ar}iCeR-Ci+i)yeOJ_mZi{MIer#0n#3v(8$PwE|?%1-0=vE}=}pXRRsRDm z0bsga?ETKSk)MPHIxGM;xHE=Uh1ASz`ORe2B{Vg@Dc>sYHDXO-=A$gH(jhZ;E7g7D zR-xC`0hPW{RYeZZ&)XYypv{gU)Vo_Fh>Ne)Ab(K*2x5zde19!&U6OBA~8j!q5g z;>O+zvLq)^2{B9R7{c3#S4w6zde<=N!wSjjIgNH{eR{p)-Fu_LlzY9iiTOh3ZoKvTC3GT( zO}3BDny_AbJZzVTf~dfIt*)%g`*rXoy%QmHeP>B=qwiJ=OW?Yrr{4P76@apY}i9Zd&vAb6PCBx)aBv!1klqc~hC0^oem*$n7uIqdZEq zdMRhf)6;{9gUs+?M0>sN-jkLq^nvOv!%lScOxHp>GA_gi6gtKCbY=QVNsj%`)~iDa@YPci#6h3zP8aYAZ&O)3ZN?TcVkTzR;S4x z(~q2bnxs4u7_1A8oFc{cBkiv9qs-xv`zv<7%p8g1mao!&6qDwR>LHNWCV|n%C0-)f zPLDR6cioA3OIiEib;i6dm9Ef5%@K`OfSY?&-uv-9;U~m)d|*bEh@!K@Al=L%r(!*C zn@(6Qo!+i8wS@0UKv+z{Hh<^(Ddo(7 zePvtHUAK8LqL4XEc~b@1G4dt0{bz5G(^P5+j9QkJdXK6Seeahz1Y6G_7jMZ-J}J$2 znJ={_0m8SZhqt*=NXXQQoPRV_)v1g8)7vRdgDK@e=UefLsh*t?d_&j5Xsk!(MhbZw zrIHHfI8=@z)P3nCIX{{#n3R!hE}ZI7=2ImbMJX#+Ipo+3(cG+QpaReuUqed0YsZd; z{@iDX^;}~nYq(*)dcWEfLR&orHYp&yotkGEz39UueXuXlF1TAzkpi)t4Oe~Z(6B0$ zo1_>OlcHdj>PXkP+e3M`ad*Q4eZJA8`{*m=XMFt4`X9kdx@zPRA9YR6zxce`8K*qUi4FPh+CZ)r#r12g@~V)HXypkkEmh&`{zbqoEjuAHgp z{hm7N0oo}PlvzGQBBK^SKxbJM82y0Cgh(3q9ff_D2ji&>En$vW>!zV~;QQusjgCQC zMbA9&-FMmr*)UuE{jP~ehpRiFnd5c~1y;v|UUP)P3xtG^!^sNnd^Wp%vQxd_-6Tj7 zc(>}OQPua9gB{AX=~0G}WUT*w@6HRay~+4r2V!65s#q86rT54;vq|gTtG#KLie{&- z?*Bz|)1N*S)f_gtbMps?(GqqUmI_aV!1hu}bKG8c(b|oDYfHR5n$qLl-u;E=-eM@< z(4%`ct*-i4El-nL_1t@;GDKierjW6YEZ@-_<@DgajE+0Eyym*4tkZC2+T_ij5wpHn}N%ke2(DE?KhR$l2D*1VVXrSGRB5< zsk192&p3M@j8`wu`tk53OHli5_eW*Ek?{Dju;ahee#P8qMDnZDA${fgN7O9pFh9)x#*h#%54sA zqk%RCM+@9);UXM9-}aq?@*IjMxwH%b8vJyu>eSD~SsDRVlidj|!*}4PXKM^+h*P#l zejewj2#Oh`2m99u>+U)B!n<$wXUk7JjrS(i=hQQm;_}rCGWAb-I+JLinUlZ#xvm$) z-INIIA#vHwZuec_Jq_7a>wDcw65Ef3OCERU)RoM&RWcaA96odq|0KV9f%|2iSC(C!TN)dfpkQP#{pvvI(Ri!&;Z zV9u2FP3oIt>rlBtveKVeVtR61xNgLv>;?uVBF9Ia;KK!+C0`1{TkAMJv-Q6ld0{Fp z{^2IeXNkPG!rUkAia^Ff#DRW<{a$*|9--`p{nrh*cmoiK}~u6Ukx;ztD4~l5yw6b8`i>fU9PmIn#;?d}PcXg>T=dfm5jlU*&kW zIj6F=p|N}?MTL{*rgS1lyIr&okX1FfBVIbql-K?f{Dx)%|Ca?*DC>7VSy`V->rTkn zMi2KEDUa;BCo(0=Ajrq&Ar2iOZjw$)WDk2RpTDqx78mWXc%}4Xsoj+DIq&6sN%8Ay zIkit{bWhL_euK%(dAfF)4=FS+UAAHsETp3kl0L?k1TvZyq(!WMAn~>ui8lqeiJMPy zwBOQ}>5&~?zsa8&V8e2SoKqH2&CV8%N+@E=R|YsK6e|}?XmgO zBZm`-vjG<6e&-EHndnJL*Lr#3@ezO6F#Srcnkz+=drHrvt0Or@M;N;7+YHO6DnOoh zm6z&!9gec)D3do9(_YalZ3}eddzUX&(n?TdwPYT->W>ULR;Rv0$Fj4G6u-X4$GS#M z*f5K8B0V_y$!hpW)Y}us@p+A2&Dt|?KaEP=#tUQju`f5A{n@xVC92~j6!~h`o47$J zy(+w5D$Y%zb&PTxM^_4>-nNj7O8BB}f5Ba2kv%ry>#h>(u;Q78v^|T{td>$vByzE zMALiHr3Ll#O2aNjQK^8z7xz9U-eSd?CvB0$34s$W3O=dYDegag?-`z{_^|)L`tguK zLC$5T($#BrciZd}$C^=Ia+go~aN;n7`(rZ$$MI9@skUnVWv+m7toCTso*VDWE@=Oe zLb$pq*~i7EeA--#N435b);mnCb=G#ZDbLUCCFpgdb)CPTg%7=jMngPonk1psUpQke3C}+$2z7X~FKMBSY44^}TMy}1v7+ufNsI^sTPVH= zFD!g%tEJviYo=T%?G)pmlV_HG8jZm*+%;lq zA$9Q$jAPuBZ;mf7r+)66rfPg^$MDtxpm7JA_Z4e-klV>d`l!Y53)1UnZJ*3q{_|ut zW16pm)O}p$fl(i$j2QeFEEswOnfj9`l_*XkChw*gZvB|~wePo6E;(^^4N|@DzScAt z_d<8*>kw__P6m3p;zhpo*TL?=*cYc=Q}3&jhtg0c5+}vGaplOHNxCfMB(bB<3wOi> ze#O%Ex%9YXf90Li>R2d@Sh4LH9W3nPiTfxTTbCr-^7Dsg&+<|-8v}R2;0u58iB2X@i7s7kHq$gsB)a=9;?O2#V5cixxv83lfR z_H3yRCo1~<`QqELJQfF#tbMvgx`J?2`sZh{bsxtmR7!Hkr+%q@-O&ftGI`V`V3%5t z^Ntbqx10E$J53qsuNqHU?eN*%-mIop$*gT3;h}QCT{#dD(4ucQpR#QnV%N{24jyLD z;h|mL^~9<_cyJu-Qru*OaNp8+?_b*-R{E`OtJtg+;5Yryvzzqi>xXmgK_6<0j7oR* zidy7nJrIu{_dmW*_$sP9D(ii^B4^1D`Ql=#*%&_UtumR#tq3wYs~;NAfTZHNG=>q{ zkN`3@=TdWVPOwDar+S{p0?~TU?wfxi9bnNNk zD<-E)-5$S01eMx%%$hq<_~)lbRY{0beP5*WbVR-W+28@$reA9@C7`{{b5E9zY)?|X zLn4Y4z458;PJuhRzH+5}-p^gC?>d9mbz8cAHO1YIXeqlvX8k!Q^2@nrD}WaUzoh*E zY${RcUatr6VT_Q1?i81IvQhTYl-R*7pSZhLY#$0YIeumM$(U3P*;X7ABH3pbhBmYX zXM1W#v>fM;1Tx$wpA8ConFQ+n4o%$8|ES+ z|2r&fUEOf9HfF9D;Z`;KV+_GhbZsqhHf{#O!oolV82tbB8vQH$f5H-Yfm%%fNE`dW z$@Zev2fV<*-#`HY|9KD;zIpJEb;w`V@h1WP7QpM&aK&0^VQ>Z@eCrXUjq$`8{S|9} zk^1cj4Kr&DNW;wa0>SW$JwTcmH>|s>1;z~sxnR}=-NRVgnq4Hug+*{ELJ;4_K!9N) zFkvtl34|jcf=CD)A`F8+X#2ZffD(ZRX_We4*UJ3nZ(BPZbYa3yh|M z3`hxx!-wMTU63r^bo}0Kz>Bu1=3kQH!kCL>$1D1ep>BYGSO@siJiNef`z}lZ;$5y{ zYw2bH{C$RtsVvt0!b^YI0`iwu?C(8f&G2cCwZ{9y8h`xX?f`()oiR?*78f^$!M}8Y z(0cknCwB*jfBk^*x>e1btZ!qS1Z1Uo|K{v}caHFfki|E0@fVUfN*;v*gAsV&2}8kP zArbr+j{oAJBn-X#J9?Jx7MOqY!Dau&BZTMu&5!4UF1nfxwV(5dp9;4;akD`xk$Bx?-#VU?2nzz-#*B z0SX}yFa*#F_y>ky{O>fd2=X6%LU4R; z{TH7QTm+vYf0q>!68VQM!eA)=Z{qKK!qESThWLj*VFVH%;s25q!v6*QhlX=C!(ZX9 z7ogO#^~T^`1=7S~@u>QZUI0kR$qEa+aL`3^E6D*3fa2292qa8S4law5m6bz6<>cg% z@-S&UNf-iyBBA)0|KBVZ@Rh?_$l72m?A_cQfgpLXFa(K$$%92uFu1HVSV#^j0~Ha% g=cNc30+$xL0HGVs%oX=Lq{2u948X-DuZ{xzKbSPa;{X5v literal 0 HcmV?d00001 diff --git a/src/test/resources/technology/tabula/csv/MultiColumn.csv b/src/test/resources/technology/tabula/csv/MultiColumn.csv new file mode 100644 index 00000000..f4f2e726 --- /dev/null +++ b/src/test/resources/technology/tabula/csv/MultiColumn.csv @@ -0,0 +1,44 @@ +1,100,200 +2,101,201 +3,102,202 +4,103,203 +5,104,204 +6,105,205 +7,106,206 +8,107,207 +9,108,208 +10,109,209 +11,110,210 +12,111,211 +13,112,212 +14,113,213 +15,114,214 +16,115,215 +17,116,216 +18,117,217 +19,118,218 +20,119,219 +21,120,220 +22,121,221 +23,122,222 +24,123,223 +25,124,224 +26,125,225 +27,126,226 +28,127,227 +29,128,228 +30,129,229 +31,130,230 +32,131,231 +33,132,232 +34,133,233 +35,134,234 +36,135,235 +37,136,236 +38,137,237 +39,138,238 +40,139,239 +41,140,240 +42,141,241 +43,142,242 +44,143,243 \ No newline at end of file From d4adc8134a46e36b751a668dba53f171e06f1699 Mon Sep 17 00:00:00 2001 From: Asheesh Rana Date: Mon, 12 Mar 2018 13:40:16 -0700 Subject: [PATCH 028/200] Updated Readme --- README.md | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 81392fca..1f1eca39 100644 --- a/README.md +++ b/README.md @@ -23,10 +23,15 @@ usage: tabula [-a ] [-b ] [-c ] [-d] [-f Tabula helps you extract tables from PDFs - -a,--area Portion of the page to analyze - (top,left,bottom,right). Example: --area - 269.875,12.75,790.5,561. Default is entire - page + -a,--area Portion of the page to analyze. Accepts top, + left,bottom,right. + Example: --area 269.875,12.75,790.5,561. + If all values are between 0-100 (inclusive) + and preceded by '%', input will be taken as + % of actual height or width of the page. + Example: --area %0,0,100,50. + To specify multiple areas, -a option should + be repeated. Default is entire page -b,--batch Convert all .pdfs in the provided directory. -c,--columns X coordinates of column boundaries. Example --columns 10.1,20.2,30.3 From 460ae29325966c72ec77e86a79af3d42e3a01fe4 Mon Sep 17 00:00:00 2001 From: Asheesh Rana Date: Thu, 15 Mar 2018 08:31:29 -0700 Subject: [PATCH 029/200] -Moved area calculation in commandlineapp class - Moved the constants out of page class --- .../technology/tabula/CommandLineApp.java | 16 +++++++++++++--- src/main/java/technology/tabula/Page.java | 19 ------------------- 2 files changed, 13 insertions(+), 22 deletions(-) diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java index c0d321f2..b7b9dfe9 100644 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ b/src/main/java/technology/tabula/CommandLineApp.java @@ -35,6 +35,10 @@ public class CommandLineApp { private static String VERSION = "1.0.2"; private static String VERSION_STRING = String.format("tabula %s (c) 2012-2017 Manuel Aristarán", VERSION); private static String BANNER = "\nTabula helps you extract tables from PDFs\n\n"; + + private static final int RELATIVE_AREA_CALCULATION_MODE = 0; + private static final int ABSOLUTE_AREA_CALCULATION_MODE = 1; + private Appendable defaultOutput; @@ -162,7 +166,13 @@ private void extractFile(File pdfFile, Appendable outFile) throws ParseException if (pageAreas != null) { for (Pair areaPair : pageAreas) { - tables.addAll(tableExtractor.extractTables(page.getArea(areaPair.getRight(), areaPair.getLeft()))); + Rectangle area = areaPair.getRight(); + if (areaPair.getLeft() == RELATIVE_AREA_CALCULATION_MODE) { + area = new Rectangle((float) (area.getTop() / 100 * page.getHeight()), + (float) (area.getLeft() / 100 * page.getWidth()), (float) (area.getWidth() / 100 * page.getWidth()), + (float) (area.getHeight() / 100 * page.getHeight())); + } + tables.addAll(tableExtractor.extractTables(page.getArea(area))); } } else { tables.addAll(tableExtractor.extractTables(page)); @@ -215,11 +225,11 @@ private static List> whichAreas(CommandLine line) throw List> areaList = new ArrayList>(); for (String optionValue: optionValues) { - int areaCalculationMode = Page.ABSOLUTE_AREA_CALCULATION_MODE; + int areaCalculationMode = ABSOLUTE_AREA_CALCULATION_MODE; int startIndex = 0; if (optionValue.startsWith("%")) { startIndex = 1; - areaCalculationMode = Page.RELATIVE_AREA_CALCULATION_MODE; + areaCalculationMode = RELATIVE_AREA_CALCULATION_MODE; } List f = parseFloatList(optionValue.substring(startIndex)); if (f.size() != 4) { diff --git a/src/main/java/technology/tabula/Page.java b/src/main/java/technology/tabula/Page.java index 959844ca..e8c1d636 100644 --- a/src/main/java/technology/tabula/Page.java +++ b/src/main/java/technology/tabula/Page.java @@ -21,9 +21,6 @@ public class Page extends Rectangle { private RectangleSpatialIndex spatial_index; private PDPage pdPage; - public static final int RELATIVE_AREA_CALCULATION_MODE = 0; - public static final int ABSOLUTE_AREA_CALCULATION_MODE = 1; - public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage) { super(top, left, width, height); this.rotation = rotation; @@ -50,22 +47,6 @@ public Page(float top, float left, float width, float height, int rotation, int this.spatial_index = index; } - - public Page getArea(float top, float left, float bottom, float right, int mode) { - Rectangle area = new Rectangle(top, left, right - left, bottom - top); - return getArea(area, mode); - } - - public Page getArea(Rectangle area, int mode) { - Rectangle newArea = area; - if (mode == RELATIVE_AREA_CALCULATION_MODE) { - newArea = new Rectangle((float) (area.getTop() / 100 * getHeight()), - (float) (area.getLeft() / 100 * getWidth()), (float) (area.getWidth() / 100 * getWidth()), - (float) (area.getHeight() / 100 * getHeight())); - } - return getArea(newArea); - } - public Page getArea(Rectangle area) { List t = getText(area); float min_char_width = 7; From 30bca6da6bf00dd2b4a92f4f49c7fed067bbfe2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Thu, 15 Mar 2018 14:12:43 -0300 Subject: [PATCH 030/200] Add AppVeyor badge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1f1eca39..369444b4 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -tabula-java [![Build Status](https://travis-ci.org/tabulapdf/tabula-java.svg?branch=master)](https://travis-ci.org/tabulapdf/tabula-java) [![Join the chat at https://gitter.im/tabulapdf/tabula-java](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/tabulapdf/tabula-java?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) +tabula-java [![Build Status](https://travis-ci.org/tabulapdf/tabula-java.svg?branch=master)](https://travis-ci.org/tabulapdf/tabula-java) [![Build status](https://ci.appveyor.com/api/projects/status/l5gym1mjhrd2v8yn?svg=true)](https://ci.appveyor.com/project/jazzido/tabula-java) [![Join the chat at https://gitter.im/tabulapdf/tabula-java](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/tabulapdf/tabula-java?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) =========== `tabula-java` is a library for extracting tables from PDF files — it is the table extraction engine that powers [Tabula](http://tabula.technology/) ([repo](http://github.com/tabulapdf/tabula)). You can use `tabula-java` as a command-line tool to programmatically extract tables from PDFs. From 7ea070e55b70930dabf7dda354190066a1e09a8e Mon Sep 17 00:00:00 2001 From: Asheesh Rana Date: Fri, 16 Mar 2018 10:03:42 -0700 Subject: [PATCH 031/200] 1966-tabula-java-column-option-with-lattice (PolicyReporter/requests#1966) - Removed unused variable outFile in CommandLineApp.java - Removed Unused imports - Moved VerticalRulingPositions out of extractor - Added tests --- .../technology/tabula/CommandLineApp.java | 25 +++++++--------- .../technology/tabula/TestCommandLineApp.java | 27 ++++++++++++++++++ .../technology/tabula/AnimalSounds.pdf | Bin 0 -> 11998 bytes .../technology/tabula/AnimalSounds1.pdf | Bin 0 -> 13784 bytes .../technology/tabula/csv/AnimalSounds.csv | 8 ++++++ .../technology/tabula/json/AnimalSounds1.json | 1 + 6 files changed, 47 insertions(+), 14 deletions(-) create mode 100644 src/test/resources/technology/tabula/AnimalSounds.pdf create mode 100644 src/test/resources/technology/tabula/AnimalSounds1.pdf create mode 100644 src/test/resources/technology/tabula/csv/AnimalSounds.csv create mode 100644 src/test/resources/technology/tabula/json/AnimalSounds1.json diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java index b7b9dfe9..6d3c34a6 100644 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ b/src/main/java/technology/tabula/CommandLineApp.java @@ -6,10 +6,7 @@ import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; import java.util.List; -import java.util.Map; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; @@ -47,6 +44,7 @@ public class CommandLineApp { private OutputFormat outputFormat; private String password; private TableExtractor tableExtractor; + private List verticalRulingPositions; public CommandLineApp(Appendable defaultOutput, CommandLine line) throws ParseException { this.defaultOutput = defaultOutput; @@ -58,6 +56,10 @@ public CommandLineApp(Appendable defaultOutput, CommandLine line) throws ParseEx if (line.hasOption('s')) { this.password = line.getOptionValue('s'); } + if (line.hasOption('c')) { + this.verticalRulingPositions = parseFloatList(line.getOptionValue('c')); + } + } public static void main(String[] args) { @@ -123,7 +125,6 @@ public boolean accept(File dir, String name) { } public void extractFileTables(CommandLine line, File pdfFile) throws ParseException { - Appendable outFile = this.defaultOutput; if (!line.hasOption('o')) { extractFile(pdfFile, this.defaultOutput); return; @@ -164,6 +165,12 @@ private void extractFile(File pdfFile, Appendable outFile) throws ParseException while (pageIterator.hasNext()) { Page page = pageIterator.next(); + if (verticalRulingPositions != null) { + for (Float verticalRulingPosition: verticalRulingPositions) { + page.addRuling(new Ruling(0, verticalRulingPosition, 0.0f, (float) page.getHeight())); + } + } + if (pageAreas != null) { for (Pair areaPair : pageAreas) { Rectangle area = areaPair.getRight(); @@ -264,9 +271,6 @@ private static TableExtractor createExtractor(CommandLine line) throws ParseExce extractor.setMethod(CommandLineApp.whichExtractionMethod(line)); extractor.setUseLineReturns(line.hasOption('u')); - if (line.hasOption('c')) { - extractor.setVerticalRulingPositions(parseFloatList(line.getOptionValue('c'))); - } return extractor; } @@ -362,10 +366,6 @@ private static class TableExtractor { public TableExtractor() { } - public void setVerticalRulingPositions(List positions) { - this.verticalRulingPositions = positions; - } - public void setGuess(boolean guess) { this.guess = guess; } @@ -410,9 +410,6 @@ public List
extractTablesBasic(Page page) { return tables; } - if (verticalRulingPositions != null) { - return basicExtractor.extract(page, verticalRulingPositions); - } return basicExtractor.extract(page); } diff --git a/src/test/java/technology/tabula/TestCommandLineApp.java b/src/test/java/technology/tabula/TestCommandLineApp.java index 81b30d4b..f2eb2b2b 100644 --- a/src/test/java/technology/tabula/TestCommandLineApp.java +++ b/src/test/java/technology/tabula/TestCommandLineApp.java @@ -185,4 +185,31 @@ public void testExtractWithPercentAndAbsoluteArea() throws ParseException, IOExc })); } + @Test + public void testLatticeModeWithColumnOption() throws ParseException, IOException { + + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/AnimalSounds.csv"); + + assertEquals(expectedCsv, this.csvFromCommandLineArgs(new String[]{ + "src/test/resources/technology/tabula/AnimalSounds.pdf", + "-p", "1", "-c", + "59,218,331,551", "-l", + "-f", "CSV" + })); + } + + @Test + public void testLatticeModeWithColumnAndMultipleAreasOption() throws ParseException, IOException { + + String expectedJson = UtilsForTesting.loadJson("src/test/resources/technology/tabula/json/AnimalSounds1.json"); + String resultJson = this.csvFromCommandLineArgs(new String[]{ + "src/test/resources/technology/tabula/AnimalSounds1.pdf", + "-p", "1", "-c", "57,136,197,296,314,391,457,553", + "-a", "%0,0,100,50", "-a", "%0,50,100,100", + "-l", "-f", "JSON" + }); + System.out.println("Returned Json: \n" + resultJson); + assertEquals(expectedJson, resultJson); + } + } diff --git a/src/test/resources/technology/tabula/AnimalSounds.pdf b/src/test/resources/technology/tabula/AnimalSounds.pdf new file mode 100644 index 0000000000000000000000000000000000000000..ad6c78cfad8469b5282c482d0274d3628b67e89a GIT binary patch literal 11998 zcmaia1z1#F*S1K>(2`0G-OLQb(4B$^NT=k`-QAJ`64EItjdUp~4bt5pQqmy(55Dz0 z-}`;nf37)m_FjAKwbx$jUgunM=B8JYlxBx;Z~^E$^Ez8P>pHUloIo(p&d3rVC1EC0sGDzCO+8OQ$lD0Ndc`z zxx)?)6vS-Ihn*^U?jpcic1e}*^VDdtwO zaTMZ9keRu(+zl_-ri{c!y%MW2Onh#aoUJcuHMEg)m4|C0FlI{+wec!0n9JY)Txj`G z&uV1ilaNN?x%2)~Ukf!_(dJX6A?OmUEn7BbYq@a`1m40k4?qX-F$fW zgi0;W(>!2W)ILJhW!2hjzWQ2LU%gGK+UC|g!-y>tZm<6F-!*SpbB zs8>bn)Y5M2bS9H<15~`mB>7NsX9u?%=U0PoGTyO@c-u@ZD@XZ)`J3o{4Dkki=bpL? zs>>}a5FSlBjiigE*K*V%d*CUAdX7@OOUIKYr&7pqLFUD_N4l{r?qixhGPU4@p^m$~ z%)8%7WD}D-M*1Ch>^`IA?;W!^BW}&AKxd&}$V-Ul`1bU>zVPDh#PC`*Fo^fZzNuy< zridUgNqE20=`6@i8V;kHJ%4&0OXW*w=v>X_Krk(6^3zNmHP7GPtVoMZQZA|;TqFUG8N zjq!HFbYoNYn4gVCL`)+*4wpJ~LHbRHmWbrcq(*AQK#vTMm8+T9GTrhDPMiz**>&j8 zCtI7R8&bz$>+Y0wrmdWJU97^;Z3+`yVI5iA*|#>X%<6BvgNh38Bb4{4cY8H@Eyt3i zj@Se{wm#m@_0krNZ#^4Kqf~_*O*<(%EKK;!G=^ID2%sj!*26SF!&vjNFt(8MWF}`H@H8auU5hP|J0=!biLnlcIiC&ch4A_Sc5odLo zO`pd+6(zvbAzdh6f*Ugn7|@P`pBgF24|Z!XVUo+g2%GhJR*=w+tXFlH0mvmpa?fEUWZ{ z^gic?i#Ial7^y_GCb&2%k+V`zHSX4AT4N@fcV^oEoQp=G5}(8W;%l-I9h^f~)uxi9 zX+Q2kCPyio;JLFyK<_+tmd3eT`n86Rj?n3p(qY36rTeJmeo6ktix8%*pZC({(DpLf zwgMcUR&wm3-Pl;Pm>KX>a*T{j%B3IpXPL2(GFLkFm>q^YobJ6VH@libPYg?XNob&^V_}lo^%qFr`a~s^{(&eg88gEGd(AMw<{UBlX*^` zO@JwVXzpo4=b=ViVUtUbI*DRSYFFPta*`wE#EY#TpOVROel*LtiBYb&plE2*4SHRA zzQX04>8a?WbXQ-}6G9MH^s^u?fwsnlE|1^(1@)}anK99uyeFum2CXiG$ru%kiC_;) z=v2P-{m5og;;1wam4Hh1~lQ{@D{evdYy@dI2NBmI^pao~pb-$~|Rz z)UN(Pc~!AdU5~4Tl^NGZ@7~E0O=L`Q<(sLDaST@%^sVAFsAy<0hjxB!0V{7P<#XqU zf?utQ;#_T|GN65Is(D>VA|Qyb*fvhfK<9`4A*wjkLQ5TOywSR(?pZ4vJSR1bEOeZr zcu-OzoK~kXTzYE6IgNwtO&TS!jHBSLj?B*uN)PeC^omz_I~`-3K|!03pMR$uZ2d?n zk&1fSn@l%q@T)OGVNHvS@~FnO)LDjSZ!B& zg#lnqh39B_yIe_u+bWnta7@|YPJC(@O@0+>@MfMtC)T%eh`1WVCQBZQ<+(-Q@GKN0$B9FwS|`Tg&aY zzl?UpjMwXwK_Q=V7oPrnhJj{YiI16zb}#l=u;jaZrdn^9a$4`1q$R;~MfSJTO75=! z<$3l*O-!1P`a(y~?@*L*8wxt3sF{iHx*#9@vb0(JW#N+dp`0ee!D>SmQUp`k(mxY~ zxi)s410jkPlf|l@0U+?^CfRWiQf~Vr&xNd77BTduVNniJQ1VI(#&F7t6hS&?JA{Co zyWN+d>lGfiSQ^HTeB}&E^(zC;0E0k3!mn4DPd-R}FSlD$VUQx0AN!)ek1?L5J~MD?>d6zK%O>6B`uvhs!HT zYHDgYtLutR(sCKR;2k;)h^mybZPS|+-45&(epbFL4s)Hc%L7wD$jfJ)=4 zOXNo4YJ7;tZNZ}oksrY)3htvA42(|{ioOjSGInL)xr(O|?4(!DU{q(U7<6JV+UF{e zx{42Ch%vrU;N1;&;I`+&%q)u>rbx2DR1=38Wj~iKDL{dBIJe&stF^Hls%a0R72tHP zHD(%pSjg1a3~4I)>NokS9*eop z?J=t0(}=RGmHRQp{LuMKQJH7d@&j7XWJie(axl8kNE&_hC>40O1}&qV>U+v7b-dD_ z7V4z6k4dV|lZ*!Gzhd-9+m!-8pW__ne zJFc#tSFb94J~#04>iHtkH`MGVfkegDXzAc0Yjdo){gXE~K8=x0D+41@bTmO|UGkLo zMQXwFy4&Fh0eXtWRsM5{;dH*hqRBZ_${4fm#%^|`v{{4Xx3Na{&v@DG!0*#_Bjpa! zRZWe~(ZhV*DV>LrEhr7v`Be#7f@Wr3jT`iD%feI!>4lW&dmTh@JKHMVi6%seK4_z7 z7Sm_m zJ0(_W)9L{OEs!)#HJ=bEjxjUU;n3-Fr-Debvpu`11@ZlKBA5@;Kb@i1V&i|1I<= zrF?_2r?oHBNwMkE(yQ?U2Y81~uT?G=h(|&_*O&7kkoQ}E_i|#5NyTIkO{`#;u59ewE zoaB4+_;pBx^t&-DfghBMSTrtn+|!5rTWc5uhSzYb`OT2(f5Ufx*sVC#K9}}MX0qxVc&MJ ziad~+P~m26bfjlA=|t2Ffpq<7FY^U9;!&H2ytEpspN1XzG8Y>DPD@}(y_=|Xx5Xe; zLUTXe{4`C;*Iw4h>v3RX-zn4w?GpfA?27zen(0Gt_>m`SMZKE#RxL)z>l$}U;kG$9 z!YV`vcai(76Iqo*WUd}*-|Acc&-XRmIN7X&kljHk1%Y-HQ@2@2!0f(u?&umW>6y0F zOHr#nLfGtC^fE>lM#)X-@hAJhpD3*a4CCF3b0#{~LA-E!7H`j7#r;!xYiOb#S4K|t zT7hDa99J^ZtF1SJB*7oN>Op+Eua7c@?7rMoL%*kaXEOpW-e6Xc$xt4X!3K0U4+7S5 zapM=$R#s(VGQ7jSsgr%10==HPqyw_MmdL;L(EJ9{Oy2rYrd}wrQn}-S{lk2Tvjxux zlez)#22)J;Nen1>_-&;Fb)M7HS_)YyVXVEQYG2*k-?$?_RiGbeP56cvbci0CV#qEjbB|HoD`QMQF+@gq|EiY=g-4t|7 z{c0)?(fQ@tI(Be2t!Bqm%BEkPGFgLDvp9c)@ouoFOuDk3RD9uZ=9H=UJapZa8_xygIsDtw7E0U2moYE1t{i^2}SO3Q;g(AC2Es* zAJ13)wO4mEmz^6reOVmr_se}rmP|4FQUbc7lmkWjT24Rs!VNRqb$$&(*maA3VzqTV zj?aY}Kr!OqBp-HYf!=u;#tE^u06kM2KfU?dRsrx za$?e>cVF{)cM7@QwJhCxe}1Iw+tU2$6guzSxj~x^VgB?My>j}8mA-xz>pL56g7;kzp6!}(5_GS{~p4KW9ne;GV z+2s>>3OjaGq#p(nWZFN@Z20BU;Ty6zgrkLx)jWeC?b_S466|OE+nq&VGkD$NMvT$g zsiWXD$GV(=&Ef`&s?c=U@0o8z8k`EUhs6^*Uj0(P|A!g(dgu>?EU~_MYe9 zco^&&HKZ;7vnd1I);VxETZi|ZVC9W^EFN>9$X~zc-BI&RLWq(4#0WB*c zwtPcNbv|WK@0M>%&g`x$yWT>TtHtpPJOv5`UuP=o#l#jf7jULE_zQVW`OF!Rsxx%} zLbUm-v;J}ow){*liQj8|>C+9xuMBR-G?T%4?I_XX8o{jic(V=6eUHLIk;KHJlhk7_ zxiqn|*KTOXjpTby&zCGWoD%p*7ap1BJAA|&Chl;jrUR^+Q%?|zd(!5tnS7y3$V+YT zGR>mdNZ&X&u_?3^J=);-IvLNECC4G;@c37#${{gU7B2+mAlI-zt}!m~Q!3!ArrPgq z?kW9*xTrK=#Ut`>Vc|Dwjz3tO8Q%6;uO2=b?b_T-ac%?<5f7q$;Ly*_as1VZgpDg% zS)-Lb{J5GuyWZa*0F#&(9*|4SYaMV)99BIF2(f>M)JZJ*)Tn)I?(vqE%;!g;Of+f8 zG9m1EHU3%Hc`Ih@_8~(N5)e#h&K05(t64+SCIU4!b|xQ-!uCP_+yymvt`%8Af|MW-x|%_|TgBOxQt- z^*Po9t)uYSjT+jXiTQxCyBVoN#Ib{PUD9X+DIG;DGrM&LzRhR#jZT>Y+qA0f{9Vp& zdXY{Q|FKRxzT=6y%mZK6#?{(SNmP7DS#4hTmomaf85OT6cpa&=(p*1p3}@ZS3Hu3! z)kcKZynv&90Muw#OIfrD4YqBvVhNg!?Vj8qwJPKfb+e^gNzXSWZ@QT{yy(p6Zlw01 zd!eSD>CxR^n#(Dq=7gK5t9CirZ%1!+q-m{M;SNW=S9WYOOeuav z!?s}bZ(1Thk(f)OY89ndRj-4f5${yHd2@#inseDu%3}Bwj|5-7-XTEB`+{Mh>~)CN zyGl%{?X{Dy|FtDo$AgV||3`<{`L6}MhC7;=?r&qW`B^v1xN%vUsl;?PZz`ge!FR|P zgqaK5Dev&^aNUVHItac;b(wtL+T1GHw=66v`cB0}!?35huW6_?gi2nhM9Pgyc$LeM zY~S91dCCFgV5`#w?*t}e267xWi#m$E#grszm;vKp7s@#NWXqLrSTQFS6tKP*(8qpN zoU)7!y4L(?iTD*i;numXbFu#_#BCY%!;Os4CZ@WHFuAP|wI7ekmSuElXVaG6#7iVg zQ_VT^r(9jO)0m!xC87togvl;HlQqc)JB(Cgq@MR^=^QqMt#O-L`GXBZpV3V|;cQ^r zGdQ=HfA1RbtfzUR5Ao7(NpnnVs~n~A2a4+qYz2~jy%~)j=e5i`O`_&Yu(Q?|6JNf1 zD^Q1m7EeM}*LHA++_ocZ->!b%H-66PzS&4MxV{7f>ocC-L~>dtzhjsFP#mTiS*oBx zJvH%`IDgcf^N1ZOUYNeL3nd$k3SN-)LdTJdefC5CAYIzvEbP{1@AJZ1PQc5Ua(U@^ zOWC=tt>N{!&U#P`N!>nqBYbB?qrC)6g!iuh;LZHa4qCFHPk(8~=N&)Vg!%rsaQa-b zyWdNy$0A!zGn91+cju2UDjy#g5w3G-`;$k#2B&f9a)j0mOa>%oJ4+jVna7HICJDuZ zro$vUunv+9C$J#U-{XfwZq?H$WR&oI*(nOM#Z|6zY3>B4PfCq!s|Ct7n}eNi)t217 z)!b8+#tz=tLK{@zH_8g`URp7Io12-bO;1oK>xr*g&N-KKvcIm!OOfb1CBKmqPAiuQ zMr~Nu>_gpdKxH>?3u+n5TixeI&Rh||evPbQ`Y4lLr?$gDsd)10G+fvvo8(>Lp8X(B z=swsww1J|-6>kr05qjdv+q3cnYc6L9$HkS`DwJgZJ@srBL;LGK93oe|eFGz?s=k3r z-eIMPEjou`=;=M7ZcgbyKYG%EeDd$A()3Gqmv0v+o*`|E{jC$B?Cz%-pAgT8_WMTl zcNty;I8Ey1&JH>S!u@;lW^YEat}aJSe$N+4Tz#cG_&CDb%g6k2g7l;&0`Z*Iv*+D2 z{UU0pv}xgJMnoi=V;`?%!m?&c!p>353Lx1B@vEO2jW&Kfk+KA2MTr9V-?p7dkwiF_ zpa#FL>3pI#Y&zpC`r6sk^Ts5$Is8lhTYf77$_rj))z@90JevEG3p*E;a@g7MCWteJ za|pRC1qYlgozCwe&&no0af62v624QI_5fL!u63XT45|{T6AT|SUt^a9`0+G)jO_N! z$1NX(7H^mkQqG@j%+4FA=pP#wv9PjMcTDy{oU~FXDTBSt+v9KWPHSdlg%jBdMJ{*S zoF;bzg9d3M!%>zK3SPFpDUsD`C^mnVl9eR$!;>)jqj00YR$H~JVv4iNPjobDlUATg z3V(ZlF(Rp87^mH!gt_(XI1(D$nUzw_Pj;sHoY~7}75XHFo<4+j>T#jjfWnkqcfvsR zy8#pei-d5(lD7uzs^E>3H_MlN=vgBeF(WjlK3*NXM+Jj^mwx=ZSD-=<7D4ao3cEcW zLdv$JM2Sh|%gB!|pVZ*15T53368FUPZ85#Og-*AxhZ6-M&%an;Xi~P=ZGH8%x?z0P zzJtAd^8+_4g}|+KpRd7GOv3p1u%lQfem@<%#LF&eeN#kTYU_a>xEc^+j8B`|b5tc_$s{!U!rK$a5C| zQw$>jsdpU7y-pmdn#e%eKieM@z+2((1mMm0HwMg9_%}S>A+*;->IAG90iLk5RXmbK zdcnZr&y?+{jI<6&R%+9ldcm~9BoPB?X^q&!t=8ZjE2t(|9IDjq6t(;zZ+|bbec%`& zKsY4Xwc`i+7Ldpk-mK|N$j6T?CDnp>z178fI5qr8zp{k1DKaOA^apXLO!r}=k^hkq zuNqcQ#Hu}H4}|6inIP(1GfLLO5{Ia+`{a-glXOO69oSD*ej#lLHwq(?s3iT4*}ayK zrRP{-9kU}(m0K;WV>C629RVo%#n5)X$N}W0QH)HOemCBPj(2Z|Y?%gF>LEklr zu(^Pq!(-u@X|Njv#SnXGy6lAlxxVTLZ`ft-#%`a+)<~ex+n0%ipO;aL^**0Q(N4SW zsNY06<@?55jbBHka@@{=XZ;y%uULoDv7T(J{xT7|ROUAMwyCU6@bNbUf)VWAXsH}{ z`}7XaqT~Frw_O_##eDXRe4_F42uUpWo(R}pTsAGCi=bhs($md6fwk^zy z`~i1U$9P(DJ(V_H&BzX^_+O(O4#|Fo@HEg-t^3g@03240r8JL1KC64k<*Q=Up8aY8hNYEyq+@6mawpjVCfyN)RSo0NKWmZ}Q&c6icCz}su;d)na9*Nj*u zCQIqDRCrF+3qQgloCjovd$X~3+l0Hz449$p~-Em^^Wz^LT&=h_5BqzK6eL>~!l1*1(<_(S4zfi9RCJ4Mk7tm+`feSxgd;#m& zo|uFyG1sGZZO6_7IA1oX*s_MJ@G{h^Z3G?$E(}v6Ub*zyz*6#mmn{r$E-8D{)Wb}) zhATGe5_L<4ljc1(JkQP?1+v)Gvr2Xsm&guUnV%AO=D5Gui}+?szPGqUK(gh=j>pPM ztQHnE`t$rtSOWt;X(Ud_TI*=Q@d7LtY(s zyxaL(G!0xADYTAQu`2l%^7Z4pAH6Gh`>Kf=stKSbE>AwkgZHFQ*MoBG^F+AZgGl!? z`@(ohXO{#BW3Xr{-9)WHcI+g#IqoJvju5mCBb^X?V>dTDjbNl^nxG>*8ku#W-kmc6 zbg$5kRz2t1oa1*WT07=ZGOkixB@##OCQSTFWY0Tl-&fZy1^iU{)X-d5Um~+W@%(9L zY|hd?GH=7av;#v_ST1z}+yK~wS-3#NEcK-somN7q6?wj!n&q=R`!_Ff2Nq{PI^mgl zqMhAle48mMiDDb`yv5cdO5Fw#PCDDLJz^fIYYF6-4~3dyi_W>q-Ai{f2F@hwRn|pT z;HS^py2V$&*DZZn;vJqMc#h?g^G(U7RZl{mw&)3j_&HO_G9PJ_)cbJ|j9Xt^5=~`} zJmP%6;{)@#DBaTnR+{-{8lM-zm={vHJW3u-xk0_=yh|#H-5zev!05c!5JH^Xm7+59 z>0w_>MpY-lAzj3h5yn+tl*rAtsaOuedyF+aTASStIB@$4w#5e$)(4kB&m)&fBYV$D z^QuoTV7Zs*Mcb~N*UHq|)7+6{#zk3ZCJG7oa@{pn94z{Dpa9FgsP54IP|xfB+@SeZ zGO|=zif#J&qv&-Xg7Ej?U#W&qJA9w$=+Vg;ZAf$WR(j&G#9cno^FNSN79(C|kCX0- zSaYWui!{NmLywIRmatx8dIsF2ON#P7vi>kLN9Q!!RrIo|mUz3_KEyO{J|uVcBqHyF zm?vaJxqsh;@fcsc*OD_>6~7AuiWQr%dVBHXx7T`_mK76JIow-qMW4^nkAChej}vH^ zo>Q-9_Q_^#){!4_$Hx~JQqS@6K^=QM!L;~OnAKX_3;SWC{gpTG;&y0;)yWJrZ8S6; zXv=4!PW1*8Ee1KbdCtNo247oW^lGxtR)(b&1~%92j=jFeIu(kLn}2R zUEeILMCb-SjA@ar(?lp0M=tha=+%eTO2}s!O22Bg7A;r^@n!T~<^L#87PrsYHZ?=< zBx@lD_i?M!MnkC}BuSI5K?04XbETa3wd2$_F-D16iWI=b3-Le8?x#*uE{ z)M1wP<-*7hqCPb51JgJ7a91*W5-6Gem|VYI6I_w#nGM$bYX&Lp82BK!YTT3cggSAy z^LlD2Djk7t9o&;>ylNZCI+t|W_fs00i3yGr#_u_mM{WY8y(Oo%lfWum`VQX`(5ijn z=kj8X1QK8ukJh=aA@y@U{sYsd@PQKPXEKI2 zFSP_WbzbK!_U*++6ejTWh~zfnjoYafL{_yBCV6a|9gTJhf~Ctl=EfoyM!!1#2%UXB zPTwQnI9Pr0^_9$J1O(f(r^g^9$#-&MehW=2b;yc7p}cON2hFTmLPso0EdqNO`|tCBLuVicN!(jb0c3cr6e( z`zRxiLOuQia8;+HO53}lUDzk7Q;zeUJ1hPdz%?u07xMT*%q*C@UDvg33u=oBDN)#J zcfH21B*U+S%x}6GE7QcS@Z^?vE0=>0q%Rl!4?Vr72X1R;dU!Ab0hmY6&jpbKou4uk ztV}ZF6&f5AuG|xbh>+jDzaH3lJiQ)o#?Tl=D7XWhlq?w@nV*lxjQkpI8-Z~f5AR3%T?&h9F|6|Cck&n z;rUuC{sGgH+j`U%rKH>H{##RvXUmr^-xqcp>Nl3s4usq*Xk4!k_jBt{I?<(Z?PCu3 zt0T#dLH1JUa=OS-n0l^RWqOmrho@hoS74l3(-Jmii_L1>#)PP zpg?vg7z%`N^6&!rcscbzvd)Iq7RF+>X4Y^Z7yuG;GDf^~1M>1g0HBA(9y;ut5N-fS z!qEN&+``P<88HtrsNxK_Q3vuOraYKO@bKWAzz1%YG62D}`WIP?_di?s7i|y5`Trs9 z>BYUW4FcmyU!->oIO7`Ygas-2-0HK)~1u1YxTkYDQ~fkb!)y~9tcjJH&O!gCh9D2T}41T zRF_un`fOogQxR&ZRd4&Iup8zUfW3KxQtF8wj|y(<>*L8AU5Cu}y-SYEl)tfDkmDDc zZBU#b@tF!!GY(EgDY zO`j&I774Gdq6Y86B~AUJwph-mRrPN#dP_w3S*4jiE%-s)Jb>WIhyZ5CIx%J&r3Ank*a|I1$=z}Nz%V@22yb` za()1vs-p}1k5t^y3I34j{~K-=j!w=J=7x?BFp@X?XB&#>Xjqsyn>*?7@bCb6I1wlT zgMpk6ZN$li0OUjaU)P5*Zf@>BeJC&AAKiyO48{$37~@6A{@8$V1EEmppYaFzzs?6c z2;G18A9Nn{_5K0V|CA5#p}Ld-AXPgxTZ;$YDDVNRe*-TB{GY%`Lx2xq`ujNKFXM>F z0RK!t*i>?~Ggg5+>wpkER*)*(-C6IiHSw2Oe*&vyXa)x<89F{x8rQ=bAZ56dor|L} z+zAMIP*w)Lf}2F39v}%7L`l0^7{iri#6hw^XGCg+g+USs*AZ(w0UtQF%6}=|2WK8`N(e*$cGL;* zU)}-!;~rwdAO9X)dIY#18rTbt^e@_M%b1&v^8Ub+pwq$#Og#rJ6l?V7r+8Ilj!;P(+ zTx@_KF)>N7B#$^Rw>U&vl2?LC6BC literal 0 HcmV?d00001 diff --git a/src/test/resources/technology/tabula/AnimalSounds1.pdf b/src/test/resources/technology/tabula/AnimalSounds1.pdf new file mode 100644 index 0000000000000000000000000000000000000000..8883d101899cb2e6690e3bc5e051b1e932bf79bd GIT binary patch literal 13784 zcmaib1yEc~vo;bWc!1!V5G1hdF7EE`8rN?Ulrpk5aWo}k zgh&*CB4(D3M)p7vOFc&;AtM7DLn9s@BnL-(BRwl5*R+yE>F`AmM$qZ=dk)+qbCFeD zlmSYbBdbL|^t?K6zI~j=#HR;hapw&+Qkh!v==ijW9Jf{cMU53`536Se8B)~#2*K;~ zmb=GqPiC(5_hM`jedEolaXxKcpy;&9wa(waWsUL46TX>;9=Jk>4xTEA)=mLL}*W?uOdb=7@XknDt-@NyU?IM)nWsTaIZ!()J1Jt3q*Y-?=+aij z27ez!8&IbpUr}{b;44kAqWv~hSECml3lpDFz-o4&kwy@|APSzj+~x9jd_j;xMWY0q z&FNmP^egSuuJp&x_+5nW>XfvmCDu?=DQ-~aZs}?!#%g-RrqStQ9s2x%YKq@ZpaQ_| zG&h63_?o9Xk5;oOC2O47TMJ-G&#L^9$aT4Sc61q=D>?`~>|UPEk)X8~y@E39S;^Jr zh>hECo0g=VB7Cqq2rqMoZiPl+W7uermymzAN%tc2?QG#rbQGJ^WZHGaR^M8!O1KEkekg%lo~Q@qV@Zb=SN*B^A5tNu zd2g_*9WW4@#0|M}m1-jE(^v*vs1e4wB?Lx|X?eusu^m6di7K5X08)vG2l=e8sk`{R zc)9|)oHyh7BDM7hsw-ol?nkYe^bdn3cws~Difmpc3WgU_y#_(!C?Zi2#6P!nO$)1J zVn1uKPY>iZ(gnj`8C)S&M<-l)@0-W|wjXHtELnr?)kpM6mT*TyGINf%>VOX#$;jI9 z?|g@}UPA=wH8NfstW4n7(Z6KO|08}_{*U(U~5o7Nrt87#-Pw5Ec!wyigdEPH$i?ls5oCrh{F;$0KE$7U0QZwZt`; zPt)}7?Z9`YR5pNk9Z*`ZHdi{uaMOC<_#uVKT4J8AW7g)RK~;c~!fW})yM=buV|O^& zVKj}M(ZTpzvEj=%wt&MP4QA+?!Kze!CzcfHT~~gX8$&hiW5#xGu`j z6sO1c*}84wN0tPezH>1b9S_H=G3_TBa%9Q!=4>i=uOBIkow|3%bm1)%8TCy;X5Hr> z1NK}f@8Q3{!T)3!s9q|2*@q>KZsP?v#74;(!PXw-xlR5vW(Z~IjKJy}y=rEnjxI(- zzCcxvc@<|^+SC{8s6D{7bSBsV=kDct1056DY%38HR(yOnE}UC}Xp3BclJ~nLZl=Q2 z*4<0umo4ftgn@VVX+#vK(H6hM;4wdF86xOcmL(3PhQya)%u8jE3F=H9%iF^ozHdZq zkjxk`D#LK|;ga@dFp9(2Cub!GjzzoZlJFA;@SM|ZMQirc&K(wnG@aXPf#GiVvUEjp zuC>kqJ5cH)X&s${_NbdCZ|uKw!DyE6QjA{c8uv=Jj_+F;vhuKgm#M_kb3c;=KBnwZ zzC=_lNUOx!L~rME7WE+ip-v4yoRuOGPHvU5dv?sLjcG}6OVFb{>FHcHBwUeiZ(u~2 z-`4q%-FBz1DaJEwY=W`8ZJsQlA3e%=K{Zx6B~EXaOeDKWIV<&?94XQAg^)E;XRzg3 z*}(K<$Gx06`7zxw7JGB{T-3IPgo>j6{MX3(Ou|ki;Y{#MNlT^K$boVsE#bkT%5Qq~ z89L2I!?X9(;W+d^Q&iW1z*J2Ji=Bx5R{gR*YZjy+KToexdeNeF_5^4KRVn^lo}N*R zNK|>97T|&!i)wM5%S`-8$qloRg1(Z}dU)!-g=H?a4$~)*?%z_L!d2rjN*FYp^w}t% z<2q12A#Pw+3XybDdPLNi^T&?H7*wd*-Wszhng#ztRBod(ElD^;5egjib>5$-KE>_6a1zzc4114lqUC+FH$8S`tgx zF{<=#fqVVi)uB!hemn);ke==0p zdc}^JM8v5=!;y&=ErD4Y8(KmuH)K46Uen8=kP*TdJzVWDyx`R3rFy zgv3E%8&Ofq%v!5zqR#kfXGZw*M~T2CV%D7J@7A6rGI@kb?MkB&L>2LNm3kD|800Ku zYeiaqV{y(RXe%&CY5GPzC0ZY9{bF+qVM^#{|hWJpj{^qCYqRHtExo9;C9L7s$~iE`$bR88rug9gd{ z1Zq=XG+A)9G>^fgORdjp7}l%x4Oi^Yo1KfPnwk1O>hB6sNmIW$m5hbfo5|6AG%(8j zg0bzlGzCw$CFj2Dz%KxA1`vH$z2psp*t4B$$G0F0LXwe#PZ38z!i2YA%cD%n-@!=C zS%atfrrj4pSE)-+I{fy)&rOPFlM9~VfTo)(%B~o#f66Tyebivi99B5fr^BEzOEDWy zC@iaU{TP57Y(2kp??`eoDs$iMYrc&<-Ln!J9=Rydf*1RKd&hQHj)IPmuwr9!jWoBt z4kdA*4h4T=E6?8RY$0w%m3Gs@&@<9+w%---i&_mj6@6nNSb@xJzzb!m=%okf*z7E!G zr%09_Cw|GLlryLBc`Po?Pz85wlxQGAmk-#?48a@* z&gR9VcgpGnxMU{X?w>tjZPs=8eht=9QdVYf7~>hU+aSD_XlfV@CAR+qXU{e89OM-! z^)aVoIk$Ygv@fsHLhHoro1AM(HU_ea2X&f7O;Yst_bdVG>*a)__3OEW!`5p{a*=_8 z)?$t1J3+>*zZ_GdTSO6xlQcqANQP0bwi{mVA~V z8L(K2G`sRAL+EOGd}^3>_1F(he=cePv*Y`Ou!Fxx2z1Vde+|M>LtSsA*&rp1pu+Da zz&W(hvWr{`(%kqepm}=>8)uM>DQ@x+Cn3nfQAN6?m`ye3=H?a;r}n|`jM1V8?&3={ zPer&H$!r0P7nZVd`K$XF- z*+QM5x6eTFQA0UF|IBjSfwG0(Ba+1;LFZ$D>VQ(3G%w~P^l^a&5%^#q8|XmkL)Rb& ziipLgBef4eJ(59n#T_+5(V_Vykr>1ojVA=dfI_+uB&!b6tB>&q zp<;i$dCS4|%^Z>Mn|Z&LBHgXM3QtcX@Cw#SF@*lJyU4n7r0iicAm;+NW`&ZMIO|ty zS-sDMci|_dVmB_ewl1e_LZ?kuF@VcA?CIHAwFrc~1y}%}!~G7e!`x$ibE_-=wFZ z=djB;e1Z)X(6a|-a4Vh=v$rqNpvRuk2>FCf*||n%nhMvU?$gdpO^pp-F{SD3A2u0l z{y8b!J~}XK;fan3)?l0Ar|)OC8TdcS04OVHwPD{oWSq`a^wk!@???|(PiE8o{s^k2 z->m4gFY%&V>S~vjXL&Nc?haRz=1!>>C(*9~tN@b+j~vwpm*$h$C8D-G*6oU+XL}YI zr8RVYkw-O9he1Z++dweqjyeyvF`(eeRsp^5%2o|M__ACUU7Wl75~PK0%x&v_i5jP` z7Ingp{*F7~661H*PZ9-pyli29bRqQ3>JZ0yLq%+1Csf5tx=jkhRUg9c9^uni?sh~b zrhds@M(6pH*$wv+;ikIpVz!|%vzl&2BN+yWffx2~ijtYj(5qDRgN^t(mFD=X{dI%w zIMXV-LWtpyeQv3a$Bhxhr6{9=0P1m^ z%HomH?fnb6XN?xPz8^9a2&Du(OTksV?J+}G~aZJx#HF19fKqnhg444FL;;) zpPqt01~gD=gl0`4#2e;_9({-BKs~G`uS?wm<_!4xh`k;ue;27hFCW&W6#bS{=qFn|WjJZx zPYhur3fcTx)dW7XpfQWq!6)j&Sa3nH6Ld%F3Tt&`k11Ux5l*?Rcx1t<_d|=$^Zo6t z&2p2$EZ_ltU2A~)X+H}0`C`oGW$&(H^?6FXX9qXu#AYjT18twOrNan^-*$%+nrX(f zYQk`M)|Nz`FBs!3XF7+>ZpfN`O`ucpJEpXJfaQ*qA7X)!SO^C8G@RlR@ws^9@=06w zk{P0qTif2$_~Qsc$tKz zXWJRWQ5P)=mN=;^#)Ls#P%k#RuMB-xosnPKxf#nI)~>4(BhvHp5?Ld^ZI>&@tvGN# z!SbVuig<>ndycBJJBJl=kX9w9piz&s4$J2WQ|K=oHrrBb2`BTlYjVOpH!Lj=3u8Kd z^!vV}JoUTgu6<0Ga{akFTQo0-w+@*S#Dr@xedxST(UzJcrkZW-4GGb<<&#CsxZ4JJ zydE;Gi8b>}+2vD|GN6XVg z<&2f#EPv`K$9q$o{9Kr)tK`gbH*u&M27Y_;>bjvF+$m{z7DW%-R2_SH{!?pnx2tqj zX47DQ+>&bH8SSuJK6JUh()f{8rLo!R7?pK(+pVRK^jDF#CR3Yd4<*4Ahu!gtNz@Oc z6W@@NJ`3{7gcYe&W!L6eV0wv~5R)*6C>dvA+vyG1!<7+wL zJ}!_iHdp#9|A7B>KufC>Z{%AlqN3SoG*Gk$H7}|fwY2mkjd=9^IjQRWF6Hd71JzBG zdpP)8B{DkaV2ZJ_fAW4-DnN_z#04*vW1uo?w*P>r*dRqxaU6%TAhv+WTlbh@Rt(S1^Xe8{+->i+ZU}wrU_W zVNKAlL3`=xo+n`yYStB{eKfk85!d{yQ5&Gu%l&g%9gz6zVFhmD1IJ0*m2cj$$Mvb1 z&V66SiB28gEaP);Q|n5-k(>J64{485e79Pq`=|A3?wIY=gwZQttGUgVsLjZw!aQd# z{59k6GP6IL?Ar9zyYtIvy>IT)#7`@62`-Rq6)UmvLU3H<7*Mft&{!=ynfg&PCx7o( z6KXqkRaw{j=Szph{&ec{Okv4(vsfB;8%1q%?220Y6GsTzZ0tjQpmsu#t|Xv#svwkN z8sIn#`dVp;Ei@yBBR zQL5mv{~7IR5l{LOI|wb$BWj}Ib4zC4n}`i&MtX;ILX;CJOD}QTmh>R5uafl3FY%XQ zvniQi)nCe{k~P)yhOn_x_|a2 zLMt|x0GVwuEBrv)qrhK3#_A@|xs0rbw?rSqO+Ol+?`Cl(vL?Az^$hAFP4sW=rlK}9 zo{%*bWU4qYsTzjwz$G_k4`NPnNN$zKj*5?H0eW*zvBmZ-9mhnB+33hon|R1BC_ z7fZiqd;!VCl-v{!UR-DaVFpEmTt5(_MD9;P@gL0N_eGd0raApXBD23brIe}E%kBB_ z#b{C8XK2>xPpABWC3MirI&8ZOFSpC)`@vp+CO?us*50kIxZ`nfj7Z&dlV6McB7aD} z?S>r&R3_va(2j=R$M_R45D-z|@UYJ6g0a~h2_kYF?a;0hm&q^eP|zIW3@_(en$uL1Ju(8wX{f`O9Tz z1I`ysbgjE%b)vx?nd7A-nYVHMgRDhrvGJdW)R`}I_upTfCcS7C;!NqsS`Qpb&Z*{V z7dZW3Nq;;^`dWjD?2$*w64*Q+l6Zt(>O#mCJ77FkvyJm$Nm)gM&BA%nBgeLSU~is& z*p;TD3r;XZ_jl&!OPnYLHAP!_445R4o#kBVj5Dv+l*uLKlye*v2~kt z_I|9gG43Ob+~}L}LT*>$Esz6RDaZ3*nfBsJHai`h3%Fw0!?_q$$HTWr*I@m!VfSU0 zYSPVO({kYCW^ukIhxbxOcZYu0Frd7x>&sjq{y1v4rSldp_8t*OU7s>GQd0;%G7Ex; z8jB=(*PmuP!6aM}F3^szJUSu?83{@)7wE`00O11(q8r(sFA{nP$)9oaVgjjI{b`y7 zhHc*%I?vkqXBQdNk%hwDphThsQ^J=D5q4ItN~2Q}n%YB|q_K=cy37^|2iyAXpYGOT z7v@35z4g0-agyW`#-RGq;r-Aqa$CPeMe2agKpfc5l<){sAiVb=F$26scPU)EM%t=R ze0zonG$PO+u*+m>c{vwZ@|ATZ+zSj>2-wehqeodBXnumJ#tmmf%-Al%kI}_EyuBKm zWG{$+N=jskXLNWivIaloA)}_Qv%b95y*S~u0CK*h<)CPPJqxZ%LXT9M4NZI5Ai8q= zb||e;54Z(M(-YL*AtNrg=_j z*}3?op+!mpfT#WN5Jj2$lCq!eF(+{M6=;i2OA&>D6~Ti}CTg=mXUrWtw6?HKKo+%j zB-L%728sj1-BKp~_`+ z|CGHe)kkbQ2OUfV+VjbU)kj++0-g9UqOnPzg+#Dr%|kn(v2FXrqV+S)CtCZ}@FOU( z@j3b=U(0zVIA@RRx&4Ng8id}rL88Z+F}m}YbfI*7#8Z*smQ1(LAlt>L*L1^$$MEap zO>(`P$T=dl=;_{fS@$v7S3s|F+xTK^;=Z5m7s@J{J7+*Gc&GUtUEU;Z%MnyMk$Qe{ z`$M40-+_Y|r#70^{Nk!%>Px9E(@cNEFuieO(WLTjSi^M#T9ZtpOw(w+BZD``2czG1 zLK$}HPTMLDA+ZB!Q9??d@a|GaJgOH-mo;|hT`8b>Q<>)cm z&s&Zv{vCzummI^}`01Y>I4NUGG<ajkhrbg*kqoS{m%@C3A2^T+2tkGOo=X| zmv)x)sk8(=J=x{dc)JEl?8mNvh~J&r8hFKlhJbGK>;{|z^Xy8T{8_&SoO%tsqQI#d zkOs1tXA4ioZoOXzr@EGn2+OneHQ*b z42cEQi8LH}dK`H|03q{iKTO%_`&yv5XKOcCt5}lLb%67RYQoDTkeJ?kn@KW!fR)=T z$3#BK2S^wEEASxnhgG(;h0z47$2OMhLE5&E7WauBr}yPiP!K__9kd#0vrAu_m&;{A zWXwU$>fLBmW@gM~^C@S!zH^tv{&?E2RD_ok{QdomqFP61t4NRsDX0FNh2yEDGxua= zRe`yt*ZQ=wY>D^odLL?oS6-I|y`w5Z!Hu$|h67`j!A-`~;=|2xllHZhv@I`|-s4Xr zbc81LP6xgWLjCoY*T_axrlc3NYTYf4qVx~yU5U`{DKkD$w~aU zbQxn1It8!T&~vyV&TPYAXh=k;-TWqx*Q?Ntfw7bMEwRn1di6m;zq^yxpMxiN?H{c? zvEG62SN)x%MVY!a={U4TYs-gv4je@sn~5wekgEbY!?Sbn2*UJ?!D+NXUuod+0708Q zw)J>u@4H&{5#I}Hft7H&F7?;KvI$PBdTjIYWE2_&)u#o(AcLN0>L4+27EV}CScaBT znPsH8WWj`bUxIqYUeCqXtE>UG?X1vy&G3ZX@4LbKW{TJvqtDKPRnOaplmmSxykom z=OcH{!6TD6wRSP(k?)7C^!N$iB2#Yoq>wFZ_Y#3Nd;tu>*M`t`omMV2?&aKa#egaF)Rx5=Z^~wjr#d9pNP8GaUFlZKJNQB z+d*&s8;yv}B=c72s|q$#ecxR?v`rL}ga|q2#a6AIT#;$yRCJgMyhrn*8{fA~HyzWdBj%KC;p}`>t~gHTTsJf$>(Yjl zZoIB#rAbno$VhZJW7YblhBGKAS;k3Ro+O#LZwXz;tGH}{)tZZ)&PwW>njwRlrUW#; z>zH_SzCv$>B*@fY3_ks#?$HGcf%V^d%E9DdEAofuzA1I}S3&*+l4&BV-Y&$%%l4uH zFm~>>SwgJB&iDN2D>s4*^V2a*M+|y?{lff*Y+I(?CVjh`NjZ?`@y4e4VNUQ2cLk5s zeG5VMOL`^_#!}_Ts0%PmC;&FC_J*}$ zWbZvcFM6>?vK>E##IUxL2xOV2wOr(HO=}o=?FY>2R^EJPsuL{6VP64}eXS{4a*4e< zX9x=udblVisLK0(`*D7{5iBQuVB9qOju2#ct!P_l5$hkwZ*X%><+B_WFRPF37arl9 z#(p)p+RMNsW5H-H(Bl3eOg#rgAa9ikm^t30_K`fSc&W~e($bdxEadw7%pA(uUOSqq z>|tK~=PH#hG?>(^)pk@pRY-VguMY-^37UPDY zNE%<-hfea{pAN%?>Yv#ZWkhUM$UbavZ4FpZfSh5_^)AOi?=`FYd^JnvH9+RDSrg{dwkSj6G6a(1Iq zuhJe&vXm0V8)m{+cLFrC&4L|qG&pm)Qz`cIld4T-p_35G+H+)3?DBYJ9w$^(c6=VJ zV|hl1-cmQb{|fcw6U&8vG*Y7x$r0=iW+*k|;9O`a6{(ZyJFt_aYR($dbE^fUja8u) z#gI#B&Phq(PshX`QcUMZRA$>}WD5r()`ZFbMvAA-HI>txxkFS&XZA<=!bJd1$@gAP zsy*O~ZxkkMInay=_WWLs2bi^sn|F%iC(O8w%E59WQkJUuWx637!S7Am*C{pKZ87@U zo&J&Vn3x)Yu^2fF-htj`rvzvF3mXvV#ju3KVy=E=cx!lXSPwD|lvXL%;5P_d)m+!2 zZ8i`5s8YX*L|px=0|~hp6pQJYUe(#Axt^%vI*PPf?&mp7n1j$=Q1Sl9Mv=VTA8&gY ze*qGoS7jH17F)}eX*#BjWYU1#4m1459!NhWhHVPJqe-s*`hMUH+Is)fx0Iot;PXlV zuCwc$D1G;>r&}a_u>omQc_24s*JYhD3VHtC748Cx`a5qK2<^0+x1EdF?^tOO1_T<=^srV+#HIo+M_hru z&uD&Ld zrLyJCeUtV~cwgm4?DmqvtG33%g#Qh3C_u1Nv3D`DE)^$f6nDi8XZR&wB0GStfY&TG z7$+q2OJsZ0yy`ou*~V4bTeK3VK1N;=xE&dxGvtNrEYh%1KmkTm0{5Fksn# zbZXdtbQ1}57(aRZT-9t*AU9ZQDDylRY=!t4g2~ z9_LdL(k{d4*{-shEY9|$Lf}RJbR*3BQL5R4Qss3Rg+flP*_smCc_#xd%HAIXKj&5w z>$BS|G))xf%rXI^Wl5^!rALR0GsL*6mseEx_c;Tltt;nC?-Rd|;IiuCMow&fu~WA3 za8sO}sTZKNj<|7gvJA0VoFG2d;Tb16jqG{oVK`F=lN?Ijc!IUp^L7&!zH1JzNyKzP z4Uq@WlScM<{CJuT^&(cpfA_Gt*a-jq%v;F$Dh#kqIMgG@gnud>HnEa4mu6O9nv+w+ zEfaHm_H2k9`tUuy|DcBRz6oAm>-gLhb~_p0Fx0?+p_P$d8(*4o6V|JxW&)ZBncd># z;FvIp_T=Em{1n?o7@=FuGs{8~sAAp1pSB>SHkz+peN%7vt>oiPLT^&F*8 zIHM_Ff%i@Bm``4uG*L#d$4_UjKs204;W}q0mroUSqo!Z0i9ZGDADWY9Ve<2bgqqr@ zKYe>{6(VAPS{Td^59rVOFyh;KlFn8swM-b*xAiXOIQ`7pG62@)=1PldtRSb16hHD4 z=TIfv>E5BV{uB?GAbh=BUc>8{(|yZH08BMCEWG&eP)FrLK z*>svJk1R1v&s@A1S{%BgQ{v5)%|)4o=?ACWaLX>v(3?mK8GWH{&aiE2f#cjLihk7G zOa-S8DnglaGRB)8JcVQze{cpQau2%htu2Ay-y_}_(B9$aJzd9WE6CWJji}zRkY=NP z#*M7L`Vyd`_uzb#{dUo+XW__OxH3k+aeQnLP!~hoFNzHw8i1+|`zRfu5R!{>+BVat zQ?x3S?JG6nxm5(iyWRh^z3^O<(NqqM8RQVRFx2*-ewigNMPKxa7ZS=1pSmJ!PhkV} zn3mbE0>mm}6aq~p)p5i*G=O>Lt-~RQw%ur!t1`0;((^J82~Gn04e0AK%D-lRbU)A_ zfEVbw>!6A5!Yypwj_1T07RCFTzi(`xLzNWuocnM9g^fqz!g;y}E9U~!`Zt{w*d(^` zb4rW=hsv7P!Yx>B_C)elgvzsdiQg&|w-u}zRpVU{ywJw&JnT?n&I^jqe^&^9E})KY zteoUlOUQI()&XfMb(np-gOM!vl9ZjHl7MWktWS7dP+6 zo^W=3V)(>`pjgEY5oD;&=F2m2?FT zb3s>!wgHp^zg-_C%M2RF&r2|W68ZkNZQ zd{|cK26mleb^}Fr_d85bR}TNQBHN(Z1pa`bdAz+Y+Gd~|>%p;E!`cHFSYXh$sTwrA zTcw?Lf!#Pf(<)0))x%$+pPvZ~%rv)p!s|dB*+%pI7M0zH|Cc5|5_6jE|Ce9|(I&gz^44?Pm+WP+Z z?ZWk5Ts9WmaE*wNAareJ^d*nieOT2GB8Beym?8QYRgTW9bmC^3w|LS1RK;a*6SX?l zkHCGiOGv2g<1{Xm@B?L*XGF$51>-HC zc%YnvarNL!+PmK3@ys+=R6CBmJ>CjpE=(I&rm5#`LKQfEK-8u zf%rZYN>&s>R%DhErQD#;m_h^EJA2Od8BO?*$7ou~coF+U=o-!iH8;8T*6pvbJKveG z|CjvI*UZt^q*5@5m5K468K$iN$}m-Ovo!*W*jPIX895l(o7pq9GLrf+1g=V)eQt!QL#W{mWj!foO}#sq0VkOBfWu9^U5CPp#GgfA_E}-`5g?5tR2bN*uhA^*Tr5t00uA%5>QajR?NuE#MBWo4>G9e zXk?{A#s-=4sveRxZp1+LniH#l1j%syN0Kbtzh2=VN#^Vf|DP0dt?$y-{vZsIo3zd$ z$9KNO?U4b|n3xnePx5Bye7a&WVHHG`Yu8u9<0=OqCN5lgE(~SWlnMq>9BvQtYlGA; z{r8EM@|!I9zpQa2@qL~^J#H@DYa=Ek4B*iO*5UUKUMeQQt*5Rwjaa`+6=c;~BS+Nv zf?y3&t+=B$Z!0~W@$Nu0#*vTS{o%iodGUL0*Ek?rVe!|+Vpp@J*H+oOR|Oe%$)}=f zMFiHx>NNs~K(g>3Ukr_s5p)~BdNgs1(tAD3v#oyivcT?PxG4^qVm$xm7DdFE8I5Ip zAe)SAIMOWWO)&#L&p7;a?#m|MFIq~~4|k(@-7R&+*d*VwMOPV7kFdOT5M}-SsYb;3 zg+jpiYvgMDQCVT@J@1FI=RLZ4-aPN?Sp>uDC`H5T**K$@xYnWPFi(}QzpB1Q?9yJV zx>AKbF=rIc>13zszu*!7c7OR@wes>V`N_ah?kLmL{qoA`?9PtoYp+DL%kHDt1jZEI zIyNf2_wJGmj^gD5e3@3S<^RG`_J3{WKlun$bkcWxC8v_TlhI#N0X+w!S4RBbv^2AK za1=Dvvwx+Vl-}RljO<82RWn0JQwL2}R#q}r1_+x#AToy6Hl#2?i2K_9NBug+!ou=b zpOKCIFYIfdnVALYb&L%n`%4Ei3mGFL<6q;i@_&?9JrL|a`>&W+JftAKuJejxVE)@0 z|Cs+e&JLNU^*74@D-_9IBUJ$jsAQvTZT5O&g6x&|{}Bja(7&7~0w#NP<$sQY|7{%N zJ+i+hKy=F4+ZZSsIcfqSH#mSwMy`%p{~lleHtR3v%juaI0p;}UU!#`kbq%0`k%Nts zy@8Pf8Tb{f0F*W|G}C)M3|=i_0fXt;S-~vKjF6Lo5d`ry`+o|wfTH#`PPULsB8reL z*0Xl7eN}GY1{73;91qTB21W{^0zh#xN67Z`@BjrNrbE_tAbY)CqVR7A!K*Q^Cmcl4 zKMi$2`XAOI{mVSagum>2HR;vml4gbunq+^y!)sU2#_82d|F#79-)^-0wT7S`nRe z*8m_dAqcq-1384bM1(~|Kpk$iPfU5KVu7$XJ*mTTNz6_BR>iE5HJA^Xo%q{dXCp z4TjM3zhrET5O4j549vjJ@W0@g{|64t!usDhFqjqeKXGjAkniAst-;6)iLL*Vv9Llm z;=g39Ajr4pKV^&{21pqGmkiACZ#YMLJ;-Ov{*{!9X6{B1R{<4lY#^-qi@iudacg56 zGVov5q97e1GEFirMj>_qMn+*~CN@D9eiksJh%h4)8;dX_ScFZGogK{1gY^H0ywX?5 z#z4^2$iTwE$%+gJVr6C*0r7)b*qPWFMFb%XVg-pX2r)uv0wI Date: Fri, 16 Mar 2018 10:08:52 -0700 Subject: [PATCH 032/200] - Removed unused variable (functionality moved) from TableExtractorClass --- src/main/java/technology/tabula/CommandLineApp.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java index 6d3c34a6..e03cb705 100644 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ b/src/main/java/technology/tabula/CommandLineApp.java @@ -360,7 +360,6 @@ private static class TableExtractor { private boolean useLineReturns = false; private BasicExtractionAlgorithm basicExtractor = new BasicExtractionAlgorithm(); private SpreadsheetExtractionAlgorithm spreadsheetExtractor = new SpreadsheetExtractionAlgorithm(); - private List verticalRulingPositions = null; private ExtractionMethod method = ExtractionMethod.BASIC; public TableExtractor() { From e37c6669d03ed32bab121ea0d8b33524cba5f6e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Thu, 5 Apr 2018 14:12:57 -0300 Subject: [PATCH 033/200] use pdfbox 2.0.9 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index bb876ccb..586e94a0 100644 --- a/pom.xml +++ b/pom.xml @@ -245,7 +245,7 @@ org.apache.pdfbox pdfbox - 2.0.8 + 2.0.9 From 0705bc621cc943fff30bafb1ead29024af1814e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Sun, 13 May 2018 10:45:17 -0300 Subject: [PATCH 034/200] Replaces JSI in favor of JTS Topology Suite (#226) Closes #224 --- pom.xml | 6 +- .../tabula/RectangleSpatialIndex.java | 69 ++++--------------- .../csv/spreadsheet_no_bounding_frame.csv | 59 ++++++++-------- 3 files changed, 46 insertions(+), 88 deletions(-) diff --git a/pom.xml b/pom.xml index 586e94a0..7107ea53 100644 --- a/pom.xml +++ b/pom.xml @@ -225,9 +225,9 @@ - net.sf.jsi - jsi - 1.1.0-SNAPSHOT + org.locationtech.jts + jts-core + 1.15.0 diff --git a/src/main/java/technology/tabula/RectangleSpatialIndex.java b/src/main/java/technology/tabula/RectangleSpatialIndex.java index be252794..2c8533df 100644 --- a/src/main/java/technology/tabula/RectangleSpatialIndex.java +++ b/src/main/java/technology/tabula/RectangleSpatialIndex.java @@ -1,87 +1,48 @@ package technology.tabula; -import gnu.trove.procedure.TIntProcedure; - import java.util.ArrayList; import java.util.List; -import net.sf.jsi.SpatialIndex; -import net.sf.jsi.rtree.RTree; +import org.locationtech.jts.geom.Envelope; +import org.locationtech.jts.index.strtree.STRtree; class RectangleSpatialIndex { - class SaveToListProcedure implements TIntProcedure { - private List ids = new ArrayList<>(); - - @Override public boolean execute(int id) { - ids.add(id); - return true; - } - List getIds() { - return ids; - } - } + private final STRtree si = new STRtree(); + private final List rectangles = new ArrayList<>(); - private final SpatialIndex si; - private final List rectangles; - private Rectangle bounds = null; - - public RectangleSpatialIndex() { - si = new RTree(); - si.init(null); - rectangles = new ArrayList<>(); - } - public void add(T te) { rectangles.add(te); - if (bounds == null) { - bounds = new Rectangle(); - bounds.setRect(te); - } - else { - bounds.merge(te); - } - si.add(rectangleToSpatialIndexRectangle(te), rectangles.size() - 1); + si.insert(new Envelope(te.getLeft(), te.getRight(), te.getBottom(), te.getTop()), te); } public List contains(Rectangle r) { - SaveToListProcedure proc = new SaveToListProcedure(); - si.contains(rectangleToSpatialIndexRectangle(r), proc); - ArrayList rv = new ArrayList<>(); - for (int i : proc.getIds()) { - rv.add(rectangles.get(i)); + List intersection = si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom())); + List rv = new ArrayList(); + + for (T ir: intersection) { + if (r.contains(ir)) { + rv.add(ir); + } } + Utils.sort(rv, Rectangle.ILL_DEFINED_ORDER); return rv; } public List intersects(Rectangle r) { - SaveToListProcedure proc = new SaveToListProcedure(); - si.intersects(rectangleToSpatialIndexRectangle(r), proc); - ArrayList rv = new ArrayList<>(); - for (int i : proc.getIds()) { - rv.add(rectangles.get(i)); - } - Utils.sort(rv, Rectangle.ILL_DEFINED_ORDER); + List rv = si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom())); return rv; } - private net.sf.jsi.Rectangle rectangleToSpatialIndexRectangle(Rectangle r) { - return new net.sf.jsi.Rectangle((float) r.getX(), - (float) r.getY(), - (float) (r.getX() + r.getWidth()), - (float) (r.getY() + r.getHeight())); - } - - /** * Minimum bounding box of all the Rectangles contained on this RectangleSpatialIndex * * @return a Rectangle */ public Rectangle getBounds() { - return bounds; + return Rectangle.boundingBoxOf(rectangles); } } diff --git a/src/test/resources/technology/tabula/csv/spreadsheet_no_bounding_frame.csv b/src/test/resources/technology/tabula/csv/spreadsheet_no_bounding_frame.csv index b5e1fd0b..513e26e7 100644 --- a/src/test/resources/technology/tabula/csv/spreadsheet_no_bounding_frame.csv +++ b/src/test/resources/technology/tabula/csv/spreadsheet_no_bounding_frame.csv @@ -1,31 +1,28 @@ -"",HARVEST,VARIATION,,, -"","11/12 -(a)",12/13,Percentage,Absolute, -"",,"May/2013 -(b)","Jun/2013 -(c)",(c/a),(c-a) -COTTON,"1.393,4","886,7","894,9","(35,8)","( 498,5)" -TOTAL PEANUT,"93,9","100,6","100,2","6,7","6,3" -PEANUT 1ST CROP,"82,1","86,3","86,2","5,0","4,1" -PEANUT 2ND CROP,"11,8","14,3","14,0","18,6","2,2" -RICE,"2.426,7","2.389,7","2.396,0","(1,3)","( 30,7)" -TOTAL BEANS,"3.262,1","2.952,7","3.026,9","(7,2)","( 235,2)" -BEANS 1ST CROP,"1.241,4","1.122,6","1.122,9","(9,5)","( 118,5)" -BEANS 2ND CROP,"1.394,6","1.275,4","1.271,7","(8,8)","( 122,9)" -BEANS 3RD CROP,"626,1","554,7","632,3","1,0","6,3" -SUNFLOWER,"74,5","60,4","68,9","(7,5)","( 5,6)" -CASTOR BEAN,"128,2","87,5","87,4","(31,8)","( 40,8)" -TOTAL CORN,"15.178,1","15.686,2","15.817,4","4,2","639,3" -CORN 1ST CROP,"7.558,5","6.879,2","6.864,7","(9,2)","( 693,8)" -CORN 2ND CROP,"7.619,6","8.807,0","8.952,7","17,5","1.333,1" -SOYBEAN,"25.042,2","27.715,2","27.715,5","10,7","2.673,3" -SORGHUM,"786,9","836,4","836,4","6,3","49,5" -SUBTOTAL,"48.386,0","50.715,4","50.943,6","5,3","2.557,7" -OAT,"153,0","168,7","168,7","10,3","15,7" -CANOLA,"42,4","43,8","43,8","3,3","1,4" -RYE,"2,3","2,3","2,3",-,- -BARLEY,"88,4","102,8","102,8","16,3","14,4" -WHEAT,"2.166,2","1.895,4","1.895,4","(12,5)","( 270,8)" -TRITICALE,"46,9","48,0","48,0","2,3","1,1" -SUBTOTAL,"2.499,2","2.261,0","2.261,0","(9,5)","( 238,2)" -BRAZIL,"50.885,2","5 2.976,4","5 3.204,6","4,6","2.319,5" \ No newline at end of file +"",HARVEST,VARIATION,,, +"","11/12 (a)",12/13,Percentage,Absolute, +"","May/2013 (b)","Jun/2013 (c)",(c/a),(c-a), +COTTON,"1.393,4","886,7","894,9","(35,8)","( 498,5)" +TOTAL PEANUT,"93,9","100,6","100,2","6,7","6,3" +PEANUT 1ST CROP,"82,1","86,3","86,2","5,0","4,1" +PEANUT 2ND CROP,"11,8","14,3","14,0","18,6","2,2" +RICE,"2.426,7","2.389,7","2.396,0","(1,3)","( 30,7)" +TOTAL BEANS,"3.262,1","2.952,7","3.026,9","(7,2)","( 235,2)" +BEANS 1ST CROP,"1.241,4","1.122,6","1.122,9","(9,5)","( 118,5)" +BEANS 2ND CROP,"1.394,6","1.275,4","1.271,7","(8,8)","( 122,9)" +BEANS 3RD CROP,"626,1","554,7","632,3","1,0","6,3" +SUNFLOWER,"74,5","60,4","68,9","(7,5)","( 5,6)" +CASTOR BEAN,"128,2","87,5","87,4","(31,8)","( 40,8)" +TOTAL CORN,"15.178,1","15.686,2","15.817,4","4,2","639,3" +CORN 1ST CROP,"7.558,5","6.879,2","6.864,7","(9,2)","( 693,8)" +CORN 2ND CROP,"7.619,6","8.807,0","8.952,7","17,5","1.333,1" +SOYBEAN,"25.042,2","27.715,2","27.715,5","10,7","2.673,3" +SORGHUM,"786,9","836,4","836,4","6,3","49,5" +SUBTOTAL,"48.386,0","50.715,4","50.943,6","5,3","2.557,7" +OAT,"153,0","168,7","168,7","10,3","15,7" +CANOLA,"42,4","43,8","43,8","3,3","1,4" +RYE,"2,3","2,3","2,3",-,- +BARLEY,"88,4","102,8","102,8","16,3","14,4" +WHEAT,"2.166,2","1.895,4","1.895,4","(12,5)","( 270,8)" +TRITICALE,"46,9","48,0","48,0","2,3","1,1" +SUBTOTAL,"2.499,2","2.261,0","2.261,0","(9,5)","( 238,2)" +BRAZIL,"50.885,2","5 2.976,4","5 3.204,6","4,6","2.319,5" From 2bc505e22dfa39f9a8f5e32f020c27d40716cf81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Mon, 14 May 2018 12:03:34 -0300 Subject: [PATCH 035/200] some housekeeping --- src/main/java/technology/tabula/Line.java | 2 +- .../tabula/ObjectExtractorStreamEngine.java | 28 +++++++++---------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/main/java/technology/tabula/Line.java b/src/main/java/technology/tabula/Line.java index 2dc8ce86..31d10529 100644 --- a/src/main/java/technology/tabula/Line.java +++ b/src/main/java/technology/tabula/Line.java @@ -52,7 +52,7 @@ public void addTextChunk(TextChunk textChunk) { public String toString() { StringBuilder sb = new StringBuilder(); String s = super.toString(); - sb.append(s.substring(0, s.length() - 1)); + sb.append(s, 0, s.length() - 1); sb.append(",chunks="); for (TextChunk te: this.textChunks) { sb.append("'" + te.getText() + "', "); diff --git a/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java b/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java index 700d1fe7..ff58dfc6 100644 --- a/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java +++ b/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java @@ -34,8 +34,6 @@ class ObjectExtractorStreamEngine extends PDFGraphicsStreamEngine { private GeneralPath currentPath = new GeneralPath(); public List clippingPaths; - private Matrix translateMatrix; - protected ObjectExtractorStreamEngine(PDPage page) { super(page); @@ -62,7 +60,7 @@ protected ObjectExtractorStreamEngine(PDPage page) { } @Override - public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) throws IOException { + public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) { currentPath.moveTo((float) p0.getX(), (float) p0.getY()); currentPath.lineTo((float) p1.getX(), (float) p1.getY()); currentPath.lineTo((float) p2.getX(), (float) p2.getY()); @@ -72,30 +70,30 @@ public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) thro } @Override - public void clip(int windingRule) throws IOException { + public void clip(int windingRule) { // the clipping path will not be updated until the succeeding painting // operator is called clipWindingRule = windingRule; } @Override - public void closePath() throws IOException { + public void closePath() { currentPath.closePath(); } @Override - public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) throws IOException { + public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) { currentPath.curveTo(x1, y1, x2, y2, x3, y3); } @Override - public void drawImage(PDImage arg0) throws IOException { + public void drawImage(PDImage arg0) { // TODO Auto-generated method stub } @Override - public void endPath() throws IOException { + public void endPath() { if (clipWindingRule != -1) { currentPath.setWindingRule(clipWindingRule); getGraphicsState().intersectClippingPath(currentPath); @@ -105,38 +103,38 @@ public void endPath() throws IOException { } @Override - public void fillAndStrokePath(int arg0) throws IOException { + public void fillAndStrokePath(int arg0) { strokeOrFillPath(true); } @Override - public void fillPath(int arg0) throws IOException { + public void fillPath(int arg0) { strokeOrFillPath(true); } @Override - public Point2D getCurrentPoint() throws IOException { + public Point2D getCurrentPoint() { return currentPath.getCurrentPoint(); } @Override - public void lineTo(float x, float y) throws IOException { + public void lineTo(float x, float y) { currentPath.lineTo(x, y); } @Override - public void moveTo(float x, float y) throws IOException { + public void moveTo(float x, float y) { currentPath.moveTo(x, y); } @Override - public void shadingFill(COSName arg0) throws IOException { + public void shadingFill(COSName arg0) { // TODO Auto-generated method stub } @Override - public void strokePath() throws IOException { + public void strokePath() { strokeOrFillPath(false); } From 489105bf657ddcd82588e61f7c5e0bef615f65bb Mon Sep 17 00:00:00 2001 From: Manuel Aristaran Date: Tue, 22 May 2018 12:32:09 -0500 Subject: [PATCH 036/200] prepare release 1.0.2 --- README.md | 4 ++-- pom.xml | 12 ++---------- src/main/java/technology/tabula/CommandLineApp.java | 2 +- 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 369444b4..b230c5ac 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ Download a version of the tabula-java's jar, with all dependencies included, tha `tabula-java` provides a command line application: ``` -$ java -jar target/tabula-1.0.1-jar-with-dependencies.jar --help +$ java -jar target/tabula-1.0.2-jar-with-dependencies.jar --help usage: tabula [-a ] [-b ] [-c ] [-d] [-f ] [-g] [-h] [-i] [-l] [-n] [-o ] [-p ] [-r] [-s ] [-t] [-u] [-v] @@ -69,7 +69,7 @@ Tabula helps you extract tables from PDFs -v,--version Print version and exit. ``` -It also includes a debugging tool, run `java -cp ./target/tabula-1.0.1-jar-with-dependencies.jar technology.tabula.debug.Debug -h` for the available options. +It also includes a debugging tool, run `java -cp ./target/tabula-1.0.2-jar-with-dependencies.jar technology.tabula.debug.Debug -h` for the available options. You can also integrate `tabula-java` with any JVM language. For Java examples, see the [`tests`](src/test/java/technology/tabula/) folder. diff --git a/pom.xml b/pom.xml index 7107ea53..27e77f04 100644 --- a/pom.xml +++ b/pom.xml @@ -2,7 +2,7 @@ 4.0.0 technology.tabula tabula - 1.0.2-SNAPSHOT + 1.0.2 Tabula Extract tables from PDF files http://github.com/tabulapdf/tabula-java @@ -36,17 +36,9 @@ scm:git:git@github.com:tabulapdf/tabula-java.git scm:git:git@github.com:tabulapdf/tabula-java.git git@github.com:tabulapdf/tabula-java.git - tabula-1.0.0-SNAPSHOT + v1.0.2 - - - sonatype - Sonatype repository - https://oss.sonatype.org/content/repositories/snapshots/ - - - UTF-8 UTF-8 diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java index b7b9dfe9..61de9f12 100644 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ b/src/main/java/technology/tabula/CommandLineApp.java @@ -33,7 +33,7 @@ public class CommandLineApp { private static String VERSION = "1.0.2"; - private static String VERSION_STRING = String.format("tabula %s (c) 2012-2017 Manuel Aristarán", VERSION); + private static String VERSION_STRING = String.format("tabula %s (c) 2012-2018 Manuel Aristarán", VERSION); private static String BANNER = "\nTabula helps you extract tables from PDFs\n\n"; private static final int RELATIVE_AREA_CALCULATION_MODE = 0; From 4f0e9f0af1a4079ae59b923873b8e798bf6bbe14 Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Fri, 15 Jun 2018 16:07:02 +1200 Subject: [PATCH 037/200] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b230c5ac..c5bb42f4 100644 --- a/README.md +++ b/README.md @@ -101,7 +101,7 @@ You can help by: ### Backers -You can also support our continued work on `tabula-java` with a one-time or monthly donation [on OpenCollective](https://opencollective.com/tabulapdf#support). Organizations who use `tabula-java` can also [sponsor the project](https://opencollective.com/tabulapdf#support) for acknolwedgement on [our official site](http://tabula.technology/) and this README. +You can also support our continued work on `tabula-java` with a one-time or monthly donation [on OpenCollective](https://opencollective.com/tabulapdf#support). Organizations who use `tabula-java` can also [sponsor the project](https://opencollective.com/tabulapdf#support) for acknowledgement on [our official site](http://tabula.technology/) and this README. Special thanks to the following users and organizations for generously supporting Tabula with donations and grants: From 709f2cba16cf8d9dd07cb4401577af0f0ac7da50 Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Sun, 17 Jun 2018 19:43:20 +1200 Subject: [PATCH 038/200] Prevent file creation in sources directory --- src/test/java/technology/tabula/TestCommandLineApp.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/test/java/technology/tabula/TestCommandLineApp.java b/src/test/java/technology/tabula/TestCommandLineApp.java index 81b30d4b..4c04dccf 100644 --- a/src/test/java/technology/tabula/TestCommandLineApp.java +++ b/src/test/java/technology/tabula/TestCommandLineApp.java @@ -12,10 +12,15 @@ import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.ParseException; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.TemporaryFolder; public class TestCommandLineApp { + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + private String csvFromCommandLineArgs(String[] args) throws ParseException { CommandLineParser parser = new DefaultParser(); CommandLine cmd = parser.parse(CommandLineApp.buildOptions(), args); @@ -72,7 +77,7 @@ public void testExtractSpreadsheetWithAreaAndNewFile() throws ParseException, IO "src/test/resources/technology/tabula/spreadsheet_no_bounding_frame.pdf", "-p", "1", "-a", "150.56,58.9,654.7,536.12", "-f", - "CSV", "-o", "outputFile" + "CSV", "-o", folder.newFile().getAbsolutePath() }); //assertEquals(expectedCsv,); } From 8d32d33008e0efd9cc38e361ecc92822a956a025 Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Fri, 15 Jun 2018 01:08:41 +1200 Subject: [PATCH 039/200] Update dependencies --- pom.xml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pom.xml b/pom.xml index 27e77f04..93ea5b63 100644 --- a/pom.xml +++ b/pom.xml @@ -243,19 +243,19 @@ org.bouncycastle bcprov-jdk15on - 1.56 + 1.59 org.bouncycastle bcmail-jdk15on - 1.56 + 1.59 junit junit - 4.11 + 4.12 test @@ -268,19 +268,19 @@ org.apache.commons commons-csv - 1.4 + 1.5 com.google.code.gson gson - 2.8.0 + 2.8.5 com.github.jai-imageio jai-imageio-core - 1.3.1 + 1.4.0 From c8eed5f9beaffb14ea1c85f1229897b63324f8de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Sun, 24 Jun 2018 12:10:16 -0300 Subject: [PATCH 040/200] Add assertion to incomplete test (see #234) --- .../java/technology/tabula/TestCommandLineApp.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/test/java/technology/tabula/TestCommandLineApp.java b/src/test/java/technology/tabula/TestCommandLineApp.java index 4c04dccf..3e4dafa7 100644 --- a/src/test/java/technology/tabula/TestCommandLineApp.java +++ b/src/test/java/technology/tabula/TestCommandLineApp.java @@ -2,11 +2,9 @@ import static org.junit.Assert.*; +import java.io.File; import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.FileSystem; -import java.nio.file.FileSystems; +import java.nio.file.*; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; @@ -73,13 +71,15 @@ public void testExtractSpreadsheetWithAreaAndNewFile() throws ParseException, IO String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/spreadsheet_no_bounding_frame.csv"); + File newFile = folder.newFile(); this.csvFromCommandLineArgs(new String[]{ "src/test/resources/technology/tabula/spreadsheet_no_bounding_frame.pdf", "-p", "1", "-a", "150.56,58.9,654.7,536.12", "-f", - "CSV", "-o", folder.newFile().getAbsolutePath() + "CSV", "-o", newFile.getAbsolutePath() }); - //assertEquals(expectedCsv,); + + assertArrayEquals(expectedCsv.getBytes(), Files.readAllBytes(Paths.get(newFile.getAbsolutePath()))); } From 246dabb9fc7ff9e044c47b0ee21a2fa8a19058ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Wed, 4 Jul 2018 09:50:51 -0300 Subject: [PATCH 041/200] upgrade pdfbox dependencies --- pom.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pom.xml b/pom.xml index 93ea5b63..24203397 100644 --- a/pom.xml +++ b/pom.xml @@ -237,7 +237,7 @@ org.apache.pdfbox pdfbox - 2.0.9 + 2.0.11 @@ -290,9 +290,9 @@ - com.levigo.jbig2 - levigo-jbig2-imageio - 2.0 + org.apache.pdfbox + jbig2-imageio + 3.0.1 From ae46324b1753818270dd2e64663ebd8b3e8a8b42 Mon Sep 17 00:00:00 2001 From: Eric Schoen Date: Tue, 3 Jul 2018 17:22:09 -0500 Subject: [PATCH 042/200] Make RectangleSpatialIndex public --- pom.xml | 2 +- src/main/java/technology/tabula/RectangleSpatialIndex.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 24203397..e9ea20a5 100644 --- a/pom.xml +++ b/pom.xml @@ -2,7 +2,7 @@ 4.0.0 technology.tabula tabula - 1.0.2 + 1.0.3 Tabula Extract tables from PDF files http://github.com/tabulapdf/tabula-java diff --git a/src/main/java/technology/tabula/RectangleSpatialIndex.java b/src/main/java/technology/tabula/RectangleSpatialIndex.java index 2c8533df..a39114a2 100644 --- a/src/main/java/technology/tabula/RectangleSpatialIndex.java +++ b/src/main/java/technology/tabula/RectangleSpatialIndex.java @@ -6,7 +6,7 @@ import org.locationtech.jts.geom.Envelope; import org.locationtech.jts.index.strtree.STRtree; -class RectangleSpatialIndex { +public class RectangleSpatialIndex { private final STRtree si = new STRtree(); From fde2527605acf6fb7f9923e97a17644dfe2fb96c Mon Sep 17 00:00:00 2001 From: coder111 Date: Tue, 21 Aug 2018 21:42:40 +0100 Subject: [PATCH 043/200] Add relative coordinate support to column option --- .../technology/tabula/CommandLineApp.java | 29 +++++++++++++++++-- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java index 61de9f12..5e1c1cf0 100644 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ b/src/main/java/technology/tabula/CommandLineApp.java @@ -265,7 +265,12 @@ private static TableExtractor createExtractor(CommandLine line) throws ParseExce extractor.setUseLineReturns(line.hasOption('u')); if (line.hasOption('c')) { - extractor.setVerticalRulingPositions(parseFloatList(line.getOptionValue('c'))); + String optionString = line.getOptionValue('c'); + if (optionString.startsWith("%")) { + extractor.setVerticalRulingPositionsRelative(true); + optionString = optionString.substring(1); + } + extractor.setVerticalRulingPositions(parseFloatList(optionString)); } return extractor; } @@ -329,7 +334,9 @@ public static Options buildOptions() { .build()); o.addOption(Option.builder("c") .longOpt("columns") - .desc("X coordinates of column boundaries. Example --columns 10.1,20.2,30.3") + .desc("X coordinates of column boundaries. Example --columns 10.1,20.2,30.3. " + + "If all values are between 0-100 (inclusive) and preceded by '%', input will be taken as % of actual width of the page. " + + "Example: --columns %25,50,80.6") .hasArg() .argName("COLUMNS") .build()); @@ -356,6 +363,7 @@ private static class TableExtractor { private boolean useLineReturns = false; private BasicExtractionAlgorithm basicExtractor = new BasicExtractionAlgorithm(); private SpreadsheetExtractionAlgorithm spreadsheetExtractor = new SpreadsheetExtractionAlgorithm(); + private boolean verticalRulingPositionsRelative = false; private List verticalRulingPositions = null; private ExtractionMethod method = ExtractionMethod.BASIC; @@ -365,6 +373,9 @@ public TableExtractor() { public void setVerticalRulingPositions(List positions) { this.verticalRulingPositions = positions; } + public void setVerticalRulingPositionsRelative(boolean relative) { + this.verticalRulingPositionsRelative = relative; + } public void setGuess(boolean guess) { this.guess = guess; @@ -411,7 +422,19 @@ public List
extractTablesBasic(Page page) { } if (verticalRulingPositions != null) { - return basicExtractor.extract(page, verticalRulingPositions); + List absoluteRulingPositions; + + if (this.verticalRulingPositionsRelative) { + // convert relative to absolute + absoluteRulingPositions = new ArrayList<>(verticalRulingPositions.size()); + for (float relative: this.verticalRulingPositions) { + float absolute = (float)(relative / 100.0 * page.getWidth()); + absoluteRulingPositions.add(absolute); + } + } else { + absoluteRulingPositions = this.verticalRulingPositions; + } + return basicExtractor.extract(page, absoluteRulingPositions); } return basicExtractor.extract(page); } From 1e9a6129f61ba7031f49fe06276a0cc69ff135ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Tue, 11 Sep 2018 12:29:58 -0300 Subject: [PATCH 044/200] Fix AppVeyor winows build - maven 3.5.4 --- appveyor.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index d16a36ce..b2c4a0ae 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -2,14 +2,14 @@ version: '{build}' install: - ps: | Add-Type -AssemblyName System.IO.Compression.FileSystem - if (!(Test-Path -Path "C:\maven\apache-maven-3.5.2" )) { + if (!(Test-Path -Path "C:\maven\apache-maven-3.5.4" )) { (new-object System.Net.WebClient).DownloadFile( - 'http://www-us.apache.org/dist/maven/maven-3/3.5.2/binaries/apache-maven-3.5.2-bin.zip', + 'http://www-us.apache.org/dist/maven/maven-3/3.5.4/binaries/apache-maven-3.5.4-bin.zip', 'C:\maven-bin.zip' ) [System.IO.Compression.ZipFile]::ExtractToDirectory("C:\maven-bin.zip", "C:\maven") } - - cmd: SET PATH=C:\maven\apache-maven-3.5.2\bin;%JAVA_HOME%\bin;%PATH% + - cmd: SET PATH=C:\maven\apache-maven-3.5.4\bin;%JAVA_HOME%\bin;%PATH% - cmd: SET MAVEN_OPTS=-Xmx2g - cmd: SET JAVA_OPTS=-Xmx2g build_script: From 6fd5d32bd61fe98beb8ecfe6cc25ec08aad92783 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Tue, 30 Oct 2018 10:56:36 -0300 Subject: [PATCH 045/200] Update pdfbox to 2.0.12 (CVE-2018-11797) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 93ea5b63..21e9677b 100644 --- a/pom.xml +++ b/pom.xml @@ -237,7 +237,7 @@ org.apache.pdfbox pdfbox - 2.0.9 + 2.0.12 From 545db99cb432306d5a06c56bfe68c3a3d6380052 Mon Sep 17 00:00:00 2001 From: laigor Date: Fri, 7 Dec 2018 19:24:12 +0300 Subject: [PATCH 046/200] Add right and bottom of area to JSON output --- src/main/java/technology/tabula/json/TableSerializer.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/technology/tabula/json/TableSerializer.java b/src/main/java/technology/tabula/json/TableSerializer.java index 87d091d0..ba04a7cf 100644 --- a/src/main/java/technology/tabula/json/TableSerializer.java +++ b/src/main/java/technology/tabula/json/TableSerializer.java @@ -30,6 +30,8 @@ public JsonElement serialize(Table src, Type typeOfSrc, JsonSerializationContext result.addProperty("left", src.getLeft()); result.addProperty("width", src.getWidth()); result.addProperty("height", src.getHeight()); + result.addProperty("right", src.getRight()); + result.addProperty("bottom", src.getBottom()); JsonArray data; result.add("data", data = new JsonArray()); From 883284cf8755eb88ff77749ea96e31f5b741fe64 Mon Sep 17 00:00:00 2001 From: "Jeremy B. Merrill" Date: Sun, 25 Nov 2018 16:12:47 -0500 Subject: [PATCH 047/200] -g method translates to ExtractionMethod.DECIDE --- src/main/java/technology/tabula/CommandLineApp.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java index 61de9f12..0de4e4a5 100644 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ b/src/main/java/technology/tabula/CommandLineApp.java @@ -252,7 +252,7 @@ private static ExtractionMethod whichExtractionMethod(CommandLine line) { } // -n/--no-spreadsheet [deprecated; use -t] or -c/--columns or -g/--guess or -t/--stream - if (line.hasOption('n') || line.hasOption('c') || line.hasOption('g') || line.hasOption('t')) { + if (line.hasOption('n') || line.hasOption('c') || line.hasOption('t')) { return ExtractionMethod.BASIC; } return ExtractionMethod.DECIDE; From c400625316e941610e47cdb5076434c7522323cc Mon Sep 17 00:00:00 2001 From: "Jeremy B. Merrill" Date: Sun, 25 Nov 2018 16:13:31 -0500 Subject: [PATCH 048/200] correct table coordiinates in guess mode --- .../technology/tabula/extractors/BasicExtractionAlgorithm.java | 3 ++- .../tabula/json/argentina_diputados_voting_record.json | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java b/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java index afaeb5c7..dcd01695 100644 --- a/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java +++ b/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java @@ -64,7 +64,8 @@ public int compare(Ruling arg0, Ruling arg1) { } Table table = new Table(this); - + table.setRect(page.getLeft(), page.getTop(), page.getWidth(), page.getHeight()); + for (int i = 0; i < lines.size(); i++) { Line line = lines.get(i); List elements = line.getTextElements(); diff --git a/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json b/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json index 3a2eced9..cc7b1735 100644 --- a/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json +++ b/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json @@ -1 +1 @@ -{"extraction_method":"stream","top":0.0,"left":0.0,"width":549.0399780273438,"height":782.0400390625,"data":[[{"top":279.87,"left":28.56,"width":175.21029663085938,"height":6.449999809265137,"text":"ABDALA de MATARAZZO, Norma Amanda"},{"top":279.87,"left":213.72,"width":108.25027465820312,"height":6.449999809265137,"text":"Frente Cívico por Santiago"},{"top":280.59,"left":397.56,"width":79.69027709960938,"height":6.449999809265137,"text":"Santiago del Estero"},{"top":279.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":296.31,"left":28.56,"width":141.71029663085938,"height":6.449999809265137,"text":"ALBRIEU, Oscar Edmundo Nicolas"},{"top":296.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":297.15,"left":397.56,"width":41.6602783203125,"height":6.449999809265137,"text":"Rio Negro"},{"top":296.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":312.87,"left":28.56,"width":83.75028228759766,"height":6.449999809265137,"text":"ALONSO, María Luz"},{"top":312.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":313.59,"left":397.56,"width":42.250274658203125,"height":6.449999809265137,"text":"La Pampa"},{"top":312.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":329.31,"left":28.56,"width":84.25028228759766,"height":6.449999809265137,"text":"ARENA, Celia Isabel"},{"top":329.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":330.15,"left":397.56,"width":37.690277099609375,"height":6.449999809265137,"text":"Santa Fe"},{"top":329.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":345.87,"left":28.56,"width":110.29029846191406,"height":6.449999809265137,"text":"ARREGUI, Andrés Roberto"},{"top":345.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":346.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":345.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":362.31,"left":28.56,"width":115.69029235839844,"height":6.449999809265137,"text":"AVOSCAN, Herman Horacio"},{"top":362.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":363.15,"left":397.56,"width":41.6602783203125,"height":6.449999809265137,"text":"Rio Negro"},{"top":362.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":378.87,"left":28.56,"width":95.6902847290039,"height":6.449999809265137,"text":"BALCEDO, María Ester"},{"top":378.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":379.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":378.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":395.31,"left":28.56,"width":127.69029235839844,"height":6.449999809265137,"text":"BARRANDEGUY, Raúl Enrique"},{"top":395.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":396.15,"left":397.56,"width":43.6702880859375,"height":6.449999809265137,"text":"Entre Ríos"},{"top":395.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":411.87,"left":28.56,"width":106.69029235839844,"height":6.449999809265137,"text":"BASTERRA, Luis Eugenio"},{"top":411.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":412.59,"left":397.56,"width":36.73028564453125,"height":6.449999809265137,"text":"Formosa"},{"top":411.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":428.31,"left":28.56,"width":92.21028137207031,"height":6.449999809265137,"text":"BEDANO, Nora Esther"},{"top":428.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":429.15,"left":397.56,"width":35.6602783203125,"height":6.449999809265137,"text":"Córdoba"},{"top":428.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":444.87,"left":28.56,"width":100.69029235839844,"height":6.449999809265137,"text":"BERNAL, María Eugenia"},{"top":444.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":445.59,"left":397.56,"width":22.200286865234375,"height":6.449999809265137,"text":"Jujuy"},{"top":444.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":461.31,"left":28.56,"width":112.21029663085938,"height":6.449999809265137,"text":"BERTONE, Rosana Andrea"},{"top":461.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":462.15,"left":397.56,"width":67.21026611328125,"height":6.449999809265137,"text":"Tierra del Fuego"},{"top":461.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":477.87,"left":28.56,"width":114.73028564453125,"height":6.449999809265137,"text":"BIANCHI, María del Carmen"},{"top":477.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":478.59,"left":397.56,"width":74.6702880859375,"height":6.449999809265137,"text":"Cdad. Aut. Bs. As."},{"top":477.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":494.31,"left":28.56,"width":115.19029235839844,"height":6.449999809265137,"text":"BIDEGAIN, Gloria Mercedes"},{"top":494.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":495.15,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":494.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":510.87,"left":28.56,"width":66.25028228759766,"height":6.449999809265137,"text":"BRAWER, Mara"},{"top":510.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":511.59,"left":397.56,"width":74.6702880859375,"height":6.449999809265137,"text":"Cdad. Aut. Bs. As."},{"top":510.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":527.31,"left":28.56,"width":90.73028564453125,"height":6.449999809265137,"text":"BRILLO, José Ricardo"},{"top":527.31,"left":213.72,"width":121.81027221679688,"height":6.449999809265137,"text":"Movimiento Popular Neuquino"},{"top":528.15,"left":397.56,"width":37.700286865234375,"height":6.449999809265137,"text":"Neuquén"},{"top":527.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":543.87,"left":28.56,"width":120.73028564453125,"height":6.449999809265137,"text":"BROMBERG, Isaac Benjamín"},{"top":543.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":544.59,"left":397.56,"width":38.6602783203125,"height":6.449999809265137,"text":"Tucumán"},{"top":543.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":560.31,"left":28.56,"width":89.7702865600586,"height":6.449999809265137,"text":"BRUE, Daniel Agustín"},{"top":560.31,"left":213.72,"width":108.25027465820312,"height":6.449999809265137,"text":"Frente Cívico por Santiago"},{"top":561.15,"left":397.56,"width":79.69027709960938,"height":6.449999809265137,"text":"Santiago del Estero"},{"top":560.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":576.87,"left":28.56,"width":72.23028564453125,"height":6.449999809265137,"text":"CALCAGNO, Eric"},{"top":576.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":577.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":576.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":593.31,"left":28.56,"width":114.73028564453125,"height":6.449999809265137,"text":"CARLOTTO, Remo Gerardo"},{"top":593.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":594.15,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":593.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":609.87,"left":28.56,"width":122.1702880859375,"height":6.449999809265137,"text":"CARMONA, Guillermo Ramón"},{"top":609.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":610.59,"left":397.56,"width":38.1602783203125,"height":6.449999809265137,"text":"Mendoza"},{"top":609.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":626.31,"left":28.56,"width":124.73028564453125,"height":6.449999809265137,"text":"CATALAN MAGNI, Julio César"},{"top":626.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":627.15,"left":397.56,"width":67.21026611328125,"height":6.449999809265137,"text":"Tierra del Fuego"},{"top":626.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":642.87,"left":28.56,"width":88.6902847290039,"height":6.449999809265137,"text":"CEJAS, Jorge Alberto"},{"top":642.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":643.59,"left":397.56,"width":41.6602783203125,"height":6.449999809265137,"text":"Rio Negro"},{"top":642.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":659.31,"left":28.56,"width":89.7702865600586,"height":6.449999809265137,"text":"CHIENO, María Elena"},{"top":659.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":660.15,"left":397.56,"width":42.72027587890625,"height":6.449999809265137,"text":"Corrientes"},{"top":659.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":675.87,"left":28.56,"width":96.25028228759766,"height":6.449999809265137,"text":"CIAMPINI, José Alberto"},{"top":675.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":676.59,"left":397.56,"width":37.700286865234375,"height":6.449999809265137,"text":"Neuquén"},{"top":675.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":692.31,"left":28.56,"width":131.77029418945312,"height":6.449999809265137,"text":"CIGOGNA, Luis Francisco Jorge"},{"top":692.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":693.15,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":692.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":708.87,"left":28.56,"width":62.15028381347656,"height":6.449999809265137,"text":"CLERI, Marcos"},{"top":708.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":709.59,"left":397.56,"width":37.690277099609375,"height":6.449999809265137,"text":"Santa Fe"},{"top":708.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":725.31,"left":28.56,"width":101.77029418945312,"height":6.449999809265137,"text":"COMELLI, Alicia Marcela"},{"top":725.31,"left":213.72,"width":121.81027221679688,"height":6.449999809265137,"text":"Movimiento Popular Neuquino"},{"top":726.15,"left":397.56,"width":37.700286865234375,"height":6.449999809265137,"text":"Neuquén"},{"top":725.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":741.87,"left":28.56,"width":88.1902847290039,"height":6.449999809265137,"text":"CONTI, Diana Beatriz"},{"top":741.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":742.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":741.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":758.31,"left":28.56,"width":98.75028228759766,"height":6.449999809265137,"text":"CORDOBA, Stella Maris"},{"top":758.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":759.15,"left":397.56,"width":38.6602783203125,"height":6.449999809265137,"text":"Tucumán"},{"top":758.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":774.87,"left":28.56,"width":105.25028991699219,"height":6.449999809265137,"text":"CURRILEN, Oscar Rubén"},{"top":774.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":775.59,"left":397.56,"width":30.2802734375,"height":6.449999809265137,"text":"Chubut"},{"top":774.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}]]} \ No newline at end of file +{"extraction_method":"stream","top":269.875,"left":12.75,"width":548.25,"height":520.625,"data":[[{"top":279.87,"left":28.56,"width":175.21029663085938,"height":6.449999809265137,"text":"ABDALA de MATARAZZO, Norma Amanda"},{"top":279.87,"left":213.72,"width":108.25027465820312,"height":6.449999809265137,"text":"Frente Cívico por Santiago"},{"top":280.59,"left":397.56,"width":79.69027709960938,"height":6.449999809265137,"text":"Santiago del Estero"},{"top":279.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":296.31,"left":28.56,"width":141.71029663085938,"height":6.449999809265137,"text":"ALBRIEU, Oscar Edmundo Nicolas"},{"top":296.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":297.15,"left":397.56,"width":41.6602783203125,"height":6.449999809265137,"text":"Rio Negro"},{"top":296.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":312.87,"left":28.56,"width":83.75028228759766,"height":6.449999809265137,"text":"ALONSO, María Luz"},{"top":312.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":313.59,"left":397.56,"width":42.250274658203125,"height":6.449999809265137,"text":"La Pampa"},{"top":312.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":329.31,"left":28.56,"width":84.25028228759766,"height":6.449999809265137,"text":"ARENA, Celia Isabel"},{"top":329.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":330.15,"left":397.56,"width":37.690277099609375,"height":6.449999809265137,"text":"Santa Fe"},{"top":329.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":345.87,"left":28.56,"width":110.29029846191406,"height":6.449999809265137,"text":"ARREGUI, Andrés Roberto"},{"top":345.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":346.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":345.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":362.31,"left":28.56,"width":115.69029235839844,"height":6.449999809265137,"text":"AVOSCAN, Herman Horacio"},{"top":362.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":363.15,"left":397.56,"width":41.6602783203125,"height":6.449999809265137,"text":"Rio Negro"},{"top":362.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":378.87,"left":28.56,"width":95.6902847290039,"height":6.449999809265137,"text":"BALCEDO, María Ester"},{"top":378.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":379.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":378.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":395.31,"left":28.56,"width":127.69029235839844,"height":6.449999809265137,"text":"BARRANDEGUY, Raúl Enrique"},{"top":395.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":396.15,"left":397.56,"width":43.6702880859375,"height":6.449999809265137,"text":"Entre Ríos"},{"top":395.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":411.87,"left":28.56,"width":106.69029235839844,"height":6.449999809265137,"text":"BASTERRA, Luis Eugenio"},{"top":411.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":412.59,"left":397.56,"width":36.73028564453125,"height":6.449999809265137,"text":"Formosa"},{"top":411.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":428.31,"left":28.56,"width":92.21028137207031,"height":6.449999809265137,"text":"BEDANO, Nora Esther"},{"top":428.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":429.15,"left":397.56,"width":35.6602783203125,"height":6.449999809265137,"text":"Córdoba"},{"top":428.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":444.87,"left":28.56,"width":100.69029235839844,"height":6.449999809265137,"text":"BERNAL, María Eugenia"},{"top":444.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":445.59,"left":397.56,"width":22.200286865234375,"height":6.449999809265137,"text":"Jujuy"},{"top":444.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":461.31,"left":28.56,"width":112.21029663085938,"height":6.449999809265137,"text":"BERTONE, Rosana Andrea"},{"top":461.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":462.15,"left":397.56,"width":67.21026611328125,"height":6.449999809265137,"text":"Tierra del Fuego"},{"top":461.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":477.87,"left":28.56,"width":114.73028564453125,"height":6.449999809265137,"text":"BIANCHI, María del Carmen"},{"top":477.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":478.59,"left":397.56,"width":74.6702880859375,"height":6.449999809265137,"text":"Cdad. Aut. Bs. As."},{"top":477.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":494.31,"left":28.56,"width":115.19029235839844,"height":6.449999809265137,"text":"BIDEGAIN, Gloria Mercedes"},{"top":494.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":495.15,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":494.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":510.87,"left":28.56,"width":66.25028228759766,"height":6.449999809265137,"text":"BRAWER, Mara"},{"top":510.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":511.59,"left":397.56,"width":74.6702880859375,"height":6.449999809265137,"text":"Cdad. Aut. Bs. As."},{"top":510.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":527.31,"left":28.56,"width":90.73028564453125,"height":6.449999809265137,"text":"BRILLO, José Ricardo"},{"top":527.31,"left":213.72,"width":121.81027221679688,"height":6.449999809265137,"text":"Movimiento Popular Neuquino"},{"top":528.15,"left":397.56,"width":37.700286865234375,"height":6.449999809265137,"text":"Neuquén"},{"top":527.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":543.87,"left":28.56,"width":120.73028564453125,"height":6.449999809265137,"text":"BROMBERG, Isaac Benjamín"},{"top":543.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":544.59,"left":397.56,"width":38.6602783203125,"height":6.449999809265137,"text":"Tucumán"},{"top":543.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":560.31,"left":28.56,"width":89.7702865600586,"height":6.449999809265137,"text":"BRUE, Daniel Agustín"},{"top":560.31,"left":213.72,"width":108.25027465820312,"height":6.449999809265137,"text":"Frente Cívico por Santiago"},{"top":561.15,"left":397.56,"width":79.69027709960938,"height":6.449999809265137,"text":"Santiago del Estero"},{"top":560.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":576.87,"left":28.56,"width":72.23028564453125,"height":6.449999809265137,"text":"CALCAGNO, Eric"},{"top":576.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":577.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":576.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":593.31,"left":28.56,"width":114.73028564453125,"height":6.449999809265137,"text":"CARLOTTO, Remo Gerardo"},{"top":593.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":594.15,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":593.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":609.87,"left":28.56,"width":122.1702880859375,"height":6.449999809265137,"text":"CARMONA, Guillermo Ramón"},{"top":609.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":610.59,"left":397.56,"width":38.1602783203125,"height":6.449999809265137,"text":"Mendoza"},{"top":609.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":626.31,"left":28.56,"width":124.73028564453125,"height":6.449999809265137,"text":"CATALAN MAGNI, Julio César"},{"top":626.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":627.15,"left":397.56,"width":67.21026611328125,"height":6.449999809265137,"text":"Tierra del Fuego"},{"top":626.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":642.87,"left":28.56,"width":88.6902847290039,"height":6.449999809265137,"text":"CEJAS, Jorge Alberto"},{"top":642.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":643.59,"left":397.56,"width":41.6602783203125,"height":6.449999809265137,"text":"Rio Negro"},{"top":642.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":659.31,"left":28.56,"width":89.7702865600586,"height":6.449999809265137,"text":"CHIENO, María Elena"},{"top":659.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":660.15,"left":397.56,"width":42.72027587890625,"height":6.449999809265137,"text":"Corrientes"},{"top":659.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":675.87,"left":28.56,"width":96.25028228759766,"height":6.449999809265137,"text":"CIAMPINI, José Alberto"},{"top":675.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":676.59,"left":397.56,"width":37.700286865234375,"height":6.449999809265137,"text":"Neuquén"},{"top":675.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":692.31,"left":28.56,"width":131.77029418945312,"height":6.449999809265137,"text":"CIGOGNA, Luis Francisco Jorge"},{"top":692.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":693.15,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":692.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":708.87,"left":28.56,"width":62.15028381347656,"height":6.449999809265137,"text":"CLERI, Marcos"},{"top":708.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":709.59,"left":397.56,"width":37.690277099609375,"height":6.449999809265137,"text":"Santa Fe"},{"top":708.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":725.31,"left":28.56,"width":101.77029418945312,"height":6.449999809265137,"text":"COMELLI, Alicia Marcela"},{"top":725.31,"left":213.72,"width":121.81027221679688,"height":6.449999809265137,"text":"Movimiento Popular Neuquino"},{"top":726.15,"left":397.56,"width":37.700286865234375,"height":6.449999809265137,"text":"Neuquén"},{"top":725.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":741.87,"left":28.56,"width":88.1902847290039,"height":6.449999809265137,"text":"CONTI, Diana Beatriz"},{"top":741.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":742.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":741.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":758.31,"left":28.56,"width":98.75028228759766,"height":6.449999809265137,"text":"CORDOBA, Stella Maris"},{"top":758.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":759.15,"left":397.56,"width":38.6602783203125,"height":6.449999809265137,"text":"Tucumán"},{"top":758.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":774.87,"left":28.56,"width":105.25028991699219,"height":6.449999809265137,"text":"CURRILEN, Oscar Rubén"},{"top":774.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":775.59,"left":397.56,"width":30.2802734375,"height":6.449999809265137,"text":"Chubut"},{"top":774.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}]]} \ No newline at end of file From 209eea214e0a48f238a8d17b95d0d6e310c1233e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Tue, 11 Dec 2018 09:46:21 -0300 Subject: [PATCH 049/200] Update JSON test fixtures to include `right` and `bottom` keys (#265) --- .../tabula/json/argentina_diputados_voting_record.json | 2 +- src/test/resources/technology/tabula/json/schools.json | 3 +-- src/test/resources/technology/tabula/json/spanning_cells.json | 2 +- .../resources/technology/tabula/json/spanning_cells_basic.json | 2 +- src/test/resources/technology/tabula/json/twotables.json | 2 +- 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json b/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json index 3a2eced9..fb4400f8 100644 --- a/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json +++ b/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json @@ -1 +1 @@ -{"extraction_method":"stream","top":0.0,"left":0.0,"width":549.0399780273438,"height":782.0400390625,"data":[[{"top":279.87,"left":28.56,"width":175.21029663085938,"height":6.449999809265137,"text":"ABDALA de MATARAZZO, Norma Amanda"},{"top":279.87,"left":213.72,"width":108.25027465820312,"height":6.449999809265137,"text":"Frente Cívico por Santiago"},{"top":280.59,"left":397.56,"width":79.69027709960938,"height":6.449999809265137,"text":"Santiago del Estero"},{"top":279.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":296.31,"left":28.56,"width":141.71029663085938,"height":6.449999809265137,"text":"ALBRIEU, Oscar Edmundo Nicolas"},{"top":296.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":297.15,"left":397.56,"width":41.6602783203125,"height":6.449999809265137,"text":"Rio Negro"},{"top":296.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":312.87,"left":28.56,"width":83.75028228759766,"height":6.449999809265137,"text":"ALONSO, María Luz"},{"top":312.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":313.59,"left":397.56,"width":42.250274658203125,"height":6.449999809265137,"text":"La Pampa"},{"top":312.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":329.31,"left":28.56,"width":84.25028228759766,"height":6.449999809265137,"text":"ARENA, Celia Isabel"},{"top":329.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":330.15,"left":397.56,"width":37.690277099609375,"height":6.449999809265137,"text":"Santa Fe"},{"top":329.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":345.87,"left":28.56,"width":110.29029846191406,"height":6.449999809265137,"text":"ARREGUI, Andrés Roberto"},{"top":345.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":346.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":345.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":362.31,"left":28.56,"width":115.69029235839844,"height":6.449999809265137,"text":"AVOSCAN, Herman Horacio"},{"top":362.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":363.15,"left":397.56,"width":41.6602783203125,"height":6.449999809265137,"text":"Rio Negro"},{"top":362.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":378.87,"left":28.56,"width":95.6902847290039,"height":6.449999809265137,"text":"BALCEDO, María Ester"},{"top":378.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":379.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":378.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":395.31,"left":28.56,"width":127.69029235839844,"height":6.449999809265137,"text":"BARRANDEGUY, Raúl Enrique"},{"top":395.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":396.15,"left":397.56,"width":43.6702880859375,"height":6.449999809265137,"text":"Entre Ríos"},{"top":395.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":411.87,"left":28.56,"width":106.69029235839844,"height":6.449999809265137,"text":"BASTERRA, Luis Eugenio"},{"top":411.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":412.59,"left":397.56,"width":36.73028564453125,"height":6.449999809265137,"text":"Formosa"},{"top":411.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":428.31,"left":28.56,"width":92.21028137207031,"height":6.449999809265137,"text":"BEDANO, Nora Esther"},{"top":428.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":429.15,"left":397.56,"width":35.6602783203125,"height":6.449999809265137,"text":"Córdoba"},{"top":428.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":444.87,"left":28.56,"width":100.69029235839844,"height":6.449999809265137,"text":"BERNAL, María Eugenia"},{"top":444.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":445.59,"left":397.56,"width":22.200286865234375,"height":6.449999809265137,"text":"Jujuy"},{"top":444.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":461.31,"left":28.56,"width":112.21029663085938,"height":6.449999809265137,"text":"BERTONE, Rosana Andrea"},{"top":461.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":462.15,"left":397.56,"width":67.21026611328125,"height":6.449999809265137,"text":"Tierra del Fuego"},{"top":461.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":477.87,"left":28.56,"width":114.73028564453125,"height":6.449999809265137,"text":"BIANCHI, María del Carmen"},{"top":477.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":478.59,"left":397.56,"width":74.6702880859375,"height":6.449999809265137,"text":"Cdad. Aut. Bs. As."},{"top":477.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":494.31,"left":28.56,"width":115.19029235839844,"height":6.449999809265137,"text":"BIDEGAIN, Gloria Mercedes"},{"top":494.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":495.15,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":494.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":510.87,"left":28.56,"width":66.25028228759766,"height":6.449999809265137,"text":"BRAWER, Mara"},{"top":510.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":511.59,"left":397.56,"width":74.6702880859375,"height":6.449999809265137,"text":"Cdad. Aut. Bs. As."},{"top":510.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":527.31,"left":28.56,"width":90.73028564453125,"height":6.449999809265137,"text":"BRILLO, José Ricardo"},{"top":527.31,"left":213.72,"width":121.81027221679688,"height":6.449999809265137,"text":"Movimiento Popular Neuquino"},{"top":528.15,"left":397.56,"width":37.700286865234375,"height":6.449999809265137,"text":"Neuquén"},{"top":527.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":543.87,"left":28.56,"width":120.73028564453125,"height":6.449999809265137,"text":"BROMBERG, Isaac Benjamín"},{"top":543.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":544.59,"left":397.56,"width":38.6602783203125,"height":6.449999809265137,"text":"Tucumán"},{"top":543.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":560.31,"left":28.56,"width":89.7702865600586,"height":6.449999809265137,"text":"BRUE, Daniel Agustín"},{"top":560.31,"left":213.72,"width":108.25027465820312,"height":6.449999809265137,"text":"Frente Cívico por Santiago"},{"top":561.15,"left":397.56,"width":79.69027709960938,"height":6.449999809265137,"text":"Santiago del Estero"},{"top":560.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":576.87,"left":28.56,"width":72.23028564453125,"height":6.449999809265137,"text":"CALCAGNO, Eric"},{"top":576.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":577.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":576.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":593.31,"left":28.56,"width":114.73028564453125,"height":6.449999809265137,"text":"CARLOTTO, Remo Gerardo"},{"top":593.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":594.15,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":593.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":609.87,"left":28.56,"width":122.1702880859375,"height":6.449999809265137,"text":"CARMONA, Guillermo Ramón"},{"top":609.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":610.59,"left":397.56,"width":38.1602783203125,"height":6.449999809265137,"text":"Mendoza"},{"top":609.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":626.31,"left":28.56,"width":124.73028564453125,"height":6.449999809265137,"text":"CATALAN MAGNI, Julio César"},{"top":626.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":627.15,"left":397.56,"width":67.21026611328125,"height":6.449999809265137,"text":"Tierra del Fuego"},{"top":626.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":642.87,"left":28.56,"width":88.6902847290039,"height":6.449999809265137,"text":"CEJAS, Jorge Alberto"},{"top":642.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":643.59,"left":397.56,"width":41.6602783203125,"height":6.449999809265137,"text":"Rio Negro"},{"top":642.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":659.31,"left":28.56,"width":89.7702865600586,"height":6.449999809265137,"text":"CHIENO, María Elena"},{"top":659.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":660.15,"left":397.56,"width":42.72027587890625,"height":6.449999809265137,"text":"Corrientes"},{"top":659.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":675.87,"left":28.56,"width":96.25028228759766,"height":6.449999809265137,"text":"CIAMPINI, José Alberto"},{"top":675.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":676.59,"left":397.56,"width":37.700286865234375,"height":6.449999809265137,"text":"Neuquén"},{"top":675.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":692.31,"left":28.56,"width":131.77029418945312,"height":6.449999809265137,"text":"CIGOGNA, Luis Francisco Jorge"},{"top":692.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":693.15,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":692.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":708.87,"left":28.56,"width":62.15028381347656,"height":6.449999809265137,"text":"CLERI, Marcos"},{"top":708.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":709.59,"left":397.56,"width":37.690277099609375,"height":6.449999809265137,"text":"Santa Fe"},{"top":708.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":725.31,"left":28.56,"width":101.77029418945312,"height":6.449999809265137,"text":"COMELLI, Alicia Marcela"},{"top":725.31,"left":213.72,"width":121.81027221679688,"height":6.449999809265137,"text":"Movimiento Popular Neuquino"},{"top":726.15,"left":397.56,"width":37.700286865234375,"height":6.449999809265137,"text":"Neuquén"},{"top":725.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":741.87,"left":28.56,"width":88.1902847290039,"height":6.449999809265137,"text":"CONTI, Diana Beatriz"},{"top":741.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":742.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":741.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":758.31,"left":28.56,"width":98.75028228759766,"height":6.449999809265137,"text":"CORDOBA, Stella Maris"},{"top":758.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":759.15,"left":397.56,"width":38.6602783203125,"height":6.449999809265137,"text":"Tucumán"},{"top":758.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":774.87,"left":28.56,"width":105.25028991699219,"height":6.449999809265137,"text":"CURRILEN, Oscar Rubén"},{"top":774.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":775.59,"left":397.56,"width":30.2802734375,"height":6.449999809265137,"text":"Chubut"},{"top":774.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}]]} \ No newline at end of file +{"extraction_method":"stream","top":0.0,"left":0.0,"width":549.0399780273438,"height":782.0400390625,"right":549.04,"bottom":782.04004,"data":[[{"top":279.87,"left":28.56,"width":175.21029663085938,"height":6.449999809265137,"text":"ABDALA de MATARAZZO, Norma Amanda"},{"top":279.87,"left":213.72,"width":108.25027465820312,"height":6.449999809265137,"text":"Frente Cívico por Santiago"},{"top":280.59,"left":397.56,"width":79.69027709960938,"height":6.449999809265137,"text":"Santiago del Estero"},{"top":279.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":296.31,"left":28.56,"width":141.71029663085938,"height":6.449999809265137,"text":"ALBRIEU, Oscar Edmundo Nicolas"},{"top":296.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":297.15,"left":397.56,"width":41.6602783203125,"height":6.449999809265137,"text":"Rio Negro"},{"top":296.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":312.87,"left":28.56,"width":83.75028228759766,"height":6.449999809265137,"text":"ALONSO, María Luz"},{"top":312.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":313.59,"left":397.56,"width":42.250274658203125,"height":6.449999809265137,"text":"La Pampa"},{"top":312.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":329.31,"left":28.56,"width":84.25028228759766,"height":6.449999809265137,"text":"ARENA, Celia Isabel"},{"top":329.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":330.15,"left":397.56,"width":37.690277099609375,"height":6.449999809265137,"text":"Santa Fe"},{"top":329.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":345.87,"left":28.56,"width":110.29029846191406,"height":6.449999809265137,"text":"ARREGUI, Andrés Roberto"},{"top":345.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":346.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":345.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":362.31,"left":28.56,"width":115.69029235839844,"height":6.449999809265137,"text":"AVOSCAN, Herman Horacio"},{"top":362.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":363.15,"left":397.56,"width":41.6602783203125,"height":6.449999809265137,"text":"Rio Negro"},{"top":362.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":378.87,"left":28.56,"width":95.6902847290039,"height":6.449999809265137,"text":"BALCEDO, María Ester"},{"top":378.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":379.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":378.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":395.31,"left":28.56,"width":127.69029235839844,"height":6.449999809265137,"text":"BARRANDEGUY, Raúl Enrique"},{"top":395.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":396.15,"left":397.56,"width":43.6702880859375,"height":6.449999809265137,"text":"Entre Ríos"},{"top":395.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":411.87,"left":28.56,"width":106.69029235839844,"height":6.449999809265137,"text":"BASTERRA, Luis Eugenio"},{"top":411.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":412.59,"left":397.56,"width":36.73028564453125,"height":6.449999809265137,"text":"Formosa"},{"top":411.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":428.31,"left":28.56,"width":92.21028137207031,"height":6.449999809265137,"text":"BEDANO, Nora Esther"},{"top":428.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":429.15,"left":397.56,"width":35.6602783203125,"height":6.449999809265137,"text":"Córdoba"},{"top":428.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":444.87,"left":28.56,"width":100.69029235839844,"height":6.449999809265137,"text":"BERNAL, María Eugenia"},{"top":444.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":445.59,"left":397.56,"width":22.200286865234375,"height":6.449999809265137,"text":"Jujuy"},{"top":444.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":461.31,"left":28.56,"width":112.21029663085938,"height":6.449999809265137,"text":"BERTONE, Rosana Andrea"},{"top":461.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":462.15,"left":397.56,"width":67.21026611328125,"height":6.449999809265137,"text":"Tierra del Fuego"},{"top":461.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":477.87,"left":28.56,"width":114.73028564453125,"height":6.449999809265137,"text":"BIANCHI, María del Carmen"},{"top":477.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":478.59,"left":397.56,"width":74.6702880859375,"height":6.449999809265137,"text":"Cdad. Aut. Bs. As."},{"top":477.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":494.31,"left":28.56,"width":115.19029235839844,"height":6.449999809265137,"text":"BIDEGAIN, Gloria Mercedes"},{"top":494.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":495.15,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":494.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":510.87,"left":28.56,"width":66.25028228759766,"height":6.449999809265137,"text":"BRAWER, Mara"},{"top":510.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":511.59,"left":397.56,"width":74.6702880859375,"height":6.449999809265137,"text":"Cdad. Aut. Bs. As."},{"top":510.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":527.31,"left":28.56,"width":90.73028564453125,"height":6.449999809265137,"text":"BRILLO, José Ricardo"},{"top":527.31,"left":213.72,"width":121.81027221679688,"height":6.449999809265137,"text":"Movimiento Popular Neuquino"},{"top":528.15,"left":397.56,"width":37.700286865234375,"height":6.449999809265137,"text":"Neuquén"},{"top":527.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":543.87,"left":28.56,"width":120.73028564453125,"height":6.449999809265137,"text":"BROMBERG, Isaac Benjamín"},{"top":543.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":544.59,"left":397.56,"width":38.6602783203125,"height":6.449999809265137,"text":"Tucumán"},{"top":543.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":560.31,"left":28.56,"width":89.7702865600586,"height":6.449999809265137,"text":"BRUE, Daniel Agustín"},{"top":560.31,"left":213.72,"width":108.25027465820312,"height":6.449999809265137,"text":"Frente Cívico por Santiago"},{"top":561.15,"left":397.56,"width":79.69027709960938,"height":6.449999809265137,"text":"Santiago del Estero"},{"top":560.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":576.87,"left":28.56,"width":72.23028564453125,"height":6.449999809265137,"text":"CALCAGNO, Eric"},{"top":576.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":577.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":576.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":593.31,"left":28.56,"width":114.73028564453125,"height":6.449999809265137,"text":"CARLOTTO, Remo Gerardo"},{"top":593.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":594.15,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":593.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":609.87,"left":28.56,"width":122.1702880859375,"height":6.449999809265137,"text":"CARMONA, Guillermo Ramón"},{"top":609.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":610.59,"left":397.56,"width":38.1602783203125,"height":6.449999809265137,"text":"Mendoza"},{"top":609.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":626.31,"left":28.56,"width":124.73028564453125,"height":6.449999809265137,"text":"CATALAN MAGNI, Julio César"},{"top":626.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":627.15,"left":397.56,"width":67.21026611328125,"height":6.449999809265137,"text":"Tierra del Fuego"},{"top":626.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":642.87,"left":28.56,"width":88.6902847290039,"height":6.449999809265137,"text":"CEJAS, Jorge Alberto"},{"top":642.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":643.59,"left":397.56,"width":41.6602783203125,"height":6.449999809265137,"text":"Rio Negro"},{"top":642.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":659.31,"left":28.56,"width":89.7702865600586,"height":6.449999809265137,"text":"CHIENO, María Elena"},{"top":659.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":660.15,"left":397.56,"width":42.72027587890625,"height":6.449999809265137,"text":"Corrientes"},{"top":659.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":675.87,"left":28.56,"width":96.25028228759766,"height":6.449999809265137,"text":"CIAMPINI, José Alberto"},{"top":675.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":676.59,"left":397.56,"width":37.700286865234375,"height":6.449999809265137,"text":"Neuquén"},{"top":675.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":692.31,"left":28.56,"width":131.77029418945312,"height":6.449999809265137,"text":"CIGOGNA, Luis Francisco Jorge"},{"top":692.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":693.15,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":692.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":708.87,"left":28.56,"width":62.15028381347656,"height":6.449999809265137,"text":"CLERI, Marcos"},{"top":708.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":709.59,"left":397.56,"width":37.690277099609375,"height":6.449999809265137,"text":"Santa Fe"},{"top":708.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":725.31,"left":28.56,"width":101.77029418945312,"height":6.449999809265137,"text":"COMELLI, Alicia Marcela"},{"top":725.31,"left":213.72,"width":121.81027221679688,"height":6.449999809265137,"text":"Movimiento Popular Neuquino"},{"top":726.15,"left":397.56,"width":37.700286865234375,"height":6.449999809265137,"text":"Neuquén"},{"top":725.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":741.87,"left":28.56,"width":88.1902847290039,"height":6.449999809265137,"text":"CONTI, Diana Beatriz"},{"top":741.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":742.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":741.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":758.31,"left":28.56,"width":98.75028228759766,"height":6.449999809265137,"text":"CORDOBA, Stella Maris"},{"top":758.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":759.15,"left":397.56,"width":38.6602783203125,"height":6.449999809265137,"text":"Tucumán"},{"top":758.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":774.87,"left":28.56,"width":105.25028991699219,"height":6.449999809265137,"text":"CURRILEN, Oscar Rubén"},{"top":774.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":775.59,"left":397.56,"width":30.2802734375,"height":6.449999809265137,"text":"Chubut"},{"top":774.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}]]} diff --git a/src/test/resources/technology/tabula/json/schools.json b/src/test/resources/technology/tabula/json/schools.json index 1c9ed032..57544ff0 100644 --- a/src/test/resources/technology/tabula/json/schools.json +++ b/src/test/resources/technology/tabula/json/schools.json @@ -1,2 +1 @@ - -{"extraction_method":"lattice","top":54.315777,"left":16.97,"width":745.3303833007812,"height":483.9442443847656,"data":[[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":54.315777,"left":18.588728,"width":153.91128540039062,"height":8.744216918945312,"text":"Last Name"},{"top":54.315777,"left":172.50002,"width":69.71998596191406,"height":8.744216918945312,"text":"First Name"},{"top":54.315777,"left":242.22,"width":104.760009765625,"height":8.744216918945312,"text":"Address"},{"top":54.315777,"left":346.98,"width":70.80001831054688,"height":8.744216918945312,"text":"City"},{"top":54.315777,"left":417.78003,"width":20.87994384765625,"height":8.744216918945312,"text":"State"},{"top":54.315777,"left":438.65997,"width":49.91998291015625,"height":8.744216918945312,"text":"Zip"},{"top":54.315777,"left":488.57996,"width":78.47998046875,"height":8.744216918945312,"text":"Occupation"},{"top":54.315777,"left":567.05994,"width":106.4400634765625,"height":8.744216918945312,"text":"Employer"},{"top":54.315777,"left":673.5,"width":45.52001953125,"height":8.744216918945312,"text":"Date"},{"top":54.315777,"left":719.02,"width":43.28033447265625,"height":8.744216918945312,"text":"Amount"}],[{"top":63.059994,"left":16.97,"width":1.6187286376953125,"height":10.799991607666016,"text":""},{"top":63.059994,"left":18.588728,"width":153.91128540039062,"height":10.799991607666016,"text":"Lidstad"},{"top":63.059994,"left":172.50002,"width":69.71998596191406,"height":10.799991607666016,"text":"Dick \u0026 Peg"},{"top":63.059994,"left":242.22,"width":104.760009765625,"height":10.799991607666016,"text":"62 Mississippi River Blvd N"},{"top":63.059994,"left":346.98,"width":70.80001831054688,"height":10.799991607666016,"text":"Saint Paul"},{"top":63.059994,"left":417.78003,"width":20.87994384765625,"height":10.799991607666016,"text":"MN"},{"top":63.059994,"left":438.65997,"width":49.91998291015625,"height":10.799991607666016,"text":"55104"},{"top":63.059994,"left":488.57996,"width":78.47998046875,"height":10.799991607666016,"text":"retired"},{"top":63.059994,"left":567.05994,"width":106.4400634765625,"height":10.799991607666016,"text":""},{"top":63.059994,"left":673.5,"width":45.52001953125,"height":10.799991607666016,"text":"10/12/2012"},{"top":63.059994,"left":719.02,"width":43.28033447265625,"height":10.799991607666016,"text":"60.00"}],[{"top":73.859985,"left":16.97,"width":1.6187286376953125,"height":10.800025939941406,"text":""},{"top":73.859985,"left":18.588728,"width":153.91128540039062,"height":10.800025939941406,"text":"Strom"},{"top":73.859985,"left":172.50002,"width":69.71998596191406,"height":10.800025939941406,"text":"Pam"},{"top":73.859985,"left":242.22,"width":104.760009765625,"height":10.800025939941406,"text":"1229 Hague Ave"},{"top":73.859985,"left":346.98,"width":70.80001831054688,"height":10.800025939941406,"text":"St. Paul"},{"top":73.859985,"left":417.78003,"width":20.87994384765625,"height":10.800025939941406,"text":"MN"},{"top":73.859985,"left":438.65997,"width":49.91998291015625,"height":10.800025939941406,"text":"55104"},{"top":73.859985,"left":488.57996,"width":78.47998046875,"height":10.800025939941406,"text":""},{"top":73.859985,"left":567.05994,"width":106.4400634765625,"height":10.800025939941406,"text":""},{"top":73.859985,"left":673.5,"width":45.52001953125,"height":10.800025939941406,"text":"9/12/2012"},{"top":73.859985,"left":719.02,"width":43.28033447265625,"height":10.800025939941406,"text":"60.00"}],[{"top":84.66001,"left":16.97,"width":1.6187286376953125,"height":10.799995422363281,"text":""},{"top":84.66001,"left":18.588728,"width":153.91128540039062,"height":10.799995422363281,"text":"Seeba"},{"top":84.66001,"left":172.50002,"width":69.71998596191406,"height":10.799995422363281,"text":"Louise \u0026 Paul"},{"top":84.66001,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"1399 Sheldon St"},{"top":84.66001,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"Saint Paul"},{"top":84.66001,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":84.66001,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55108"},{"top":84.66001,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":"BOE"},{"top":84.66001,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":"City of Saint Paul"},{"top":84.66001,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"10/12/2012"},{"top":84.66001,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"60.00"}],[{"top":95.46001,"left":16.97,"width":1.6187286376953125,"height":10.799995422363281,"text":""},{"top":95.46001,"left":18.588728,"width":153.91128540039062,"height":10.799995422363281,"text":"Schumacher / Bales"},{"top":95.46001,"left":172.50002,"width":69.71998596191406,"height":10.799995422363281,"text":"Douglas L. / Patricia"},{"top":95.46001,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"948 County Rd. D W"},{"top":95.46001,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"Saint Paul"},{"top":95.46001,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":95.46001,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55126"},{"top":95.46001,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":""},{"top":95.46001,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":""},{"top":95.46001,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"10/13/2012"},{"top":95.46001,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"60.00"}],[{"top":106.26,"left":16.97,"width":1.6187286376953125,"height":10.799995422363281,"text":""},{"top":106.26,"left":18.588728,"width":153.91128540039062,"height":10.799995422363281,"text":"Abrams"},{"top":106.26,"left":172.50002,"width":69.71998596191406,"height":10.799995422363281,"text":"Marjorie"},{"top":106.26,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"238 8th St east"},{"top":106.26,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"St Paul"},{"top":106.26,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":106.26,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55101"},{"top":106.26,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":"Retired"},{"top":106.26,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":"Retired"},{"top":106.26,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"8/8/2012"},{"top":106.26,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"75.00"}],[{"top":117.06,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":117.06,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Crouse / Schroeder"},{"top":117.06,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Abigail / Jonathan"},{"top":117.06,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1545 Branston St."},{"top":117.06,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":117.06,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":117.06,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55108"},{"top":117.06,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":""},{"top":117.06,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":""},{"top":117.06,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/6/2012"},{"top":117.06,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"75.00"}],[{"top":127.859985,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":127.859985,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"O\u0027Connell"},{"top":127.859985,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"Jean"},{"top":127.859985,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"888 Ivy Ave W."},{"top":127.859985,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Saint Paul"},{"top":127.859985,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":127.859985,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55117"},{"top":127.859985,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":""},{"top":127.859985,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":""},{"top":127.859985,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/30/2012"},{"top":127.859985,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"75.00"}],[{"top":138.65999,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":138.65999,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Reese"},{"top":138.65999,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Cheri A"},{"top":138.65999,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"981 Davern St."},{"top":138.65999,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":138.65999,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":138.65999,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55116"},{"top":138.65999,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Public Relations"},{"top":138.65999,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Far North Spirits"},{"top":138.65999,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/3/2012"},{"top":138.65999,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"75.00"}],[{"top":149.46,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":149.46,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"Serrano"},{"top":149.46,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"Luz Maria"},{"top":149.46,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"5559 Park Place Drive"},{"top":149.46,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Shoreview"},{"top":149.46,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":149.46,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55126"},{"top":149.46,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"retired"},{"top":149.46,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":"SPPS"},{"top":149.46,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/20/2012"},{"top":149.46,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"75.00"}],[{"top":160.26001,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":160.26001,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Alsiddiqui"},{"top":160.26001,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Jaber"},{"top":160.26001,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"13056 Euclid Ave"},{"top":160.26001,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Apple Valley"},{"top":160.26001,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":160.26001,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55124"},{"top":160.26001,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"SPPS budget analyst"},{"top":160.26001,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"SPPS"},{"top":160.26001,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"9/20/2012"},{"top":160.26001,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":171.06,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":171.06,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"Andrastek"},{"top":171.06,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"John"},{"top":171.06,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"774 Ashland Ave"},{"top":171.06,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"St Paul"},{"top":171.06,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":171.06,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55104"},{"top":171.06,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"asst principal"},{"top":171.06,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":"SPPS"},{"top":171.06,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"8/22/2012"},{"top":171.06,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"100.00"}],[{"top":181.86,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":181.86,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Anfang"},{"top":181.86,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Heather L. \u0026 Matt"},{"top":181.86,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1635 Bayard Ave"},{"top":181.86,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":181.86,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":181.86,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55116"},{"top":181.86,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"Ex Director"},{"top":181.86,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"BOMA"},{"top":181.86,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/6/2012"},{"top":181.86,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":192.65999,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":192.65999,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Aronson"},{"top":192.65999,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Roger J."},{"top":192.65999,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4852 Emerson Ave. S."},{"top":192.65999,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Minneapolis"},{"top":192.65999,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":192.65999,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55419"},{"top":192.65999,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Attorney at Law"},{"top":192.65999,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":""},{"top":192.65999,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"9/20/2012"},{"top":192.65999,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":203.46,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":203.46,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"Aronson"},{"top":203.46,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"Roger J."},{"top":203.46,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"4852 Emerson Ave. S."},{"top":203.46,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Minneapolis"},{"top":203.46,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":203.46,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55419"},{"top":203.46,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"Attorney at Law"},{"top":203.46,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":""},{"top":203.46,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/20/2012"},{"top":203.46,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"100.00"}],[{"top":214.26001,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":214.26001,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Banks"},{"top":214.26001,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Michael or Patricia"},{"top":214.26001,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1664 Van Buren Ave."},{"top":214.26001,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":214.26001,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":214.26001,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":214.26001,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"retired social worker"},{"top":214.26001,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":""},{"top":214.26001,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"9/20/2012"},{"top":214.26001,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":225.06,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":225.06,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Bennett"},{"top":225.06,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"David"},{"top":225.06,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"25 Birchwood Rd."},{"top":225.06,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Mahtomedi"},{"top":225.06,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":225.06,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55110"},{"top":225.06,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"retired"},{"top":225.06,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"retired"},{"top":225.06,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"9/3/2012"},{"top":225.06,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":235.86002,"left":16.97,"width":1.6187286376953125,"height":10.799972534179688,"text":""},{"top":235.86002,"left":18.588728,"width":153.91128540039062,"height":10.799972534179688,"text":"Berry"},{"top":235.86002,"left":172.50002,"width":69.71998596191406,"height":10.799972534179688,"text":"Margaret"},{"top":235.86002,"left":242.22,"width":104.760009765625,"height":10.799972534179688,"text":"1267 Pike Lake Dr"},{"top":235.86002,"left":346.98,"width":70.80001831054688,"height":10.799972534179688,"text":"New Brighton"},{"top":235.86002,"left":417.78003,"width":20.87994384765625,"height":10.799972534179688,"text":"MN"},{"top":235.86002,"left":438.65997,"width":49.91998291015625,"height":10.799972534179688,"text":"55112"},{"top":235.86002,"left":488.57996,"width":78.47998046875,"height":10.799972534179688,"text":"Asst Principal"},{"top":235.86002,"left":567.05994,"width":106.4400634765625,"height":10.799972534179688,"text":"Saint Paul Public Schools"},{"top":235.86002,"left":673.5,"width":45.52001953125,"height":10.799972534179688,"text":"8/8/2012"},{"top":235.86002,"left":719.02,"width":43.28033447265625,"height":10.799972534179688,"text":"100.00"}],[{"top":246.65999,"left":16.97,"width":1.6187286376953125,"height":10.800033569335938,"text":""},{"top":246.65999,"left":18.588728,"width":153.91128540039062,"height":10.800033569335938,"text":"Boyle"},{"top":246.65999,"left":172.50002,"width":69.71998596191406,"height":10.800033569335938,"text":"Matthew C. \u0026 Eliza"},{"top":246.65999,"left":242.22,"width":104.760009765625,"height":10.800033569335938,"text":"2165 Princeton Ave"},{"top":246.65999,"left":346.98,"width":70.80001831054688,"height":10.800033569335938,"text":"Saint Paul"},{"top":246.65999,"left":417.78003,"width":20.87994384765625,"height":10.800033569335938,"text":"MN"},{"top":246.65999,"left":438.65997,"width":49.91998291015625,"height":10.800033569335938,"text":"55105"},{"top":246.65999,"left":488.57996,"width":78.47998046875,"height":10.800033569335938,"text":""},{"top":246.65999,"left":567.05994,"width":106.4400634765625,"height":10.800033569335938,"text":""},{"top":246.65999,"left":673.5,"width":45.52001953125,"height":10.800033569335938,"text":"10/6/2012"},{"top":246.65999,"left":719.02,"width":43.28033447265625,"height":10.800033569335938,"text":"100.00"}],[{"top":257.46002,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":257.46002,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Brodrick"},{"top":257.46002,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"John"},{"top":257.46002,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1007 Charles"},{"top":257.46002,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"St. Paul"},{"top":257.46002,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":257.46002,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":257.46002,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"BOE"},{"top":257.46002,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"retired"},{"top":257.46002,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"8/26/2012"},{"top":257.46002,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":268.26,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":268.26,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Brodrick"},{"top":268.26,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"John F."},{"top":268.26,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1007 Charles Ave"},{"top":268.26,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":268.26,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":268.26,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":268.26,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"BoE"},{"top":268.26,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"SPPS"},{"top":268.26,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/4/2012"},{"top":268.26,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":279.06,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":279.06,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Brown"},{"top":279.06,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Mike"},{"top":279.06,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1440 Goodrich Ave"},{"top":279.06,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":279.06,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":279.06,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":279.06,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":279.06,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":279.06,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"9/23/2012"},{"top":279.06,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":289.84,"left":16.97,"width":1.6187286376953125,"height":10.819976806640625,"text":""},{"top":289.84,"left":18.588728,"width":153.91128540039062,"height":10.819976806640625,"text":"Cacy"},{"top":289.84,"left":172.50002,"width":69.71998596191406,"height":10.819976806640625,"text":"Jill"},{"top":289.84,"left":242.22,"width":104.760009765625,"height":10.819976806640625,"text":"1409 Smith Ave So"},{"top":289.84,"left":346.98,"width":70.80001831054688,"height":10.819976806640625,"text":"West St. Paul"},{"top":289.84,"left":417.78003,"width":20.87994384765625,"height":10.819976806640625,"text":"MN"},{"top":289.84,"left":438.65997,"width":49.91998291015625,"height":10.819976806640625,"text":"55118"},{"top":289.84,"left":488.57996,"width":78.47998046875,"height":10.819976806640625,"text":"Administrator"},{"top":289.84,"left":567.05994,"width":106.4400634765625,"height":10.819976806640625,"text":"SPPS"},{"top":289.84,"left":673.5,"width":45.52001953125,"height":10.819976806640625,"text":"9/12/2012"},{"top":289.84,"left":719.02,"width":43.28033447265625,"height":10.819976806640625,"text":"100.00"}],[{"top":300.65997,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":300.65997,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Cardwell"},{"top":300.65997,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Eileen"},{"top":300.65997,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4172 Bridgewater Ct"},{"top":300.65997,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Vadnais Height"},{"top":300.65997,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":300.65997,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55127"},{"top":300.65997,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Retired"},{"top":300.65997,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Retired"},{"top":300.65997,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/3/2012"},{"top":300.65997,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":311.46,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":311.46,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Carlstrom"},{"top":311.46,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Cheryl"},{"top":311.46,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4244 Oakmede Ln"},{"top":311.46,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"St Paul"},{"top":311.46,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":311.46,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55110"},{"top":311.46,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Satff"},{"top":311.46,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Saint Paul Public Schools"},{"top":311.46,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/8/2012"},{"top":311.46,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":322.26,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":322.26,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Carter"},{"top":322.26,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Melvin W. \u0026 Willet"},{"top":322.26,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"405 Western Ave N"},{"top":322.26,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":322.26,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":322.26,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55103"},{"top":322.26,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Cty Commissioner"},{"top":322.26,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Ramsey Cty"},{"top":322.26,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/8/2012"},{"top":322.26,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":333.06003,"left":16.97,"width":1.6187286376953125,"height":10.8199462890625,"text":""},{"top":333.06003,"left":18.588728,"width":153.91128540039062,"height":10.8199462890625,"text":"Caruso"},{"top":333.06003,"left":172.50002,"width":69.71998596191406,"height":10.8199462890625,"text":"Sarah"},{"top":333.06003,"left":242.22,"width":104.760009765625,"height":10.8199462890625,"text":"2615 Newton Ave S"},{"top":333.06003,"left":346.98,"width":70.80001831054688,"height":10.8199462890625,"text":"Minneapolis"},{"top":333.06003,"left":417.78003,"width":20.87994384765625,"height":10.8199462890625,"text":"MN"},{"top":333.06003,"left":438.65997,"width":49.91998291015625,"height":10.8199462890625,"text":"55405"},{"top":333.06003,"left":488.57996,"width":78.47998046875,"height":10.8199462890625,"text":"CEO"},{"top":333.06003,"left":567.05994,"width":106.4400634765625,"height":10.8199462890625,"text":"United Way"},{"top":333.06003,"left":673.5,"width":45.52001953125,"height":10.8199462890625,"text":"9/12/2012"},{"top":333.06003,"left":719.02,"width":43.28033447265625,"height":10.8199462890625,"text":"100.00"}],[{"top":343.87997,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":343.87997,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Casey /Trewartha"},{"top":343.87997,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Kerry F. / Kelly A."},{"top":343.87997,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"2266 Goodrich Ave"},{"top":343.87997,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":343.87997,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":343.87997,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":343.87997,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":343.87997,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":343.87997,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/4/2012"},{"top":343.87997,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":354.65997,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":354.65997,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Cassidy"},{"top":354.65997,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Paul D."},{"top":354.65997,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1990 Dayton Ave"},{"top":354.65997,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":354.65997,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":354.65997,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55104"},{"top":354.65997,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":354.65997,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":354.65997,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/6/2012"},{"top":354.65997,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":365.43997,"left":16.97,"width":1.6187286376953125,"height":10.820037841796875,"text":""},{"top":365.43997,"left":18.588728,"width":153.91128540039062,"height":10.820037841796875,"text":"Causey"},{"top":365.43997,"left":172.50002,"width":69.71998596191406,"height":10.820037841796875,"text":"Christopher"},{"top":365.43997,"left":242.22,"width":104.760009765625,"height":10.820037841796875,"text":"2181 Doswell Avenue"},{"top":365.43997,"left":346.98,"width":70.80001831054688,"height":10.820037841796875,"text":"Saint Paul"},{"top":365.43997,"left":417.78003,"width":20.87994384765625,"height":10.820037841796875,"text":"MN"},{"top":365.43997,"left":438.65997,"width":49.91998291015625,"height":10.820037841796875,"text":"55108"},{"top":365.43997,"left":488.57996,"width":78.47998046875,"height":10.820037841796875,"text":"finance"},{"top":365.43997,"left":567.05994,"width":106.4400634765625,"height":10.820037841796875,"text":""},{"top":365.43997,"left":673.5,"width":45.52001953125,"height":10.820037841796875,"text":"9/3/2012"},{"top":365.43997,"left":719.02,"width":43.28033447265625,"height":10.820037841796875,"text":"100.00"}],[{"top":376.26,"left":16.97,"width":1.6187286376953125,"height":10.819976806640625,"text":""},{"top":376.26,"left":18.588728,"width":153.91128540039062,"height":10.819976806640625,"text":"Christiansen"},{"top":376.26,"left":172.50002,"width":69.71998596191406,"height":10.819976806640625,"text":"Lena"},{"top":376.26,"left":242.22,"width":104.760009765625,"height":10.819976806640625,"text":"2453 Como Ave"},{"top":376.26,"left":346.98,"width":70.80001831054688,"height":10.819976806640625,"text":"St Paul"},{"top":376.26,"left":417.78003,"width":20.87994384765625,"height":10.819976806640625,"text":"MN"},{"top":376.26,"left":438.65997,"width":49.91998291015625,"height":10.819976806640625,"text":"55108"},{"top":376.26,"left":488.57996,"width":78.47998046875,"height":10.819976806640625,"text":"Principal"},{"top":376.26,"left":567.05994,"width":106.4400634765625,"height":10.819976806640625,"text":"Saint Paul Public Schools"},{"top":376.26,"left":673.5,"width":45.52001953125,"height":10.819976806640625,"text":"8/8/2012"},{"top":376.26,"left":719.02,"width":43.28033447265625,"height":10.819976806640625,"text":"100.00"}],[{"top":387.08,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":387.08,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Clapp"},{"top":387.08,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Agustus (Bill)"},{"top":387.08,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"757 Osceola Ave #1"},{"top":387.08,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":387.08,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":387.08,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":387.08,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":"retired"},{"top":387.08,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":387.08,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/2/2012"},{"top":387.08,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":397.86,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":397.86,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Cohen"},{"top":397.86,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Brad A."},{"top":397.86,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1460 Raymond Ave"},{"top":397.86,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":397.86,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":397.86,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55108"},{"top":397.86,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":"academic technology"},{"top":397.86,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":"UMN"},{"top":397.86,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/17/2012"},{"top":397.86,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":408.63998,"left":16.97,"width":1.6187286376953125,"height":10.82000732421875,"text":""},{"top":408.63998,"left":18.588728,"width":153.91128540039062,"height":10.82000732421875,"text":"Commers"},{"top":408.63998,"left":172.50002,"width":69.71998596191406,"height":10.82000732421875,"text":"Beth"},{"top":408.63998,"left":242.22,"width":104.760009765625,"height":10.82000732421875,"text":"2294 Commonwealth Ave"},{"top":408.63998,"left":346.98,"width":70.80001831054688,"height":10.82000732421875,"text":"St Paul"},{"top":408.63998,"left":417.78003,"width":20.87994384765625,"height":10.82000732421875,"text":"MN"},{"top":408.63998,"left":438.65997,"width":49.91998291015625,"height":10.82000732421875,"text":"55108"},{"top":408.63998,"left":488.57996,"width":78.47998046875,"height":10.82000732421875,"text":"Self Employed"},{"top":408.63998,"left":567.05994,"width":106.4400634765625,"height":10.82000732421875,"text":"Homemaker"},{"top":408.63998,"left":673.5,"width":45.52001953125,"height":10.82000732421875,"text":"8/1/2012"},{"top":408.63998,"left":719.02,"width":43.28033447265625,"height":10.82000732421875,"text":"100.00"}],[{"top":419.46,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":419.46,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Couture"},{"top":419.46,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Steven"},{"top":419.46,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"7950 Victoria Way"},{"top":419.46,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Louise Park"},{"top":419.46,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":419.46,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55426"},{"top":419.46,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Principal"},{"top":419.46,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Saint Paul Public Schools"},{"top":419.46,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/10/2012"},{"top":419.46,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":430.26,"left":16.97,"width":1.6187286376953125,"height":10.800048828125,"text":""},{"top":430.26,"left":18.588728,"width":153.91128540039062,"height":10.800048828125,"text":"Crawford"},{"top":430.26,"left":172.50002,"width":69.71998596191406,"height":10.800048828125,"text":"Lydia P."},{"top":430.26,"left":242.22,"width":104.760009765625,"height":10.800048828125,"text":"1140 Edgcumbe Rd"},{"top":430.26,"left":346.98,"width":70.80001831054688,"height":10.800048828125,"text":"Saint Paul"},{"top":430.26,"left":417.78003,"width":20.87994384765625,"height":10.800048828125,"text":"MN"},{"top":430.26,"left":438.65997,"width":49.91998291015625,"height":10.800048828125,"text":"55105"},{"top":430.26,"left":488.57996,"width":78.47998046875,"height":10.800048828125,"text":""},{"top":430.26,"left":567.05994,"width":106.4400634765625,"height":10.800048828125,"text":""},{"top":430.26,"left":673.5,"width":45.52001953125,"height":10.800048828125,"text":"9/15/2012"},{"top":430.26,"left":719.02,"width":43.28033447265625,"height":10.800048828125,"text":"100.00"}],[{"top":441.06006,"left":16.97,"width":1.6187286376953125,"height":10.799896240234375,"text":""},{"top":441.06006,"left":18.588728,"width":153.91128540039062,"height":10.799896240234375,"text":"Cudahy / Ricker"},{"top":441.06006,"left":172.50002,"width":69.71998596191406,"height":10.799896240234375,"text":"Robert \u0026 Mary C"},{"top":441.06006,"left":242.22,"width":104.760009765625,"height":10.799896240234375,"text":"616 Cherokee Ave."},{"top":441.06006,"left":346.98,"width":70.80001831054688,"height":10.799896240234375,"text":"Saint Paul"},{"top":441.06006,"left":417.78003,"width":20.87994384765625,"height":10.799896240234375,"text":"MN"},{"top":441.06006,"left":438.65997,"width":49.91998291015625,"height":10.799896240234375,"text":"55107"},{"top":441.06006,"left":488.57996,"width":78.47998046875,"height":10.799896240234375,"text":"Teacher/Union Presid"},{"top":441.06006,"left":567.05994,"width":106.4400634765625,"height":10.799896240234375,"text":"SPPS/SPFT"},{"top":441.06006,"left":673.5,"width":45.52001953125,"height":10.799896240234375,"text":"9/18/2012"},{"top":441.06006,"left":719.02,"width":43.28033447265625,"height":10.799896240234375,"text":"100.00"}],[{"top":451.85995,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":451.85995,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Cudahy / Ricker"},{"top":451.85995,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Robert \u0026 Mary C"},{"top":451.85995,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"616 Cherokee Ave."},{"top":451.85995,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":451.85995,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":451.85995,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55107"},{"top":451.85995,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Teacher/Union Presid"},{"top":451.85995,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"SPPS/SPFT"},{"top":451.85995,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/6/2012"},{"top":451.85995,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":462.65997,"left":16.97,"width":1.6187286376953125,"height":10.82000732421875,"text":""},{"top":462.65997,"left":18.588728,"width":153.91128540039062,"height":10.82000732421875,"text":"Currie"},{"top":462.65997,"left":172.50002,"width":69.71998596191406,"height":10.82000732421875,"text":"Elisabeth"},{"top":462.65997,"left":242.22,"width":104.760009765625,"height":10.82000732421875,"text":"2274 Hillside Ave"},{"top":462.65997,"left":346.98,"width":70.80001831054688,"height":10.82000732421875,"text":"St. Paul"},{"top":462.65997,"left":417.78003,"width":20.87994384765625,"height":10.82000732421875,"text":"MN"},{"top":462.65997,"left":438.65997,"width":49.91998291015625,"height":10.82000732421875,"text":"55108"},{"top":462.65997,"left":488.57996,"width":78.47998046875,"height":10.82000732421875,"text":""},{"top":462.65997,"left":567.05994,"width":106.4400634765625,"height":10.82000732421875,"text":""},{"top":462.65997,"left":673.5,"width":45.52001953125,"height":10.82000732421875,"text":"9/12/2012"},{"top":462.65997,"left":719.02,"width":43.28033447265625,"height":10.82000732421875,"text":"100.00"}],[{"top":473.47998,"left":16.97,"width":1.6187286376953125,"height":10.780029296875,"text":""},{"top":473.47998,"left":18.588728,"width":153.91128540039062,"height":10.780029296875,"text":"Doane"},{"top":473.47998,"left":172.50002,"width":69.71998596191406,"height":10.780029296875,"text":"Paul V \u0026 Helen R"},{"top":473.47998,"left":242.22,"width":104.760009765625,"height":10.780029296875,"text":"444 Portland Ave"},{"top":473.47998,"left":346.98,"width":70.80001831054688,"height":10.780029296875,"text":"Sant Paul"},{"top":473.47998,"left":417.78003,"width":20.87994384765625,"height":10.780029296875,"text":"MN"},{"top":473.47998,"left":438.65997,"width":49.91998291015625,"height":10.780029296875,"text":"55102"},{"top":473.47998,"left":488.57996,"width":78.47998046875,"height":10.780029296875,"text":"Ex director"},{"top":473.47998,"left":567.05994,"width":106.4400634765625,"height":10.780029296875,"text":"St Paul Teachers\u0027 Retirement A"},{"top":473.47998,"left":673.5,"width":45.52001953125,"height":10.780029296875,"text":"10/3/2012"},{"top":473.47998,"left":719.02,"width":43.28033447265625,"height":10.780029296875,"text":"100.00"}],[{"top":484.26,"left":16.97,"width":1.6187286376953125,"height":10.800048828125,"text":""},{"top":484.26,"left":18.588728,"width":153.91128540039062,"height":10.800048828125,"text":"Dougherty"},{"top":484.26,"left":172.50002,"width":69.71998596191406,"height":10.800048828125,"text":"Richards S \u0026 Patrici"},{"top":484.26,"left":242.22,"width":104.760009765625,"height":10.800048828125,"text":"812 Goodrich Ave"},{"top":484.26,"left":346.98,"width":70.80001831054688,"height":10.800048828125,"text":"Saint Paul"},{"top":484.26,"left":417.78003,"width":20.87994384765625,"height":10.800048828125,"text":"MN"},{"top":484.26,"left":438.65997,"width":49.91998291015625,"height":10.800048828125,"text":"55105"},{"top":484.26,"left":488.57996,"width":78.47998046875,"height":10.800048828125,"text":""},{"top":484.26,"left":567.05994,"width":106.4400634765625,"height":10.800048828125,"text":""},{"top":484.26,"left":673.5,"width":45.52001953125,"height":10.800048828125,"text":"10/4/2012"},{"top":484.26,"left":719.02,"width":43.28033447265625,"height":10.800048828125,"text":"100.00"}],[{"top":495.06006,"left":16.97,"width":1.6187286376953125,"height":10.799896240234375,"text":""},{"top":495.06006,"left":18.588728,"width":153.91128540039062,"height":10.799896240234375,"text":"Driscoll"},{"top":495.06006,"left":172.50002,"width":69.71998596191406,"height":10.799896240234375,"text":"Joe \u0026 Jill"},{"top":495.06006,"left":242.22,"width":104.760009765625,"height":10.799896240234375,"text":"2383 Bourne Ave"},{"top":495.06006,"left":346.98,"width":70.80001831054688,"height":10.799896240234375,"text":"Saint Paul"},{"top":495.06006,"left":417.78003,"width":20.87994384765625,"height":10.799896240234375,"text":"MN"},{"top":495.06006,"left":438.65997,"width":49.91998291015625,"height":10.799896240234375,"text":"55108"},{"top":495.06006,"left":488.57996,"width":78.47998046875,"height":10.799896240234375,"text":"HR Manager"},{"top":495.06006,"left":567.05994,"width":106.4400634765625,"height":10.799896240234375,"text":"LexisNexis"},{"top":495.06006,"left":673.5,"width":45.52001953125,"height":10.799896240234375,"text":"10/18/2012"},{"top":495.06006,"left":719.02,"width":43.28033447265625,"height":10.799896240234375,"text":"100.00"}],[{"top":505.85995,"left":16.97,"width":1.6187286376953125,"height":10.780059814453125,"text":""},{"top":505.85995,"left":18.588728,"width":153.91128540039062,"height":10.780059814453125,"text":"Dubaille / Haugee"},{"top":505.85995,"left":172.50002,"width":69.71998596191406,"height":10.780059814453125,"text":"Florence M. /Eric"},{"top":505.85995,"left":242.22,"width":104.760009765625,"height":10.780059814453125,"text":"1009 Edmund Ave"},{"top":505.85995,"left":346.98,"width":70.80001831054688,"height":10.780059814453125,"text":"Saint Paul"},{"top":505.85995,"left":417.78003,"width":20.87994384765625,"height":10.780059814453125,"text":"MN"},{"top":505.85995,"left":438.65997,"width":49.91998291015625,"height":10.780059814453125,"text":"55104"},{"top":505.85995,"left":488.57996,"width":78.47998046875,"height":10.780059814453125,"text":"Florence‐teacher"},{"top":505.85995,"left":567.05994,"width":106.4400634765625,"height":10.780059814453125,"text":"SPPS"},{"top":505.85995,"left":673.5,"width":45.52001953125,"height":10.780059814453125,"text":"10/3/2012"},{"top":505.85995,"left":719.02,"width":43.28033447265625,"height":10.780059814453125,"text":"100.00"}],[{"top":516.64,"left":16.97,"width":1.6187286376953125,"height":10.83990478515625,"text":""},{"top":516.64,"left":18.588728,"width":153.91128540039062,"height":10.83990478515625,"text":"Eaton"},{"top":516.64,"left":172.50002,"width":69.71998596191406,"height":10.83990478515625,"text":"Jim"},{"top":516.64,"left":242.22,"width":104.760009765625,"height":10.83990478515625,"text":"2133 Berkeley Ave"},{"top":516.64,"left":346.98,"width":70.80001831054688,"height":10.83990478515625,"text":"St Paul"},{"top":516.64,"left":417.78003,"width":20.87994384765625,"height":10.83990478515625,"text":"MN"},{"top":516.64,"left":438.65997,"width":49.91998291015625,"height":10.83990478515625,"text":"55105"},{"top":516.64,"left":488.57996,"width":78.47998046875,"height":10.83990478515625,"text":"Principal"},{"top":516.64,"left":567.05994,"width":106.4400634765625,"height":10.83990478515625,"text":"Saint Paul Public Schools"},{"top":516.64,"left":673.5,"width":45.52001953125,"height":10.83990478515625,"text":"8/23/2012"},{"top":516.64,"left":719.02,"width":43.28033447265625,"height":10.83990478515625,"text":"100.00"}],[{"top":527.4799,"left":16.97,"width":1.6187286376953125,"height":10.78009033203125,"text":""},{"top":527.4799,"left":18.588728,"width":153.91128540039062,"height":10.78009033203125,"text":"Eaves /Alger"},{"top":527.4799,"left":172.50002,"width":69.71998596191406,"height":10.78009033203125,"text":"Patricia / Stuart"},{"top":527.4799,"left":242.22,"width":104.760009765625,"height":10.78009033203125,"text":"1143 Portladn Ave."},{"top":527.4799,"left":346.98,"width":70.80001831054688,"height":10.78009033203125,"text":"Saint Paul"},{"top":527.4799,"left":417.78003,"width":20.87994384765625,"height":10.78009033203125,"text":"MN"},{"top":527.4799,"left":438.65997,"width":49.91998291015625,"height":10.78009033203125,"text":"55104"},{"top":527.4799,"left":488.57996,"width":78.47998046875,"height":10.78009033203125,"text":""},{"top":527.4799,"left":567.05994,"width":106.4400634765625,"height":10.78009033203125,"text":""},{"top":527.4799,"left":673.5,"width":45.52001953125,"height":10.78009033203125,"text":"10/3/2012"},{"top":527.4799,"left":719.02,"width":43.28033447265625,"height":10.78009033203125,"text":"100.00"}]]} \ No newline at end of file +{"extraction_method":"lattice","top":54.315777,"left":16.97,"width":745.3303833007812,"height":483.9442443847656,"right":762.30035,"bottom":538.26,"data":[[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":54.315777,"left":18.588728,"width":153.91128540039062,"height":8.744216918945312,"text":"Last Name"},{"top":54.315777,"left":172.50002,"width":69.71998596191406,"height":8.744216918945312,"text":"First Name"},{"top":54.315777,"left":242.22,"width":104.760009765625,"height":8.744216918945312,"text":"Address"},{"top":54.315777,"left":346.98,"width":70.80001831054688,"height":8.744216918945312,"text":"City"},{"top":54.315777,"left":417.78003,"width":20.87994384765625,"height":8.744216918945312,"text":"State"},{"top":54.315777,"left":438.65997,"width":49.91998291015625,"height":8.744216918945312,"text":"Zip"},{"top":54.315777,"left":488.57996,"width":78.47998046875,"height":8.744216918945312,"text":"Occupation"},{"top":54.315777,"left":567.05994,"width":106.4400634765625,"height":8.744216918945312,"text":"Employer"},{"top":54.315777,"left":673.5,"width":45.52001953125,"height":8.744216918945312,"text":"Date"},{"top":54.315777,"left":719.02,"width":43.28033447265625,"height":8.744216918945312,"text":"Amount"}],[{"top":63.059994,"left":16.97,"width":1.6187286376953125,"height":10.799991607666016,"text":""},{"top":63.059994,"left":18.588728,"width":153.91128540039062,"height":10.799991607666016,"text":"Lidstad"},{"top":63.059994,"left":172.50002,"width":69.71998596191406,"height":10.799991607666016,"text":"Dick \u0026 Peg"},{"top":63.059994,"left":242.22,"width":104.760009765625,"height":10.799991607666016,"text":"62 Mississippi River Blvd N"},{"top":63.059994,"left":346.98,"width":70.80001831054688,"height":10.799991607666016,"text":"Saint Paul"},{"top":63.059994,"left":417.78003,"width":20.87994384765625,"height":10.799991607666016,"text":"MN"},{"top":63.059994,"left":438.65997,"width":49.91998291015625,"height":10.799991607666016,"text":"55104"},{"top":63.059994,"left":488.57996,"width":78.47998046875,"height":10.799991607666016,"text":"retired"},{"top":63.059994,"left":567.05994,"width":106.4400634765625,"height":10.799991607666016,"text":""},{"top":63.059994,"left":673.5,"width":45.52001953125,"height":10.799991607666016,"text":"10/12/2012"},{"top":63.059994,"left":719.02,"width":43.28033447265625,"height":10.799991607666016,"text":"60.00"}],[{"top":73.859985,"left":16.97,"width":1.6187286376953125,"height":10.800025939941406,"text":""},{"top":73.859985,"left":18.588728,"width":153.91128540039062,"height":10.800025939941406,"text":"Strom"},{"top":73.859985,"left":172.50002,"width":69.71998596191406,"height":10.800025939941406,"text":"Pam"},{"top":73.859985,"left":242.22,"width":104.760009765625,"height":10.800025939941406,"text":"1229 Hague Ave"},{"top":73.859985,"left":346.98,"width":70.80001831054688,"height":10.800025939941406,"text":"St. Paul"},{"top":73.859985,"left":417.78003,"width":20.87994384765625,"height":10.800025939941406,"text":"MN"},{"top":73.859985,"left":438.65997,"width":49.91998291015625,"height":10.800025939941406,"text":"55104"},{"top":73.859985,"left":488.57996,"width":78.47998046875,"height":10.800025939941406,"text":""},{"top":73.859985,"left":567.05994,"width":106.4400634765625,"height":10.800025939941406,"text":""},{"top":73.859985,"left":673.5,"width":45.52001953125,"height":10.800025939941406,"text":"9/12/2012"},{"top":73.859985,"left":719.02,"width":43.28033447265625,"height":10.800025939941406,"text":"60.00"}],[{"top":84.66001,"left":16.97,"width":1.6187286376953125,"height":10.799995422363281,"text":""},{"top":84.66001,"left":18.588728,"width":153.91128540039062,"height":10.799995422363281,"text":"Seeba"},{"top":84.66001,"left":172.50002,"width":69.71998596191406,"height":10.799995422363281,"text":"Louise \u0026 Paul"},{"top":84.66001,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"1399 Sheldon St"},{"top":84.66001,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"Saint Paul"},{"top":84.66001,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":84.66001,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55108"},{"top":84.66001,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":"BOE"},{"top":84.66001,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":"City of Saint Paul"},{"top":84.66001,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"10/12/2012"},{"top":84.66001,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"60.00"}],[{"top":95.46001,"left":16.97,"width":1.6187286376953125,"height":10.799995422363281,"text":""},{"top":95.46001,"left":18.588728,"width":153.91128540039062,"height":10.799995422363281,"text":"Schumacher / Bales"},{"top":95.46001,"left":172.50002,"width":69.71998596191406,"height":10.799995422363281,"text":"Douglas L. / Patricia"},{"top":95.46001,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"948 County Rd. D W"},{"top":95.46001,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"Saint Paul"},{"top":95.46001,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":95.46001,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55126"},{"top":95.46001,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":""},{"top":95.46001,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":""},{"top":95.46001,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"10/13/2012"},{"top":95.46001,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"60.00"}],[{"top":106.26,"left":16.97,"width":1.6187286376953125,"height":10.799995422363281,"text":""},{"top":106.26,"left":18.588728,"width":153.91128540039062,"height":10.799995422363281,"text":"Abrams"},{"top":106.26,"left":172.50002,"width":69.71998596191406,"height":10.799995422363281,"text":"Marjorie"},{"top":106.26,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"238 8th St east"},{"top":106.26,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"St Paul"},{"top":106.26,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":106.26,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55101"},{"top":106.26,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":"Retired"},{"top":106.26,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":"Retired"},{"top":106.26,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"8/8/2012"},{"top":106.26,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"75.00"}],[{"top":117.06,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":117.06,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Crouse / Schroeder"},{"top":117.06,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Abigail / Jonathan"},{"top":117.06,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1545 Branston St."},{"top":117.06,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":117.06,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":117.06,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55108"},{"top":117.06,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":""},{"top":117.06,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":""},{"top":117.06,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/6/2012"},{"top":117.06,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"75.00"}],[{"top":127.859985,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":127.859985,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"O\u0027Connell"},{"top":127.859985,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"Jean"},{"top":127.859985,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"888 Ivy Ave W."},{"top":127.859985,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Saint Paul"},{"top":127.859985,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":127.859985,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55117"},{"top":127.859985,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":""},{"top":127.859985,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":""},{"top":127.859985,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/30/2012"},{"top":127.859985,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"75.00"}],[{"top":138.65999,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":138.65999,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Reese"},{"top":138.65999,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Cheri A"},{"top":138.65999,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"981 Davern St."},{"top":138.65999,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":138.65999,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":138.65999,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55116"},{"top":138.65999,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Public Relations"},{"top":138.65999,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Far North Spirits"},{"top":138.65999,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/3/2012"},{"top":138.65999,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"75.00"}],[{"top":149.46,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":149.46,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"Serrano"},{"top":149.46,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"Luz Maria"},{"top":149.46,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"5559 Park Place Drive"},{"top":149.46,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Shoreview"},{"top":149.46,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":149.46,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55126"},{"top":149.46,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"retired"},{"top":149.46,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":"SPPS"},{"top":149.46,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/20/2012"},{"top":149.46,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"75.00"}],[{"top":160.26001,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":160.26001,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Alsiddiqui"},{"top":160.26001,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Jaber"},{"top":160.26001,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"13056 Euclid Ave"},{"top":160.26001,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Apple Valley"},{"top":160.26001,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":160.26001,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55124"},{"top":160.26001,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"SPPS budget analyst"},{"top":160.26001,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"SPPS"},{"top":160.26001,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"9/20/2012"},{"top":160.26001,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":171.06,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":171.06,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"Andrastek"},{"top":171.06,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"John"},{"top":171.06,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"774 Ashland Ave"},{"top":171.06,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"St Paul"},{"top":171.06,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":171.06,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55104"},{"top":171.06,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"asst principal"},{"top":171.06,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":"SPPS"},{"top":171.06,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"8/22/2012"},{"top":171.06,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"100.00"}],[{"top":181.86,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":181.86,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Anfang"},{"top":181.86,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Heather L. \u0026 Matt"},{"top":181.86,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1635 Bayard Ave"},{"top":181.86,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":181.86,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":181.86,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55116"},{"top":181.86,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"Ex Director"},{"top":181.86,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"BOMA"},{"top":181.86,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/6/2012"},{"top":181.86,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":192.65999,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":192.65999,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Aronson"},{"top":192.65999,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Roger J."},{"top":192.65999,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4852 Emerson Ave. S."},{"top":192.65999,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Minneapolis"},{"top":192.65999,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":192.65999,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55419"},{"top":192.65999,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Attorney at Law"},{"top":192.65999,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":""},{"top":192.65999,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"9/20/2012"},{"top":192.65999,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":203.46,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":203.46,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"Aronson"},{"top":203.46,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"Roger J."},{"top":203.46,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"4852 Emerson Ave. S."},{"top":203.46,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Minneapolis"},{"top":203.46,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":203.46,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55419"},{"top":203.46,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"Attorney at Law"},{"top":203.46,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":""},{"top":203.46,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/20/2012"},{"top":203.46,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"100.00"}],[{"top":214.26001,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":214.26001,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Banks"},{"top":214.26001,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Michael or Patricia"},{"top":214.26001,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1664 Van Buren Ave."},{"top":214.26001,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":214.26001,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":214.26001,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":214.26001,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"retired social worker"},{"top":214.26001,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":""},{"top":214.26001,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"9/20/2012"},{"top":214.26001,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":225.06,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":225.06,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Bennett"},{"top":225.06,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"David"},{"top":225.06,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"25 Birchwood Rd."},{"top":225.06,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Mahtomedi"},{"top":225.06,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":225.06,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55110"},{"top":225.06,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"retired"},{"top":225.06,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"retired"},{"top":225.06,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"9/3/2012"},{"top":225.06,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":235.86002,"left":16.97,"width":1.6187286376953125,"height":10.799972534179688,"text":""},{"top":235.86002,"left":18.588728,"width":153.91128540039062,"height":10.799972534179688,"text":"Berry"},{"top":235.86002,"left":172.50002,"width":69.71998596191406,"height":10.799972534179688,"text":"Margaret"},{"top":235.86002,"left":242.22,"width":104.760009765625,"height":10.799972534179688,"text":"1267 Pike Lake Dr"},{"top":235.86002,"left":346.98,"width":70.80001831054688,"height":10.799972534179688,"text":"New Brighton"},{"top":235.86002,"left":417.78003,"width":20.87994384765625,"height":10.799972534179688,"text":"MN"},{"top":235.86002,"left":438.65997,"width":49.91998291015625,"height":10.799972534179688,"text":"55112"},{"top":235.86002,"left":488.57996,"width":78.47998046875,"height":10.799972534179688,"text":"Asst Principal"},{"top":235.86002,"left":567.05994,"width":106.4400634765625,"height":10.799972534179688,"text":"Saint Paul Public Schools"},{"top":235.86002,"left":673.5,"width":45.52001953125,"height":10.799972534179688,"text":"8/8/2012"},{"top":235.86002,"left":719.02,"width":43.28033447265625,"height":10.799972534179688,"text":"100.00"}],[{"top":246.65999,"left":16.97,"width":1.6187286376953125,"height":10.800033569335938,"text":""},{"top":246.65999,"left":18.588728,"width":153.91128540039062,"height":10.800033569335938,"text":"Boyle"},{"top":246.65999,"left":172.50002,"width":69.71998596191406,"height":10.800033569335938,"text":"Matthew C. \u0026 Eliza"},{"top":246.65999,"left":242.22,"width":104.760009765625,"height":10.800033569335938,"text":"2165 Princeton Ave"},{"top":246.65999,"left":346.98,"width":70.80001831054688,"height":10.800033569335938,"text":"Saint Paul"},{"top":246.65999,"left":417.78003,"width":20.87994384765625,"height":10.800033569335938,"text":"MN"},{"top":246.65999,"left":438.65997,"width":49.91998291015625,"height":10.800033569335938,"text":"55105"},{"top":246.65999,"left":488.57996,"width":78.47998046875,"height":10.800033569335938,"text":""},{"top":246.65999,"left":567.05994,"width":106.4400634765625,"height":10.800033569335938,"text":""},{"top":246.65999,"left":673.5,"width":45.52001953125,"height":10.800033569335938,"text":"10/6/2012"},{"top":246.65999,"left":719.02,"width":43.28033447265625,"height":10.800033569335938,"text":"100.00"}],[{"top":257.46002,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":257.46002,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Brodrick"},{"top":257.46002,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"John"},{"top":257.46002,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1007 Charles"},{"top":257.46002,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"St. Paul"},{"top":257.46002,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":257.46002,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":257.46002,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"BOE"},{"top":257.46002,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"retired"},{"top":257.46002,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"8/26/2012"},{"top":257.46002,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":268.26,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":268.26,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Brodrick"},{"top":268.26,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"John F."},{"top":268.26,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1007 Charles Ave"},{"top":268.26,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":268.26,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":268.26,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":268.26,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"BoE"},{"top":268.26,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"SPPS"},{"top":268.26,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/4/2012"},{"top":268.26,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":279.06,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":279.06,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Brown"},{"top":279.06,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Mike"},{"top":279.06,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1440 Goodrich Ave"},{"top":279.06,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":279.06,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":279.06,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":279.06,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":279.06,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":279.06,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"9/23/2012"},{"top":279.06,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":289.84,"left":16.97,"width":1.6187286376953125,"height":10.819976806640625,"text":""},{"top":289.84,"left":18.588728,"width":153.91128540039062,"height":10.819976806640625,"text":"Cacy"},{"top":289.84,"left":172.50002,"width":69.71998596191406,"height":10.819976806640625,"text":"Jill"},{"top":289.84,"left":242.22,"width":104.760009765625,"height":10.819976806640625,"text":"1409 Smith Ave So"},{"top":289.84,"left":346.98,"width":70.80001831054688,"height":10.819976806640625,"text":"West St. Paul"},{"top":289.84,"left":417.78003,"width":20.87994384765625,"height":10.819976806640625,"text":"MN"},{"top":289.84,"left":438.65997,"width":49.91998291015625,"height":10.819976806640625,"text":"55118"},{"top":289.84,"left":488.57996,"width":78.47998046875,"height":10.819976806640625,"text":"Administrator"},{"top":289.84,"left":567.05994,"width":106.4400634765625,"height":10.819976806640625,"text":"SPPS"},{"top":289.84,"left":673.5,"width":45.52001953125,"height":10.819976806640625,"text":"9/12/2012"},{"top":289.84,"left":719.02,"width":43.28033447265625,"height":10.819976806640625,"text":"100.00"}],[{"top":300.65997,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":300.65997,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Cardwell"},{"top":300.65997,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Eileen"},{"top":300.65997,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4172 Bridgewater Ct"},{"top":300.65997,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Vadnais Height"},{"top":300.65997,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":300.65997,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55127"},{"top":300.65997,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Retired"},{"top":300.65997,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Retired"},{"top":300.65997,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/3/2012"},{"top":300.65997,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":311.46,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":311.46,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Carlstrom"},{"top":311.46,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Cheryl"},{"top":311.46,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4244 Oakmede Ln"},{"top":311.46,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"St Paul"},{"top":311.46,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":311.46,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55110"},{"top":311.46,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Satff"},{"top":311.46,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Saint Paul Public Schools"},{"top":311.46,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/8/2012"},{"top":311.46,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":322.26,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":322.26,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Carter"},{"top":322.26,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Melvin W. \u0026 Willet"},{"top":322.26,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"405 Western Ave N"},{"top":322.26,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":322.26,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":322.26,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55103"},{"top":322.26,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Cty Commissioner"},{"top":322.26,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Ramsey Cty"},{"top":322.26,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/8/2012"},{"top":322.26,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":333.06003,"left":16.97,"width":1.6187286376953125,"height":10.8199462890625,"text":""},{"top":333.06003,"left":18.588728,"width":153.91128540039062,"height":10.8199462890625,"text":"Caruso"},{"top":333.06003,"left":172.50002,"width":69.71998596191406,"height":10.8199462890625,"text":"Sarah"},{"top":333.06003,"left":242.22,"width":104.760009765625,"height":10.8199462890625,"text":"2615 Newton Ave S"},{"top":333.06003,"left":346.98,"width":70.80001831054688,"height":10.8199462890625,"text":"Minneapolis"},{"top":333.06003,"left":417.78003,"width":20.87994384765625,"height":10.8199462890625,"text":"MN"},{"top":333.06003,"left":438.65997,"width":49.91998291015625,"height":10.8199462890625,"text":"55405"},{"top":333.06003,"left":488.57996,"width":78.47998046875,"height":10.8199462890625,"text":"CEO"},{"top":333.06003,"left":567.05994,"width":106.4400634765625,"height":10.8199462890625,"text":"United Way"},{"top":333.06003,"left":673.5,"width":45.52001953125,"height":10.8199462890625,"text":"9/12/2012"},{"top":333.06003,"left":719.02,"width":43.28033447265625,"height":10.8199462890625,"text":"100.00"}],[{"top":343.87997,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":343.87997,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Casey /Trewartha"},{"top":343.87997,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Kerry F. / Kelly A."},{"top":343.87997,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"2266 Goodrich Ave"},{"top":343.87997,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":343.87997,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":343.87997,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":343.87997,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":343.87997,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":343.87997,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/4/2012"},{"top":343.87997,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":354.65997,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":354.65997,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Cassidy"},{"top":354.65997,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Paul D."},{"top":354.65997,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1990 Dayton Ave"},{"top":354.65997,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":354.65997,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":354.65997,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55104"},{"top":354.65997,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":354.65997,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":354.65997,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/6/2012"},{"top":354.65997,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":365.43997,"left":16.97,"width":1.6187286376953125,"height":10.820037841796875,"text":""},{"top":365.43997,"left":18.588728,"width":153.91128540039062,"height":10.820037841796875,"text":"Causey"},{"top":365.43997,"left":172.50002,"width":69.71998596191406,"height":10.820037841796875,"text":"Christopher"},{"top":365.43997,"left":242.22,"width":104.760009765625,"height":10.820037841796875,"text":"2181 Doswell Avenue"},{"top":365.43997,"left":346.98,"width":70.80001831054688,"height":10.820037841796875,"text":"Saint Paul"},{"top":365.43997,"left":417.78003,"width":20.87994384765625,"height":10.820037841796875,"text":"MN"},{"top":365.43997,"left":438.65997,"width":49.91998291015625,"height":10.820037841796875,"text":"55108"},{"top":365.43997,"left":488.57996,"width":78.47998046875,"height":10.820037841796875,"text":"finance"},{"top":365.43997,"left":567.05994,"width":106.4400634765625,"height":10.820037841796875,"text":""},{"top":365.43997,"left":673.5,"width":45.52001953125,"height":10.820037841796875,"text":"9/3/2012"},{"top":365.43997,"left":719.02,"width":43.28033447265625,"height":10.820037841796875,"text":"100.00"}],[{"top":376.26,"left":16.97,"width":1.6187286376953125,"height":10.819976806640625,"text":""},{"top":376.26,"left":18.588728,"width":153.91128540039062,"height":10.819976806640625,"text":"Christiansen"},{"top":376.26,"left":172.50002,"width":69.71998596191406,"height":10.819976806640625,"text":"Lena"},{"top":376.26,"left":242.22,"width":104.760009765625,"height":10.819976806640625,"text":"2453 Como Ave"},{"top":376.26,"left":346.98,"width":70.80001831054688,"height":10.819976806640625,"text":"St Paul"},{"top":376.26,"left":417.78003,"width":20.87994384765625,"height":10.819976806640625,"text":"MN"},{"top":376.26,"left":438.65997,"width":49.91998291015625,"height":10.819976806640625,"text":"55108"},{"top":376.26,"left":488.57996,"width":78.47998046875,"height":10.819976806640625,"text":"Principal"},{"top":376.26,"left":567.05994,"width":106.4400634765625,"height":10.819976806640625,"text":"Saint Paul Public Schools"},{"top":376.26,"left":673.5,"width":45.52001953125,"height":10.819976806640625,"text":"8/8/2012"},{"top":376.26,"left":719.02,"width":43.28033447265625,"height":10.819976806640625,"text":"100.00"}],[{"top":387.08,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":387.08,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Clapp"},{"top":387.08,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Agustus (Bill)"},{"top":387.08,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"757 Osceola Ave #1"},{"top":387.08,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":387.08,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":387.08,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":387.08,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":"retired"},{"top":387.08,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":387.08,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/2/2012"},{"top":387.08,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":397.86,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":397.86,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Cohen"},{"top":397.86,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Brad A."},{"top":397.86,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1460 Raymond Ave"},{"top":397.86,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":397.86,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":397.86,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55108"},{"top":397.86,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":"academic technology"},{"top":397.86,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":"UMN"},{"top":397.86,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/17/2012"},{"top":397.86,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":408.63998,"left":16.97,"width":1.6187286376953125,"height":10.82000732421875,"text":""},{"top":408.63998,"left":18.588728,"width":153.91128540039062,"height":10.82000732421875,"text":"Commers"},{"top":408.63998,"left":172.50002,"width":69.71998596191406,"height":10.82000732421875,"text":"Beth"},{"top":408.63998,"left":242.22,"width":104.760009765625,"height":10.82000732421875,"text":"2294 Commonwealth Ave"},{"top":408.63998,"left":346.98,"width":70.80001831054688,"height":10.82000732421875,"text":"St Paul"},{"top":408.63998,"left":417.78003,"width":20.87994384765625,"height":10.82000732421875,"text":"MN"},{"top":408.63998,"left":438.65997,"width":49.91998291015625,"height":10.82000732421875,"text":"55108"},{"top":408.63998,"left":488.57996,"width":78.47998046875,"height":10.82000732421875,"text":"Self Employed"},{"top":408.63998,"left":567.05994,"width":106.4400634765625,"height":10.82000732421875,"text":"Homemaker"},{"top":408.63998,"left":673.5,"width":45.52001953125,"height":10.82000732421875,"text":"8/1/2012"},{"top":408.63998,"left":719.02,"width":43.28033447265625,"height":10.82000732421875,"text":"100.00"}],[{"top":419.46,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":419.46,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Couture"},{"top":419.46,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Steven"},{"top":419.46,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"7950 Victoria Way"},{"top":419.46,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Louise Park"},{"top":419.46,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":419.46,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55426"},{"top":419.46,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Principal"},{"top":419.46,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Saint Paul Public Schools"},{"top":419.46,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/10/2012"},{"top":419.46,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":430.26,"left":16.97,"width":1.6187286376953125,"height":10.800048828125,"text":""},{"top":430.26,"left":18.588728,"width":153.91128540039062,"height":10.800048828125,"text":"Crawford"},{"top":430.26,"left":172.50002,"width":69.71998596191406,"height":10.800048828125,"text":"Lydia P."},{"top":430.26,"left":242.22,"width":104.760009765625,"height":10.800048828125,"text":"1140 Edgcumbe Rd"},{"top":430.26,"left":346.98,"width":70.80001831054688,"height":10.800048828125,"text":"Saint Paul"},{"top":430.26,"left":417.78003,"width":20.87994384765625,"height":10.800048828125,"text":"MN"},{"top":430.26,"left":438.65997,"width":49.91998291015625,"height":10.800048828125,"text":"55105"},{"top":430.26,"left":488.57996,"width":78.47998046875,"height":10.800048828125,"text":""},{"top":430.26,"left":567.05994,"width":106.4400634765625,"height":10.800048828125,"text":""},{"top":430.26,"left":673.5,"width":45.52001953125,"height":10.800048828125,"text":"9/15/2012"},{"top":430.26,"left":719.02,"width":43.28033447265625,"height":10.800048828125,"text":"100.00"}],[{"top":441.06006,"left":16.97,"width":1.6187286376953125,"height":10.799896240234375,"text":""},{"top":441.06006,"left":18.588728,"width":153.91128540039062,"height":10.799896240234375,"text":"Cudahy / Ricker"},{"top":441.06006,"left":172.50002,"width":69.71998596191406,"height":10.799896240234375,"text":"Robert \u0026 Mary C"},{"top":441.06006,"left":242.22,"width":104.760009765625,"height":10.799896240234375,"text":"616 Cherokee Ave."},{"top":441.06006,"left":346.98,"width":70.80001831054688,"height":10.799896240234375,"text":"Saint Paul"},{"top":441.06006,"left":417.78003,"width":20.87994384765625,"height":10.799896240234375,"text":"MN"},{"top":441.06006,"left":438.65997,"width":49.91998291015625,"height":10.799896240234375,"text":"55107"},{"top":441.06006,"left":488.57996,"width":78.47998046875,"height":10.799896240234375,"text":"Teacher/Union Presid"},{"top":441.06006,"left":567.05994,"width":106.4400634765625,"height":10.799896240234375,"text":"SPPS/SPFT"},{"top":441.06006,"left":673.5,"width":45.52001953125,"height":10.799896240234375,"text":"9/18/2012"},{"top":441.06006,"left":719.02,"width":43.28033447265625,"height":10.799896240234375,"text":"100.00"}],[{"top":451.85995,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":451.85995,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Cudahy / Ricker"},{"top":451.85995,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Robert \u0026 Mary C"},{"top":451.85995,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"616 Cherokee Ave."},{"top":451.85995,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":451.85995,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":451.85995,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55107"},{"top":451.85995,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Teacher/Union Presid"},{"top":451.85995,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"SPPS/SPFT"},{"top":451.85995,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/6/2012"},{"top":451.85995,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":462.65997,"left":16.97,"width":1.6187286376953125,"height":10.82000732421875,"text":""},{"top":462.65997,"left":18.588728,"width":153.91128540039062,"height":10.82000732421875,"text":"Currie"},{"top":462.65997,"left":172.50002,"width":69.71998596191406,"height":10.82000732421875,"text":"Elisabeth"},{"top":462.65997,"left":242.22,"width":104.760009765625,"height":10.82000732421875,"text":"2274 Hillside Ave"},{"top":462.65997,"left":346.98,"width":70.80001831054688,"height":10.82000732421875,"text":"St. Paul"},{"top":462.65997,"left":417.78003,"width":20.87994384765625,"height":10.82000732421875,"text":"MN"},{"top":462.65997,"left":438.65997,"width":49.91998291015625,"height":10.82000732421875,"text":"55108"},{"top":462.65997,"left":488.57996,"width":78.47998046875,"height":10.82000732421875,"text":""},{"top":462.65997,"left":567.05994,"width":106.4400634765625,"height":10.82000732421875,"text":""},{"top":462.65997,"left":673.5,"width":45.52001953125,"height":10.82000732421875,"text":"9/12/2012"},{"top":462.65997,"left":719.02,"width":43.28033447265625,"height":10.82000732421875,"text":"100.00"}],[{"top":473.47998,"left":16.97,"width":1.6187286376953125,"height":10.780029296875,"text":""},{"top":473.47998,"left":18.588728,"width":153.91128540039062,"height":10.780029296875,"text":"Doane"},{"top":473.47998,"left":172.50002,"width":69.71998596191406,"height":10.780029296875,"text":"Paul V \u0026 Helen R"},{"top":473.47998,"left":242.22,"width":104.760009765625,"height":10.780029296875,"text":"444 Portland Ave"},{"top":473.47998,"left":346.98,"width":70.80001831054688,"height":10.780029296875,"text":"Sant Paul"},{"top":473.47998,"left":417.78003,"width":20.87994384765625,"height":10.780029296875,"text":"MN"},{"top":473.47998,"left":438.65997,"width":49.91998291015625,"height":10.780029296875,"text":"55102"},{"top":473.47998,"left":488.57996,"width":78.47998046875,"height":10.780029296875,"text":"Ex director"},{"top":473.47998,"left":567.05994,"width":106.4400634765625,"height":10.780029296875,"text":"St Paul Teachers\u0027 Retirement A"},{"top":473.47998,"left":673.5,"width":45.52001953125,"height":10.780029296875,"text":"10/3/2012"},{"top":473.47998,"left":719.02,"width":43.28033447265625,"height":10.780029296875,"text":"100.00"}],[{"top":484.26,"left":16.97,"width":1.6187286376953125,"height":10.800048828125,"text":""},{"top":484.26,"left":18.588728,"width":153.91128540039062,"height":10.800048828125,"text":"Dougherty"},{"top":484.26,"left":172.50002,"width":69.71998596191406,"height":10.800048828125,"text":"Richards S \u0026 Patrici"},{"top":484.26,"left":242.22,"width":104.760009765625,"height":10.800048828125,"text":"812 Goodrich Ave"},{"top":484.26,"left":346.98,"width":70.80001831054688,"height":10.800048828125,"text":"Saint Paul"},{"top":484.26,"left":417.78003,"width":20.87994384765625,"height":10.800048828125,"text":"MN"},{"top":484.26,"left":438.65997,"width":49.91998291015625,"height":10.800048828125,"text":"55105"},{"top":484.26,"left":488.57996,"width":78.47998046875,"height":10.800048828125,"text":""},{"top":484.26,"left":567.05994,"width":106.4400634765625,"height":10.800048828125,"text":""},{"top":484.26,"left":673.5,"width":45.52001953125,"height":10.800048828125,"text":"10/4/2012"},{"top":484.26,"left":719.02,"width":43.28033447265625,"height":10.800048828125,"text":"100.00"}],[{"top":495.06006,"left":16.97,"width":1.6187286376953125,"height":10.799896240234375,"text":""},{"top":495.06006,"left":18.588728,"width":153.91128540039062,"height":10.799896240234375,"text":"Driscoll"},{"top":495.06006,"left":172.50002,"width":69.71998596191406,"height":10.799896240234375,"text":"Joe \u0026 Jill"},{"top":495.06006,"left":242.22,"width":104.760009765625,"height":10.799896240234375,"text":"2383 Bourne Ave"},{"top":495.06006,"left":346.98,"width":70.80001831054688,"height":10.799896240234375,"text":"Saint Paul"},{"top":495.06006,"left":417.78003,"width":20.87994384765625,"height":10.799896240234375,"text":"MN"},{"top":495.06006,"left":438.65997,"width":49.91998291015625,"height":10.799896240234375,"text":"55108"},{"top":495.06006,"left":488.57996,"width":78.47998046875,"height":10.799896240234375,"text":"HR Manager"},{"top":495.06006,"left":567.05994,"width":106.4400634765625,"height":10.799896240234375,"text":"LexisNexis"},{"top":495.06006,"left":673.5,"width":45.52001953125,"height":10.799896240234375,"text":"10/18/2012"},{"top":495.06006,"left":719.02,"width":43.28033447265625,"height":10.799896240234375,"text":"100.00"}],[{"top":505.85995,"left":16.97,"width":1.6187286376953125,"height":10.780059814453125,"text":""},{"top":505.85995,"left":18.588728,"width":153.91128540039062,"height":10.780059814453125,"text":"Dubaille / Haugee"},{"top":505.85995,"left":172.50002,"width":69.71998596191406,"height":10.780059814453125,"text":"Florence M. /Eric"},{"top":505.85995,"left":242.22,"width":104.760009765625,"height":10.780059814453125,"text":"1009 Edmund Ave"},{"top":505.85995,"left":346.98,"width":70.80001831054688,"height":10.780059814453125,"text":"Saint Paul"},{"top":505.85995,"left":417.78003,"width":20.87994384765625,"height":10.780059814453125,"text":"MN"},{"top":505.85995,"left":438.65997,"width":49.91998291015625,"height":10.780059814453125,"text":"55104"},{"top":505.85995,"left":488.57996,"width":78.47998046875,"height":10.780059814453125,"text":"Florence‐teacher"},{"top":505.85995,"left":567.05994,"width":106.4400634765625,"height":10.780059814453125,"text":"SPPS"},{"top":505.85995,"left":673.5,"width":45.52001953125,"height":10.780059814453125,"text":"10/3/2012"},{"top":505.85995,"left":719.02,"width":43.28033447265625,"height":10.780059814453125,"text":"100.00"}],[{"top":516.64,"left":16.97,"width":1.6187286376953125,"height":10.83990478515625,"text":""},{"top":516.64,"left":18.588728,"width":153.91128540039062,"height":10.83990478515625,"text":"Eaton"},{"top":516.64,"left":172.50002,"width":69.71998596191406,"height":10.83990478515625,"text":"Jim"},{"top":516.64,"left":242.22,"width":104.760009765625,"height":10.83990478515625,"text":"2133 Berkeley Ave"},{"top":516.64,"left":346.98,"width":70.80001831054688,"height":10.83990478515625,"text":"St Paul"},{"top":516.64,"left":417.78003,"width":20.87994384765625,"height":10.83990478515625,"text":"MN"},{"top":516.64,"left":438.65997,"width":49.91998291015625,"height":10.83990478515625,"text":"55105"},{"top":516.64,"left":488.57996,"width":78.47998046875,"height":10.83990478515625,"text":"Principal"},{"top":516.64,"left":567.05994,"width":106.4400634765625,"height":10.83990478515625,"text":"Saint Paul Public Schools"},{"top":516.64,"left":673.5,"width":45.52001953125,"height":10.83990478515625,"text":"8/23/2012"},{"top":516.64,"left":719.02,"width":43.28033447265625,"height":10.83990478515625,"text":"100.00"}],[{"top":527.4799,"left":16.97,"width":1.6187286376953125,"height":10.78009033203125,"text":""},{"top":527.4799,"left":18.588728,"width":153.91128540039062,"height":10.78009033203125,"text":"Eaves /Alger"},{"top":527.4799,"left":172.50002,"width":69.71998596191406,"height":10.78009033203125,"text":"Patricia / Stuart"},{"top":527.4799,"left":242.22,"width":104.760009765625,"height":10.78009033203125,"text":"1143 Portladn Ave."},{"top":527.4799,"left":346.98,"width":70.80001831054688,"height":10.78009033203125,"text":"Saint Paul"},{"top":527.4799,"left":417.78003,"width":20.87994384765625,"height":10.78009033203125,"text":"MN"},{"top":527.4799,"left":438.65997,"width":49.91998291015625,"height":10.78009033203125,"text":"55104"},{"top":527.4799,"left":488.57996,"width":78.47998046875,"height":10.78009033203125,"text":""},{"top":527.4799,"left":567.05994,"width":106.4400634765625,"height":10.78009033203125,"text":""},{"top":527.4799,"left":673.5,"width":45.52001953125,"height":10.78009033203125,"text":"10/3/2012"},{"top":527.4799,"left":719.02,"width":43.28033447265625,"height":10.78009033203125,"text":"100.00"}]]} diff --git a/src/test/resources/technology/tabula/json/spanning_cells.json b/src/test/resources/technology/tabula/json/spanning_cells.json index 97f3d147..89bb9707 100644 --- a/src/test/resources/technology/tabula/json/spanning_cells.json +++ b/src/test/resources/technology/tabula/json/spanning_cells.json @@ -1 +1 @@ -[{"extraction_method":"lattice","top":146.51932,"left":119.78943,"width":355.312255859375,"height":270.5516052246094,"data":[[{"top":146.51932,"left":119.78943,"width":355.312255859375,"height":12.938491821289062,"text":"Improved operation scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":159.45781,"left":119.78943,"width":121.92680358886719,"height":12.785446166992188,"text":"Volume servers in:"},{"top":159.45781,"left":241.71623,"width":46.66426086425781,"height":12.785446166992188,"text":"2007"},{"top":159.45781,"left":288.3805,"width":46.663482666015625,"height":12.785446166992188,"text":"2008"},{"top":159.45781,"left":335.04398,"width":46.663818359375,"height":12.785446166992188,"text":"2009"},{"top":159.45781,"left":381.7078,"width":46.663360595703125,"height":12.785446166992188,"text":"2010"},{"top":159.45781,"left":428.37115,"width":46.73052978515625,"height":12.785446166992188,"text":"2011"}],[{"top":172.24326,"left":119.78943,"width":121.92680358886719,"height":12.832427978515625,"text":"Server closets"},{"top":172.24326,"left":241.71623,"width":46.66426086425781,"height":12.832427978515625,"text":"1,505"},{"top":172.24326,"left":288.3805,"width":46.663482666015625,"height":12.832427978515625,"text":"1,580"},{"top":172.24326,"left":335.04398,"width":46.663818359375,"height":12.832427978515625,"text":"1,643"},{"top":172.24326,"left":381.7078,"width":46.663360595703125,"height":12.832427978515625,"text":"1,673"},{"top":172.24326,"left":428.37115,"width":46.73052978515625,"height":12.832427978515625,"text":"1,689"}],[{"top":185.07568,"left":119.78943,"width":121.92680358886719,"height":12.891036987304688,"text":"Server rooms"},{"top":185.07568,"left":241.71623,"width":46.66426086425781,"height":12.891036987304688,"text":"1,512"},{"top":185.07568,"left":288.3805,"width":46.663482666015625,"height":12.891036987304688,"text":"1,586"},{"top":185.07568,"left":335.04398,"width":46.663818359375,"height":12.891036987304688,"text":"1,646"},{"top":185.07568,"left":381.7078,"width":46.663360595703125,"height":12.891036987304688,"text":"1,677"},{"top":185.07568,"left":428.37115,"width":46.73052978515625,"height":12.891036987304688,"text":"1,693"}],[{"top":197.96672,"left":119.78943,"width":121.92680358886719,"height":12.890411376953125,"text":"Localized data centers"},{"top":197.96672,"left":241.71623,"width":46.66426086425781,"height":12.890411376953125,"text":"1,512"},{"top":197.96672,"left":288.3805,"width":46.663482666015625,"height":12.890411376953125,"text":"1,586"},{"top":197.96672,"left":335.04398,"width":46.663818359375,"height":12.890411376953125,"text":"1,646"},{"top":197.96672,"left":381.7078,"width":46.663360595703125,"height":12.890411376953125,"text":"1,677"},{"top":197.96672,"left":428.37115,"width":46.73052978515625,"height":12.890411376953125,"text":"1,693"}],[{"top":210.85713,"left":119.78943,"width":121.92680358886719,"height":12.890716552734375,"text":"Mid-tier data centers"},{"top":210.85713,"left":241.71623,"width":46.66426086425781,"height":12.890716552734375,"text":"1,512"},{"top":210.85713,"left":288.3805,"width":46.663482666015625,"height":12.890716552734375,"text":"1,586"},{"top":210.85713,"left":335.04398,"width":46.663818359375,"height":12.890716552734375,"text":"1,646"},{"top":210.85713,"left":381.7078,"width":46.663360595703125,"height":12.890716552734375,"text":"1,677"},{"top":210.85713,"left":428.37115,"width":46.73052978515625,"height":12.890716552734375,"text":"1,693"}],[{"top":223.74785,"left":119.78943,"width":121.92680358886719,"height":12.785018920898438,"text":"Enterprise-class data centers"},{"top":223.74785,"left":241.71623,"width":46.66426086425781,"height":12.785018920898438,"text":"1,512"},{"top":223.74785,"left":288.3805,"width":46.663482666015625,"height":12.785018920898438,"text":"1,586"},{"top":223.74785,"left":335.04398,"width":46.663818359375,"height":12.785018920898438,"text":"1,646"},{"top":223.74785,"left":381.7078,"width":46.663360595703125,"height":12.785018920898438,"text":"1,677"},{"top":223.74785,"left":428.37115,"width":46.73052978515625,"height":12.785018920898438,"text":"1,693"}],[{"top":236.53287,"left":119.78943,"width":355.312255859375,"height":13.1025390625,"text":"Best practice scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":249.6354,"left":119.78943,"width":121.92680358886719,"height":12.72686767578125,"text":"Volume servers in:"},{"top":249.6354,"left":241.71623,"width":46.66426086425781,"height":12.72686767578125,"text":"2007"},{"top":249.6354,"left":288.3805,"width":46.663482666015625,"height":12.72686767578125,"text":"2008"},{"top":249.6354,"left":335.04398,"width":46.663818359375,"height":12.72686767578125,"text":"2009"},{"top":249.6354,"left":381.7078,"width":46.663360595703125,"height":12.72686767578125,"text":"2010"},{"top":249.6354,"left":428.37115,"width":46.73052978515625,"height":12.72686767578125,"text":"2011"}],[{"top":262.36227,"left":119.78943,"width":121.92680358886719,"height":12.890472412109375,"text":"Server closets"},{"top":262.36227,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,456"},{"top":262.36227,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,439"},{"top":262.36227,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,386"},{"top":262.36227,"left":381.7078,"width":46.663360595703125,"height":12.890472412109375,"text":"1,296"},{"top":262.36227,"left":428.37115,"width":46.73052978515625,"height":12.890472412109375,"text":"1,326"}],[{"top":275.25275,"left":119.78943,"width":121.92680358886719,"height":12.8914794921875,"text":"Server rooms"},{"top":275.25275,"left":241.71623,"width":46.66426086425781,"height":12.8914794921875,"text":"1,465"},{"top":275.25275,"left":288.3805,"width":46.663482666015625,"height":12.8914794921875,"text":"1,472"},{"top":275.25275,"left":335.04398,"width":46.663818359375,"height":12.8914794921875,"text":"1,427"},{"top":275.25275,"left":381.7078,"width":46.663360595703125,"height":12.8914794921875,"text":"1,334"},{"top":275.25275,"left":428.37115,"width":46.73052978515625,"height":12.8914794921875,"text":"1,371"}],[{"top":288.14423,"left":119.78943,"width":121.92680358886719,"height":12.890167236328125,"text":"Localized data centers"},{"top":288.14423,"left":241.71623,"width":46.66426086425781,"height":12.890167236328125,"text":"1,465"},{"top":288.14423,"left":288.3805,"width":46.663482666015625,"height":12.890167236328125,"text":"1,471"},{"top":288.14423,"left":335.04398,"width":46.663818359375,"height":12.890167236328125,"text":"1,426"},{"top":288.14423,"left":381.7078,"width":46.663360595703125,"height":12.890167236328125,"text":"1,334"},{"top":288.14423,"left":428.37115,"width":46.73052978515625,"height":12.890167236328125,"text":"1,371"}],[{"top":301.0344,"left":119.78943,"width":121.92680358886719,"height":12.890472412109375,"text":"Mid-tier data centers"},{"top":301.0344,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,465"},{"top":301.0344,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,471"},{"top":301.0344,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,426"},{"top":301.0344,"left":381.7078,"width":46.663360595703125,"height":12.890472412109375,"text":"1,334"},{"top":301.0344,"left":428.37115,"width":46.73052978515625,"height":12.890472412109375,"text":"1,371"}],[{"top":313.92487,"left":119.78943,"width":121.92680358886719,"height":12.7855224609375,"text":"Enterprise-class data centers"},{"top":313.92487,"left":241.71623,"width":46.66426086425781,"height":12.7855224609375,"text":"1,465"},{"top":313.92487,"left":288.3805,"width":46.663482666015625,"height":12.7855224609375,"text":"1,471"},{"top":313.92487,"left":335.04398,"width":46.663818359375,"height":12.7855224609375,"text":"1,426"},{"top":313.92487,"left":381.7078,"width":46.663360595703125,"height":12.7855224609375,"text":"1,334"},{"top":313.92487,"left":428.37115,"width":46.73052978515625,"height":12.7855224609375,"text":"1,371"}],[{"top":326.7104,"left":119.78943,"width":355.312255859375,"height":13.0440673828125,"text":"State-of-the-art scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":339.75446,"left":119.78943,"width":121.92680358886719,"height":12.784912109375,"text":"Volume servers in:"},{"top":339.75446,"left":241.71623,"width":46.66426086425781,"height":12.784912109375,"text":"2007"},{"top":339.75446,"left":288.3805,"width":46.663482666015625,"height":12.784912109375,"text":"2008"},{"top":339.75446,"left":335.04398,"width":46.663818359375,"height":12.784912109375,"text":"2009"},{"top":339.75446,"left":381.7078,"width":46.663360595703125,"height":12.784912109375,"text":"2010"},{"top":339.75446,"left":428.37115,"width":46.73052978515625,"height":12.784912109375,"text":"2011"}],[{"top":352.53937,"left":119.78943,"width":121.92680358886719,"height":12.89111328125,"text":"Server closets"},{"top":352.53937,"left":241.71623,"width":46.66426086425781,"height":12.89111328125,"text":"1,485"},{"top":352.53937,"left":288.3805,"width":46.663482666015625,"height":12.89111328125,"text":"1,471"},{"top":352.53937,"left":335.04398,"width":46.663818359375,"height":12.89111328125,"text":"1,424"},{"top":352.53937,"left":381.7078,"width":46.663360595703125,"height":12.89111328125,"text":"1,315"},{"top":352.53937,"left":428.37115,"width":46.73052978515625,"height":12.89111328125,"text":"1,349"}],[{"top":365.43048,"left":119.78943,"width":121.92680358886719,"height":12.8902587890625,"text":"Server rooms"},{"top":365.43048,"left":241.71623,"width":46.66426086425781,"height":12.8902587890625,"text":"1,495"},{"top":365.43048,"left":288.3805,"width":46.663482666015625,"height":12.8902587890625,"text":"1,573"},{"top":365.43048,"left":335.04398,"width":46.663818359375,"height":12.8902587890625,"text":"1,586"},{"top":365.43048,"left":381.7078,"width":46.663360595703125,"height":12.8902587890625,"text":"1,424"},{"top":365.43048,"left":428.37115,"width":46.73052978515625,"height":12.8902587890625,"text":"1,485"}],[{"top":378.32074,"left":119.78943,"width":121.92680358886719,"height":12.891143798828125,"text":"Localized data centers"},{"top":378.32074,"left":241.71623,"width":46.66426086425781,"height":12.891143798828125,"text":"1,495"},{"top":378.32074,"left":288.3805,"width":46.663482666015625,"height":12.891143798828125,"text":"1,572"},{"top":378.32074,"left":335.04398,"width":46.663818359375,"height":12.891143798828125,"text":"1,585"},{"top":378.32074,"left":381.7078,"width":46.663360595703125,"height":12.891143798828125,"text":"1,424"},{"top":378.32074,"left":428.37115,"width":46.73052978515625,"height":12.891143798828125,"text":"1,485"}],[{"top":391.21188,"left":119.78943,"width":121.92680358886719,"height":12.89068603515625,"text":"Mid-tier data centers"},{"top":391.21188,"left":241.71623,"width":46.66426086425781,"height":12.89068603515625,"text":"1,495"},{"top":391.21188,"left":288.3805,"width":46.663482666015625,"height":12.89068603515625,"text":"1,572"},{"top":391.21188,"left":335.04398,"width":46.663818359375,"height":12.89068603515625,"text":"1,585"},{"top":391.21188,"left":381.7078,"width":46.663360595703125,"height":12.89068603515625,"text":"1,424"},{"top":391.21188,"left":428.37115,"width":46.73052978515625,"height":12.89068603515625,"text":"1,485"}],[{"top":404.10257,"left":119.78943,"width":121.92680358886719,"height":12.968353271484375,"text":"Enterprise-class data centers"},{"top":404.10257,"left":241.71623,"width":46.66426086425781,"height":12.968353271484375,"text":"1,495"},{"top":404.10257,"left":288.3805,"width":46.663482666015625,"height":12.968353271484375,"text":"1,572"},{"top":404.10257,"left":335.04398,"width":46.663818359375,"height":12.968353271484375,"text":"1,585"},{"top":404.10257,"left":381.7078,"width":46.663360595703125,"height":12.968353271484375,"text":"1,424"},{"top":404.10257,"left":428.37115,"width":46.73052978515625,"height":12.968353271484375,"text":"1,485"}]]},{"extraction_method":"lattice","top":538.25964,"left":130.8136,"width":333.3038024902344,"height":51.52508544921875,"data":[[{"top":538.25964,"left":130.8136,"width":333.3038024902344,"height":12.880615234375,"text":"All alternative scenarios"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":551.14026,"left":130.8136,"width":99.93739318847656,"height":12.78485107421875,"text":"Server class"},{"top":551.14026,"left":230.75099,"width":46.66316223144531,"height":12.78485107421875,"text":"2007"},{"top":551.14026,"left":277.41415,"width":46.663726806640625,"height":12.78485107421875,"text":"2008"},{"top":551.14026,"left":324.07788,"width":46.663970947265625,"height":12.78485107421875,"text":"2009"},{"top":551.14026,"left":370.74185,"width":46.663177490234375,"height":12.78485107421875,"text":"2010"},{"top":551.14026,"left":417.40503,"width":46.712371826171875,"height":12.78485107421875,"text":"2011"}],[{"top":563.9251,"left":130.8136,"width":99.93739318847656,"height":12.8905029296875,"text":"Mid-range"},{"top":563.9251,"left":230.75099,"width":46.66316223144531,"height":12.8905029296875,"text":"4,921"},{"top":563.9251,"left":277.41415,"width":46.663726806640625,"height":12.8905029296875,"text":"5,467"},{"top":563.9251,"left":324.07788,"width":46.663970947265625,"height":12.8905029296875,"text":"6,152"},{"top":563.9251,"left":370.74185,"width":46.663177490234375,"height":12.8905029296875,"text":"6,649"},{"top":563.9251,"left":417.40503,"width":46.712371826171875,"height":12.8905029296875,"text":"7,185"}],[{"top":576.8156,"left":130.8136,"width":99.93739318847656,"height":12.9691162109375,"text":"High-end"},{"top":576.8156,"left":230.75099,"width":46.66316223144531,"height":12.9691162109375,"text":"76,295"},{"top":576.8156,"left":277.41415,"width":46.663726806640625,"height":12.9691162109375,"text":"81,624"},{"top":576.8156,"left":324.07788,"width":46.663970947265625,"height":12.9691162109375,"text":"86,849"},{"top":576.8156,"left":370.74185,"width":46.663177490234375,"height":12.9691162109375,"text":"92,662"},{"top":576.8156,"left":417.40503,"width":46.712371826171875,"height":12.9691162109375,"text":"98,864"}]]}] \ No newline at end of file +[{"extraction_method":"lattice","top":146.51932,"left":119.78943,"width":355.312255859375,"height":270.5516052246094,"right":475.10168,"bottom":417.07092,"data":[[{"top":146.51932,"left":119.78943,"width":355.312255859375,"height":12.938491821289062,"text":"Improved operation scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":159.45781,"left":119.78943,"width":121.92680358886719,"height":12.785446166992188,"text":"Volume servers in:"},{"top":159.45781,"left":241.71623,"width":46.66426086425781,"height":12.785446166992188,"text":"2007"},{"top":159.45781,"left":288.3805,"width":46.663482666015625,"height":12.785446166992188,"text":"2008"},{"top":159.45781,"left":335.04398,"width":46.663818359375,"height":12.785446166992188,"text":"2009"},{"top":159.45781,"left":381.7078,"width":46.663360595703125,"height":12.785446166992188,"text":"2010"},{"top":159.45781,"left":428.37115,"width":46.73052978515625,"height":12.785446166992188,"text":"2011"}],[{"top":172.24326,"left":119.78943,"width":121.92680358886719,"height":12.832427978515625,"text":"Server closets"},{"top":172.24326,"left":241.71623,"width":46.66426086425781,"height":12.832427978515625,"text":"1,505"},{"top":172.24326,"left":288.3805,"width":46.663482666015625,"height":12.832427978515625,"text":"1,580"},{"top":172.24326,"left":335.04398,"width":46.663818359375,"height":12.832427978515625,"text":"1,643"},{"top":172.24326,"left":381.7078,"width":46.663360595703125,"height":12.832427978515625,"text":"1,673"},{"top":172.24326,"left":428.37115,"width":46.73052978515625,"height":12.832427978515625,"text":"1,689"}],[{"top":185.07568,"left":119.78943,"width":121.92680358886719,"height":12.891036987304688,"text":"Server rooms"},{"top":185.07568,"left":241.71623,"width":46.66426086425781,"height":12.891036987304688,"text":"1,512"},{"top":185.07568,"left":288.3805,"width":46.663482666015625,"height":12.891036987304688,"text":"1,586"},{"top":185.07568,"left":335.04398,"width":46.663818359375,"height":12.891036987304688,"text":"1,646"},{"top":185.07568,"left":381.7078,"width":46.663360595703125,"height":12.891036987304688,"text":"1,677"},{"top":185.07568,"left":428.37115,"width":46.73052978515625,"height":12.891036987304688,"text":"1,693"}],[{"top":197.96672,"left":119.78943,"width":121.92680358886719,"height":12.890411376953125,"text":"Localized data centers"},{"top":197.96672,"left":241.71623,"width":46.66426086425781,"height":12.890411376953125,"text":"1,512"},{"top":197.96672,"left":288.3805,"width":46.663482666015625,"height":12.890411376953125,"text":"1,586"},{"top":197.96672,"left":335.04398,"width":46.663818359375,"height":12.890411376953125,"text":"1,646"},{"top":197.96672,"left":381.7078,"width":46.663360595703125,"height":12.890411376953125,"text":"1,677"},{"top":197.96672,"left":428.37115,"width":46.73052978515625,"height":12.890411376953125,"text":"1,693"}],[{"top":210.85713,"left":119.78943,"width":121.92680358886719,"height":12.890716552734375,"text":"Mid-tier data centers"},{"top":210.85713,"left":241.71623,"width":46.66426086425781,"height":12.890716552734375,"text":"1,512"},{"top":210.85713,"left":288.3805,"width":46.663482666015625,"height":12.890716552734375,"text":"1,586"},{"top":210.85713,"left":335.04398,"width":46.663818359375,"height":12.890716552734375,"text":"1,646"},{"top":210.85713,"left":381.7078,"width":46.663360595703125,"height":12.890716552734375,"text":"1,677"},{"top":210.85713,"left":428.37115,"width":46.73052978515625,"height":12.890716552734375,"text":"1,693"}],[{"top":223.74785,"left":119.78943,"width":121.92680358886719,"height":12.785018920898438,"text":"Enterprise-class data centers"},{"top":223.74785,"left":241.71623,"width":46.66426086425781,"height":12.785018920898438,"text":"1,512"},{"top":223.74785,"left":288.3805,"width":46.663482666015625,"height":12.785018920898438,"text":"1,586"},{"top":223.74785,"left":335.04398,"width":46.663818359375,"height":12.785018920898438,"text":"1,646"},{"top":223.74785,"left":381.7078,"width":46.663360595703125,"height":12.785018920898438,"text":"1,677"},{"top":223.74785,"left":428.37115,"width":46.73052978515625,"height":12.785018920898438,"text":"1,693"}],[{"top":236.53287,"left":119.78943,"width":355.312255859375,"height":13.1025390625,"text":"Best practice scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":249.6354,"left":119.78943,"width":121.92680358886719,"height":12.72686767578125,"text":"Volume servers in:"},{"top":249.6354,"left":241.71623,"width":46.66426086425781,"height":12.72686767578125,"text":"2007"},{"top":249.6354,"left":288.3805,"width":46.663482666015625,"height":12.72686767578125,"text":"2008"},{"top":249.6354,"left":335.04398,"width":46.663818359375,"height":12.72686767578125,"text":"2009"},{"top":249.6354,"left":381.7078,"width":46.663360595703125,"height":12.72686767578125,"text":"2010"},{"top":249.6354,"left":428.37115,"width":46.73052978515625,"height":12.72686767578125,"text":"2011"}],[{"top":262.36227,"left":119.78943,"width":121.92680358886719,"height":12.890472412109375,"text":"Server closets"},{"top":262.36227,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,456"},{"top":262.36227,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,439"},{"top":262.36227,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,386"},{"top":262.36227,"left":381.7078,"width":46.663360595703125,"height":12.890472412109375,"text":"1,296"},{"top":262.36227,"left":428.37115,"width":46.73052978515625,"height":12.890472412109375,"text":"1,326"}],[{"top":275.25275,"left":119.78943,"width":121.92680358886719,"height":12.8914794921875,"text":"Server rooms"},{"top":275.25275,"left":241.71623,"width":46.66426086425781,"height":12.8914794921875,"text":"1,465"},{"top":275.25275,"left":288.3805,"width":46.663482666015625,"height":12.8914794921875,"text":"1,472"},{"top":275.25275,"left":335.04398,"width":46.663818359375,"height":12.8914794921875,"text":"1,427"},{"top":275.25275,"left":381.7078,"width":46.663360595703125,"height":12.8914794921875,"text":"1,334"},{"top":275.25275,"left":428.37115,"width":46.73052978515625,"height":12.8914794921875,"text":"1,371"}],[{"top":288.14423,"left":119.78943,"width":121.92680358886719,"height":12.890167236328125,"text":"Localized data centers"},{"top":288.14423,"left":241.71623,"width":46.66426086425781,"height":12.890167236328125,"text":"1,465"},{"top":288.14423,"left":288.3805,"width":46.663482666015625,"height":12.890167236328125,"text":"1,471"},{"top":288.14423,"left":335.04398,"width":46.663818359375,"height":12.890167236328125,"text":"1,426"},{"top":288.14423,"left":381.7078,"width":46.663360595703125,"height":12.890167236328125,"text":"1,334"},{"top":288.14423,"left":428.37115,"width":46.73052978515625,"height":12.890167236328125,"text":"1,371"}],[{"top":301.0344,"left":119.78943,"width":121.92680358886719,"height":12.890472412109375,"text":"Mid-tier data centers"},{"top":301.0344,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,465"},{"top":301.0344,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,471"},{"top":301.0344,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,426"},{"top":301.0344,"left":381.7078,"width":46.663360595703125,"height":12.890472412109375,"text":"1,334"},{"top":301.0344,"left":428.37115,"width":46.73052978515625,"height":12.890472412109375,"text":"1,371"}],[{"top":313.92487,"left":119.78943,"width":121.92680358886719,"height":12.7855224609375,"text":"Enterprise-class data centers"},{"top":313.92487,"left":241.71623,"width":46.66426086425781,"height":12.7855224609375,"text":"1,465"},{"top":313.92487,"left":288.3805,"width":46.663482666015625,"height":12.7855224609375,"text":"1,471"},{"top":313.92487,"left":335.04398,"width":46.663818359375,"height":12.7855224609375,"text":"1,426"},{"top":313.92487,"left":381.7078,"width":46.663360595703125,"height":12.7855224609375,"text":"1,334"},{"top":313.92487,"left":428.37115,"width":46.73052978515625,"height":12.7855224609375,"text":"1,371"}],[{"top":326.7104,"left":119.78943,"width":355.312255859375,"height":13.0440673828125,"text":"State-of-the-art scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":339.75446,"left":119.78943,"width":121.92680358886719,"height":12.784912109375,"text":"Volume servers in:"},{"top":339.75446,"left":241.71623,"width":46.66426086425781,"height":12.784912109375,"text":"2007"},{"top":339.75446,"left":288.3805,"width":46.663482666015625,"height":12.784912109375,"text":"2008"},{"top":339.75446,"left":335.04398,"width":46.663818359375,"height":12.784912109375,"text":"2009"},{"top":339.75446,"left":381.7078,"width":46.663360595703125,"height":12.784912109375,"text":"2010"},{"top":339.75446,"left":428.37115,"width":46.73052978515625,"height":12.784912109375,"text":"2011"}],[{"top":352.53937,"left":119.78943,"width":121.92680358886719,"height":12.89111328125,"text":"Server closets"},{"top":352.53937,"left":241.71623,"width":46.66426086425781,"height":12.89111328125,"text":"1,485"},{"top":352.53937,"left":288.3805,"width":46.663482666015625,"height":12.89111328125,"text":"1,471"},{"top":352.53937,"left":335.04398,"width":46.663818359375,"height":12.89111328125,"text":"1,424"},{"top":352.53937,"left":381.7078,"width":46.663360595703125,"height":12.89111328125,"text":"1,315"},{"top":352.53937,"left":428.37115,"width":46.73052978515625,"height":12.89111328125,"text":"1,349"}],[{"top":365.43048,"left":119.78943,"width":121.92680358886719,"height":12.8902587890625,"text":"Server rooms"},{"top":365.43048,"left":241.71623,"width":46.66426086425781,"height":12.8902587890625,"text":"1,495"},{"top":365.43048,"left":288.3805,"width":46.663482666015625,"height":12.8902587890625,"text":"1,573"},{"top":365.43048,"left":335.04398,"width":46.663818359375,"height":12.8902587890625,"text":"1,586"},{"top":365.43048,"left":381.7078,"width":46.663360595703125,"height":12.8902587890625,"text":"1,424"},{"top":365.43048,"left":428.37115,"width":46.73052978515625,"height":12.8902587890625,"text":"1,485"}],[{"top":378.32074,"left":119.78943,"width":121.92680358886719,"height":12.891143798828125,"text":"Localized data centers"},{"top":378.32074,"left":241.71623,"width":46.66426086425781,"height":12.891143798828125,"text":"1,495"},{"top":378.32074,"left":288.3805,"width":46.663482666015625,"height":12.891143798828125,"text":"1,572"},{"top":378.32074,"left":335.04398,"width":46.663818359375,"height":12.891143798828125,"text":"1,585"},{"top":378.32074,"left":381.7078,"width":46.663360595703125,"height":12.891143798828125,"text":"1,424"},{"top":378.32074,"left":428.37115,"width":46.73052978515625,"height":12.891143798828125,"text":"1,485"}],[{"top":391.21188,"left":119.78943,"width":121.92680358886719,"height":12.89068603515625,"text":"Mid-tier data centers"},{"top":391.21188,"left":241.71623,"width":46.66426086425781,"height":12.89068603515625,"text":"1,495"},{"top":391.21188,"left":288.3805,"width":46.663482666015625,"height":12.89068603515625,"text":"1,572"},{"top":391.21188,"left":335.04398,"width":46.663818359375,"height":12.89068603515625,"text":"1,585"},{"top":391.21188,"left":381.7078,"width":46.663360595703125,"height":12.89068603515625,"text":"1,424"},{"top":391.21188,"left":428.37115,"width":46.73052978515625,"height":12.89068603515625,"text":"1,485"}],[{"top":404.10257,"left":119.78943,"width":121.92680358886719,"height":12.968353271484375,"text":"Enterprise-class data centers"},{"top":404.10257,"left":241.71623,"width":46.66426086425781,"height":12.968353271484375,"text":"1,495"},{"top":404.10257,"left":288.3805,"width":46.663482666015625,"height":12.968353271484375,"text":"1,572"},{"top":404.10257,"left":335.04398,"width":46.663818359375,"height":12.968353271484375,"text":"1,585"},{"top":404.10257,"left":381.7078,"width":46.663360595703125,"height":12.968353271484375,"text":"1,424"},{"top":404.10257,"left":428.37115,"width":46.73052978515625,"height":12.968353271484375,"text":"1,485"}]]},{"extraction_method":"lattice","top":538.25964,"left":130.8136,"width":333.3038024902344,"height":51.52508544921875,"right":464.1174,"bottom":589.7847,"data":[[{"top":538.25964,"left":130.8136,"width":333.3038024902344,"height":12.880615234375,"text":"All alternative scenarios"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":551.14026,"left":130.8136,"width":99.93739318847656,"height":12.78485107421875,"text":"Server class"},{"top":551.14026,"left":230.75099,"width":46.66316223144531,"height":12.78485107421875,"text":"2007"},{"top":551.14026,"left":277.41415,"width":46.663726806640625,"height":12.78485107421875,"text":"2008"},{"top":551.14026,"left":324.07788,"width":46.663970947265625,"height":12.78485107421875,"text":"2009"},{"top":551.14026,"left":370.74185,"width":46.663177490234375,"height":12.78485107421875,"text":"2010"},{"top":551.14026,"left":417.40503,"width":46.712371826171875,"height":12.78485107421875,"text":"2011"}],[{"top":563.9251,"left":130.8136,"width":99.93739318847656,"height":12.8905029296875,"text":"Mid-range"},{"top":563.9251,"left":230.75099,"width":46.66316223144531,"height":12.8905029296875,"text":"4,921"},{"top":563.9251,"left":277.41415,"width":46.663726806640625,"height":12.8905029296875,"text":"5,467"},{"top":563.9251,"left":324.07788,"width":46.663970947265625,"height":12.8905029296875,"text":"6,152"},{"top":563.9251,"left":370.74185,"width":46.663177490234375,"height":12.8905029296875,"text":"6,649"},{"top":563.9251,"left":417.40503,"width":46.712371826171875,"height":12.8905029296875,"text":"7,185"}],[{"top":576.8156,"left":130.8136,"width":99.93739318847656,"height":12.9691162109375,"text":"High-end"},{"top":576.8156,"left":230.75099,"width":46.66316223144531,"height":12.9691162109375,"text":"76,295"},{"top":576.8156,"left":277.41415,"width":46.663726806640625,"height":12.9691162109375,"text":"81,624"},{"top":576.8156,"left":324.07788,"width":46.663970947265625,"height":12.9691162109375,"text":"86,849"},{"top":576.8156,"left":370.74185,"width":46.663177490234375,"height":12.9691162109375,"text":"92,662"},{"top":576.8156,"left":417.40503,"width":46.712371826171875,"height":12.9691162109375,"text":"98,864"}]]}] diff --git a/src/test/resources/technology/tabula/json/spanning_cells_basic.json b/src/test/resources/technology/tabula/json/spanning_cells_basic.json index da8c71e0..d0164280 100644 --- a/src/test/resources/technology/tabula/json/spanning_cells_basic.json +++ b/src/test/resources/technology/tabula/json/spanning_cells_basic.json @@ -1 +1 @@ -[{"extraction_method":"lattice","top":150.56,"left":119.789444,"width":355.31939697265625,"height":266.5108642578125,"data":[[{"top":150.56,"left":119.789444,"width":355.31939697265625,"height":8.897811889648438,"text":"Improved operation scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":159.45781,"left":119.789444,"width":121.92678833007812,"height":12.785446166992188,"text":"Volume servers in:"},{"top":159.45781,"left":241.71623,"width":46.66426086425781,"height":12.785446166992188,"text":"2007"},{"top":159.45781,"left":288.3805,"width":46.663482666015625,"height":12.785446166992188,"text":"2008"},{"top":159.45781,"left":335.04398,"width":46.663818359375,"height":12.785446166992188,"text":"2009"},{"top":159.45781,"left":381.7078,"width":46.663330078125,"height":12.785446166992188,"text":"2010"},{"top":159.45781,"left":428.37112,"width":46.73773193359375,"height":12.785446166992188,"text":"2011"}],[{"top":172.24326,"left":119.789444,"width":121.92678833007812,"height":12.832427978515625,"text":"Server closets"},{"top":172.24326,"left":241.71623,"width":46.66426086425781,"height":12.832427978515625,"text":"1,505"},{"top":172.24326,"left":288.3805,"width":46.663482666015625,"height":12.832427978515625,"text":"1,580"},{"top":172.24326,"left":335.04398,"width":46.663818359375,"height":12.832427978515625,"text":"1,643"},{"top":172.24326,"left":381.7078,"width":46.663330078125,"height":12.832427978515625,"text":"1,673"},{"top":172.24326,"left":428.37112,"width":46.73773193359375,"height":12.832427978515625,"text":"1,689"}],[{"top":185.07568,"left":119.789444,"width":121.92678833007812,"height":12.891036987304688,"text":"Server rooms"},{"top":185.07568,"left":241.71623,"width":46.66426086425781,"height":12.891036987304688,"text":"1,512"},{"top":185.07568,"left":288.3805,"width":46.663482666015625,"height":12.891036987304688,"text":"1,586"},{"top":185.07568,"left":335.04398,"width":46.663818359375,"height":12.891036987304688,"text":"1,646"},{"top":185.07568,"left":381.7078,"width":46.663330078125,"height":12.891036987304688,"text":"1,677"},{"top":185.07568,"left":428.37112,"width":46.73773193359375,"height":12.891036987304688,"text":"1,693"}],[{"top":197.96672,"left":119.789444,"width":121.92678833007812,"height":12.890411376953125,"text":"Localized data centers"},{"top":197.96672,"left":241.71623,"width":46.66426086425781,"height":12.890411376953125,"text":"1,512"},{"top":197.96672,"left":288.3805,"width":46.663482666015625,"height":12.890411376953125,"text":"1,586"},{"top":197.96672,"left":335.04398,"width":46.663818359375,"height":12.890411376953125,"text":"1,646"},{"top":197.96672,"left":381.7078,"width":46.663330078125,"height":12.890411376953125,"text":"1,677"},{"top":197.96672,"left":428.37112,"width":46.73773193359375,"height":12.890411376953125,"text":"1,693"}],[{"top":210.85713,"left":119.789444,"width":121.92678833007812,"height":12.890716552734375,"text":"Mid-tier data centers"},{"top":210.85713,"left":241.71623,"width":46.66426086425781,"height":12.890716552734375,"text":"1,512"},{"top":210.85713,"left":288.3805,"width":46.663482666015625,"height":12.890716552734375,"text":"1,586"},{"top":210.85713,"left":335.04398,"width":46.663818359375,"height":12.890716552734375,"text":"1,646"},{"top":210.85713,"left":381.7078,"width":46.663330078125,"height":12.890716552734375,"text":"1,677"},{"top":210.85713,"left":428.37112,"width":46.73773193359375,"height":12.890716552734375,"text":"1,693"}],[{"top":223.74785,"left":119.789444,"width":121.92678833007812,"height":12.785003662109375,"text":"Enterprise-class data centers"},{"top":223.74785,"left":241.71623,"width":46.66426086425781,"height":12.785003662109375,"text":"1,512"},{"top":223.74785,"left":288.3805,"width":46.663482666015625,"height":12.785003662109375,"text":"1,586"},{"top":223.74785,"left":335.04398,"width":46.663818359375,"height":12.785003662109375,"text":"1,646"},{"top":223.74785,"left":381.7078,"width":46.663330078125,"height":12.785003662109375,"text":"1,677"},{"top":223.74785,"left":428.37112,"width":46.73773193359375,"height":12.785003662109375,"text":"1,693"}],[{"top":236.53285,"left":119.789444,"width":355.31939697265625,"height":13.1025390625,"text":"Best practice scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":249.63539,"left":119.789444,"width":121.92678833007812,"height":12.726882934570312,"text":"Volume servers in:"},{"top":249.63539,"left":241.71623,"width":46.66426086425781,"height":12.726882934570312,"text":"2007"},{"top":249.63539,"left":288.3805,"width":46.663482666015625,"height":12.726882934570312,"text":"2008"},{"top":249.63539,"left":335.04398,"width":46.663818359375,"height":12.726882934570312,"text":"2009"},{"top":249.63539,"left":381.7078,"width":46.663330078125,"height":12.726882934570312,"text":"2010"},{"top":249.63539,"left":428.37112,"width":46.73773193359375,"height":12.726882934570312,"text":"2011"}],[{"top":262.36227,"left":119.789444,"width":121.92678833007812,"height":12.890472412109375,"text":"Server closets"},{"top":262.36227,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,456"},{"top":262.36227,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,439"},{"top":262.36227,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,386"},{"top":262.36227,"left":381.7078,"width":46.663330078125,"height":12.890472412109375,"text":"1,296"},{"top":262.36227,"left":428.37112,"width":46.73773193359375,"height":12.890472412109375,"text":"1,326"}],[{"top":275.25275,"left":119.789444,"width":121.92678833007812,"height":12.8914794921875,"text":"Server rooms"},{"top":275.25275,"left":241.71623,"width":46.66426086425781,"height":12.8914794921875,"text":"1,465"},{"top":275.25275,"left":288.3805,"width":46.663482666015625,"height":12.8914794921875,"text":"1,472"},{"top":275.25275,"left":335.04398,"width":46.663818359375,"height":12.8914794921875,"text":"1,427"},{"top":275.25275,"left":381.7078,"width":46.663330078125,"height":12.8914794921875,"text":"1,334"},{"top":275.25275,"left":428.37112,"width":46.73773193359375,"height":12.8914794921875,"text":"1,371"}],[{"top":288.14423,"left":119.789444,"width":121.92678833007812,"height":12.890167236328125,"text":"Localized data centers"},{"top":288.14423,"left":241.71623,"width":46.66426086425781,"height":12.890167236328125,"text":"1,465"},{"top":288.14423,"left":288.3805,"width":46.663482666015625,"height":12.890167236328125,"text":"1,471"},{"top":288.14423,"left":335.04398,"width":46.663818359375,"height":12.890167236328125,"text":"1,426"},{"top":288.14423,"left":381.7078,"width":46.663330078125,"height":12.890167236328125,"text":"1,334"},{"top":288.14423,"left":428.37112,"width":46.73773193359375,"height":12.890167236328125,"text":"1,371"}],[{"top":301.0344,"left":119.789444,"width":121.92678833007812,"height":12.890472412109375,"text":"Mid-tier data centers"},{"top":301.0344,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,465"},{"top":301.0344,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,471"},{"top":301.0344,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,426"},{"top":301.0344,"left":381.7078,"width":46.663330078125,"height":12.890472412109375,"text":"1,334"},{"top":301.0344,"left":428.37112,"width":46.73773193359375,"height":12.890472412109375,"text":"1,371"}],[{"top":313.92487,"left":119.789444,"width":121.92678833007812,"height":12.785552978515625,"text":"Enterprise-class data centers"},{"top":313.92487,"left":241.71623,"width":46.66426086425781,"height":12.785552978515625,"text":"1,465"},{"top":313.92487,"left":288.3805,"width":46.663482666015625,"height":12.785552978515625,"text":"1,471"},{"top":313.92487,"left":335.04398,"width":46.663818359375,"height":12.785552978515625,"text":"1,426"},{"top":313.92487,"left":381.7078,"width":46.663330078125,"height":12.785552978515625,"text":"1,334"},{"top":313.92487,"left":428.37112,"width":46.73773193359375,"height":12.785552978515625,"text":"1,371"}],[{"top":326.71042,"left":119.789444,"width":355.31939697265625,"height":13.04400634765625,"text":"State-of-the-art scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":339.75443,"left":119.789444,"width":121.92678833007812,"height":12.784942626953125,"text":"Volume servers in:"},{"top":339.75443,"left":241.71623,"width":46.66426086425781,"height":12.784942626953125,"text":"2007"},{"top":339.75443,"left":288.3805,"width":46.663482666015625,"height":12.784942626953125,"text":"2008"},{"top":339.75443,"left":335.04398,"width":46.663818359375,"height":12.784942626953125,"text":"2009"},{"top":339.75443,"left":381.7078,"width":46.663330078125,"height":12.784942626953125,"text":"2010"},{"top":339.75443,"left":428.37112,"width":46.73773193359375,"height":12.784942626953125,"text":"2011"}],[{"top":352.53937,"left":119.789444,"width":121.92678833007812,"height":12.89111328125,"text":"Server closets"},{"top":352.53937,"left":241.71623,"width":46.66426086425781,"height":12.89111328125,"text":"1,485"},{"top":352.53937,"left":288.3805,"width":46.663482666015625,"height":12.89111328125,"text":"1,471"},{"top":352.53937,"left":335.04398,"width":46.663818359375,"height":12.89111328125,"text":"1,424"},{"top":352.53937,"left":381.7078,"width":46.663330078125,"height":12.89111328125,"text":"1,315"},{"top":352.53937,"left":428.37112,"width":46.73773193359375,"height":12.89111328125,"text":"1,349"}],[{"top":365.43048,"left":119.789444,"width":121.92678833007812,"height":12.8902587890625,"text":"Server rooms"},{"top":365.43048,"left":241.71623,"width":46.66426086425781,"height":12.8902587890625,"text":"1,495"},{"top":365.43048,"left":288.3805,"width":46.663482666015625,"height":12.8902587890625,"text":"1,573"},{"top":365.43048,"left":335.04398,"width":46.663818359375,"height":12.8902587890625,"text":"1,586"},{"top":365.43048,"left":381.7078,"width":46.663330078125,"height":12.8902587890625,"text":"1,424"},{"top":365.43048,"left":428.37112,"width":46.73773193359375,"height":12.8902587890625,"text":"1,485"}],[{"top":378.32074,"left":119.789444,"width":121.92678833007812,"height":12.891143798828125,"text":"Localized data centers"},{"top":378.32074,"left":241.71623,"width":46.66426086425781,"height":12.891143798828125,"text":"1,495"},{"top":378.32074,"left":288.3805,"width":46.663482666015625,"height":12.891143798828125,"text":"1,572"},{"top":378.32074,"left":335.04398,"width":46.663818359375,"height":12.891143798828125,"text":"1,585"},{"top":378.32074,"left":381.7078,"width":46.663330078125,"height":12.891143798828125,"text":"1,424"},{"top":378.32074,"left":428.37112,"width":46.73773193359375,"height":12.891143798828125,"text":"1,485"}],[{"top":391.21188,"left":119.789444,"width":121.92678833007812,"height":12.89068603515625,"text":"Mid-tier data centers"},{"top":391.21188,"left":241.71623,"width":46.66426086425781,"height":12.89068603515625,"text":"1,495"},{"top":391.21188,"left":288.3805,"width":46.663482666015625,"height":12.89068603515625,"text":"1,572"},{"top":391.21188,"left":335.04398,"width":46.663818359375,"height":12.89068603515625,"text":"1,585"},{"top":391.21188,"left":381.7078,"width":46.663330078125,"height":12.89068603515625,"text":"1,424"},{"top":391.21188,"left":428.37112,"width":46.73773193359375,"height":12.89068603515625,"text":"1,485"}],[{"top":404.10257,"left":119.789444,"width":121.92678833007812,"height":12.968292236328125,"text":"Enterprise-class data centers"},{"top":404.10257,"left":241.71623,"width":46.66426086425781,"height":12.968292236328125,"text":"1,495"},{"top":404.10257,"left":288.3805,"width":46.663482666015625,"height":12.968292236328125,"text":"1,572"},{"top":404.10257,"left":335.04398,"width":46.663818359375,"height":12.968292236328125,"text":"1,585"},{"top":404.10257,"left":381.7078,"width":46.663330078125,"height":12.968292236328125,"text":"1,424"},{"top":404.10257,"left":428.37112,"width":46.73773193359375,"height":12.968292236328125,"text":"1,485"}]]},{"extraction_method":"lattice","top":538.25964,"left":130.8136,"width":333.3038024902344,"height":51.5423583984375,"data":[[{"top":538.25964,"left":130.8136,"width":333.3038024902344,"height":12.88055419921875,"text":"All alternative scenarios"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":551.1402,"left":130.8136,"width":99.93739318847656,"height":12.784912109375,"text":"Server class"},{"top":551.1402,"left":230.75099,"width":46.66316223144531,"height":12.784912109375,"text":"2007"},{"top":551.1402,"left":277.41415,"width":46.663726806640625,"height":12.784912109375,"text":"2008"},{"top":551.1402,"left":324.07788,"width":46.663970947265625,"height":12.784912109375,"text":"2009"},{"top":551.1402,"left":370.74185,"width":46.663177490234375,"height":12.784912109375,"text":"2010"},{"top":551.1402,"left":417.40503,"width":46.712371826171875,"height":12.784912109375,"text":"2011"}],[{"top":563.9251,"left":130.8136,"width":99.93739318847656,"height":12.8905029296875,"text":"Mid-range"},{"top":563.9251,"left":230.75099,"width":46.66316223144531,"height":12.8905029296875,"text":"4,921"},{"top":563.9251,"left":277.41415,"width":46.663726806640625,"height":12.8905029296875,"text":"5,467"},{"top":563.9251,"left":324.07788,"width":46.663970947265625,"height":12.8905029296875,"text":"6,152"},{"top":563.9251,"left":370.74185,"width":46.663177490234375,"height":12.8905029296875,"text":"6,649"},{"top":563.9251,"left":417.40503,"width":46.712371826171875,"height":12.8905029296875,"text":"7,185"}],[{"top":576.8156,"left":130.8136,"width":99.93739318847656,"height":12.98638916015625,"text":"High-end"},{"top":576.8156,"left":230.75099,"width":46.66316223144531,"height":12.98638916015625,"text":"76,295"},{"top":576.8156,"left":277.41415,"width":46.663726806640625,"height":12.98638916015625,"text":"81,624"},{"top":576.8156,"left":324.07788,"width":46.663970947265625,"height":12.98638916015625,"text":"86,849"},{"top":576.8156,"left":370.74185,"width":46.663177490234375,"height":12.98638916015625,"text":"92,662"},{"top":576.8156,"left":417.40503,"width":46.712371826171875,"height":12.98638916015625,"text":"98,864"}]]}] \ No newline at end of file +[{"extraction_method":"lattice","top":150.56,"left":119.789444,"width":355.31939697265625,"height":266.5108642578125,"right":475.10883,"bottom":417.07086,"data":[[{"top":150.56,"left":119.789444,"width":355.31939697265625,"height":8.897811889648438,"text":"Improved operation scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":159.45781,"left":119.789444,"width":121.92678833007812,"height":12.785446166992188,"text":"Volume servers in:"},{"top":159.45781,"left":241.71623,"width":46.66426086425781,"height":12.785446166992188,"text":"2007"},{"top":159.45781,"left":288.3805,"width":46.663482666015625,"height":12.785446166992188,"text":"2008"},{"top":159.45781,"left":335.04398,"width":46.663818359375,"height":12.785446166992188,"text":"2009"},{"top":159.45781,"left":381.7078,"width":46.663330078125,"height":12.785446166992188,"text":"2010"},{"top":159.45781,"left":428.37112,"width":46.73773193359375,"height":12.785446166992188,"text":"2011"}],[{"top":172.24326,"left":119.789444,"width":121.92678833007812,"height":12.832427978515625,"text":"Server closets"},{"top":172.24326,"left":241.71623,"width":46.66426086425781,"height":12.832427978515625,"text":"1,505"},{"top":172.24326,"left":288.3805,"width":46.663482666015625,"height":12.832427978515625,"text":"1,580"},{"top":172.24326,"left":335.04398,"width":46.663818359375,"height":12.832427978515625,"text":"1,643"},{"top":172.24326,"left":381.7078,"width":46.663330078125,"height":12.832427978515625,"text":"1,673"},{"top":172.24326,"left":428.37112,"width":46.73773193359375,"height":12.832427978515625,"text":"1,689"}],[{"top":185.07568,"left":119.789444,"width":121.92678833007812,"height":12.891036987304688,"text":"Server rooms"},{"top":185.07568,"left":241.71623,"width":46.66426086425781,"height":12.891036987304688,"text":"1,512"},{"top":185.07568,"left":288.3805,"width":46.663482666015625,"height":12.891036987304688,"text":"1,586"},{"top":185.07568,"left":335.04398,"width":46.663818359375,"height":12.891036987304688,"text":"1,646"},{"top":185.07568,"left":381.7078,"width":46.663330078125,"height":12.891036987304688,"text":"1,677"},{"top":185.07568,"left":428.37112,"width":46.73773193359375,"height":12.891036987304688,"text":"1,693"}],[{"top":197.96672,"left":119.789444,"width":121.92678833007812,"height":12.890411376953125,"text":"Localized data centers"},{"top":197.96672,"left":241.71623,"width":46.66426086425781,"height":12.890411376953125,"text":"1,512"},{"top":197.96672,"left":288.3805,"width":46.663482666015625,"height":12.890411376953125,"text":"1,586"},{"top":197.96672,"left":335.04398,"width":46.663818359375,"height":12.890411376953125,"text":"1,646"},{"top":197.96672,"left":381.7078,"width":46.663330078125,"height":12.890411376953125,"text":"1,677"},{"top":197.96672,"left":428.37112,"width":46.73773193359375,"height":12.890411376953125,"text":"1,693"}],[{"top":210.85713,"left":119.789444,"width":121.92678833007812,"height":12.890716552734375,"text":"Mid-tier data centers"},{"top":210.85713,"left":241.71623,"width":46.66426086425781,"height":12.890716552734375,"text":"1,512"},{"top":210.85713,"left":288.3805,"width":46.663482666015625,"height":12.890716552734375,"text":"1,586"},{"top":210.85713,"left":335.04398,"width":46.663818359375,"height":12.890716552734375,"text":"1,646"},{"top":210.85713,"left":381.7078,"width":46.663330078125,"height":12.890716552734375,"text":"1,677"},{"top":210.85713,"left":428.37112,"width":46.73773193359375,"height":12.890716552734375,"text":"1,693"}],[{"top":223.74785,"left":119.789444,"width":121.92678833007812,"height":12.785003662109375,"text":"Enterprise-class data centers"},{"top":223.74785,"left":241.71623,"width":46.66426086425781,"height":12.785003662109375,"text":"1,512"},{"top":223.74785,"left":288.3805,"width":46.663482666015625,"height":12.785003662109375,"text":"1,586"},{"top":223.74785,"left":335.04398,"width":46.663818359375,"height":12.785003662109375,"text":"1,646"},{"top":223.74785,"left":381.7078,"width":46.663330078125,"height":12.785003662109375,"text":"1,677"},{"top":223.74785,"left":428.37112,"width":46.73773193359375,"height":12.785003662109375,"text":"1,693"}],[{"top":236.53285,"left":119.789444,"width":355.31939697265625,"height":13.1025390625,"text":"Best practice scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":249.63539,"left":119.789444,"width":121.92678833007812,"height":12.726882934570312,"text":"Volume servers in:"},{"top":249.63539,"left":241.71623,"width":46.66426086425781,"height":12.726882934570312,"text":"2007"},{"top":249.63539,"left":288.3805,"width":46.663482666015625,"height":12.726882934570312,"text":"2008"},{"top":249.63539,"left":335.04398,"width":46.663818359375,"height":12.726882934570312,"text":"2009"},{"top":249.63539,"left":381.7078,"width":46.663330078125,"height":12.726882934570312,"text":"2010"},{"top":249.63539,"left":428.37112,"width":46.73773193359375,"height":12.726882934570312,"text":"2011"}],[{"top":262.36227,"left":119.789444,"width":121.92678833007812,"height":12.890472412109375,"text":"Server closets"},{"top":262.36227,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,456"},{"top":262.36227,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,439"},{"top":262.36227,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,386"},{"top":262.36227,"left":381.7078,"width":46.663330078125,"height":12.890472412109375,"text":"1,296"},{"top":262.36227,"left":428.37112,"width":46.73773193359375,"height":12.890472412109375,"text":"1,326"}],[{"top":275.25275,"left":119.789444,"width":121.92678833007812,"height":12.8914794921875,"text":"Server rooms"},{"top":275.25275,"left":241.71623,"width":46.66426086425781,"height":12.8914794921875,"text":"1,465"},{"top":275.25275,"left":288.3805,"width":46.663482666015625,"height":12.8914794921875,"text":"1,472"},{"top":275.25275,"left":335.04398,"width":46.663818359375,"height":12.8914794921875,"text":"1,427"},{"top":275.25275,"left":381.7078,"width":46.663330078125,"height":12.8914794921875,"text":"1,334"},{"top":275.25275,"left":428.37112,"width":46.73773193359375,"height":12.8914794921875,"text":"1,371"}],[{"top":288.14423,"left":119.789444,"width":121.92678833007812,"height":12.890167236328125,"text":"Localized data centers"},{"top":288.14423,"left":241.71623,"width":46.66426086425781,"height":12.890167236328125,"text":"1,465"},{"top":288.14423,"left":288.3805,"width":46.663482666015625,"height":12.890167236328125,"text":"1,471"},{"top":288.14423,"left":335.04398,"width":46.663818359375,"height":12.890167236328125,"text":"1,426"},{"top":288.14423,"left":381.7078,"width":46.663330078125,"height":12.890167236328125,"text":"1,334"},{"top":288.14423,"left":428.37112,"width":46.73773193359375,"height":12.890167236328125,"text":"1,371"}],[{"top":301.0344,"left":119.789444,"width":121.92678833007812,"height":12.890472412109375,"text":"Mid-tier data centers"},{"top":301.0344,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,465"},{"top":301.0344,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,471"},{"top":301.0344,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,426"},{"top":301.0344,"left":381.7078,"width":46.663330078125,"height":12.890472412109375,"text":"1,334"},{"top":301.0344,"left":428.37112,"width":46.73773193359375,"height":12.890472412109375,"text":"1,371"}],[{"top":313.92487,"left":119.789444,"width":121.92678833007812,"height":12.785552978515625,"text":"Enterprise-class data centers"},{"top":313.92487,"left":241.71623,"width":46.66426086425781,"height":12.785552978515625,"text":"1,465"},{"top":313.92487,"left":288.3805,"width":46.663482666015625,"height":12.785552978515625,"text":"1,471"},{"top":313.92487,"left":335.04398,"width":46.663818359375,"height":12.785552978515625,"text":"1,426"},{"top":313.92487,"left":381.7078,"width":46.663330078125,"height":12.785552978515625,"text":"1,334"},{"top":313.92487,"left":428.37112,"width":46.73773193359375,"height":12.785552978515625,"text":"1,371"}],[{"top":326.71042,"left":119.789444,"width":355.31939697265625,"height":13.04400634765625,"text":"State-of-the-art scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":339.75443,"left":119.789444,"width":121.92678833007812,"height":12.784942626953125,"text":"Volume servers in:"},{"top":339.75443,"left":241.71623,"width":46.66426086425781,"height":12.784942626953125,"text":"2007"},{"top":339.75443,"left":288.3805,"width":46.663482666015625,"height":12.784942626953125,"text":"2008"},{"top":339.75443,"left":335.04398,"width":46.663818359375,"height":12.784942626953125,"text":"2009"},{"top":339.75443,"left":381.7078,"width":46.663330078125,"height":12.784942626953125,"text":"2010"},{"top":339.75443,"left":428.37112,"width":46.73773193359375,"height":12.784942626953125,"text":"2011"}],[{"top":352.53937,"left":119.789444,"width":121.92678833007812,"height":12.89111328125,"text":"Server closets"},{"top":352.53937,"left":241.71623,"width":46.66426086425781,"height":12.89111328125,"text":"1,485"},{"top":352.53937,"left":288.3805,"width":46.663482666015625,"height":12.89111328125,"text":"1,471"},{"top":352.53937,"left":335.04398,"width":46.663818359375,"height":12.89111328125,"text":"1,424"},{"top":352.53937,"left":381.7078,"width":46.663330078125,"height":12.89111328125,"text":"1,315"},{"top":352.53937,"left":428.37112,"width":46.73773193359375,"height":12.89111328125,"text":"1,349"}],[{"top":365.43048,"left":119.789444,"width":121.92678833007812,"height":12.8902587890625,"text":"Server rooms"},{"top":365.43048,"left":241.71623,"width":46.66426086425781,"height":12.8902587890625,"text":"1,495"},{"top":365.43048,"left":288.3805,"width":46.663482666015625,"height":12.8902587890625,"text":"1,573"},{"top":365.43048,"left":335.04398,"width":46.663818359375,"height":12.8902587890625,"text":"1,586"},{"top":365.43048,"left":381.7078,"width":46.663330078125,"height":12.8902587890625,"text":"1,424"},{"top":365.43048,"left":428.37112,"width":46.73773193359375,"height":12.8902587890625,"text":"1,485"}],[{"top":378.32074,"left":119.789444,"width":121.92678833007812,"height":12.891143798828125,"text":"Localized data centers"},{"top":378.32074,"left":241.71623,"width":46.66426086425781,"height":12.891143798828125,"text":"1,495"},{"top":378.32074,"left":288.3805,"width":46.663482666015625,"height":12.891143798828125,"text":"1,572"},{"top":378.32074,"left":335.04398,"width":46.663818359375,"height":12.891143798828125,"text":"1,585"},{"top":378.32074,"left":381.7078,"width":46.663330078125,"height":12.891143798828125,"text":"1,424"},{"top":378.32074,"left":428.37112,"width":46.73773193359375,"height":12.891143798828125,"text":"1,485"}],[{"top":391.21188,"left":119.789444,"width":121.92678833007812,"height":12.89068603515625,"text":"Mid-tier data centers"},{"top":391.21188,"left":241.71623,"width":46.66426086425781,"height":12.89068603515625,"text":"1,495"},{"top":391.21188,"left":288.3805,"width":46.663482666015625,"height":12.89068603515625,"text":"1,572"},{"top":391.21188,"left":335.04398,"width":46.663818359375,"height":12.89068603515625,"text":"1,585"},{"top":391.21188,"left":381.7078,"width":46.663330078125,"height":12.89068603515625,"text":"1,424"},{"top":391.21188,"left":428.37112,"width":46.73773193359375,"height":12.89068603515625,"text":"1,485"}],[{"top":404.10257,"left":119.789444,"width":121.92678833007812,"height":12.968292236328125,"text":"Enterprise-class data centers"},{"top":404.10257,"left":241.71623,"width":46.66426086425781,"height":12.968292236328125,"text":"1,495"},{"top":404.10257,"left":288.3805,"width":46.663482666015625,"height":12.968292236328125,"text":"1,572"},{"top":404.10257,"left":335.04398,"width":46.663818359375,"height":12.968292236328125,"text":"1,585"},{"top":404.10257,"left":381.7078,"width":46.663330078125,"height":12.968292236328125,"text":"1,424"},{"top":404.10257,"left":428.37112,"width":46.73773193359375,"height":12.968292236328125,"text":"1,485"}]]},{"extraction_method":"lattice","top":538.25964,"left":130.8136,"width":333.3038024902344,"height":51.5423583984375,"right":464.1174,"bottom":589.802,"data":[[{"top":538.25964,"left":130.8136,"width":333.3038024902344,"height":12.88055419921875,"text":"All alternative scenarios"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":551.1402,"left":130.8136,"width":99.93739318847656,"height":12.784912109375,"text":"Server class"},{"top":551.1402,"left":230.75099,"width":46.66316223144531,"height":12.784912109375,"text":"2007"},{"top":551.1402,"left":277.41415,"width":46.663726806640625,"height":12.784912109375,"text":"2008"},{"top":551.1402,"left":324.07788,"width":46.663970947265625,"height":12.784912109375,"text":"2009"},{"top":551.1402,"left":370.74185,"width":46.663177490234375,"height":12.784912109375,"text":"2010"},{"top":551.1402,"left":417.40503,"width":46.712371826171875,"height":12.784912109375,"text":"2011"}],[{"top":563.9251,"left":130.8136,"width":99.93739318847656,"height":12.8905029296875,"text":"Mid-range"},{"top":563.9251,"left":230.75099,"width":46.66316223144531,"height":12.8905029296875,"text":"4,921"},{"top":563.9251,"left":277.41415,"width":46.663726806640625,"height":12.8905029296875,"text":"5,467"},{"top":563.9251,"left":324.07788,"width":46.663970947265625,"height":12.8905029296875,"text":"6,152"},{"top":563.9251,"left":370.74185,"width":46.663177490234375,"height":12.8905029296875,"text":"6,649"},{"top":563.9251,"left":417.40503,"width":46.712371826171875,"height":12.8905029296875,"text":"7,185"}],[{"top":576.8156,"left":130.8136,"width":99.93739318847656,"height":12.98638916015625,"text":"High-end"},{"top":576.8156,"left":230.75099,"width":46.66316223144531,"height":12.98638916015625,"text":"76,295"},{"top":576.8156,"left":277.41415,"width":46.663726806640625,"height":12.98638916015625,"text":"81,624"},{"top":576.8156,"left":324.07788,"width":46.663970947265625,"height":12.98638916015625,"text":"86,849"},{"top":576.8156,"left":370.74185,"width":46.663177490234375,"height":12.98638916015625,"text":"92,662"},{"top":576.8156,"left":417.40503,"width":46.712371826171875,"height":12.98638916015625,"text":"98,864"}]]}] diff --git a/src/test/resources/technology/tabula/json/twotables.json b/src/test/resources/technology/tabula/json/twotables.json index 5c70c52e..f6bd4f36 100644 --- a/src/test/resources/technology/tabula/json/twotables.json +++ b/src/test/resources/technology/tabula/json/twotables.json @@ -1 +1 @@ -[{"extraction_method":"lattice","top":111.79087,"left":67.41156,"width":342.44476318359375,"height":174.0704345703125,"data":[[{"top":111.79087,"left":67.41156,"width":85.61141967773438,"height":28.536293029785156,"text":""},{"top":111.79087,"left":153.02298,"width":256.8333435058594,"height":14.267105102539062,"text":"株主資本"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":126.057976,"left":153.02298,"width":51.367401123046875,"height":14.269187927246094,"text":"資本金"},{"top":126.057976,"left":204.39038,"width":51.36669921875,"height":14.269187927246094,"text":"資本剰余金"},{"top":126.057976,"left":255.75708,"width":51.3656005859375,"height":14.269187927246094,"text":"利益剰余金"},{"top":126.057976,"left":307.12268,"width":51.366668701171875,"height":14.269187927246094,"text":"自己株式"},{"top":126.057976,"left":358.48935,"width":51.366973876953125,"height":14.269187927246094,"text":"株主資本合計"}],[{"top":140.32716,"left":67.41156,"width":85.61141967773438,"height":14.265777587890625,"text":"当期首残高"},{"top":140.32716,"left":153.02298,"width":51.367401123046875,"height":14.265777587890625,"text":"5,664"},{"top":140.32716,"left":204.39038,"width":51.36669921875,"height":14.265777587890625,"text":"749"},{"top":140.32716,"left":255.75708,"width":51.3656005859375,"height":14.265777587890625,"text":"12,017"},{"top":140.32716,"left":307.12268,"width":51.366668701171875,"height":14.265777587890625,"text":"△747"},{"top":140.32716,"left":358.48935,"width":51.366973876953125,"height":14.265777587890625,"text":"17,683"}],[{"top":154.59294,"left":67.41156,"width":85.61141967773438,"height":14.26910400390625,"text":"当期変動額"},{"top":154.59294,"left":153.02298,"width":51.367401123046875,"height":14.26910400390625,"text":""},{"top":154.59294,"left":204.39038,"width":51.36669921875,"height":14.26910400390625,"text":""},{"top":154.59294,"left":255.75708,"width":51.3656005859375,"height":14.26910400390625,"text":""},{"top":154.59294,"left":307.12268,"width":51.366668701171875,"height":14.26910400390625,"text":""},{"top":154.59294,"left":358.48935,"width":51.366973876953125,"height":14.26910400390625,"text":""}],[{"top":168.86205,"left":67.41156,"width":85.61141967773438,"height":14.268997192382812,"text":"剰余金の配当"},{"top":168.86205,"left":153.02298,"width":51.367401123046875,"height":14.268997192382812,"text":""},{"top":168.86205,"left":204.39038,"width":51.36669921875,"height":14.268997192382812,"text":""},{"top":168.86205,"left":255.75708,"width":51.3656005859375,"height":14.268997192382812,"text":"△525"},{"top":168.86205,"left":307.12268,"width":51.366668701171875,"height":14.268997192382812,"text":""},{"top":168.86205,"left":358.48935,"width":51.366973876953125,"height":14.268997192382812,"text":"△525"}],[{"top":183.13104,"left":67.41156,"width":85.61141967773438,"height":14.2691650390625,"text":"当期純利益"},{"top":183.13104,"left":153.02298,"width":51.367401123046875,"height":14.2691650390625,"text":""},{"top":183.13104,"left":204.39038,"width":51.36669921875,"height":14.2691650390625,"text":""},{"top":183.13104,"left":255.75708,"width":51.3656005859375,"height":14.2691650390625,"text":"1,269"},{"top":183.13104,"left":307.12268,"width":51.366668701171875,"height":14.2691650390625,"text":""},{"top":183.13104,"left":358.48935,"width":51.366973876953125,"height":14.2691650390625,"text":"1,269"}],[{"top":197.4002,"left":67.41156,"width":85.61141967773438,"height":14.268936157226562,"text":"自己株式の取得"},{"top":197.4002,"left":153.02298,"width":51.367401123046875,"height":14.268936157226562,"text":""},{"top":197.4002,"left":204.39038,"width":51.36669921875,"height":14.268936157226562,"text":""},{"top":197.4002,"left":255.75708,"width":51.3656005859375,"height":14.268936157226562,"text":""},{"top":197.4002,"left":307.12268,"width":51.366668701171875,"height":14.268936157226562,"text":"△0"},{"top":197.4002,"left":358.48935,"width":51.366973876953125,"height":14.268936157226562,"text":"△0"}],[{"top":211.66914,"left":67.41156,"width":85.61141967773438,"height":22.82952880859375,"text":"持分法の適用範囲\rの変動"},{"top":211.66914,"left":153.02298,"width":51.367401123046875,"height":22.82952880859375,"text":""},{"top":211.66914,"left":204.39038,"width":51.36669921875,"height":22.82952880859375,"text":""},{"top":211.66914,"left":255.75708,"width":51.3656005859375,"height":22.82952880859375,"text":"85"},{"top":211.66914,"left":307.12268,"width":51.366668701171875,"height":22.82952880859375,"text":""},{"top":211.66914,"left":358.48935,"width":51.366973876953125,"height":22.82952880859375,"text":"85"}],[{"top":234.49867,"left":67.41156,"width":85.61141967773438,"height":22.829910278320312,"text":"株主資本以外の項目\rの当期変動額(純額)"},{"top":234.49867,"left":153.02298,"width":51.367401123046875,"height":22.829910278320312,"text":""},{"top":234.49867,"left":204.39038,"width":51.36669921875,"height":22.829910278320312,"text":""},{"top":234.49867,"left":255.75708,"width":51.3656005859375,"height":22.829910278320312,"text":""},{"top":234.49867,"left":307.12268,"width":51.366668701171875,"height":22.829910278320312,"text":""},{"top":234.49867,"left":358.48935,"width":51.366973876953125,"height":22.829910278320312,"text":""}],[{"top":257.32858,"left":67.41156,"width":85.61141967773438,"height":14.2686767578125,"text":"当期変動額合計"},{"top":257.32858,"left":153.02298,"width":51.367401123046875,"height":14.2686767578125,"text":"―"},{"top":257.32858,"left":204.39038,"width":51.36669921875,"height":14.2686767578125,"text":"―"},{"top":257.32858,"left":255.75708,"width":51.3656005859375,"height":14.2686767578125,"text":"829"},{"top":257.32858,"left":307.12268,"width":51.366668701171875,"height":14.2686767578125,"text":"△0"},{"top":257.32858,"left":358.48935,"width":51.366973876953125,"height":14.2686767578125,"text":"829"}],[{"top":271.59726,"left":67.41156,"width":85.61141967773438,"height":14.2640380859375,"text":"当期末残高"},{"top":271.59726,"left":153.02298,"width":51.367401123046875,"height":14.2640380859375,"text":"5,664"},{"top":271.59726,"left":204.39038,"width":51.36669921875,"height":14.2640380859375,"text":"749"},{"top":271.59726,"left":255.75708,"width":51.3656005859375,"height":14.2640380859375,"text":"12,846"},{"top":271.59726,"left":307.12268,"width":51.366668701171875,"height":14.2640380859375,"text":"△747"},{"top":271.59726,"left":358.48935,"width":51.366973876953125,"height":14.2640380859375,"text":"18,512"}]]},{"extraction_method":"lattice","top":312.25272,"left":67.41156,"width":445.17803955078125,"height":191.19696044921875,"data":[[{"top":312.25272,"left":67.41156,"width":85.61141967773438,"height":45.658233642578125,"text":""},{"top":312.25272,"left":153.02298,"width":256.8333435058594,"height":14.267730712890625,"text":"その他の包括利益累計額"},{"top":312.25272,"left":409.85632,"width":51.365631103515625,"height":45.658233642578125,"text":"少数株主持分"},{"top":312.25272,"left":461.22195,"width":51.367645263671875,"height":45.658233642578125,"text":"純資産合計"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":326.52045,"left":153.02298,"width":51.367401123046875,"height":31.3905029296875,"text":"その他有価証券\r評価差額金"},{"top":326.52045,"left":204.39038,"width":51.36669921875,"height":31.3905029296875,"text":"繰延ヘッジ\r損益"},{"top":326.52045,"left":255.75708,"width":51.3656005859375,"height":31.3905029296875,"text":"為替換算\r調整勘定"},{"top":326.52045,"left":307.12268,"width":51.366668701171875,"height":31.3905029296875,"text":"退職給付に係る\r調整累計額"},{"top":326.52045,"left":358.48935,"width":51.366973876953125,"height":31.3905029296875,"text":"その他の\r包括利益\r累計額合計"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":357.91095,"left":67.41156,"width":85.61141967773438,"height":14.268768310546875,"text":"当期首残高"},{"top":357.91095,"left":153.02298,"width":51.367401123046875,"height":14.268768310546875,"text":"△669"},{"top":357.91095,"left":204.39038,"width":51.36669921875,"height":14.268768310546875,"text":"61"},{"top":357.91095,"left":255.75708,"width":51.3656005859375,"height":14.268768310546875,"text":"△109"},{"top":357.91095,"left":307.12268,"width":51.366668701171875,"height":14.268768310546875,"text":"―"},{"top":357.91095,"left":358.48935,"width":51.366973876953125,"height":14.268768310546875,"text":"△717"},{"top":357.91095,"left":409.85632,"width":51.365631103515625,"height":14.268768310546875,"text":"246"},{"top":357.91095,"left":461.22195,"width":51.367645263671875,"height":14.268768310546875,"text":"17,212"}],[{"top":372.17972,"left":67.41156,"width":85.61141967773438,"height":14.2691650390625,"text":"当期変動額"},{"top":372.17972,"left":153.02298,"width":51.367401123046875,"height":14.2691650390625,"text":""},{"top":372.17972,"left":204.39038,"width":51.36669921875,"height":14.2691650390625,"text":""},{"top":372.17972,"left":255.75708,"width":51.3656005859375,"height":14.2691650390625,"text":""},{"top":372.17972,"left":307.12268,"width":51.366668701171875,"height":14.2691650390625,"text":""},{"top":372.17972,"left":358.48935,"width":51.366973876953125,"height":14.2691650390625,"text":""},{"top":372.17972,"left":409.85632,"width":51.365631103515625,"height":14.2691650390625,"text":""},{"top":372.17972,"left":461.22195,"width":51.367645263671875,"height":14.2691650390625,"text":""}],[{"top":386.44888,"left":67.41156,"width":85.61141967773438,"height":14.268646240234375,"text":"剰余金の配当"},{"top":386.44888,"left":153.02298,"width":51.367401123046875,"height":14.268646240234375,"text":""},{"top":386.44888,"left":204.39038,"width":51.36669921875,"height":14.268646240234375,"text":""},{"top":386.44888,"left":255.75708,"width":51.3656005859375,"height":14.268646240234375,"text":""},{"top":386.44888,"left":307.12268,"width":51.366668701171875,"height":14.268646240234375,"text":""},{"top":386.44888,"left":358.48935,"width":51.366973876953125,"height":14.268646240234375,"text":""},{"top":386.44888,"left":409.85632,"width":51.365631103515625,"height":14.268646240234375,"text":""},{"top":386.44888,"left":461.22195,"width":51.367645263671875,"height":14.268646240234375,"text":"△525"}],[{"top":400.71753,"left":67.41156,"width":85.61141967773438,"height":14.26812744140625,"text":"当期純利益"},{"top":400.71753,"left":153.02298,"width":51.367401123046875,"height":14.26812744140625,"text":""},{"top":400.71753,"left":204.39038,"width":51.36669921875,"height":14.26812744140625,"text":""},{"top":400.71753,"left":255.75708,"width":51.3656005859375,"height":14.26812744140625,"text":""},{"top":400.71753,"left":307.12268,"width":51.366668701171875,"height":14.26812744140625,"text":""},{"top":400.71753,"left":358.48935,"width":51.366973876953125,"height":14.26812744140625,"text":""},{"top":400.71753,"left":409.85632,"width":51.365631103515625,"height":14.26812744140625,"text":""},{"top":400.71753,"left":461.22195,"width":51.367645263671875,"height":14.26812744140625,"text":"1,269"}],[{"top":414.98566,"left":67.41156,"width":85.61141967773438,"height":14.26678466796875,"text":"自己株式の取得"},{"top":414.98566,"left":153.02298,"width":51.367401123046875,"height":14.26678466796875,"text":""},{"top":414.98566,"left":204.39038,"width":51.36669921875,"height":14.26678466796875,"text":""},{"top":414.98566,"left":255.75708,"width":51.3656005859375,"height":14.26678466796875,"text":""},{"top":414.98566,"left":307.12268,"width":51.366668701171875,"height":14.26678466796875,"text":""},{"top":414.98566,"left":358.48935,"width":51.366973876953125,"height":14.26678466796875,"text":""},{"top":414.98566,"left":409.85632,"width":51.365631103515625,"height":14.26678466796875,"text":""},{"top":414.98566,"left":461.22195,"width":51.367645263671875,"height":14.26678466796875,"text":"△0"}],[{"top":429.25244,"left":67.41156,"width":85.61141967773438,"height":22.8292236328125,"text":"持分法の適用範囲\rの変動"},{"top":429.25244,"left":153.02298,"width":51.367401123046875,"height":22.8292236328125,"text":""},{"top":429.25244,"left":204.39038,"width":51.36669921875,"height":22.8292236328125,"text":""},{"top":429.25244,"left":255.75708,"width":51.3656005859375,"height":22.8292236328125,"text":""},{"top":429.25244,"left":307.12268,"width":51.366668701171875,"height":22.8292236328125,"text":""},{"top":429.25244,"left":358.48935,"width":51.366973876953125,"height":22.8292236328125,"text":""},{"top":429.25244,"left":409.85632,"width":51.365631103515625,"height":22.8292236328125,"text":""},{"top":429.25244,"left":461.22195,"width":51.367645263671875,"height":22.8292236328125,"text":"85"}],[{"top":452.08167,"left":67.41156,"width":85.61141967773438,"height":22.830596923828125,"text":"株主資本以外の項目\rの当期変動額(純額)"},{"top":452.08167,"left":153.02298,"width":51.367401123046875,"height":22.830596923828125,"text":"556"},{"top":452.08167,"left":204.39038,"width":51.36669921875,"height":22.830596923828125,"text":"80"},{"top":452.08167,"left":255.75708,"width":51.3656005859375,"height":22.830596923828125,"text":"5"},{"top":452.08167,"left":307.12268,"width":51.366668701171875,"height":22.830596923828125,"text":"―"},{"top":452.08167,"left":358.48935,"width":51.366973876953125,"height":22.830596923828125,"text":"642"},{"top":452.08167,"left":409.85632,"width":51.365631103515625,"height":22.830596923828125,"text":"△0"},{"top":452.08167,"left":461.22195,"width":51.367645263671875,"height":22.830596923828125,"text":"642"}],[{"top":474.91226,"left":67.41156,"width":85.61141967773438,"height":14.26776123046875,"text":"当期変動額合計"},{"top":474.91226,"left":153.02298,"width":51.367401123046875,"height":14.26776123046875,"text":"556"},{"top":474.91226,"left":204.39038,"width":51.36669921875,"height":14.26776123046875,"text":"80"},{"top":474.91226,"left":255.75708,"width":51.3656005859375,"height":14.26776123046875,"text":"5"},{"top":474.91226,"left":307.12268,"width":51.366668701171875,"height":14.26776123046875,"text":"―"},{"top":474.91226,"left":358.48935,"width":51.366973876953125,"height":14.26776123046875,"text":"642"},{"top":474.91226,"left":409.85632,"width":51.365631103515625,"height":14.26776123046875,"text":"△0"},{"top":474.91226,"left":461.22195,"width":51.367645263671875,"height":14.26776123046875,"text":"1,471"}],[{"top":489.18002,"left":67.41156,"width":85.61141967773438,"height":14.2696533203125,"text":"当期末残高"},{"top":489.18002,"left":153.02298,"width":51.367401123046875,"height":14.2696533203125,"text":"△113"},{"top":489.18002,"left":204.39038,"width":51.36669921875,"height":14.2696533203125,"text":"142"},{"top":489.18002,"left":255.75708,"width":51.3656005859375,"height":14.2696533203125,"text":"△104"},{"top":489.18002,"left":307.12268,"width":51.366668701171875,"height":14.2696533203125,"text":"―"},{"top":489.18002,"left":358.48935,"width":51.366973876953125,"height":14.2696533203125,"text":"△75"},{"top":489.18002,"left":409.85632,"width":51.365631103515625,"height":14.2696533203125,"text":"245"},{"top":489.18002,"left":461.22195,"width":51.367645263671875,"height":14.2696533203125,"text":"18,683"}]]}] \ No newline at end of file +[{"extraction_method":"lattice","top":111.79087,"left":67.41156,"width":342.44476318359375,"height":174.0704345703125,"right":409.85632,"bottom":285.8613,"data":[[{"top":111.79087,"left":67.41156,"width":85.61141967773438,"height":28.536293029785156,"text":""},{"top":111.79087,"left":153.02298,"width":256.8333435058594,"height":14.267105102539062,"text":"株主資本"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":126.057976,"left":153.02298,"width":51.367401123046875,"height":14.269187927246094,"text":"資本金"},{"top":126.057976,"left":204.39038,"width":51.36669921875,"height":14.269187927246094,"text":"資本剰余金"},{"top":126.057976,"left":255.75708,"width":51.3656005859375,"height":14.269187927246094,"text":"利益剰余金"},{"top":126.057976,"left":307.12268,"width":51.366668701171875,"height":14.269187927246094,"text":"自己株式"},{"top":126.057976,"left":358.48935,"width":51.366973876953125,"height":14.269187927246094,"text":"株主資本合計"}],[{"top":140.32716,"left":67.41156,"width":85.61141967773438,"height":14.265777587890625,"text":"当期首残高"},{"top":140.32716,"left":153.02298,"width":51.367401123046875,"height":14.265777587890625,"text":"5,664"},{"top":140.32716,"left":204.39038,"width":51.36669921875,"height":14.265777587890625,"text":"749"},{"top":140.32716,"left":255.75708,"width":51.3656005859375,"height":14.265777587890625,"text":"12,017"},{"top":140.32716,"left":307.12268,"width":51.366668701171875,"height":14.265777587890625,"text":"△747"},{"top":140.32716,"left":358.48935,"width":51.366973876953125,"height":14.265777587890625,"text":"17,683"}],[{"top":154.59294,"left":67.41156,"width":85.61141967773438,"height":14.26910400390625,"text":"当期変動額"},{"top":154.59294,"left":153.02298,"width":51.367401123046875,"height":14.26910400390625,"text":""},{"top":154.59294,"left":204.39038,"width":51.36669921875,"height":14.26910400390625,"text":""},{"top":154.59294,"left":255.75708,"width":51.3656005859375,"height":14.26910400390625,"text":""},{"top":154.59294,"left":307.12268,"width":51.366668701171875,"height":14.26910400390625,"text":""},{"top":154.59294,"left":358.48935,"width":51.366973876953125,"height":14.26910400390625,"text":""}],[{"top":168.86205,"left":67.41156,"width":85.61141967773438,"height":14.268997192382812,"text":"剰余金の配当"},{"top":168.86205,"left":153.02298,"width":51.367401123046875,"height":14.268997192382812,"text":""},{"top":168.86205,"left":204.39038,"width":51.36669921875,"height":14.268997192382812,"text":""},{"top":168.86205,"left":255.75708,"width":51.3656005859375,"height":14.268997192382812,"text":"△525"},{"top":168.86205,"left":307.12268,"width":51.366668701171875,"height":14.268997192382812,"text":""},{"top":168.86205,"left":358.48935,"width":51.366973876953125,"height":14.268997192382812,"text":"△525"}],[{"top":183.13104,"left":67.41156,"width":85.61141967773438,"height":14.2691650390625,"text":"当期純利益"},{"top":183.13104,"left":153.02298,"width":51.367401123046875,"height":14.2691650390625,"text":""},{"top":183.13104,"left":204.39038,"width":51.36669921875,"height":14.2691650390625,"text":""},{"top":183.13104,"left":255.75708,"width":51.3656005859375,"height":14.2691650390625,"text":"1,269"},{"top":183.13104,"left":307.12268,"width":51.366668701171875,"height":14.2691650390625,"text":""},{"top":183.13104,"left":358.48935,"width":51.366973876953125,"height":14.2691650390625,"text":"1,269"}],[{"top":197.4002,"left":67.41156,"width":85.61141967773438,"height":14.268936157226562,"text":"自己株式の取得"},{"top":197.4002,"left":153.02298,"width":51.367401123046875,"height":14.268936157226562,"text":""},{"top":197.4002,"left":204.39038,"width":51.36669921875,"height":14.268936157226562,"text":""},{"top":197.4002,"left":255.75708,"width":51.3656005859375,"height":14.268936157226562,"text":""},{"top":197.4002,"left":307.12268,"width":51.366668701171875,"height":14.268936157226562,"text":"△0"},{"top":197.4002,"left":358.48935,"width":51.366973876953125,"height":14.268936157226562,"text":"△0"}],[{"top":211.66914,"left":67.41156,"width":85.61141967773438,"height":22.82952880859375,"text":"持分法の適用範囲\rの変動"},{"top":211.66914,"left":153.02298,"width":51.367401123046875,"height":22.82952880859375,"text":""},{"top":211.66914,"left":204.39038,"width":51.36669921875,"height":22.82952880859375,"text":""},{"top":211.66914,"left":255.75708,"width":51.3656005859375,"height":22.82952880859375,"text":"85"},{"top":211.66914,"left":307.12268,"width":51.366668701171875,"height":22.82952880859375,"text":""},{"top":211.66914,"left":358.48935,"width":51.366973876953125,"height":22.82952880859375,"text":"85"}],[{"top":234.49867,"left":67.41156,"width":85.61141967773438,"height":22.829910278320312,"text":"株主資本以外の項目\rの当期変動額(純額)"},{"top":234.49867,"left":153.02298,"width":51.367401123046875,"height":22.829910278320312,"text":""},{"top":234.49867,"left":204.39038,"width":51.36669921875,"height":22.829910278320312,"text":""},{"top":234.49867,"left":255.75708,"width":51.3656005859375,"height":22.829910278320312,"text":""},{"top":234.49867,"left":307.12268,"width":51.366668701171875,"height":22.829910278320312,"text":""},{"top":234.49867,"left":358.48935,"width":51.366973876953125,"height":22.829910278320312,"text":""}],[{"top":257.32858,"left":67.41156,"width":85.61141967773438,"height":14.2686767578125,"text":"当期変動額合計"},{"top":257.32858,"left":153.02298,"width":51.367401123046875,"height":14.2686767578125,"text":"―"},{"top":257.32858,"left":204.39038,"width":51.36669921875,"height":14.2686767578125,"text":"―"},{"top":257.32858,"left":255.75708,"width":51.3656005859375,"height":14.2686767578125,"text":"829"},{"top":257.32858,"left":307.12268,"width":51.366668701171875,"height":14.2686767578125,"text":"△0"},{"top":257.32858,"left":358.48935,"width":51.366973876953125,"height":14.2686767578125,"text":"829"}],[{"top":271.59726,"left":67.41156,"width":85.61141967773438,"height":14.2640380859375,"text":"当期末残高"},{"top":271.59726,"left":153.02298,"width":51.367401123046875,"height":14.2640380859375,"text":"5,664"},{"top":271.59726,"left":204.39038,"width":51.36669921875,"height":14.2640380859375,"text":"749"},{"top":271.59726,"left":255.75708,"width":51.3656005859375,"height":14.2640380859375,"text":"12,846"},{"top":271.59726,"left":307.12268,"width":51.366668701171875,"height":14.2640380859375,"text":"△747"},{"top":271.59726,"left":358.48935,"width":51.366973876953125,"height":14.2640380859375,"text":"18,512"}]]},{"extraction_method":"lattice","top":312.25272,"left":67.41156,"width":445.17803955078125,"height":191.19696044921875,"right":512.5896,"bottom":503.44968,"data":[[{"top":312.25272,"left":67.41156,"width":85.61141967773438,"height":45.658233642578125,"text":""},{"top":312.25272,"left":153.02298,"width":256.8333435058594,"height":14.267730712890625,"text":"その他の包括利益累計額"},{"top":312.25272,"left":409.85632,"width":51.365631103515625,"height":45.658233642578125,"text":"少数株主持分"},{"top":312.25272,"left":461.22195,"width":51.367645263671875,"height":45.658233642578125,"text":"純資産合計"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":326.52045,"left":153.02298,"width":51.367401123046875,"height":31.3905029296875,"text":"その他有価証券\r評価差額金"},{"top":326.52045,"left":204.39038,"width":51.36669921875,"height":31.3905029296875,"text":"繰延ヘッジ\r損益"},{"top":326.52045,"left":255.75708,"width":51.3656005859375,"height":31.3905029296875,"text":"為替換算\r調整勘定"},{"top":326.52045,"left":307.12268,"width":51.366668701171875,"height":31.3905029296875,"text":"退職給付に係る\r調整累計額"},{"top":326.52045,"left":358.48935,"width":51.366973876953125,"height":31.3905029296875,"text":"その他の\r包括利益\r累計額合計"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":357.91095,"left":67.41156,"width":85.61141967773438,"height":14.268768310546875,"text":"当期首残高"},{"top":357.91095,"left":153.02298,"width":51.367401123046875,"height":14.268768310546875,"text":"△669"},{"top":357.91095,"left":204.39038,"width":51.36669921875,"height":14.268768310546875,"text":"61"},{"top":357.91095,"left":255.75708,"width":51.3656005859375,"height":14.268768310546875,"text":"△109"},{"top":357.91095,"left":307.12268,"width":51.366668701171875,"height":14.268768310546875,"text":"―"},{"top":357.91095,"left":358.48935,"width":51.366973876953125,"height":14.268768310546875,"text":"△717"},{"top":357.91095,"left":409.85632,"width":51.365631103515625,"height":14.268768310546875,"text":"246"},{"top":357.91095,"left":461.22195,"width":51.367645263671875,"height":14.268768310546875,"text":"17,212"}],[{"top":372.17972,"left":67.41156,"width":85.61141967773438,"height":14.2691650390625,"text":"当期変動額"},{"top":372.17972,"left":153.02298,"width":51.367401123046875,"height":14.2691650390625,"text":""},{"top":372.17972,"left":204.39038,"width":51.36669921875,"height":14.2691650390625,"text":""},{"top":372.17972,"left":255.75708,"width":51.3656005859375,"height":14.2691650390625,"text":""},{"top":372.17972,"left":307.12268,"width":51.366668701171875,"height":14.2691650390625,"text":""},{"top":372.17972,"left":358.48935,"width":51.366973876953125,"height":14.2691650390625,"text":""},{"top":372.17972,"left":409.85632,"width":51.365631103515625,"height":14.2691650390625,"text":""},{"top":372.17972,"left":461.22195,"width":51.367645263671875,"height":14.2691650390625,"text":""}],[{"top":386.44888,"left":67.41156,"width":85.61141967773438,"height":14.268646240234375,"text":"剰余金の配当"},{"top":386.44888,"left":153.02298,"width":51.367401123046875,"height":14.268646240234375,"text":""},{"top":386.44888,"left":204.39038,"width":51.36669921875,"height":14.268646240234375,"text":""},{"top":386.44888,"left":255.75708,"width":51.3656005859375,"height":14.268646240234375,"text":""},{"top":386.44888,"left":307.12268,"width":51.366668701171875,"height":14.268646240234375,"text":""},{"top":386.44888,"left":358.48935,"width":51.366973876953125,"height":14.268646240234375,"text":""},{"top":386.44888,"left":409.85632,"width":51.365631103515625,"height":14.268646240234375,"text":""},{"top":386.44888,"left":461.22195,"width":51.367645263671875,"height":14.268646240234375,"text":"△525"}],[{"top":400.71753,"left":67.41156,"width":85.61141967773438,"height":14.26812744140625,"text":"当期純利益"},{"top":400.71753,"left":153.02298,"width":51.367401123046875,"height":14.26812744140625,"text":""},{"top":400.71753,"left":204.39038,"width":51.36669921875,"height":14.26812744140625,"text":""},{"top":400.71753,"left":255.75708,"width":51.3656005859375,"height":14.26812744140625,"text":""},{"top":400.71753,"left":307.12268,"width":51.366668701171875,"height":14.26812744140625,"text":""},{"top":400.71753,"left":358.48935,"width":51.366973876953125,"height":14.26812744140625,"text":""},{"top":400.71753,"left":409.85632,"width":51.365631103515625,"height":14.26812744140625,"text":""},{"top":400.71753,"left":461.22195,"width":51.367645263671875,"height":14.26812744140625,"text":"1,269"}],[{"top":414.98566,"left":67.41156,"width":85.61141967773438,"height":14.26678466796875,"text":"自己株式の取得"},{"top":414.98566,"left":153.02298,"width":51.367401123046875,"height":14.26678466796875,"text":""},{"top":414.98566,"left":204.39038,"width":51.36669921875,"height":14.26678466796875,"text":""},{"top":414.98566,"left":255.75708,"width":51.3656005859375,"height":14.26678466796875,"text":""},{"top":414.98566,"left":307.12268,"width":51.366668701171875,"height":14.26678466796875,"text":""},{"top":414.98566,"left":358.48935,"width":51.366973876953125,"height":14.26678466796875,"text":""},{"top":414.98566,"left":409.85632,"width":51.365631103515625,"height":14.26678466796875,"text":""},{"top":414.98566,"left":461.22195,"width":51.367645263671875,"height":14.26678466796875,"text":"△0"}],[{"top":429.25244,"left":67.41156,"width":85.61141967773438,"height":22.8292236328125,"text":"持分法の適用範囲\rの変動"},{"top":429.25244,"left":153.02298,"width":51.367401123046875,"height":22.8292236328125,"text":""},{"top":429.25244,"left":204.39038,"width":51.36669921875,"height":22.8292236328125,"text":""},{"top":429.25244,"left":255.75708,"width":51.3656005859375,"height":22.8292236328125,"text":""},{"top":429.25244,"left":307.12268,"width":51.366668701171875,"height":22.8292236328125,"text":""},{"top":429.25244,"left":358.48935,"width":51.366973876953125,"height":22.8292236328125,"text":""},{"top":429.25244,"left":409.85632,"width":51.365631103515625,"height":22.8292236328125,"text":""},{"top":429.25244,"left":461.22195,"width":51.367645263671875,"height":22.8292236328125,"text":"85"}],[{"top":452.08167,"left":67.41156,"width":85.61141967773438,"height":22.830596923828125,"text":"株主資本以外の項目\rの当期変動額(純額)"},{"top":452.08167,"left":153.02298,"width":51.367401123046875,"height":22.830596923828125,"text":"556"},{"top":452.08167,"left":204.39038,"width":51.36669921875,"height":22.830596923828125,"text":"80"},{"top":452.08167,"left":255.75708,"width":51.3656005859375,"height":22.830596923828125,"text":"5"},{"top":452.08167,"left":307.12268,"width":51.366668701171875,"height":22.830596923828125,"text":"―"},{"top":452.08167,"left":358.48935,"width":51.366973876953125,"height":22.830596923828125,"text":"642"},{"top":452.08167,"left":409.85632,"width":51.365631103515625,"height":22.830596923828125,"text":"△0"},{"top":452.08167,"left":461.22195,"width":51.367645263671875,"height":22.830596923828125,"text":"642"}],[{"top":474.91226,"left":67.41156,"width":85.61141967773438,"height":14.26776123046875,"text":"当期変動額合計"},{"top":474.91226,"left":153.02298,"width":51.367401123046875,"height":14.26776123046875,"text":"556"},{"top":474.91226,"left":204.39038,"width":51.36669921875,"height":14.26776123046875,"text":"80"},{"top":474.91226,"left":255.75708,"width":51.3656005859375,"height":14.26776123046875,"text":"5"},{"top":474.91226,"left":307.12268,"width":51.366668701171875,"height":14.26776123046875,"text":"―"},{"top":474.91226,"left":358.48935,"width":51.366973876953125,"height":14.26776123046875,"text":"642"},{"top":474.91226,"left":409.85632,"width":51.365631103515625,"height":14.26776123046875,"text":"△0"},{"top":474.91226,"left":461.22195,"width":51.367645263671875,"height":14.26776123046875,"text":"1,471"}],[{"top":489.18002,"left":67.41156,"width":85.61141967773438,"height":14.2696533203125,"text":"当期末残高"},{"top":489.18002,"left":153.02298,"width":51.367401123046875,"height":14.2696533203125,"text":"△113"},{"top":489.18002,"left":204.39038,"width":51.36669921875,"height":14.2696533203125,"text":"142"},{"top":489.18002,"left":255.75708,"width":51.3656005859375,"height":14.2696533203125,"text":"△104"},{"top":489.18002,"left":307.12268,"width":51.366668701171875,"height":14.2696533203125,"text":"―"},{"top":489.18002,"left":358.48935,"width":51.366973876953125,"height":14.2696533203125,"text":"△75"},{"top":489.18002,"left":409.85632,"width":51.365631103515625,"height":14.2696533203125,"text":"245"},{"top":489.18002,"left":461.22195,"width":51.367645263671875,"height":14.2696533203125,"text":"18,683"}]]}] From a86b72aa81c0bcb4b8b4402dc94ca1211f152930 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Tue, 11 Dec 2018 10:03:32 -0300 Subject: [PATCH 050/200] fix test --- .../tabula/json/argentina_diputados_voting_record.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json b/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json index f8846c62..6b60f8a7 100644 --- a/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json +++ b/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json @@ -1,2 +1 @@ -{"extraction_method":"stream","top":0.0,"left":0.0,"width":549.0399780273438,"height":782.0400390625,"right":549.04,"bottom":782.04004,"data":[[{"top":279.87,"left":28.56,"width":175.21029663085938,"height":6.449999809265137,"text":"ABDALA de MATARAZZO, Norma Amanda"},{"top":279.87,"left":213.72,"width":108.25027465820312,"height":6.449999809265137,"text":"Frente Cívico por Santiago"},{"top":280.59,"left":397.56,"width":79.69027709960938,"height":6.449999809265137,"text":"Santiago del Estero"},{"top":279.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":296.31,"left":28.56,"width":141.71029663085938,"height":6.449999809265137,"text":"ALBRIEU, Oscar Edmundo Nicolas"},{"top":296.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":297.15,"left":397.56,"width":41.6602783203125,"height":6.449999809265137,"text":"Rio Negro"},{"top":296.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":312.87,"left":28.56,"width":83.75028228759766,"height":6.449999809265137,"text":"ALONSO, María Luz"},{"top":312.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":313.59,"left":397.56,"width":42.250274658203125,"height":6.449999809265137,"text":"La Pampa"},{"top":312.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":329.31,"left":28.56,"width":84.25028228759766,"height":6.449999809265137,"text":"ARENA, Celia Isabel"},{"top":329.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":330.15,"left":397.56,"width":37.690277099609375,"height":6.449999809265137,"text":"Santa Fe"},{"top":329.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":345.87,"left":28.56,"width":110.29029846191406,"height":6.449999809265137,"text":"ARREGUI, Andrés Roberto"},{"top":345.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":346.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":345.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":362.31,"left":28.56,"width":115.69029235839844,"height":6.449999809265137,"text":"AVOSCAN, Herman Horacio"},{"top":362.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":363.15,"left":397.56,"width":41.6602783203125,"height":6.449999809265137,"text":"Rio Negro"},{"top":362.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":378.87,"left":28.56,"width":95.6902847290039,"height":6.449999809265137,"text":"BALCEDO, María Ester"},{"top":378.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":379.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":378.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":395.31,"left":28.56,"width":127.69029235839844,"height":6.449999809265137,"text":"BARRANDEGUY, Raúl Enrique"},{"top":395.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":396.15,"left":397.56,"width":43.6702880859375,"height":6.449999809265137,"text":"Entre Ríos"},{"top":395.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":411.87,"left":28.56,"width":106.69029235839844,"height":6.449999809265137,"text":"BASTERRA, Luis Eugenio"},{"top":411.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":412.59,"left":397.56,"width":36.73028564453125,"height":6.449999809265137,"text":"Formosa"},{"top":411.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":428.31,"left":28.56,"width":92.21028137207031,"height":6.449999809265137,"text":"BEDANO, Nora Esther"},{"top":428.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":429.15,"left":397.56,"width":35.6602783203125,"height":6.449999809265137,"text":"Córdoba"},{"top":428.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":444.87,"left":28.56,"width":100.69029235839844,"height":6.449999809265137,"text":"BERNAL, María Eugenia"},{"top":444.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":445.59,"left":397.56,"width":22.200286865234375,"height":6.449999809265137,"text":"Jujuy"},{"top":444.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":461.31,"left":28.56,"width":112.21029663085938,"height":6.449999809265137,"text":"BERTONE, Rosana Andrea"},{"top":461.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":462.15,"left":397.56,"width":67.21026611328125,"height":6.449999809265137,"text":"Tierra del Fuego"},{"top":461.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":477.87,"left":28.56,"width":114.73028564453125,"height":6.449999809265137,"text":"BIANCHI, María del Carmen"},{"top":477.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":478.59,"left":397.56,"width":74.6702880859375,"height":6.449999809265137,"text":"Cdad. Aut. Bs. As."},{"top":477.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":494.31,"left":28.56,"width":115.19029235839844,"height":6.449999809265137,"text":"BIDEGAIN, Gloria Mercedes"},{"top":494.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":495.15,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":494.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":510.87,"left":28.56,"width":66.25028228759766,"height":6.449999809265137,"text":"BRAWER, Mara"},{"top":510.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":511.59,"left":397.56,"width":74.6702880859375,"height":6.449999809265137,"text":"Cdad. Aut. Bs. As."},{"top":510.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":527.31,"left":28.56,"width":90.73028564453125,"height":6.449999809265137,"text":"BRILLO, José Ricardo"},{"top":527.31,"left":213.72,"width":121.81027221679688,"height":6.449999809265137,"text":"Movimiento Popular Neuquino"},{"top":528.15,"left":397.56,"width":37.700286865234375,"height":6.449999809265137,"text":"Neuquén"},{"top":527.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":543.87,"left":28.56,"width":120.73028564453125,"height":6.449999809265137,"text":"BROMBERG, Isaac Benjamín"},{"top":543.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":544.59,"left":397.56,"width":38.6602783203125,"height":6.449999809265137,"text":"Tucumán"},{"top":543.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":560.31,"left":28.56,"width":89.7702865600586,"height":6.449999809265137,"text":"BRUE, Daniel Agustín"},{"top":560.31,"left":213.72,"width":108.25027465820312,"height":6.449999809265137,"text":"Frente Cívico por Santiago"},{"top":561.15,"left":397.56,"width":79.69027709960938,"height":6.449999809265137,"text":"Santiago del Estero"},{"top":560.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":576.87,"left":28.56,"width":72.23028564453125,"height":6.449999809265137,"text":"CALCAGNO, Eric"},{"top":576.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":577.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":576.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":593.31,"left":28.56,"width":114.73028564453125,"height":6.449999809265137,"text":"CARLOTTO, Remo Gerardo"},{"top":593.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":594.15,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":593.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":609.87,"left":28.56,"width":122.1702880859375,"height":6.449999809265137,"text":"CARMONA, Guillermo Ramón"},{"top":609.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":610.59,"left":397.56,"width":38.1602783203125,"height":6.449999809265137,"text":"Mendoza"},{"top":609.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":626.31,"left":28.56,"width":124.73028564453125,"height":6.449999809265137,"text":"CATALAN MAGNI, Julio César"},{"top":626.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":627.15,"left":397.56,"width":67.21026611328125,"height":6.449999809265137,"text":"Tierra del Fuego"},{"top":626.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":642.87,"left":28.56,"width":88.6902847290039,"height":6.449999809265137,"text":"CEJAS, Jorge Alberto"},{"top":642.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":643.59,"left":397.56,"width":41.6602783203125,"height":6.449999809265137,"text":"Rio Negro"},{"top":642.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":659.31,"left":28.56,"width":89.7702865600586,"height":6.449999809265137,"text":"CHIENO, María Elena"},{"top":659.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":660.15,"left":397.56,"width":42.72027587890625,"height":6.449999809265137,"text":"Corrientes"},{"top":659.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":675.87,"left":28.56,"width":96.25028228759766,"height":6.449999809265137,"text":"CIAMPINI, José Alberto"},{"top":675.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":676.59,"left":397.56,"width":37.700286865234375,"height":6.449999809265137,"text":"Neuquén"},{"top":675.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":692.31,"left":28.56,"width":131.77029418945312,"height":6.449999809265137,"text":"CIGOGNA, Luis Francisco Jorge"},{"top":692.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":693.15,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":692.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":708.87,"left":28.56,"width":62.15028381347656,"height":6.449999809265137,"text":"CLERI, Marcos"},{"top":708.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":709.59,"left":397.56,"width":37.690277099609375,"height":6.449999809265137,"text":"Santa Fe"},{"top":708.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":725.31,"left":28.56,"width":101.77029418945312,"height":6.449999809265137,"text":"COMELLI, Alicia Marcela"},{"top":725.31,"left":213.72,"width":121.81027221679688,"height":6.449999809265137,"text":"Movimiento Popular Neuquino"},{"top":726.15,"left":397.56,"width":37.700286865234375,"height":6.449999809265137,"text":"Neuquén"},{"top":725.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":741.87,"left":28.56,"width":88.1902847290039,"height":6.449999809265137,"text":"CONTI, Diana Beatriz"},{"top":741.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":742.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":741.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":758.31,"left":28.56,"width":98.75028228759766,"height":6.449999809265137,"text":"CORDOBA, Stella Maris"},{"top":758.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":759.15,"left":397.56,"width":38.6602783203125,"height":6.449999809265137,"text":"Tucumán"},{"top":758.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":774.87,"left":28.56,"width":105.25028991699219,"height":6.449999809265137,"text":"CURRILEN, Oscar Rubén"},{"top":774.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":775.59,"left":397.56,"width":30.2802734375,"height":6.449999809265137,"text":"Chubut"},{"top":774.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}]]} - +{"extraction_method":"stream","top":269.875,"left":12.75,"width":548.25,"height":520.625,"right":561.0,"bottom":790.5,"data":[[{"top":279.87,"left":28.56,"width":175.21029663085938,"height":6.449999809265137,"text":"ABDALA de MATARAZZO, Norma Amanda"},{"top":279.87,"left":213.72,"width":108.25027465820312,"height":6.449999809265137,"text":"Frente Cívico por Santiago"},{"top":280.59,"left":397.56,"width":79.69027709960938,"height":6.449999809265137,"text":"Santiago del Estero"},{"top":279.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":296.31,"left":28.56,"width":141.71029663085938,"height":6.449999809265137,"text":"ALBRIEU, Oscar Edmundo Nicolas"},{"top":296.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":297.15,"left":397.56,"width":41.6602783203125,"height":6.449999809265137,"text":"Rio Negro"},{"top":296.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":312.87,"left":28.56,"width":83.75028228759766,"height":6.449999809265137,"text":"ALONSO, María Luz"},{"top":312.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":313.59,"left":397.56,"width":42.250274658203125,"height":6.449999809265137,"text":"La Pampa"},{"top":312.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":329.31,"left":28.56,"width":84.25028228759766,"height":6.449999809265137,"text":"ARENA, Celia Isabel"},{"top":329.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":330.15,"left":397.56,"width":37.690277099609375,"height":6.449999809265137,"text":"Santa Fe"},{"top":329.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":345.87,"left":28.56,"width":110.29029846191406,"height":6.449999809265137,"text":"ARREGUI, Andrés Roberto"},{"top":345.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":346.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":345.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":362.31,"left":28.56,"width":115.69029235839844,"height":6.449999809265137,"text":"AVOSCAN, Herman Horacio"},{"top":362.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":363.15,"left":397.56,"width":41.6602783203125,"height":6.449999809265137,"text":"Rio Negro"},{"top":362.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":378.87,"left":28.56,"width":95.6902847290039,"height":6.449999809265137,"text":"BALCEDO, María Ester"},{"top":378.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":379.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":378.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":395.31,"left":28.56,"width":127.69029235839844,"height":6.449999809265137,"text":"BARRANDEGUY, Raúl Enrique"},{"top":395.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":396.15,"left":397.56,"width":43.6702880859375,"height":6.449999809265137,"text":"Entre Ríos"},{"top":395.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":411.87,"left":28.56,"width":106.69029235839844,"height":6.449999809265137,"text":"BASTERRA, Luis Eugenio"},{"top":411.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":412.59,"left":397.56,"width":36.73028564453125,"height":6.449999809265137,"text":"Formosa"},{"top":411.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":428.31,"left":28.56,"width":92.21028137207031,"height":6.449999809265137,"text":"BEDANO, Nora Esther"},{"top":428.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":429.15,"left":397.56,"width":35.6602783203125,"height":6.449999809265137,"text":"Córdoba"},{"top":428.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":444.87,"left":28.56,"width":100.69029235839844,"height":6.449999809265137,"text":"BERNAL, María Eugenia"},{"top":444.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":445.59,"left":397.56,"width":22.200286865234375,"height":6.449999809265137,"text":"Jujuy"},{"top":444.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":461.31,"left":28.56,"width":112.21029663085938,"height":6.449999809265137,"text":"BERTONE, Rosana Andrea"},{"top":461.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":462.15,"left":397.56,"width":67.21026611328125,"height":6.449999809265137,"text":"Tierra del Fuego"},{"top":461.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":477.87,"left":28.56,"width":114.73028564453125,"height":6.449999809265137,"text":"BIANCHI, María del Carmen"},{"top":477.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":478.59,"left":397.56,"width":74.6702880859375,"height":6.449999809265137,"text":"Cdad. Aut. Bs. As."},{"top":477.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":494.31,"left":28.56,"width":115.19029235839844,"height":6.449999809265137,"text":"BIDEGAIN, Gloria Mercedes"},{"top":494.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":495.15,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":494.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":510.87,"left":28.56,"width":66.25028228759766,"height":6.449999809265137,"text":"BRAWER, Mara"},{"top":510.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":511.59,"left":397.56,"width":74.6702880859375,"height":6.449999809265137,"text":"Cdad. Aut. Bs. As."},{"top":510.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":527.31,"left":28.56,"width":90.73028564453125,"height":6.449999809265137,"text":"BRILLO, José Ricardo"},{"top":527.31,"left":213.72,"width":121.81027221679688,"height":6.449999809265137,"text":"Movimiento Popular Neuquino"},{"top":528.15,"left":397.56,"width":37.700286865234375,"height":6.449999809265137,"text":"Neuquén"},{"top":527.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":543.87,"left":28.56,"width":120.73028564453125,"height":6.449999809265137,"text":"BROMBERG, Isaac Benjamín"},{"top":543.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":544.59,"left":397.56,"width":38.6602783203125,"height":6.449999809265137,"text":"Tucumán"},{"top":543.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":560.31,"left":28.56,"width":89.7702865600586,"height":6.449999809265137,"text":"BRUE, Daniel Agustín"},{"top":560.31,"left":213.72,"width":108.25027465820312,"height":6.449999809265137,"text":"Frente Cívico por Santiago"},{"top":561.15,"left":397.56,"width":79.69027709960938,"height":6.449999809265137,"text":"Santiago del Estero"},{"top":560.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":576.87,"left":28.56,"width":72.23028564453125,"height":6.449999809265137,"text":"CALCAGNO, Eric"},{"top":576.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":577.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":576.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":593.31,"left":28.56,"width":114.73028564453125,"height":6.449999809265137,"text":"CARLOTTO, Remo Gerardo"},{"top":593.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":594.15,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":593.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":609.87,"left":28.56,"width":122.1702880859375,"height":6.449999809265137,"text":"CARMONA, Guillermo Ramón"},{"top":609.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":610.59,"left":397.56,"width":38.1602783203125,"height":6.449999809265137,"text":"Mendoza"},{"top":609.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":626.31,"left":28.56,"width":124.73028564453125,"height":6.449999809265137,"text":"CATALAN MAGNI, Julio César"},{"top":626.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":627.15,"left":397.56,"width":67.21026611328125,"height":6.449999809265137,"text":"Tierra del Fuego"},{"top":626.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":642.87,"left":28.56,"width":88.6902847290039,"height":6.449999809265137,"text":"CEJAS, Jorge Alberto"},{"top":642.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":643.59,"left":397.56,"width":41.6602783203125,"height":6.449999809265137,"text":"Rio Negro"},{"top":642.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":659.31,"left":28.56,"width":89.7702865600586,"height":6.449999809265137,"text":"CHIENO, María Elena"},{"top":659.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":660.15,"left":397.56,"width":42.72027587890625,"height":6.449999809265137,"text":"Corrientes"},{"top":659.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":675.87,"left":28.56,"width":96.25028228759766,"height":6.449999809265137,"text":"CIAMPINI, José Alberto"},{"top":675.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":676.59,"left":397.56,"width":37.700286865234375,"height":6.449999809265137,"text":"Neuquén"},{"top":675.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":692.31,"left":28.56,"width":131.77029418945312,"height":6.449999809265137,"text":"CIGOGNA, Luis Francisco Jorge"},{"top":692.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":693.15,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":692.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":708.87,"left":28.56,"width":62.15028381347656,"height":6.449999809265137,"text":"CLERI, Marcos"},{"top":708.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":709.59,"left":397.56,"width":37.690277099609375,"height":6.449999809265137,"text":"Santa Fe"},{"top":708.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":725.31,"left":28.56,"width":101.77029418945312,"height":6.449999809265137,"text":"COMELLI, Alicia Marcela"},{"top":725.31,"left":213.72,"width":121.81027221679688,"height":6.449999809265137,"text":"Movimiento Popular Neuquino"},{"top":726.15,"left":397.56,"width":37.700286865234375,"height":6.449999809265137,"text":"Neuquén"},{"top":725.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":741.87,"left":28.56,"width":88.1902847290039,"height":6.449999809265137,"text":"CONTI, Diana Beatriz"},{"top":741.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":742.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":741.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":758.31,"left":28.56,"width":98.75028228759766,"height":6.449999809265137,"text":"CORDOBA, Stella Maris"},{"top":758.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":759.15,"left":397.56,"width":38.6602783203125,"height":6.449999809265137,"text":"Tucumán"},{"top":758.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":774.87,"left":28.56,"width":105.25028991699219,"height":6.449999809265137,"text":"CURRILEN, Oscar Rubén"},{"top":774.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":775.59,"left":397.56,"width":30.2802734375,"height":6.449999809265137,"text":"Chubut"},{"top":774.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}]]} From 7eda6d793aa696f7e509ffb07677f2bd3fa4cdda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Tue, 4 Jun 2019 12:21:57 -0300 Subject: [PATCH 051/200] Remove Gitter badge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …we're not really checking that channel anymore. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c5bb42f4..a13ea913 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -tabula-java [![Build Status](https://travis-ci.org/tabulapdf/tabula-java.svg?branch=master)](https://travis-ci.org/tabulapdf/tabula-java) [![Build status](https://ci.appveyor.com/api/projects/status/l5gym1mjhrd2v8yn?svg=true)](https://ci.appveyor.com/project/jazzido/tabula-java) [![Join the chat at https://gitter.im/tabulapdf/tabula-java](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/tabulapdf/tabula-java?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) +tabula-java [![Build Status](https://travis-ci.org/tabulapdf/tabula-java.svg?branch=master)](https://travis-ci.org/tabulapdf/tabula-java) [![Build status](https://ci.appveyor.com/api/projects/status/l5gym1mjhrd2v8yn?svg=true)](https://ci.appveyor.com/project/jazzido/tabula-java) =========== `tabula-java` is a library for extracting tables from PDF files — it is the table extraction engine that powers [Tabula](http://tabula.technology/) ([repo](http://github.com/tabulapdf/tabula)). You can use `tabula-java` as a command-line tool to programmatically extract tables from PDFs. From fcb5e495443bfb782d90fd5f72503e7d1e3413fe Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" Date: Tue, 4 Jun 2019 15:21:23 +0000 Subject: [PATCH 052/200] Bump maven-compiler-plugin from 3.1 to 3.8.1 Bumps [maven-compiler-plugin](https://github.com/apache/maven-compiler-plugin) from 3.1 to 3.8.1. - [Release notes](https://github.com/apache/maven-compiler-plugin/releases) - [Commits](https://github.com/apache/maven-compiler-plugin/compare/maven-compiler-plugin-3.1...maven-compiler-plugin-3.8.1) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d79a18a5..fcdf17b0 100644 --- a/pom.xml +++ b/pom.xml @@ -124,7 +124,7 @@ maven-compiler-plugin - 3.1 + 3.8.1 1.7 1.7 From 41b4d725c44618c5568dfdf676012b54788f735c Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" Date: Tue, 4 Jun 2019 15:21:38 +0000 Subject: [PATCH 053/200] Bump commons-csv from 1.5 to 1.6 Bumps commons-csv from 1.5 to 1.6. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index fcdf17b0..bb8673cf 100644 --- a/pom.xml +++ b/pom.xml @@ -268,7 +268,7 @@ org.apache.commons commons-csv - 1.5 + 1.6 From f107ee81b80700f5561c3097bd97bc4a36318cb4 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" Date: Tue, 4 Jun 2019 15:22:18 +0000 Subject: [PATCH 054/200] Bump jbig2-imageio from 3.0.1 to 3.0.2 Bumps jbig2-imageio from 3.0.1 to 3.0.2. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index bb8673cf..a0488f19 100644 --- a/pom.xml +++ b/pom.xml @@ -292,7 +292,7 @@ org.apache.pdfbox jbig2-imageio - 3.0.1 + 3.0.2 From f790d9e5a9492b81a430125fb766769f17664192 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" Date: Tue, 4 Jun 2019 15:22:44 +0000 Subject: [PATCH 055/200] Bump bcprov-jdk15on from 1.59 to 1.61 Bumps [bcprov-jdk15on](https://github.com/bcgit/bc-java) from 1.59 to 1.61. - [Release notes](https://github.com/bcgit/bc-java/releases) - [Changelog](https://github.com/bcgit/bc-java/blob/master/docs/releasenotes.html) - [Commits](https://github.com/bcgit/bc-java/commits) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index a0488f19..a0113bc9 100644 --- a/pom.xml +++ b/pom.xml @@ -243,7 +243,7 @@ org.bouncycastle bcprov-jdk15on - 1.59 + 1.61 From 4302c8d3663954c035c4a46f12234a0f657235c3 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" Date: Wed, 5 Jun 2019 12:11:39 +0000 Subject: [PATCH 056/200] Bump slf4j-simple from 1.7.25 to 1.7.26 Bumps [slf4j-simple](https://github.com/qos-ch/slf4j) from 1.7.25 to 1.7.26. - [Release notes](https://github.com/qos-ch/slf4j/releases) - [Commits](https://github.com/qos-ch/slf4j/compare/v_1.7.25...v_1.7.26) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index a0113bc9..d7cbaff7 100644 --- a/pom.xml +++ b/pom.xml @@ -231,7 +231,7 @@ org.slf4j slf4j-simple - 1.7.25 + 1.7.26 From 1cda37c6aeb0ef1b7be4767e623c2ed496d15d22 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" Date: Tue, 4 Jun 2019 15:23:29 +0000 Subject: [PATCH 057/200] Bump bcmail-jdk15on from 1.59 to 1.61 Bumps [bcmail-jdk15on](https://github.com/bcgit/bc-java) from 1.59 to 1.61. - [Release notes](https://github.com/bcgit/bc-java/releases) - [Changelog](https://github.com/bcgit/bc-java/blob/master/docs/releasenotes.html) - [Commits](https://github.com/bcgit/bc-java/commits) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d7cbaff7..48b253ea 100644 --- a/pom.xml +++ b/pom.xml @@ -249,7 +249,7 @@ org.bouncycastle bcmail-jdk15on - 1.59 + 1.61 From 88ca8c549060a25473b2d721627813f37e4e9e92 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" Date: Tue, 4 Jun 2019 15:24:43 +0000 Subject: [PATCH 058/200] Bump slf4j-api from 1.7.25 to 1.7.26 Bumps [slf4j-api](https://github.com/qos-ch/slf4j) from 1.7.25 to 1.7.26. - [Release notes](https://github.com/qos-ch/slf4j/releases) - [Commits](https://github.com/qos-ch/slf4j/compare/v_1.7.25...v_1.7.26) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 48b253ea..fe51d1e2 100644 --- a/pom.xml +++ b/pom.xml @@ -225,7 +225,7 @@ org.slf4j slf4j-api - 1.7.25 + 1.7.26 From 31b4def9d37dde4cae08326c4c2cc45aa01a9382 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" Date: Tue, 4 Jun 2019 15:23:59 +0000 Subject: [PATCH 059/200] Bump maven-surefire-plugin from 2.20.1 to 2.22.2 Bumps [maven-surefire-plugin](https://github.com/apache/maven-surefire) from 2.20.1 to 2.22.2. - [Release notes](https://github.com/apache/maven-surefire/releases) - [Commits](https://github.com/apache/maven-surefire/compare/surefire-2.20.1...surefire-2.22.2) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index fe51d1e2..15ea0874 100644 --- a/pom.xml +++ b/pom.xml @@ -146,7 +146,7 @@ org.apache.maven.plugins maven-surefire-plugin - 2.20.1 + 2.22.2 -Xms1024m -Xmx2048m From 9b99be1e2708f845e0f16439a78a3eb4c9d6f529 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" Date: Tue, 4 Jun 2019 15:24:08 +0000 Subject: [PATCH 060/200] Bump maven-gpg-plugin from 1.5 to 1.6 Bumps maven-gpg-plugin from 1.5 to 1.6. --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 15ea0874..c631aa9c 100644 --- a/pom.xml +++ b/pom.xml @@ -111,7 +111,7 @@ org.apache.maven.plugins maven-gpg-plugin - 1.5 + 1.6 sign-artifacts @@ -199,7 +199,7 @@ org.apache.maven.plugins maven-gpg-plugin - 1.5 + 1.6 sign-artifacts From d7de3ada03e2ba18c5616f015a63b3dda017b081 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" Date: Tue, 4 Jun 2019 15:24:56 +0000 Subject: [PATCH 061/200] Bump maven-source-plugin from 2.2.1 to 3.1.0 Bumps [maven-source-plugin](https://github.com/apache/maven-source-plugin) from 2.2.1 to 3.1.0. - [Release notes](https://github.com/apache/maven-source-plugin/releases) - [Commits](https://github.com/apache/maven-source-plugin/compare/maven-source-plugin-2.2.1...maven-source-plugin-3.1.0) --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index c631aa9c..336bc473 100644 --- a/pom.xml +++ b/pom.xml @@ -85,7 +85,7 @@ org.apache.maven.plugins maven-source-plugin - 2.2.1 + 3.1.0 attach-sources @@ -186,7 +186,7 @@ org.apache.maven.plugins maven-source-plugin - 2.2.1 + 3.1.0 attach-sources From 93be9bea468beb7e9290e843e1e346b569987b03 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" Date: Fri, 7 Jun 2019 11:45:18 +0000 Subject: [PATCH 062/200] Bump bcprov-jdk15on from 1.61 to 1.62 Bumps [bcprov-jdk15on](https://github.com/bcgit/bc-java) from 1.61 to 1.62. - [Release notes](https://github.com/bcgit/bc-java/releases) - [Changelog](https://github.com/bcgit/bc-java/blob/master/docs/releasenotes.html) - [Commits](https://github.com/bcgit/bc-java/commits) Signed-off-by: dependabot-preview[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 336bc473..ab153b7b 100644 --- a/pom.xml +++ b/pom.xml @@ -243,7 +243,7 @@ org.bouncycastle bcprov-jdk15on - 1.61 + 1.62 From 45512978ed34f7c39cd0bb9560d67d15f31c03d8 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" Date: Fri, 7 Jun 2019 11:45:39 +0000 Subject: [PATCH 063/200] Bump bcmail-jdk15on from 1.61 to 1.62 Bumps [bcmail-jdk15on](https://github.com/bcgit/bc-java) from 1.61 to 1.62. - [Release notes](https://github.com/bcgit/bc-java/releases) - [Changelog](https://github.com/bcgit/bc-java/blob/master/docs/releasenotes.html) - [Commits](https://github.com/bcgit/bc-java/commits) Signed-off-by: dependabot-preview[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index ab153b7b..b8f1f810 100644 --- a/pom.xml +++ b/pom.xml @@ -249,7 +249,7 @@ org.bouncycastle bcmail-jdk15on - 1.61 + 1.62 From 975592853792e98e109cba110e0db29887fca522 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Mon, 10 Jun 2019 13:51:07 -0300 Subject: [PATCH 064/200] Update supported JDKs in travis --- .travis.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index cb01b361..1bd4cbdc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,9 +2,12 @@ language: java install: mvn install -DskipTests=true -Dmaven.javadoc.skip=true -Dgpg.skip=true -B -V script: mvn test -Dgpg.skip=true jdk: - - openjdk7 - oraclejdk8 - oraclejdk9 + - oraclejdk10 + - openjdk8 + - openjdk9 + - openjdk10 sudo: false From d7a3a7221c18c1a93ee00f104de32b7fafb7ecb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Mon, 10 Jun 2019 13:53:13 -0300 Subject: [PATCH 065/200] Remove deprecated oraclejdk10 from Travis config --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 1bd4cbdc..7a69aedf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,6 @@ script: mvn test -Dgpg.skip=true jdk: - oraclejdk8 - oraclejdk9 - - oraclejdk10 - openjdk8 - openjdk9 - openjdk10 From d562ca68acffc467bccff8f7984b48ff21a3a140 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" Date: Thu, 6 Jun 2019 12:02:15 +0000 Subject: [PATCH 066/200] Bump commons-csv from 1.6 to 1.7 Bumps commons-csv from 1.6 to 1.7. Signed-off-by: dependabot-preview[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index b8f1f810..aa6314ee 100644 --- a/pom.xml +++ b/pom.xml @@ -268,7 +268,7 @@ org.apache.commons commons-csv - 1.6 + 1.7 From ff9476f6af9cb976336caef250cd73774047a670 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Mon, 24 Jun 2019 18:10:16 -0300 Subject: [PATCH 067/200] remove unused code --- .../tabula/ObjectExtractorStreamEngine.java | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java b/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java index ff58dfc6..ae249a40 100644 --- a/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java +++ b/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java @@ -23,8 +23,6 @@ class ObjectExtractorStreamEngine extends PDFGraphicsStreamEngine { - private static final String NBSP = "\u00A0"; - protected List rulings; private AffineTransform pageTransform; private boolean debugClippingPaths; @@ -32,7 +30,6 @@ class ObjectExtractorStreamEngine extends PDFGraphicsStreamEngine { private Logger log; private int clipWindingRule = -1; private GeneralPath currentPath = new GeneralPath(); - public List clippingPaths; protected ObjectExtractorStreamEngine(PDPage page) { super(page); @@ -249,14 +246,6 @@ public Rectangle2D currentClippingPath() { return transformedClippingPath.getBounds2D(); } - public boolean isDebugClippingPaths() { - return debugClippingPaths; - } - - public void setDebugClippingPaths(boolean debugClippingPaths) { - this.debugClippingPaths = debugClippingPaths; - } - class PointComparator implements Comparator { @Override public int compare(Point2D o1, Point2D o2) { From 2b3cc7ddc244ba517b65a6ac9d18d6b7d7b02eae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Mon, 24 Jun 2019 18:10:30 -0300 Subject: [PATCH 068/200] adjust tests --- ...andLineApp_testGuessOption_no_guessing.csv | 110 +++++++++--------- ...dLineApp_testGuessOption_with_guessing.csv | 80 ++++++------- .../argentina_diputados_voting_record.json | 2 +- 3 files changed, 96 insertions(+), 96 deletions(-) diff --git a/src/test/resources/technology/tabula/csv/TestCommandLineApp_testGuessOption_no_guessing.csv b/src/test/resources/technology/tabula/csv/TestCommandLineApp_testGuessOption_no_guessing.csv index cd546d5a..de63c5c0 100644 --- a/src/test/resources/technology/tabula/csv/TestCommandLineApp_testGuessOption_no_guessing.csv +++ b/src/test/resources/technology/tabula/csv/TestCommandLineApp_testGuessOption_no_guessing.csv @@ -1,65 +1,65 @@ -"E-PRTR pollutants and their thresholds" +E-PRTR pollutants and their thresholds "" -"A facility has to report data under E-PRTR if it fulfils the following criteria:" -"• the facility falls under at least one of the 65 E-PRTR economic activities. The" -"activities are also reported using a statistical classification of economic activities" -"(NACE rev 2)" -"• the facility has a capacity exceeding at least one of the E-PRTR capacity" -"thresholds" -"• the facility releases pollutants or transfers waste off-site which exceed specific" -"thresholds set out in Article 5 of the E-PRTR Regulation. These thresholds for" +A facility has to report data under E-PRTR if it fulfils the following criteria: +• the facility falls under at least one of the 65 E-PRTR economic activities. The +activities are also reported using a statistical classification of economic activities +(NACE rev 2) +• the facility has a capacity exceeding at least one of the E-PRTR capacity +thresholds +• the facility releases pollutants or transfers waste off-site which exceed specific +thresholds set out in Article 5 of the E-PRTR Regulation. These thresholds for "releases of pollutants are specified for each media - air, water and land - in Annex" -"II of the E-PRTR Regulation." +II of the E-PRTR Regulation. "" -"In the following tables you will find the 91 E-PRTR pollutants and their thresholds broken" -"down by the 7 groups used in all the searches of the E-PRTR website." +In the following tables you will find the 91 E-PRTR pollutants and their thresholds broken +down by the 7 groups used in all the searches of the E-PRTR website. "" "" -"Greenhouse gases" +Greenhouse gases "" -"THRESHOLD FOR RELEASES" -"to air to water to land" -"kg/year kg/year kg/year" -"Carbon dioxide (CO2) 100 million - -" -"Hydro-fluorocarbons (HFCs) 100 - -" -"Methane (CH4) 100 000 - -" -"Nitrous oxide (N2O) 10 000 - -" -"Perfluorocarbons (PFCs) 100 - -" -"Sulphur hexafluoride (SF6) 50 - -" +THRESHOLD FOR RELEASES +to air to water to land +kg/year kg/year kg/year +Carbon dioxide (CO2) 100 million - - +Hydro-fluorocarbons (HFCs) 100 - - +Methane (CH4) 100 000 - - +Nitrous oxide (N2O) 10 000 - - +Perfluorocarbons (PFCs) 100 - - +Sulphur hexafluoride (SF6) 50 - - "" -"Other gases" +Other gases "" -"THRESHOLD FOR RELEASES" -"to air to water to land" -"kg/year kg/year kg/year" -"Ammonia (NH3) 10 000 - -" -"Carbon monoxide (CO) 500 000 - -" -"Chlorine and inorganic compounds" -"(as HCl)" -"10 000 - -" -"Chlorofluorocarbons (CFCs) 1 - -" -"Flourine and inorganic compounds" -"(as HF)" -"5 000 - -" -"Halons 1 - -" -"Hydrochlorofluorocarbons (HCFCs) 1 - -" -"Hydrogen Cyanide (HCN) 200 - -" -"Nitrogen oxides (NOx/NO2) 100 000 - -" -"Non-methane volatile organic" -"compounds (NMVOC)" -"100 000 - -" -"Sulphur oxides (SOx/SO2) 150 000 - -" +THRESHOLD FOR RELEASES +to air to water to land +kg/year kg/year kg/year +Ammonia (NH3) 10 000 - - +Carbon monoxide (CO) 500 000 - - +Chlorine and inorganic compounds +10 000 - - +(as HCl) +Chlorofluorocarbons (CFCs) 1 - - +Flourine and inorganic compounds +5 000 - - +(as HF) +Halons 1 - - +Hydrochlorofluorocarbons (HCFCs) 1 - - +Hydrogen Cyanide (HCN) 200 - - +Nitrogen oxides (NOx/NO2) 100 000 - - +Non-methane volatile organic +100 000 - - +compounds (NMVOC) +Sulphur oxides (SOx/SO2) 150 000 - - "" -"Heavy metals" +Heavy metals "" -"THRESHOLD FOR RELEASES" -"to air to water to land" -"kg/year kg/year kg/year" -"Arsenic and compounds (as As) 20 5 5" -"Cadmium and compounds (as Cd) 10 5 5" -"Chromium and compounds (as Cr) 100 50 50" -"Copper and compounds (as Cu) 100 50 50" -"Lead and compounds (as Pb) 200 20 20" -"Mercury and compounds (as Hg) 10 1 1" -"Nickel and compounds (as Ni) 50 20 20" -"Zinc and compounds (as Zn) 200 100 100" +THRESHOLD FOR RELEASES +to air to water to land +kg/year kg/year kg/year +Arsenic and compounds (as As) 20 5 5 +Cadmium and compounds (as Cd) 10 5 5 +Chromium and compounds (as Cr) 100 50 50 +Copper and compounds (as Cu) 100 50 50 +Lead and compounds (as Pb) 200 20 20 +Mercury and compounds (as Hg) 10 1 1 +Nickel and compounds (as Ni) 50 20 20 +Zinc and compounds (as Zn) 200 100 100 diff --git a/src/test/resources/technology/tabula/csv/TestCommandLineApp_testGuessOption_with_guessing.csv b/src/test/resources/technology/tabula/csv/TestCommandLineApp_testGuessOption_with_guessing.csv index 40a51d00..e683abd3 100644 --- a/src/test/resources/technology/tabula/csv/TestCommandLineApp_testGuessOption_with_guessing.csv +++ b/src/test/resources/technology/tabula/csv/TestCommandLineApp_testGuessOption_with_guessing.csv @@ -1,40 +1,40 @@ -"",,"THRESHOLD FOR RELEASES", -"","to air","to water","to land" -"","kg/year","kg/year","kg/year" -"Carbon dioxide (CO2)","100 million","-","-" -"Hydro-fluorocarbons (HFCs)","100","-","-" -"Methane (CH4)","100 000","-","-" -"Nitrous oxide (N2O)","10 000","-","-" -"Perfluorocarbons (PFCs)","100","-","-" -"Sulphur hexafluoride (SF6)","50","-","-" -"","THRESHOLD FOR RELEASES" -"","to air to water to land" -"","kg/year kg/year kg/year" -"Ammonia (NH3)","10 000 - -" -"Carbon monoxide (CO)","500 000 - -" -"Chlorine and inorganic compounds", -"(as HCl)", -"","10 000 - -" -"Chlorofluorocarbons (CFCs)","1 - -" -"Flourine and inorganic compounds", -"(as HF)", -"","5 000 - -" -"Halons","1 - -" -"Hydrochlorofluorocarbons (HCFCs)","1 - -" -"Hydrogen Cyanide (HCN)","200 - -" -"Nitrogen oxides (NOx/NO2)","100 000 - -" -"Non-methane volatile organic", -"compounds (NMVOC)", -"","100 000 - -" -"Sulphur oxides (SOx/SO2)","150 000 - -" -"","THRESHOLD FOR RELEASES" -"","to air to water to land" -"","kg/year kg/year kg/year" -"Arsenic and compounds (as As)","20 5 5" -"Cadmium and compounds (as Cd)","10 5 5" -"Chromium and compounds (as Cr)","100 50 50" -"Copper and compounds (as Cu)","100 50 50" -"Lead and compounds (as Pb)","200 20 20" -"Mercury and compounds (as Hg)","10 1 1" -"Nickel and compounds (as Ni)","50 20 20" -"Zinc and compounds (as Zn)","200 100 100" +"",,THRESHOLD FOR RELEASES, +"",to air,to water,to land +"",kg/year,kg/year,kg/year +Carbon dioxide (CO2),100 million,-,- +Hydro-fluorocarbons (HFCs),100,-,- +Methane (CH4),100 000,-,- +Nitrous oxide (N2O),10 000,-,- +Perfluorocarbons (PFCs),100,-,- +Sulphur hexafluoride (SF6),50,-,- +"",THRESHOLD FOR RELEASES +"",to air to water to land +"",kg/year kg/year kg/year +Ammonia (NH3),10 000 - - +Carbon monoxide (CO),500 000 - - +Chlorine and inorganic compounds, +"",10 000 - - +(as HCl), +Chlorofluorocarbons (CFCs),1 - - +Flourine and inorganic compounds, +"",5 000 - - +(as HF), +Halons,1 - - +Hydrochlorofluorocarbons (HCFCs),1 - - +Hydrogen Cyanide (HCN),200 - - +Nitrogen oxides (NOx/NO2),100 000 - - +Non-methane volatile organic, +"",100 000 - - +compounds (NMVOC), +Sulphur oxides (SOx/SO2),150 000 - - +"",THRESHOLD FOR RELEASES +"",to air to water to land +"",kg/year kg/year kg/year +Arsenic and compounds (as As),20 5 5 +Cadmium and compounds (as Cd),10 5 5 +Chromium and compounds (as Cr),100 50 50 +Copper and compounds (as Cu),100 50 50 +Lead and compounds (as Pb),200 20 20 +Mercury and compounds (as Hg),10 1 1 +Nickel and compounds (as Ni),50 20 20 +Zinc and compounds (as Zn),200 100 100 diff --git a/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json b/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json index 6b60f8a7..21ad2880 100644 --- a/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json +++ b/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json @@ -1 +1 @@ -{"extraction_method":"stream","top":269.875,"left":12.75,"width":548.25,"height":520.625,"right":561.0,"bottom":790.5,"data":[[{"top":279.87,"left":28.56,"width":175.21029663085938,"height":6.449999809265137,"text":"ABDALA de MATARAZZO, Norma Amanda"},{"top":279.87,"left":213.72,"width":108.25027465820312,"height":6.449999809265137,"text":"Frente Cívico por Santiago"},{"top":280.59,"left":397.56,"width":79.69027709960938,"height":6.449999809265137,"text":"Santiago del Estero"},{"top":279.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":296.31,"left":28.56,"width":141.71029663085938,"height":6.449999809265137,"text":"ALBRIEU, Oscar Edmundo Nicolas"},{"top":296.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":297.15,"left":397.56,"width":41.6602783203125,"height":6.449999809265137,"text":"Rio Negro"},{"top":296.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":312.87,"left":28.56,"width":83.75028228759766,"height":6.449999809265137,"text":"ALONSO, María Luz"},{"top":312.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":313.59,"left":397.56,"width":42.250274658203125,"height":6.449999809265137,"text":"La Pampa"},{"top":312.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":329.31,"left":28.56,"width":84.25028228759766,"height":6.449999809265137,"text":"ARENA, Celia Isabel"},{"top":329.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":330.15,"left":397.56,"width":37.690277099609375,"height":6.449999809265137,"text":"Santa Fe"},{"top":329.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":345.87,"left":28.56,"width":110.29029846191406,"height":6.449999809265137,"text":"ARREGUI, Andrés Roberto"},{"top":345.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":346.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":345.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":362.31,"left":28.56,"width":115.69029235839844,"height":6.449999809265137,"text":"AVOSCAN, Herman Horacio"},{"top":362.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":363.15,"left":397.56,"width":41.6602783203125,"height":6.449999809265137,"text":"Rio Negro"},{"top":362.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":378.87,"left":28.56,"width":95.6902847290039,"height":6.449999809265137,"text":"BALCEDO, María Ester"},{"top":378.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":379.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":378.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":395.31,"left":28.56,"width":127.69029235839844,"height":6.449999809265137,"text":"BARRANDEGUY, Raúl Enrique"},{"top":395.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":396.15,"left":397.56,"width":43.6702880859375,"height":6.449999809265137,"text":"Entre Ríos"},{"top":395.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":411.87,"left":28.56,"width":106.69029235839844,"height":6.449999809265137,"text":"BASTERRA, Luis Eugenio"},{"top":411.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":412.59,"left":397.56,"width":36.73028564453125,"height":6.449999809265137,"text":"Formosa"},{"top":411.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":428.31,"left":28.56,"width":92.21028137207031,"height":6.449999809265137,"text":"BEDANO, Nora Esther"},{"top":428.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":429.15,"left":397.56,"width":35.6602783203125,"height":6.449999809265137,"text":"Córdoba"},{"top":428.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":444.87,"left":28.56,"width":100.69029235839844,"height":6.449999809265137,"text":"BERNAL, María Eugenia"},{"top":444.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":445.59,"left":397.56,"width":22.200286865234375,"height":6.449999809265137,"text":"Jujuy"},{"top":444.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":461.31,"left":28.56,"width":112.21029663085938,"height":6.449999809265137,"text":"BERTONE, Rosana Andrea"},{"top":461.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":462.15,"left":397.56,"width":67.21026611328125,"height":6.449999809265137,"text":"Tierra del Fuego"},{"top":461.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":477.87,"left":28.56,"width":114.73028564453125,"height":6.449999809265137,"text":"BIANCHI, María del Carmen"},{"top":477.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":478.59,"left":397.56,"width":74.6702880859375,"height":6.449999809265137,"text":"Cdad. Aut. Bs. As."},{"top":477.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":494.31,"left":28.56,"width":115.19029235839844,"height":6.449999809265137,"text":"BIDEGAIN, Gloria Mercedes"},{"top":494.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":495.15,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":494.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":510.87,"left":28.56,"width":66.25028228759766,"height":6.449999809265137,"text":"BRAWER, Mara"},{"top":510.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":511.59,"left":397.56,"width":74.6702880859375,"height":6.449999809265137,"text":"Cdad. Aut. Bs. As."},{"top":510.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":527.31,"left":28.56,"width":90.73028564453125,"height":6.449999809265137,"text":"BRILLO, José Ricardo"},{"top":527.31,"left":213.72,"width":121.81027221679688,"height":6.449999809265137,"text":"Movimiento Popular Neuquino"},{"top":528.15,"left":397.56,"width":37.700286865234375,"height":6.449999809265137,"text":"Neuquén"},{"top":527.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":543.87,"left":28.56,"width":120.73028564453125,"height":6.449999809265137,"text":"BROMBERG, Isaac Benjamín"},{"top":543.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":544.59,"left":397.56,"width":38.6602783203125,"height":6.449999809265137,"text":"Tucumán"},{"top":543.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":560.31,"left":28.56,"width":89.7702865600586,"height":6.449999809265137,"text":"BRUE, Daniel Agustín"},{"top":560.31,"left":213.72,"width":108.25027465820312,"height":6.449999809265137,"text":"Frente Cívico por Santiago"},{"top":561.15,"left":397.56,"width":79.69027709960938,"height":6.449999809265137,"text":"Santiago del Estero"},{"top":560.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":576.87,"left":28.56,"width":72.23028564453125,"height":6.449999809265137,"text":"CALCAGNO, Eric"},{"top":576.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":577.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":576.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":593.31,"left":28.56,"width":114.73028564453125,"height":6.449999809265137,"text":"CARLOTTO, Remo Gerardo"},{"top":593.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":594.15,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":593.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":609.87,"left":28.56,"width":122.1702880859375,"height":6.449999809265137,"text":"CARMONA, Guillermo Ramón"},{"top":609.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":610.59,"left":397.56,"width":38.1602783203125,"height":6.449999809265137,"text":"Mendoza"},{"top":609.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":626.31,"left":28.56,"width":124.73028564453125,"height":6.449999809265137,"text":"CATALAN MAGNI, Julio César"},{"top":626.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":627.15,"left":397.56,"width":67.21026611328125,"height":6.449999809265137,"text":"Tierra del Fuego"},{"top":626.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":642.87,"left":28.56,"width":88.6902847290039,"height":6.449999809265137,"text":"CEJAS, Jorge Alberto"},{"top":642.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":643.59,"left":397.56,"width":41.6602783203125,"height":6.449999809265137,"text":"Rio Negro"},{"top":642.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":659.31,"left":28.56,"width":89.7702865600586,"height":6.449999809265137,"text":"CHIENO, María Elena"},{"top":659.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":660.15,"left":397.56,"width":42.72027587890625,"height":6.449999809265137,"text":"Corrientes"},{"top":659.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":675.87,"left":28.56,"width":96.25028228759766,"height":6.449999809265137,"text":"CIAMPINI, José Alberto"},{"top":675.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":676.59,"left":397.56,"width":37.700286865234375,"height":6.449999809265137,"text":"Neuquén"},{"top":675.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":692.31,"left":28.56,"width":131.77029418945312,"height":6.449999809265137,"text":"CIGOGNA, Luis Francisco Jorge"},{"top":692.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":693.15,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":692.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":708.87,"left":28.56,"width":62.15028381347656,"height":6.449999809265137,"text":"CLERI, Marcos"},{"top":708.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":709.59,"left":397.56,"width":37.690277099609375,"height":6.449999809265137,"text":"Santa Fe"},{"top":708.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":725.31,"left":28.56,"width":101.77029418945312,"height":6.449999809265137,"text":"COMELLI, Alicia Marcela"},{"top":725.31,"left":213.72,"width":121.81027221679688,"height":6.449999809265137,"text":"Movimiento Popular Neuquino"},{"top":726.15,"left":397.56,"width":37.700286865234375,"height":6.449999809265137,"text":"Neuquén"},{"top":725.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":741.87,"left":28.56,"width":88.1902847290039,"height":6.449999809265137,"text":"CONTI, Diana Beatriz"},{"top":741.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":742.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":741.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":758.31,"left":28.56,"width":98.75028228759766,"height":6.449999809265137,"text":"CORDOBA, Stella Maris"},{"top":758.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":759.15,"left":397.56,"width":38.6602783203125,"height":6.449999809265137,"text":"Tucumán"},{"top":758.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":774.87,"left":28.56,"width":105.25028991699219,"height":6.449999809265137,"text":"CURRILEN, Oscar Rubén"},{"top":774.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":775.59,"left":397.56,"width":30.2802734375,"height":6.449999809265137,"text":"Chubut"},{"top":774.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}]]} +{"extraction_method":"stream","top":269.875,"left":12.75,"width":548.25,"height":520.625,"right":561.0,"bottom":790.5,"data":[[{"top":281.82,"left":28.56,"width":175.21029663085938,"height":4.5,"text":"ABDALA de MATARAZZO, Norma Amanda"},{"top":281.82,"left":213.72,"width":108.25027465820312,"height":4.5,"text":"Frente Cívico por Santiago"},{"top":282.54,"left":397.56,"width":79.69027709960938,"height":4.5,"text":"Santiago del Estero"},{"top":281.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":298.26,"left":28.56,"width":141.71029663085938,"height":4.5,"text":"ALBRIEU, Oscar Edmundo Nicolas"},{"top":298.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":299.1,"left":397.56,"width":41.6602783203125,"height":4.5,"text":"Rio Negro"},{"top":298.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":314.82,"left":28.56,"width":83.75028228759766,"height":4.5,"text":"ALONSO, María Luz"},{"top":314.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":315.54,"left":397.56,"width":42.250274658203125,"height":4.5,"text":"La Pampa"},{"top":314.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":331.26,"left":28.56,"width":84.25028228759766,"height":4.5,"text":"ARENA, Celia Isabel"},{"top":331.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":332.1,"left":397.56,"width":37.690277099609375,"height":4.5,"text":"Santa Fe"},{"top":331.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":347.82,"left":28.56,"width":110.29029846191406,"height":4.5,"text":"ARREGUI, Andrés Roberto"},{"top":347.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":348.54,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":347.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":364.26,"left":28.56,"width":115.69029235839844,"height":4.5,"text":"AVOSCAN, Herman Horacio"},{"top":364.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":365.1,"left":397.56,"width":41.6602783203125,"height":4.5,"text":"Rio Negro"},{"top":364.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":380.82,"left":28.56,"width":95.6902847290039,"height":4.5,"text":"BALCEDO, María Ester"},{"top":380.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":381.54,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":380.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":397.26,"left":28.56,"width":127.69029235839844,"height":4.5,"text":"BARRANDEGUY, Raúl Enrique"},{"top":397.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":398.1,"left":397.56,"width":43.6702880859375,"height":4.5,"text":"Entre Ríos"},{"top":397.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":413.82,"left":28.56,"width":106.69029235839844,"height":4.5,"text":"BASTERRA, Luis Eugenio"},{"top":413.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":414.54,"left":397.56,"width":36.73028564453125,"height":4.5,"text":"Formosa"},{"top":413.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":430.26,"left":28.56,"width":92.21028137207031,"height":4.5,"text":"BEDANO, Nora Esther"},{"top":430.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":431.1,"left":397.56,"width":35.6602783203125,"height":4.5,"text":"Córdoba"},{"top":430.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":446.82,"left":28.56,"width":100.69029235839844,"height":4.5,"text":"BERNAL, María Eugenia"},{"top":446.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":447.54,"left":397.56,"width":22.200286865234375,"height":4.5,"text":"Jujuy"},{"top":446.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":463.26,"left":28.56,"width":112.21029663085938,"height":4.5,"text":"BERTONE, Rosana Andrea"},{"top":463.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":464.1,"left":397.56,"width":67.21026611328125,"height":4.5,"text":"Tierra del Fuego"},{"top":463.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":479.82,"left":28.56,"width":114.73028564453125,"height":4.5,"text":"BIANCHI, María del Carmen"},{"top":479.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":480.54,"left":397.56,"width":74.6702880859375,"height":4.5,"text":"Cdad. Aut. Bs. As."},{"top":479.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":496.26,"left":28.56,"width":115.19029235839844,"height":4.5,"text":"BIDEGAIN, Gloria Mercedes"},{"top":496.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":497.1,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":496.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":512.82,"left":28.56,"width":66.25028228759766,"height":4.5,"text":"BRAWER, Mara"},{"top":512.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":513.54,"left":397.56,"width":74.6702880859375,"height":4.5,"text":"Cdad. Aut. Bs. As."},{"top":512.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":529.26,"left":28.56,"width":90.73028564453125,"height":4.5,"text":"BRILLO, José Ricardo"},{"top":529.26,"left":213.72,"width":121.81027221679688,"height":4.5,"text":"Movimiento Popular Neuquino"},{"top":530.1,"left":397.56,"width":37.700286865234375,"height":4.5,"text":"Neuquén"},{"top":529.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":545.82,"left":28.56,"width":120.73028564453125,"height":4.5,"text":"BROMBERG, Isaac Benjamín"},{"top":545.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":546.54,"left":397.56,"width":38.6602783203125,"height":4.5,"text":"Tucumán"},{"top":545.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":562.26,"left":28.56,"width":89.7702865600586,"height":4.5,"text":"BRUE, Daniel Agustín"},{"top":562.26,"left":213.72,"width":108.25027465820312,"height":4.5,"text":"Frente Cívico por Santiago"},{"top":563.1,"left":397.56,"width":79.69027709960938,"height":4.5,"text":"Santiago del Estero"},{"top":562.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":578.82,"left":28.56,"width":72.23028564453125,"height":4.5,"text":"CALCAGNO, Eric"},{"top":578.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":579.54,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":578.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":595.26,"left":28.56,"width":114.73028564453125,"height":4.5,"text":"CARLOTTO, Remo Gerardo"},{"top":595.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":596.1,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":595.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":611.82,"left":28.56,"width":122.1702880859375,"height":4.5,"text":"CARMONA, Guillermo Ramón"},{"top":611.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":612.54,"left":397.56,"width":38.1602783203125,"height":4.5,"text":"Mendoza"},{"top":611.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":628.26,"left":28.56,"width":124.73028564453125,"height":4.5,"text":"CATALAN MAGNI, Julio César"},{"top":628.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":629.1,"left":397.56,"width":67.21026611328125,"height":4.5,"text":"Tierra del Fuego"},{"top":628.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":644.82,"left":28.56,"width":88.6902847290039,"height":4.5,"text":"CEJAS, Jorge Alberto"},{"top":644.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":645.54,"left":397.56,"width":41.6602783203125,"height":4.5,"text":"Rio Negro"},{"top":644.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":661.26,"left":28.56,"width":89.7702865600586,"height":4.5,"text":"CHIENO, María Elena"},{"top":661.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":662.1,"left":397.56,"width":42.72027587890625,"height":4.5,"text":"Corrientes"},{"top":661.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":677.82,"left":28.56,"width":96.25028228759766,"height":4.5,"text":"CIAMPINI, José Alberto"},{"top":677.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":678.54,"left":397.56,"width":37.700286865234375,"height":4.5,"text":"Neuquén"},{"top":677.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":694.26,"left":28.56,"width":131.77029418945312,"height":4.5,"text":"CIGOGNA, Luis Francisco Jorge"},{"top":694.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":695.1,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":694.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":710.82,"left":28.56,"width":62.15028381347656,"height":4.5,"text":"CLERI, Marcos"},{"top":710.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":711.54,"left":397.56,"width":37.690277099609375,"height":4.5,"text":"Santa Fe"},{"top":710.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":727.26,"left":28.56,"width":101.77029418945312,"height":4.5,"text":"COMELLI, Alicia Marcela"},{"top":727.26,"left":213.72,"width":121.81027221679688,"height":4.5,"text":"Movimiento Popular Neuquino"},{"top":728.1,"left":397.56,"width":37.700286865234375,"height":4.5,"text":"Neuquén"},{"top":727.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":743.82,"left":28.56,"width":88.1902847290039,"height":4.5,"text":"CONTI, Diana Beatriz"},{"top":743.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":744.54,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":743.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":760.26,"left":28.56,"width":98.75028228759766,"height":4.5,"text":"CORDOBA, Stella Maris"},{"top":760.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":761.1,"left":397.56,"width":38.6602783203125,"height":4.5,"text":"Tucumán"},{"top":760.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":776.82,"left":28.56,"width":105.25028991699219,"height":4.5,"text":"CURRILEN, Oscar Rubén"},{"top":776.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":777.54,"left":397.56,"width":30.2802734375,"height":4.5,"text":"Chubut"},{"top":776.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}]]} \ No newline at end of file From 9cf6726b761040ff0e91f79c5e650e74e09194de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Mon, 24 Jun 2019 18:12:22 -0300 Subject: [PATCH 069/200] upgrade to PDFBox 2.0.15 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index aa6314ee..5b4b3f9f 100644 --- a/pom.xml +++ b/pom.xml @@ -237,7 +237,7 @@ org.apache.pdfbox pdfbox - 2.0.12 + 2.0.15 From c28fcec8d804d8cb7c1eecdadae1db27453cfc4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Mon, 24 Jun 2019 19:23:27 -0300 Subject: [PATCH 070/200] prepare for next release --- pom.xml | 2 +- src/main/java/technology/tabula/CommandLineApp.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 5b4b3f9f..2c478bd8 100644 --- a/pom.xml +++ b/pom.xml @@ -2,7 +2,7 @@ 4.0.0 technology.tabula tabula - 1.0.3 + 1.0.4-SNAPSHOT Tabula Extract tables from PDF files http://github.com/tabulapdf/tabula-java diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java index 0de4e4a5..e23d90cd 100644 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ b/src/main/java/technology/tabula/CommandLineApp.java @@ -32,7 +32,7 @@ public class CommandLineApp { - private static String VERSION = "1.0.2"; + private static String VERSION = "1.0.4"; private static String VERSION_STRING = String.format("tabula %s (c) 2012-2018 Manuel Aristarán", VERSION); private static String BANNER = "\nTabula helps you extract tables from PDFs\n\n"; From 78eff7eefb3de2e3a4e353378d3269009b9ba8e9 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" Date: Tue, 4 Jun 2019 15:21:57 +0000 Subject: [PATCH 071/200] Bump jts-core from 1.15.0 to 1.16.1 Bumps jts-core from 1.15.0 to 1.16.1. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 2c478bd8..ca3f6471 100644 --- a/pom.xml +++ b/pom.xml @@ -219,7 +219,7 @@ org.locationtech.jts jts-core - 1.15.0 + 1.16.1 From acee4c387b3c731516d251d112fc0ebe47161408 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Mon, 24 Jun 2019 19:38:34 -0300 Subject: [PATCH 072/200] target java 1.8 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index ca3f6471..b3685394 100644 --- a/pom.xml +++ b/pom.xml @@ -126,8 +126,8 @@ maven-compiler-plugin 3.8.1 - 1.7 - 1.7 + 1.8 + 1.8 From 21b124660a90127d2867a48db04d6412d9c4f438 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Mon, 24 Jun 2019 19:42:15 -0300 Subject: [PATCH 073/200] add jdk11 to travis --- .travis.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 7a69aedf..a17315e7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,5 @@ jdk: - openjdk8 - openjdk9 - openjdk10 + - openjdk11 sudo: false - - - From c688dc7045ffedcf4827d8a3729332807d7c0f8e Mon Sep 17 00:00:00 2001 From: "Jeremy B. Merrill" Date: Sun, 28 Jul 2019 13:45:41 -0400 Subject: [PATCH 074/200] clarify -a command's coordinate order --- src/main/java/technology/tabula/CommandLineApp.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java index e23d90cd..a4cace6f 100644 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ b/src/main/java/technology/tabula/CommandLineApp.java @@ -335,7 +335,8 @@ public static Options buildOptions() { .build()); o.addOption(Option.builder("a") .longOpt("area") - .desc("-a/--area = Portion of the page to analyze. Accepts top,left,bottom,right . Example: --area 269.875,12.75,790.5,561. " + .desc("-a/--area = Portion of the page to analyze. Example: --area 269.875,12.75,790.5,561. " + + "Accepts top,left,bottom,right i.e. y1,x1,y2,x2 " + "If all values are between 0-100 (inclusive) and preceded by '%', input will be taken as % of actual height or width of the page. " + "Example: --area %0,0,100,50. To specify multiple areas, -a option should be repeated. Default is entire page") .hasArg() From 9960775528f6ff09dcb41830ea48eb89a73f3b49 Mon Sep 17 00:00:00 2001 From: "Jeremy B. Merrill" Date: Sun, 28 Jul 2019 13:46:50 -0400 Subject: [PATCH 075/200] copy over clarified -a flag's coordinate order --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index a13ea913..6b8a52c1 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,9 @@ Tabula helps you extract tables from PDFs -a,--area Portion of the page to analyze. Accepts top, left,bottom,right. + Portion of the page to analyze. Example: --area 269.875,12.75,790.5,561. + Accepts top,left,bottom,right i.e. y1,x1,y2,x2 If all values are between 0-100 (inclusive) and preceded by '%', input will be taken as % of actual height or width of the page. From 8eb34f9ddc37b509e58b66c9da4a0d89a15ef0d9 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" Date: Thu, 6 Jun 2019 12:01:53 +0000 Subject: [PATCH 076/200] Bump nexus-staging-maven-plugin from 1.6.3 to 1.6.8 Bumps nexus-staging-maven-plugin from 1.6.3 to 1.6.8. Signed-off-by: dependabot-preview[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index b3685394..43f4f4d7 100644 --- a/pom.xml +++ b/pom.xml @@ -73,7 +73,7 @@ org.sonatype.plugins nexus-staging-maven-plugin - 1.6.3 + 1.6.8 true ossrh From f4595f1f9504be0ca6b962ee57f96eaffd589231 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Mon, 8 Jul 2019 06:11:03 +0000 Subject: [PATCH 077/200] Bump maven-javadoc-plugin from 2.10.3 to 3.1.1 Bumps [maven-javadoc-plugin](https://github.com/apache/maven-javadoc-plugin) from 2.10.3 to 3.1.1. - [Release notes](https://github.com/apache/maven-javadoc-plugin/releases) - [Commits](https://github.com/apache/maven-javadoc-plugin/compare/maven-javadoc-plugin-2.10.3...maven-javadoc-plugin-3.1.1) Signed-off-by: dependabot-preview[bot] --- pom.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 43f4f4d7..e5b9805d 100644 --- a/pom.xml +++ b/pom.xml @@ -60,7 +60,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 2.10.3 + 3.1.1 true @@ -98,7 +98,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 2.9.1 + 3.1.1 attach-javadocs @@ -173,7 +173,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 2.9.1 + 3.1.1 attach-javadocs From 462bdaa219d0688b9d0b500a97a9884e9511a724 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Tue, 29 Oct 2019 11:33:20 -0300 Subject: [PATCH 078/200] remove OracleJDK from Travis --- .travis.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index a17315e7..7397abbf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,8 +2,6 @@ language: java install: mvn install -DskipTests=true -Dmaven.javadoc.skip=true -Dgpg.skip=true -B -V script: mvn test -Dgpg.skip=true jdk: - - oraclejdk8 - - oraclejdk9 - openjdk8 - openjdk9 - openjdk10 From e00a74c73444ac3563dbfb09ffa4d3e0ddc1decf Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Mon, 4 Nov 2019 11:55:40 +0000 Subject: [PATCH 079/200] Bump slf4j-simple from 1.7.26 to 1.7.29 Bumps [slf4j-simple](https://github.com/qos-ch/slf4j) from 1.7.26 to 1.7.29. - [Release notes](https://github.com/qos-ch/slf4j/releases) - [Commits](https://github.com/qos-ch/slf4j/commits) Signed-off-by: dependabot-preview[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index e5b9805d..71143426 100644 --- a/pom.xml +++ b/pom.xml @@ -231,7 +231,7 @@ org.slf4j slf4j-simple - 1.7.26 + 1.7.29 From c94fe95a8d0b3f3a3ccc4ce57399089b4d453039 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Mon, 4 Nov 2019 11:53:56 +0000 Subject: [PATCH 080/200] Bump slf4j-api from 1.7.26 to 1.7.29 Bumps [slf4j-api](https://github.com/qos-ch/slf4j) from 1.7.26 to 1.7.29. - [Release notes](https://github.com/qos-ch/slf4j/releases) - [Commits](https://github.com/qos-ch/slf4j/commits) Signed-off-by: dependabot-preview[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 71143426..082d6df7 100644 --- a/pom.xml +++ b/pom.xml @@ -225,7 +225,7 @@ org.slf4j slf4j-api - 1.7.26 + 1.7.29 From dc3bc6700452b5a6f79f60a0927ffa779ab76626 Mon Sep 17 00:00:00 2001 From: Harry Biddle Date: Thu, 20 Feb 2020 14:41:37 +0100 Subject: [PATCH 081/200] Extra information on coordinate system in command-line help text --- README.md | 26 +++++++++---------- .../technology/tabula/CommandLineApp.java | 17 ++++++------ 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 6b8a52c1..76e25663 100644 --- a/README.md +++ b/README.md @@ -23,20 +23,20 @@ usage: tabula [-a ] [-b ] [-c ] [-d] [-f Tabula helps you extract tables from PDFs - -a,--area Portion of the page to analyze. Accepts top, - left,bottom,right. - Portion of the page to analyze. - Example: --area 269.875,12.75,790.5,561. - Accepts top,left,bottom,right i.e. y1,x1,y2,x2 - If all values are between 0-100 (inclusive) - and preceded by '%', input will be taken as - % of actual height or width of the page. - Example: --area %0,0,100,50. - To specify multiple areas, -a option should - be repeated. Default is entire page + -a,--area Portion of the page to analyze. Example: --area + 269.875,12.75,790.5,561. Accepts + top,left,bottom,right i.e. y1,x1,y2,x2 where all + values are in points relative to the top left + corner. If all values are between 0-100 + (inclusive) and preceded by '%', input will be + taken as % of actual height or width of the page. + Example: --area %0,0,100,50. To specify multiple + areas, -a option should be repeated. Default is + entire page -b,--batch Convert all .pdfs in the provided directory. - -c,--columns X coordinates of column boundaries. Example - --columns 10.1,20.2,30.3 + -c,--columns X coordinates of column boundaries where values + are in points and relative to the left of the + page. Example --columns 10.1,20.2,30.3 -d,--debug Print detected table areas instead of processing. -f,--format Output format: (CSV,TSV,JSON). Default: CSV diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java index a4cace6f..ec16db6f 100644 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ b/src/main/java/technology/tabula/CommandLineApp.java @@ -35,13 +35,13 @@ public class CommandLineApp { private static String VERSION = "1.0.4"; private static String VERSION_STRING = String.format("tabula %s (c) 2012-2018 Manuel Aristarán", VERSION); private static String BANNER = "\nTabula helps you extract tables from PDFs\n\n"; - + private static final int RELATIVE_AREA_CALCULATION_MODE = 0; private static final int ABSOLUTE_AREA_CALCULATION_MODE = 1; private Appendable defaultOutput; - + private List> pageAreas; private List pages; private OutputFormat outputFormat; @@ -167,10 +167,10 @@ private void extractFile(File pdfFile, Appendable outFile) throws ParseException if (pageAreas != null) { for (Pair areaPair : pageAreas) { Rectangle area = areaPair.getRight(); - if (areaPair.getLeft() == RELATIVE_AREA_CALCULATION_MODE) { + if (areaPair.getLeft() == RELATIVE_AREA_CALCULATION_MODE) { area = new Rectangle((float) (area.getTop() / 100 * page.getHeight()), (float) (area.getLeft() / 100 * page.getWidth()), (float) (area.getWidth() / 100 * page.getWidth()), - (float) (area.getHeight() / 100 * page.getHeight())); + (float) (area.getHeight() / 100 * page.getHeight())); } tables.addAll(tableExtractor.extractTables(page.getArea(area))); } @@ -220,10 +220,10 @@ private static List> whichAreas(CommandLine line) throw if (!line.hasOption('a')) { return null; } - + String[] optionValues = line.getOptionValues('a'); - List> areaList = new ArrayList>(); + List> areaList = new ArrayList>(); for (String optionValue: optionValues) { int areaCalculationMode = ABSOLUTE_AREA_CALCULATION_MODE; int startIndex = 0; @@ -329,14 +329,15 @@ public static Options buildOptions() { .build()); o.addOption(Option.builder("c") .longOpt("columns") - .desc("X coordinates of column boundaries. Example --columns 10.1,20.2,30.3") + .desc("X coordinates of column boundaries where values are in points and relative to the left of the page. " + + "Example --columns 10.1,20.2,30.3") .hasArg() .argName("COLUMNS") .build()); o.addOption(Option.builder("a") .longOpt("area") .desc("-a/--area = Portion of the page to analyze. Example: --area 269.875,12.75,790.5,561. " - + "Accepts top,left,bottom,right i.e. y1,x1,y2,x2 " + + "Accepts top,left,bottom,right i.e. y1,x1,y2,x2 where all values are in points relative to the top left corner. " + "If all values are between 0-100 (inclusive) and preceded by '%', input will be taken as % of actual height or width of the page. " + "Example: --area %0,0,100,50. To specify multiple areas, -a option should be repeated. Default is entire page") .hasArg() From e8e0cfccc8ae97008d9e86c34ca11d32ad819caf Mon Sep 17 00:00:00 2001 From: Nat Date: Sat, 28 Dec 2019 23:04:30 -0600 Subject: [PATCH 082/200] Fix excessive memory usage issue with large (many pages) PDFs --- .../tabula/detectors/NurminenDetectionAlgorithm.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java index 8f155ae3..fe30ae8d 100644 --- a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java +++ b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java @@ -117,6 +117,7 @@ public List detect(Page page) { PDDocument removeTextDocument = null; try { removeTextDocument = this.removeText(pdfPage); + pdfPage = removeTextDocument.getPage(0); image = Utils.pageConvertToImage(pdfPage, 144, ImageType.GRAY); } catch (Exception e) { return new ArrayList<>(); @@ -856,16 +857,15 @@ private PDDocument removeText(PDPage page) throws IOException { } PDDocument document = new PDDocument(); - document.addPage(page); + PDPage newPage = document.importPage(page); + newPage.setResources(page.getResources()); PDStream newContents = new PDStream(document); OutputStream out = newContents.createOutputStream(COSName.FLATE_DECODE); ContentStreamWriter writer = new ContentStreamWriter(out); writer.writeTokens(newTokens); out.close(); - page.setContents(newContents); - + newPage.setContents(newContents); return document; - } } From 0d8228168b36b224020e546734b903670874d3b3 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Fri, 7 Feb 2020 05:44:27 +0000 Subject: [PATCH 083/200] Bump commons-csv from 1.7 to 1.8 Bumps commons-csv from 1.7 to 1.8. Signed-off-by: dependabot-preview[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 082d6df7..46a27c58 100644 --- a/pom.xml +++ b/pom.xml @@ -268,7 +268,7 @@ org.apache.commons commons-csv - 1.7 + 1.8 From 97b6f204110b5393ab40aa993b8bf393e0041a80 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Tue, 17 Dec 2019 05:40:58 +0000 Subject: [PATCH 084/200] Bump slf4j-simple from 1.7.29 to 1.7.30 Bumps [slf4j-simple](https://github.com/qos-ch/slf4j) from 1.7.29 to 1.7.30. - [Release notes](https://github.com/qos-ch/slf4j/releases) - [Commits](https://github.com/qos-ch/slf4j/compare/v_1.7.29...v_1.7.30) Signed-off-by: dependabot-preview[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 46a27c58..d155a797 100644 --- a/pom.xml +++ b/pom.xml @@ -231,7 +231,7 @@ org.slf4j slf4j-simple - 1.7.29 + 1.7.30 From 7ea31a051c9c5a2697668a26fb5fdb0c23c6b962 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Tue, 17 Dec 2019 05:41:22 +0000 Subject: [PATCH 085/200] Bump slf4j-api from 1.7.29 to 1.7.30 Bumps [slf4j-api](https://github.com/qos-ch/slf4j) from 1.7.29 to 1.7.30. - [Release notes](https://github.com/qos-ch/slf4j/releases) - [Commits](https://github.com/qos-ch/slf4j/compare/v_1.7.29...v_1.7.30) Signed-off-by: dependabot-preview[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d155a797..b42765cf 100644 --- a/pom.xml +++ b/pom.xml @@ -225,7 +225,7 @@ org.slf4j slf4j-api - 1.7.29 + 1.7.30 From 708d753c36529e15695080fc8cf0a865c694ac38 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Wed, 18 Dec 2019 05:43:25 +0000 Subject: [PATCH 086/200] Bump jbig2-imageio from 3.0.2 to 3.0.3 Bumps jbig2-imageio from 3.0.2 to 3.0.3. Signed-off-by: dependabot-preview[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index b42765cf..776b49d1 100644 --- a/pom.xml +++ b/pom.xml @@ -292,7 +292,7 @@ org.apache.pdfbox jbig2-imageio - 3.0.2 + 3.0.3 From 7742439e2f7c5a9e3bcf99b57e7998af9555b8b7 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Mon, 6 Apr 2020 06:12:36 +0000 Subject: [PATCH 087/200] Bump bcmail-jdk15on from 1.62 to 1.65 Bumps [bcmail-jdk15on](https://github.com/bcgit/bc-java) from 1.62 to 1.65. - [Release notes](https://github.com/bcgit/bc-java/releases) - [Changelog](https://github.com/bcgit/bc-java/blob/master/docs/releasenotes.html) - [Commits](https://github.com/bcgit/bc-java/commits) Signed-off-by: dependabot-preview[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 776b49d1..876f45c2 100644 --- a/pom.xml +++ b/pom.xml @@ -249,7 +249,7 @@ org.bouncycastle bcmail-jdk15on - 1.62 + 1.65 From b341dfe8323abb9de6826bc0683e950af510e5a0 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Mon, 6 Apr 2020 06:12:15 +0000 Subject: [PATCH 088/200] Bump bcprov-jdk15on from 1.62 to 1.65 Bumps [bcprov-jdk15on](https://github.com/bcgit/bc-java) from 1.62 to 1.65. - [Release notes](https://github.com/bcgit/bc-java/releases) - [Changelog](https://github.com/bcgit/bc-java/blob/master/docs/releasenotes.html) - [Commits](https://github.com/bcgit/bc-java/commits) Signed-off-by: dependabot-preview[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 876f45c2..4cbaa1d4 100644 --- a/pom.xml +++ b/pom.xml @@ -243,7 +243,7 @@ org.bouncycastle bcprov-jdk15on - 1.62 + 1.65 From e35816791c3a12175882acb4fb3e1add39614411 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Mon, 16 Mar 2020 05:56:26 +0000 Subject: [PATCH 089/200] Bump maven-javadoc-plugin from 3.1.1 to 3.2.0 Bumps [maven-javadoc-plugin](https://github.com/apache/maven-javadoc-plugin) from 3.1.1 to 3.2.0. - [Release notes](https://github.com/apache/maven-javadoc-plugin/releases) - [Commits](https://github.com/apache/maven-javadoc-plugin/compare/maven-javadoc-plugin-3.1.1...maven-javadoc-plugin-3.2.0) Signed-off-by: dependabot-preview[bot] --- pom.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 4cbaa1d4..033cdeaa 100644 --- a/pom.xml +++ b/pom.xml @@ -60,7 +60,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.1.1 + 3.2.0 true @@ -98,7 +98,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.1.1 + 3.2.0 attach-javadocs @@ -173,7 +173,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.1.1 + 3.2.0 attach-javadocs From ce25679c0cbf20d790eaf908c8c0ef7332cc5ad6 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Thu, 2 Jan 2020 05:42:32 +0000 Subject: [PATCH 090/200] Bump junit from 4.12 to 4.13 Bumps [junit](https://github.com/junit-team/junit4) from 4.12 to 4.13. - [Release notes](https://github.com/junit-team/junit4/releases) - [Changelog](https://github.com/junit-team/junit4/blob/master/doc/ReleaseNotes4.12.md) - [Commits](https://github.com/junit-team/junit4/compare/r4.12...r4.13) Signed-off-by: dependabot-preview[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 033cdeaa..3d4bd826 100644 --- a/pom.xml +++ b/pom.xml @@ -255,7 +255,7 @@ junit junit - 4.12 + 4.13 test From 62780644614802e8185b36982b494a220798aef6 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Mon, 23 Dec 2019 05:51:03 +0000 Subject: [PATCH 091/200] Bump maven-source-plugin from 3.1.0 to 3.2.1 Bumps [maven-source-plugin](https://github.com/apache/maven-source-plugin) from 3.1.0 to 3.2.1. - [Release notes](https://github.com/apache/maven-source-plugin/releases) - [Commits](https://github.com/apache/maven-source-plugin/compare/maven-source-plugin-3.1.0...maven-source-plugin-3.2.1) Signed-off-by: dependabot-preview[bot] --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 3d4bd826..6b0adb7c 100644 --- a/pom.xml +++ b/pom.xml @@ -85,7 +85,7 @@ org.apache.maven.plugins maven-source-plugin - 3.1.0 + 3.2.1 attach-sources @@ -186,7 +186,7 @@ org.apache.maven.plugins maven-source-plugin - 3.1.0 + 3.2.1 attach-sources From eec86f517e9782c7534283cfde9cf0e4bad4bf1f Mon Sep 17 00:00:00 2001 From: Tilman Hausherr Date: Sat, 18 Apr 2020 17:56:40 +0200 Subject: [PATCH 092/200] format with Locale.US (#355) --- src/main/java/technology/tabula/QuickSort.java | 2 +- src/main/java/technology/tabula/Rectangle.java | 5 +++-- src/main/java/technology/tabula/Ruling.java | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/main/java/technology/tabula/QuickSort.java b/src/main/java/technology/tabula/QuickSort.java index 34757ca3..03388a15 100644 --- a/src/main/java/technology/tabula/QuickSort.java +++ b/src/main/java/technology/tabula/QuickSort.java @@ -25,7 +25,7 @@ /** * An implementation of Quicksort. * - * @see http://de.wikipedia.org/wiki/Quicksort + * @see wikipedia * * @author UWe Pachler */ diff --git a/src/main/java/technology/tabula/Rectangle.java b/src/main/java/technology/tabula/Rectangle.java index e4522a0a..b96fcd77 100644 --- a/src/main/java/technology/tabula/Rectangle.java +++ b/src/main/java/technology/tabula/Rectangle.java @@ -4,6 +4,7 @@ import java.awt.geom.Rectangle2D; import java.util.Comparator; import java.util.List; +import java.util.Locale; @SuppressWarnings("serial") public class Rectangle extends Rectangle2D.Float { @@ -11,7 +12,7 @@ public class Rectangle extends Rectangle2D.Float { /** * Ill-defined comparator, from when Rectangle was Comparable. * - * @see https://github.com/tabulapdf/tabula-java/issues/116 + * @see PR 116 * @deprecated with no replacement */ @Deprecated @@ -151,7 +152,7 @@ public String toString() { StringBuilder sb = new StringBuilder(); String s = super.toString(); sb.append(s.substring(0, s.length() - 1)); - sb.append(String.format(",bottom=%f,right=%f]", this.getBottom(), this.getRight())); + sb.append(String.format(Locale.US, ",bottom=%f,right=%f]", this.getBottom(), this.getRight())); return sb.toString(); } diff --git a/src/main/java/technology/tabula/Ruling.java b/src/main/java/technology/tabula/Ruling.java index 2ad33aaa..cdab6c4f 100644 --- a/src/main/java/technology/tabula/Ruling.java +++ b/src/main/java/technology/tabula/Ruling.java @@ -8,6 +8,7 @@ import java.util.Comparator; import java.util.Formatter; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.TreeMap; @@ -291,7 +292,7 @@ public double getAngle() { public String toString() { StringBuilder sb = new StringBuilder(); Formatter formatter = new Formatter(sb); - String rv = formatter.format("%s[x1=%f y1=%f x2=%f y2=%f]", this.getClass().toString(), this.x1, this.y1, this.x2, this.y2).toString(); + String rv = formatter.format(Locale.US, "%s[x1=%f y1=%f x2=%f y2=%f]", this.getClass().toString(), this.x1, this.y1, this.x2, this.y2).toString(); formatter.close(); return rv; } From 227d92fddd5320e07cb11b0195acba0052b61dd8 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Wed, 8 Jul 2020 22:50:32 -0300 Subject: [PATCH 093/200] Bump jts-core from 1.16.1 to 1.17.0 (#363) Bumps jts-core from 1.16.1 to 1.17.0. Signed-off-by: dependabot-preview[bot] Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com> --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 6b0adb7c..123b69d2 100644 --- a/pom.xml +++ b/pom.xml @@ -219,7 +219,7 @@ org.locationtech.jts jts-core - 1.16.1 + 1.17.0 From 92c330e5f4855c18a4e84cc23f1041fb5369578a Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Wed, 8 Jul 2020 22:51:53 -0300 Subject: [PATCH 094/200] Bump bcprov-jdk15on from 1.65 to 1.66 (#364) Bumps [bcprov-jdk15on](https://github.com/bcgit/bc-java) from 1.65 to 1.66. - [Release notes](https://github.com/bcgit/bc-java/releases) - [Changelog](https://github.com/bcgit/bc-java/blob/master/docs/releasenotes.html) - [Commits](https://github.com/bcgit/bc-java/commits) Signed-off-by: dependabot-preview[bot] Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com> --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 123b69d2..0f4787b4 100644 --- a/pom.xml +++ b/pom.xml @@ -243,7 +243,7 @@ org.bouncycastle bcprov-jdk15on - 1.65 + 1.66 From a6421332832835626c0cda16ecda72471812e0f2 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Wed, 8 Jul 2020 22:52:08 -0300 Subject: [PATCH 095/200] Bump bcmail-jdk15on from 1.65 to 1.66 (#365) Bumps [bcmail-jdk15on](https://github.com/bcgit/bc-java) from 1.65 to 1.66. - [Release notes](https://github.com/bcgit/bc-java/releases) - [Changelog](https://github.com/bcgit/bc-java/blob/master/docs/releasenotes.html) - [Commits](https://github.com/bcgit/bc-java/commits) Signed-off-by: dependabot-preview[bot] Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com> --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 0f4787b4..366b6bce 100644 --- a/pom.xml +++ b/pom.xml @@ -249,7 +249,7 @@ org.bouncycastle bcmail-jdk15on - 1.65 + 1.66 From 5bb1a15bbdf9378c46cbcaa7843e857966708833 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Fri, 10 Jul 2020 16:25:46 -0300 Subject: [PATCH 096/200] Upgrade PDFbox to 2.0.21 - Closes #325 --- pom.xml | 15 +- .../technology/tabula/ObjectExtractor.java | 2 +- src/main/java/technology/tabula/Page.java | 438 +++++++++--------- .../java/technology/tabula/TextStripper.java | 54 ++- src/main/java/technology/tabula/Utils.java | 5 + .../java/technology/tabula/debug/Debug.java | 2 +- .../detectors/NurminenDetectionAlgorithm.java | 4 +- .../tabula/TestProjectionProfile.java | 4 +- .../java/technology/tabula/TestUtils.java | 2 +- 9 files changed, 302 insertions(+), 224 deletions(-) diff --git a/pom.xml b/pom.xml index 366b6bce..e327d1af 100644 --- a/pom.xml +++ b/pom.xml @@ -32,6 +32,19 @@ + + + snapshots + https://repository.apache.org/content/repositories/snapshots/ + + false + + + true + + + + scm:git:git@github.com:tabulapdf/tabula-java.git scm:git:git@github.com:tabulapdf/tabula-java.git @@ -237,7 +250,7 @@ org.apache.pdfbox pdfbox - 2.0.15 + 2.0.21-SNAPSHOT diff --git a/src/main/java/technology/tabula/ObjectExtractor.java b/src/main/java/technology/tabula/ObjectExtractor.java index 87c2a2f9..3998ba6f 100644 --- a/src/main/java/technology/tabula/ObjectExtractor.java +++ b/src/main/java/technology/tabula/ObjectExtractor.java @@ -42,7 +42,7 @@ protected Page extractPage(Integer pageNumber) throws IOException { h = p.getCropBox().getHeight(); } - return new Page(0, 0, w, h, pageRotation, pageNumber, p, pdfTextStripper.textElements, + return new Page(0, 0, w, h, pageRotation, pageNumber, p, this.pdfDocument, pdfTextStripper.textElements, se.rulings, pdfTextStripper.minCharWidth, pdfTextStripper.minCharHeight, pdfTextStripper.spatialIndex); } diff --git a/src/main/java/technology/tabula/Page.java b/src/main/java/technology/tabula/Page.java index e8c1d636..3207bcb9 100644 --- a/src/main/java/technology/tabula/Page.java +++ b/src/main/java/technology/tabula/Page.java @@ -6,237 +6,243 @@ import java.util.Comparator; import java.util.List; +import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; @SuppressWarnings("serial") // TODO: this class should probably be called "PageArea" or something like that public class Page extends Rectangle { - private Integer rotation; - private int pageNumber; - private List texts; - private List rulings, cleanRulings = null, verticalRulingLines = null, horizontalRulingLines = null; - private float minCharWidth; - private float minCharHeight; - private RectangleSpatialIndex spatial_index; - private PDPage pdPage; - - public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage) { - super(top, left, width, height); - this.rotation = rotation; - this.pageNumber = page_number; - this.pdPage = pdPage; - } - - public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage, - List characters, List rulings) { - - this(top, left, width, height, rotation, page_number, pdPage); - this.texts = characters; - this.rulings = rulings; - } - - - public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage, - List characters, List rulings, - float minCharWidth, float minCharHeight, RectangleSpatialIndex index) { - - this(top, left, width, height, rotation, page_number, pdPage, characters, rulings); - this.minCharHeight = minCharHeight; - this.minCharWidth = minCharWidth; - this.spatial_index = index; - } + private Integer rotation; + private int pageNumber; + private List texts; + private List rulings, cleanRulings = null, verticalRulingLines = null, horizontalRulingLines = null; + private float minCharWidth; + private float minCharHeight; + private RectangleSpatialIndex spatial_index; + private PDPage pdPage; + private PDDocument pdDoc; + + public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage, PDDocument doc) { + super(top, left, width, height); + this.rotation = rotation; + this.pageNumber = page_number; + this.pdPage = pdPage; + this.pdDoc = doc; + } + + public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage, PDDocument doc, + List characters, List rulings) { + + this(top, left, width, height, rotation, page_number, pdPage, doc); + this.texts = characters; + this.rulings = rulings; + } + + + public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage, PDDocument doc, + List characters, List rulings, + float minCharWidth, float minCharHeight, RectangleSpatialIndex index) { + + this(top, left, width, height, rotation, page_number, pdPage, doc, characters, rulings); + this.minCharHeight = minCharHeight; + this.minCharWidth = minCharWidth; + this.spatial_index = index; + } + + public Page getArea(Rectangle area) { + List t = getText(area); + float min_char_width = 7; + float min_char_height = 7; + + if(t.size() > 0){ + min_char_width = Collections.min(t, new Comparator() { + @Override + public int compare(TextElement te1, TextElement te2) { + return java.lang.Float.compare(te1.width, te2.width); + }}).width; + min_char_height = Collections.min(t, new Comparator() { + @Override + public int compare(TextElement te1, TextElement te2) { + return java.lang.Float.compare(te1.height, te2.height); + }}).height; + } + Page rv = new Page( + area.getTop(), + area.getLeft(), + (float) area.getWidth(), + (float) area.getHeight(), + rotation, + pageNumber, + pdPage, + pdDoc, + t, + Ruling.cropRulingsToArea(getRulings(), area), + min_char_width, + min_char_height, + spatial_index); + + rv.addRuling(new Ruling( + new Point2D.Double(rv.getLeft(), + rv.getTop()), + new Point2D.Double(rv.getRight(), + rv.getTop()))); + rv.addRuling(new Ruling( + new Point2D.Double(rv.getRight(), + rv.getTop()), + new Point2D.Double(rv.getRight(), + rv.getBottom()))); + rv.addRuling(new Ruling( + new Point2D.Double(rv.getRight(), + rv.getBottom()), + new Point2D.Double(rv.getLeft(), + rv.getBottom()))); + rv.addRuling(new Ruling( + new Point2D.Double(rv.getLeft(), + rv.getBottom()), + new Point2D.Double(rv.getLeft(), + rv.getTop()))); + + return rv; + } + + public Page getArea(float top, float left, float bottom, float right) { + Rectangle area = new Rectangle(top, left, right - left, bottom - top); + return this.getArea(area); + } + + public List getText() { + return texts; + } + + public List getText(Rectangle area) { + return this.spatial_index.contains(area); + } + + /** @deprecated use {@linkplain #getText(Rectangle)} instead */ + @Deprecated public List getText(float top, float left, float bottom, float right) { + return this.getText(new Rectangle(top, left, right - left, bottom - top)); + } + + public Integer getRotation() { + return rotation; + } + + public int getPageNumber() { + return pageNumber; + } + + /** @deprecated use {@linkplain #getText()} instead */ + @Deprecated public List getTexts() { + return texts; + } + + /** + * Returns the minimum bounding box that contains all the TextElements on this Page + */ + public Rectangle getTextBounds() { + List texts = this.getText(); + if (!texts.isEmpty()) { + return Utils.bounds(texts); + } + else { + return new Rectangle(); + } + + } + + public List getRulings() { + if (this.cleanRulings != null) { + return this.cleanRulings; + } + + if (this.rulings == null || this.rulings.isEmpty()) { + this.verticalRulingLines = new ArrayList<>(); + this.horizontalRulingLines = new ArrayList<>(); + return new ArrayList<>(); + } + + Utils.snapPoints(this.rulings, this.minCharWidth, this.minCharHeight); + + List vrs = new ArrayList<>(); + for (Ruling vr: this.rulings) { + if (vr.vertical()) { + vrs.add(vr); + } + } + this.verticalRulingLines = Ruling.collapseOrientedRulings(vrs); + + List hrs = new ArrayList<>(); + for (Ruling hr: this.rulings) { + if (hr.horizontal()) { + hrs.add(hr); + } + } + this.horizontalRulingLines = Ruling.collapseOrientedRulings(hrs); + + this.cleanRulings = new ArrayList<>(this.verticalRulingLines); + this.cleanRulings.addAll(this.horizontalRulingLines); + + return this.cleanRulings; + + } + + public List getVerticalRulings() { + if (this.verticalRulingLines != null) { + return this.verticalRulingLines; + } + this.getRulings(); + return this.verticalRulingLines; + } - public Page getArea(Rectangle area) { - List t = getText(area); - float min_char_width = 7; - float min_char_height = 7; - - if(t.size() > 0){ - min_char_width = Collections.min(t, new Comparator() { - @Override - public int compare(TextElement te1, TextElement te2) { - return java.lang.Float.compare(te1.width, te2.width); - }}).width; - min_char_height = Collections.min(t, new Comparator() { - @Override - public int compare(TextElement te1, TextElement te2) { - return java.lang.Float.compare(te1.height, te2.height); - }}).height; - } - Page rv = new Page( - area.getTop(), - area.getLeft(), - (float) area.getWidth(), - (float) area.getHeight(), - rotation, - pageNumber, - pdPage, - t, - Ruling.cropRulingsToArea(getRulings(), area), - min_char_width, - min_char_height, - spatial_index); - - rv.addRuling(new Ruling( - new Point2D.Double(rv.getLeft(), - rv.getTop()), - new Point2D.Double(rv.getRight(), - rv.getTop()))); - rv.addRuling(new Ruling( - new Point2D.Double(rv.getRight(), - rv.getTop()), - new Point2D.Double(rv.getRight(), - rv.getBottom()))); - rv.addRuling(new Ruling( - new Point2D.Double(rv.getRight(), - rv.getBottom()), - new Point2D.Double(rv.getLeft(), - rv.getBottom()))); - rv.addRuling(new Ruling( - new Point2D.Double(rv.getLeft(), - rv.getBottom()), - new Point2D.Double(rv.getLeft(), - rv.getTop()))); - - return rv; - } - - public Page getArea(float top, float left, float bottom, float right) { - Rectangle area = new Rectangle(top, left, right - left, bottom - top); - return this.getArea(area); - } - - public List getText() { - return texts; - } - - public List getText(Rectangle area) { - return this.spatial_index.contains(area); - } - - /** @deprecated use {@linkplain #getText(Rectangle)} instead */ - @Deprecated public List getText(float top, float left, float bottom, float right) { - return this.getText(new Rectangle(top, left, right - left, bottom - top)); + public List getHorizontalRulings() { + if (this.horizontalRulingLines != null) { + return this.horizontalRulingLines; } + this.getRulings(); + return this.horizontalRulingLines; + } - public Integer getRotation() { - return rotation; + public void addRuling(Ruling r) { + if (r.oblique()) { + throw new UnsupportedOperationException("Can't add an oblique ruling"); } + this.rulings.add(r); + // clear caches + this.verticalRulingLines = null; + this.horizontalRulingLines = null; + this.cleanRulings = null; + } - public int getPageNumber() { - return pageNumber; - } + public List getUnprocessedRulings() { + return this.rulings; + } - /** @deprecated use {@linkplain #getText()} instead */ - @Deprecated public List getTexts() { - return texts; - } - - /** - * Returns the minimum bounding box that contains all the TextElements on this Page - */ - public Rectangle getTextBounds() { - List texts = this.getText(); - if (!texts.isEmpty()) { - return Utils.bounds(texts); - } - else { - return new Rectangle(); - } - - } + /** @deprecated with no replacement */ + @Deprecated public float getMinCharWidth() { + return minCharWidth; + } - public List getRulings() { - if (this.cleanRulings != null) { - return this.cleanRulings; - } - - if (this.rulings == null || this.rulings.isEmpty()) { - this.verticalRulingLines = new ArrayList<>(); - this.horizontalRulingLines = new ArrayList<>(); - return new ArrayList<>(); - } - - Utils.snapPoints(this.rulings, this.minCharWidth, this.minCharHeight); - - List vrs = new ArrayList<>(); - for (Ruling vr: this.rulings) { - if (vr.vertical()) { - vrs.add(vr); - } - } - this.verticalRulingLines = Ruling.collapseOrientedRulings(vrs); - - List hrs = new ArrayList<>(); - for (Ruling hr: this.rulings) { - if (hr.horizontal()) { - hrs.add(hr); - } - } - this.horizontalRulingLines = Ruling.collapseOrientedRulings(hrs); - - this.cleanRulings = new ArrayList<>(this.verticalRulingLines); - this.cleanRulings.addAll(this.horizontalRulingLines); - - return this.cleanRulings; - - } - - public List getVerticalRulings() { - if (this.verticalRulingLines != null) { - return this.verticalRulingLines; - } - this.getRulings(); - return this.verticalRulingLines; - } - - public List getHorizontalRulings() { - if (this.horizontalRulingLines != null) { - return this.horizontalRulingLines; - } - this.getRulings(); - return this.horizontalRulingLines; - } - - public void addRuling(Ruling r) { - if (r.oblique()) { - throw new UnsupportedOperationException("Can't add an oblique ruling"); - } - this.rulings.add(r); - // clear caches - this.verticalRulingLines = null; - this.horizontalRulingLines = null; - this.cleanRulings = null; - } - - public List getUnprocessedRulings() { - return this.rulings; - } + /** @deprecated with no replacement */ + @Deprecated public float getMinCharHeight() { + return minCharHeight; + } - /** @deprecated with no replacement */ - @Deprecated public float getMinCharWidth() { - return minCharWidth; - } + public PDPage getPDPage() { + return pdPage; + } - /** @deprecated with no replacement */ - @Deprecated public float getMinCharHeight() { - return minCharHeight; - } + public PDDocument getPDDoc() { + return pdDoc; + } - public PDPage getPDPage() { - return pdPage; - } + /** @deprecated with no replacement */ + @Deprecated public RectangleSpatialIndex getSpatialIndex() { + return this.spatial_index; + } - /** @deprecated with no replacement */ - @Deprecated public RectangleSpatialIndex getSpatialIndex() { - return this.spatial_index; - } - - /** @deprecated with no replacement */ - @Deprecated public boolean hasText() { - return this.texts.size() > 0; - } - - + /** @deprecated with no replacement */ + @Deprecated public boolean hasText() { + return this.texts.size() > 0; + } } diff --git a/src/main/java/technology/tabula/TextStripper.java b/src/main/java/technology/tabula/TextStripper.java index 3e1c82a1..219dfa42 100644 --- a/src/main/java/technology/tabula/TextStripper.java +++ b/src/main/java/technology/tabula/TextStripper.java @@ -1,6 +1,10 @@ package technology.tabula; +import org.apache.fontbox.util.BoundingBox; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDFontDescriptor; +import org.apache.pdfbox.pdmodel.font.PDType3Font; import org.apache.pdfbox.text.PDFTextStripper; import org.apache.pdfbox.text.TextPosition; @@ -68,6 +72,54 @@ protected void writeString(String string, List textPositions) thro } } + @Override + protected float computeFontHeight(PDFont font) throws IOException + { + BoundingBox bbox = font.getBoundingBox(); + if (bbox.getLowerLeftY() < Short.MIN_VALUE) + { + // PDFBOX-2158 and PDFBOX-3130 + // files by Salmat eSolutions / ClibPDF Library + bbox.setLowerLeftY(- (bbox.getLowerLeftY() + 65536)); + } + // 1/2 the bbox is used as the height todo: why? + float glyphHeight = bbox.getHeight() / 2; + + // sometimes the bbox has very high values, but CapHeight is OK + PDFontDescriptor fontDescriptor = font.getFontDescriptor(); + if (fontDescriptor != null) + { + float capHeight = fontDescriptor.getCapHeight(); + if (Float.compare(capHeight, 0) != 0 && + (capHeight < glyphHeight || Float.compare(glyphHeight, 0) == 0)) + { + glyphHeight = capHeight; + } + // PDFBOX-3464, PDFBOX-448: + // sometimes even CapHeight has very high value, but Ascent and Descent are ok + float ascent = fontDescriptor.getAscent(); + float descent = fontDescriptor.getDescent(); + if (ascent > 0 && descent < 0 && + ((ascent - descent) / 2 < glyphHeight || Float.compare(glyphHeight, 0) == 0)) + { + glyphHeight = (ascent - descent) / 2; + } + } + + // transformPoint from glyph space -> text space + float height; + if (font instanceof PDType3Font) + { + height = font.getFontMatrix().transformPoint(0, glyphHeight).y; + } + else + { + height = glyphHeight / 1000; + } + + return height; + } + private boolean isPrintable(String s) { Character c; Character.UnicodeBlock block; @@ -79,4 +131,4 @@ private boolean isPrintable(String s) { } return printable; } -} \ No newline at end of file +} diff --git a/src/main/java/technology/tabula/Utils.java b/src/main/java/technology/tabula/Utils.java index 35c6cc4d..00814429 100644 --- a/src/main/java/technology/tabula/Utils.java +++ b/src/main/java/technology/tabula/Utils.java @@ -280,4 +280,9 @@ public static BufferedImage pageConvertToImage(PDPage page, int dpi, ImageType i } } + public static BufferedImage pageConvertToImage(PDDocument doc, PDPage page, int dpi, ImageType imageType) throws IOException { + PDFRenderer renderer = new PDFRenderer(doc); + return renderer.renderImageWithDPI(doc.getPages().indexOf(page), dpi, imageType); + } + } diff --git a/src/main/java/technology/tabula/debug/Debug.java b/src/main/java/technology/tabula/debug/Debug.java index f9f923b5..91609045 100644 --- a/src/main/java/technology/tabula/debug/Debug.java +++ b/src/main/java/technology/tabula/debug/Debug.java @@ -227,7 +227,7 @@ public static void renderPage(String pdfPath, String outPath, int pageNumber, Re PDPage p = document.getPage(pageNumber); - BufferedImage image = Utils.pageConvertToImage(p, 72, ImageType.RGB); + BufferedImage image = Utils.pageConvertToImage(document, p, 72, ImageType.RGB); Graphics2D g = (Graphics2D) image.getGraphics(); diff --git a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java index fe30ae8d..74898e3e 100644 --- a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java +++ b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java @@ -106,7 +106,7 @@ public List detect(Page page) { BufferedImage image; PDPage pdfPage = page.getPDPage(); try { - image = Utils.pageConvertToImage(pdfPage, 144, ImageType.GRAY); + image = Utils.pageConvertToImage(page.getPDDoc(), pdfPage, 144, ImageType.GRAY); } catch (IOException e) { return new ArrayList<>(); } @@ -118,7 +118,7 @@ public List detect(Page page) { try { removeTextDocument = this.removeText(pdfPage); pdfPage = removeTextDocument.getPage(0); - image = Utils.pageConvertToImage(pdfPage, 144, ImageType.GRAY); + image = Utils.pageConvertToImage(removeTextDocument, pdfPage, 144, ImageType.GRAY); } catch (Exception e) { return new ArrayList<>(); } finally { diff --git a/src/test/java/technology/tabula/TestProjectionProfile.java b/src/test/java/technology/tabula/TestProjectionProfile.java index 44d488eb..4a3462f9 100644 --- a/src/test/java/technology/tabula/TestProjectionProfile.java +++ b/src/test/java/technology/tabula/TestProjectionProfile.java @@ -5,6 +5,7 @@ import java.util.ArrayList; import java.util.List; +import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.font.PDType1Font; import org.junit.Before; @@ -18,6 +19,7 @@ public class TestProjectionProfile { @Before public void setUpProjectionProfile() { PDPage pdPage = new PDPage(); + PDDocument pdDocument = new PDDocument(); TextElement textElement = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "test", 1f); TextElement textElement2 = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "test", 1f); @@ -30,7 +32,7 @@ public void setUpProjectionProfile() { rulingList.add(ruling); - page = new Page(0, 0, 1, 1, 0, 1, pdPage, textList, rulingList); + page = new Page(0, 0, 1, 1, 0, 1, pdPage, pdDocument, textList, rulingList); List rectangles = new ArrayList<>(); rectangles.add(new Rectangle(0f, 0f, 500f, 5f)); diff --git a/src/test/java/technology/tabula/TestUtils.java b/src/test/java/technology/tabula/TestUtils.java index db634d89..e68411df 100644 --- a/src/test/java/technology/tabula/TestUtils.java +++ b/src/test/java/technology/tabula/TestUtils.java @@ -124,7 +124,7 @@ public void testQuickSortLongList() { public void testJPEG2000DoesNotRaise() throws IOException { PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/jpeg2000.pdf")); PDPage page = pdf_document.getPage(0); - Utils.pageConvertToImage(page, 360, ImageType.RGB); + Utils.pageConvertToImage(pdf_document, page, 360, ImageType.RGB); } } From b2b8b69488fc9f4e502d40dda1f7c60f8b9c6fa7 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Fri, 10 Jul 2020 17:00:32 -0300 Subject: [PATCH 097/200] Bump gson from 2.8.5 to 2.8.6 (#322) Bumps [gson](https://github.com/google/gson) from 2.8.5 to 2.8.6. - [Release notes](https://github.com/google/gson/releases) - [Changelog](https://github.com/google/gson/blob/master/CHANGELOG.md) - [Commits](https://github.com/google/gson/compare/gson-parent-2.8.5...gson-parent-2.8.6) Signed-off-by: dependabot-preview[bot] Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com> --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 366b6bce..3c019b7d 100644 --- a/pom.xml +++ b/pom.xml @@ -274,7 +274,7 @@ com.google.code.gson gson - 2.8.5 + 2.8.6 From f2f64cfd2e1813b1d85e36192f775d1561c05e5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Sun, 12 Jul 2020 13:54:00 -0300 Subject: [PATCH 098/200] getFontSize -> getFontSizeInPt (Closes #277) --- src/main/java/technology/tabula/TextStripper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/technology/tabula/TextStripper.java b/src/main/java/technology/tabula/TextStripper.java index 3e1c82a1..2b150177 100644 --- a/src/main/java/technology/tabula/TextStripper.java +++ b/src/main/java/technology/tabula/TextStripper.java @@ -55,7 +55,7 @@ protected void writeString(String string, List textPositions) thro TextElement te = new TextElement(Utils.round(textPosition.getYDirAdj() - h, 2), Utils.round(textPosition.getXDirAdj(), 2), Utils.round(textPosition.getWidthDirAdj(), 2), - Utils.round(textPosition.getHeightDir(), 2), textPosition.getFont(), textPosition.getFontSize(), c, + Utils.round(textPosition.getHeightDir(), 2), textPosition.getFont(), textPosition.getFontSizeInPt(), c, // workaround a possible bug in PDFBox: // https://issues.apache.org/jira/browse/PDFBOX-1755 wos, textPosition.getDir()); From 34a829ff376572ca6953d53077126061808c600d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Sun, 12 Jul 2020 14:40:58 -0300 Subject: [PATCH 099/200] fix tests --- src/test/java/technology/tabula/TestCommandLineApp.java | 5 ++--- src/test/resources/technology/tabula/json/AnimalSounds1.json | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/test/java/technology/tabula/TestCommandLineApp.java b/src/test/java/technology/tabula/TestCommandLineApp.java index 4a9a91bd..341c5d55 100644 --- a/src/test/java/technology/tabula/TestCommandLineApp.java +++ b/src/test/java/technology/tabula/TestCommandLineApp.java @@ -199,7 +199,7 @@ public void testLatticeModeWithColumnOption() throws ParseException, IOException "src/test/resources/technology/tabula/AnimalSounds.pdf", "-p", "1", "-c", "59,218,331,551", "-l", - "-f", "CSV" + "-r", "CSV" })); } @@ -211,9 +211,8 @@ public void testLatticeModeWithColumnAndMultipleAreasOption() throws ParseExcept "src/test/resources/technology/tabula/AnimalSounds1.pdf", "-p", "1", "-c", "57,136,197,296,314,391,457,553", "-a", "%0,0,100,50", "-a", "%0,50,100,100", - "-l", "-f", "JSON" + "-r", "-f", "JSON" }); - System.out.println("Returned Json: \n" + resultJson); assertEquals(expectedJson, resultJson); } diff --git a/src/test/resources/technology/tabula/json/AnimalSounds1.json b/src/test/resources/technology/tabula/json/AnimalSounds1.json index c9fd334a..c13c6759 100644 --- a/src/test/resources/technology/tabula/json/AnimalSounds1.json +++ b/src/test/resources/technology/tabula/json/AnimalSounds1.json @@ -1 +1 @@ -[{"extraction_method":"lattice","top":0.006499578,"left":56.8,"width":241.1999969482422,"height":315.36407470703125,"data":[[{"top":0.006499578,"left":56.8,"width":79.19999694824219,"height":95.31405639648438,"text":"Animal"},{"top":0.006499578,"left":136.0,"width":61.0,"height":95.31405639648438,"text":"Action"},{"top":0.006499578,"left":197.0,"width":101.0,"height":95.31405639648438,"text":"Result"}],[{"top":95.32056,"left":56.8,"width":79.19999694824219,"height":23.050010681152344,"text":"Cat"},{"top":95.32056,"left":136.0,"width":61.0,"height":23.050010681152344,"text":"Says"},{"top":95.32056,"left":197.0,"width":101.0,"height":23.050010681152344,"text":"Meow"}],[{"top":118.37057,"left":56.8,"width":79.19999694824219,"height":63.99999237060547,"text":"Parastratiosph\recomyiastratio\rsphecomyioid\res"},{"top":118.37057,"left":136.0,"width":61.0,"height":63.99999237060547,"text":"Says"},{"top":118.37057,"left":197.0,"width":101.0,"height":63.99999237060547,"text":"bzzzzzzz"}],[{"top":182.37056,"left":56.8,"width":79.19999694824219,"height":133.00001525878906,"text":"Fox"},{"top":182.37056,"left":136.0,"width":61.0,"height":133.00001525878906,"text":"Says"},{"top":182.37056,"left":197.0,"width":101.0,"height":133.00001525878906,"text":"Ring-\rdingdingdingd\ringeringeding\rGering-\rdingdingdingd\ringeringeding\rGering-\rdingdingdingd\ringeringeding"}]]},{"extraction_method":"lattice","top":0.006499578,"left":313.35715,"width":241.55941772460938,"height":259.2640380859375,"data":[[{"top":0.006499578,"left":313.35715,"width":77.64285278320312,"height":72.26405334472656,"text":""},{"top":0.006499578,"left":391.0,"width":66.0,"height":72.26405334472656,"text":""},{"top":0.006499578,"left":457.0,"width":97.91656494140625,"height":72.26405334472656,"text":""}],[{"top":72.27055,"left":313.35715,"width":77.64285278320312,"height":23.050003051757812,"text":"Animal"},{"top":72.27055,"left":391.0,"width":66.0,"height":23.050003051757812,"text":"Action"},{"top":72.27055,"left":457.0,"width":97.91656494140625,"height":23.050003051757812,"text":"Result"}],[{"top":95.32056,"left":313.35715,"width":77.64285278320312,"height":35.94999694824219,"text":"Dogs/wolves/\rMore dogs"},{"top":95.32056,"left":391.0,"width":66.0,"height":35.94999694824219,"text":"Says"},{"top":95.32056,"left":457.0,"width":97.91656494140625,"height":35.94999694824219,"text":"Bow-wow/\rruff-ruff"}],[{"top":131.27055,"left":313.35715,"width":77.64285278320312,"height":36.40000915527344,"text":"Donkey"},{"top":131.27055,"left":391.0,"width":66.0,"height":36.40000915527344,"text":"Says"},{"top":131.27055,"left":457.0,"width":97.91656494140625,"height":36.40000915527344,"text":"Hee-Haw Hee-\rHaw"}],[{"top":167.67056,"left":313.35715,"width":77.64285278320312,"height":91.5999755859375,"text":"Fox"},{"top":167.67056,"left":391.0,"width":66.0,"height":91.5999755859375,"text":"Says"},{"top":167.67056,"left":457.0,"width":97.91656494140625,"height":91.5999755859375,"text":"Wa-pa-pa-pa-\rpa-pa-pow\rWa-pa-pa-pa-\rpa-pow\rWa-pa-pa-pa-\rpa-pa-pow"}]]}] \ No newline at end of file +[{"extraction_method":"lattice","top":0.006499578,"left":56.8,"width":241.1999969482422,"height":315.36407470703125,"right":298.0,"bottom":315.37057,"data":[[{"top":0.006499578,"left":56.8,"width":79.19999694824219,"height":95.31405639648438,"text":"Animal"},{"top":0.006499578,"left":136.0,"width":61.0,"height":95.31405639648438,"text":"Action"},{"top":0.006499578,"left":197.0,"width":101.0,"height":95.31405639648438,"text":"Result"}],[{"top":95.32056,"left":56.8,"width":79.19999694824219,"height":23.050010681152344,"text":"Cat"},{"top":95.32056,"left":136.0,"width":61.0,"height":23.050010681152344,"text":"Says"},{"top":95.32056,"left":197.0,"width":101.0,"height":23.050010681152344,"text":"Meow"}],[{"top":118.37057,"left":56.8,"width":79.19999694824219,"height":63.99999237060547,"text":"Parastratiosph\recomyiastratio\rsphecomyioid\res"},{"top":118.37057,"left":136.0,"width":61.0,"height":63.99999237060547,"text":"Says"},{"top":118.37057,"left":197.0,"width":101.0,"height":63.99999237060547,"text":"bzzzzzzz"}],[{"top":182.37056,"left":56.8,"width":79.19999694824219,"height":133.00001525878906,"text":"Fox"},{"top":182.37056,"left":136.0,"width":61.0,"height":133.00001525878906,"text":"Says"},{"top":182.37056,"left":197.0,"width":101.0,"height":133.00001525878906,"text":"Ring-\rdingdingdingd\ringeringeding\rGering-\rdingdingdingd\ringeringeding\rGering-\rdingdingdingd\ringeringeding"}]]},{"extraction_method":"lattice","top":0.006499578,"left":313.35715,"width":241.55941772460938,"height":259.2640380859375,"right":554.91656,"bottom":259.27054,"data":[[{"top":0.006499578,"left":313.35715,"width":77.64285278320312,"height":72.26405334472656,"text":""},{"top":0.006499578,"left":391.0,"width":66.0,"height":72.26405334472656,"text":""},{"top":0.006499578,"left":457.0,"width":97.91656494140625,"height":72.26405334472656,"text":""}],[{"top":72.27055,"left":313.35715,"width":77.64285278320312,"height":23.050003051757812,"text":"Animal"},{"top":72.27055,"left":391.0,"width":66.0,"height":23.050003051757812,"text":"Action"},{"top":72.27055,"left":457.0,"width":97.91656494140625,"height":23.050003051757812,"text":"Result"}],[{"top":95.32056,"left":313.35715,"width":77.64285278320312,"height":35.94999694824219,"text":"Dogs/wolves/\rMore dogs"},{"top":95.32056,"left":391.0,"width":66.0,"height":35.94999694824219,"text":"Says"},{"top":95.32056,"left":457.0,"width":97.91656494140625,"height":35.94999694824219,"text":"Bow-wow/\rruff-ruff"}],[{"top":131.27055,"left":313.35715,"width":77.64285278320312,"height":36.40000915527344,"text":"Donkey"},{"top":131.27055,"left":391.0,"width":66.0,"height":36.40000915527344,"text":"Says"},{"top":131.27055,"left":457.0,"width":97.91656494140625,"height":36.40000915527344,"text":"Hee-Haw Hee-\rHaw"}],[{"top":167.67056,"left":313.35715,"width":77.64285278320312,"height":91.5999755859375,"text":"Fox"},{"top":167.67056,"left":391.0,"width":66.0,"height":91.5999755859375,"text":"Says"},{"top":167.67056,"left":457.0,"width":97.91656494140625,"height":91.5999755859375,"text":"Wa-pa-pa-pa-\rpa-pa-pow\rWa-pa-pa-pa-\rpa-pow\rWa-pa-pa-pa-\rpa-pa-pow"}]]}] From 82ad2ed7632ce72a55331d98dc766ee26f2fdbc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Sun, 12 Jul 2020 14:46:56 -0300 Subject: [PATCH 100/200] fix test --- src/test/java/technology/tabula/TestCommandLineApp.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/java/technology/tabula/TestCommandLineApp.java b/src/test/java/technology/tabula/TestCommandLineApp.java index 341c5d55..5a4e3af5 100644 --- a/src/test/java/technology/tabula/TestCommandLineApp.java +++ b/src/test/java/technology/tabula/TestCommandLineApp.java @@ -198,8 +198,8 @@ public void testLatticeModeWithColumnOption() throws ParseException, IOException assertEquals(expectedCsv, this.csvFromCommandLineArgs(new String[]{ "src/test/resources/technology/tabula/AnimalSounds.pdf", "-p", "1", "-c", - "59,218,331,551", "-l", - "-r", "CSV" + "59,218,331,551", + "-r", "-f", "CSV" })); } From dbeff6e393f995281c3aa8f669ed71edf587b593 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Wed, 29 Jul 2020 10:11:19 -0300 Subject: [PATCH 101/200] CommandLineApp: disable unused --debug flag --- src/main/java/technology/tabula/CommandLineApp.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java index 6431dbb2..6e26e7cd 100644 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ b/src/main/java/technology/tabula/CommandLineApp.java @@ -315,7 +315,7 @@ public static Options buildOptions() { o.addOption("t", "stream", false, "Force PDF to be extracted using stream-mode extraction (if there are no ruling lines separating each cell)"); o.addOption("i", "silent", false, "Suppress all stderr output."); o.addOption("u", "use-line-returns", false, "Use embedded line returns in cells. (Only in spreadsheet mode.)"); - o.addOption("d", "debug", false, "Print detected table areas instead of processing."); + // o.addOption("d", "debug", false, "Print detected table areas instead of processing."); o.addOption(Option.builder("b") .longOpt("batch") .desc("Convert all .pdfs in the provided directory.") From 5bfe84d409d661d9c4c9de04102829177207f9aa Mon Sep 17 00:00:00 2001 From: travisbeale Date: Fri, 14 Aug 2020 09:22:49 -0400 Subject: [PATCH 102/200] Added a heuristic to filter out tall-ish whitespace elements that can throw off text chunking --- src/main/java/technology/tabula/TextStripper.java | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/main/java/technology/tabula/TextStripper.java b/src/main/java/technology/tabula/TextStripper.java index 2b150177..429ff304 100644 --- a/src/main/java/technology/tabula/TextStripper.java +++ b/src/main/java/technology/tabula/TextStripper.java @@ -10,11 +10,14 @@ public class TextStripper extends PDFTextStripper { private static final String NBSP = "\u00A0"; + private static final float AVG_HEIGHT_MULT_THRESHOLD = 6.0f; private PDDocument document; public ArrayList textElements; public RectangleSpatialIndex spatialIndex; public float minCharWidth = Float.MAX_VALUE; public float minCharHeight = Float.MAX_VALUE; + public float totalHeight = 0.0f; + public int countHeight = 0; public TextStripper(PDDocument document, int pageNumber) throws IOException { super(); @@ -62,7 +65,17 @@ protected void writeString(String string, List textPositions) thro this.minCharWidth = (float) Math.min(this.minCharWidth, te.getWidth()); this.minCharHeight = (float) Math.min(this.minCharHeight, te.getHeight()); - + + countHeight++; + totalHeight += te.getHeight(); + float avgHeight = totalHeight / countHeight; + + if (avgHeight > 0 + && te.getHeight() >= (avgHeight * AVG_HEIGHT_MULT_THRESHOLD) + && (te.getText() == null || te.getText().trim().equals(""))) { + continue; + } + this.spatialIndex.add(te); this.textElements.add(te); } From 85c27ab88285c903ee89db94725543fe314482f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Fri, 10 Jul 2020 16:25:46 -0300 Subject: [PATCH 103/200] Upgrade PDFbox to 2.0.21 - Closes #325 --- pom.xml | 15 +- .../technology/tabula/ObjectExtractor.java | 2 +- src/main/java/technology/tabula/Page.java | 438 +++++++++--------- .../java/technology/tabula/TextStripper.java | 54 ++- src/main/java/technology/tabula/Utils.java | 5 + .../java/technology/tabula/debug/Debug.java | 2 +- .../detectors/NurminenDetectionAlgorithm.java | 4 +- .../tabula/TestProjectionProfile.java | 4 +- .../java/technology/tabula/TestUtils.java | 2 +- 9 files changed, 302 insertions(+), 224 deletions(-) diff --git a/pom.xml b/pom.xml index 3c019b7d..52f29f50 100644 --- a/pom.xml +++ b/pom.xml @@ -32,6 +32,19 @@ + + + snapshots + https://repository.apache.org/content/repositories/snapshots/ + + false + + + true + + + + scm:git:git@github.com:tabulapdf/tabula-java.git scm:git:git@github.com:tabulapdf/tabula-java.git @@ -237,7 +250,7 @@ org.apache.pdfbox pdfbox - 2.0.15 + 2.0.21-SNAPSHOT diff --git a/src/main/java/technology/tabula/ObjectExtractor.java b/src/main/java/technology/tabula/ObjectExtractor.java index 87c2a2f9..3998ba6f 100644 --- a/src/main/java/technology/tabula/ObjectExtractor.java +++ b/src/main/java/technology/tabula/ObjectExtractor.java @@ -42,7 +42,7 @@ protected Page extractPage(Integer pageNumber) throws IOException { h = p.getCropBox().getHeight(); } - return new Page(0, 0, w, h, pageRotation, pageNumber, p, pdfTextStripper.textElements, + return new Page(0, 0, w, h, pageRotation, pageNumber, p, this.pdfDocument, pdfTextStripper.textElements, se.rulings, pdfTextStripper.minCharWidth, pdfTextStripper.minCharHeight, pdfTextStripper.spatialIndex); } diff --git a/src/main/java/technology/tabula/Page.java b/src/main/java/technology/tabula/Page.java index e8c1d636..3207bcb9 100644 --- a/src/main/java/technology/tabula/Page.java +++ b/src/main/java/technology/tabula/Page.java @@ -6,237 +6,243 @@ import java.util.Comparator; import java.util.List; +import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; @SuppressWarnings("serial") // TODO: this class should probably be called "PageArea" or something like that public class Page extends Rectangle { - private Integer rotation; - private int pageNumber; - private List texts; - private List rulings, cleanRulings = null, verticalRulingLines = null, horizontalRulingLines = null; - private float minCharWidth; - private float minCharHeight; - private RectangleSpatialIndex spatial_index; - private PDPage pdPage; - - public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage) { - super(top, left, width, height); - this.rotation = rotation; - this.pageNumber = page_number; - this.pdPage = pdPage; - } - - public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage, - List characters, List rulings) { - - this(top, left, width, height, rotation, page_number, pdPage); - this.texts = characters; - this.rulings = rulings; - } - - - public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage, - List characters, List rulings, - float minCharWidth, float minCharHeight, RectangleSpatialIndex index) { - - this(top, left, width, height, rotation, page_number, pdPage, characters, rulings); - this.minCharHeight = minCharHeight; - this.minCharWidth = minCharWidth; - this.spatial_index = index; - } + private Integer rotation; + private int pageNumber; + private List texts; + private List rulings, cleanRulings = null, verticalRulingLines = null, horizontalRulingLines = null; + private float minCharWidth; + private float minCharHeight; + private RectangleSpatialIndex spatial_index; + private PDPage pdPage; + private PDDocument pdDoc; + + public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage, PDDocument doc) { + super(top, left, width, height); + this.rotation = rotation; + this.pageNumber = page_number; + this.pdPage = pdPage; + this.pdDoc = doc; + } + + public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage, PDDocument doc, + List characters, List rulings) { + + this(top, left, width, height, rotation, page_number, pdPage, doc); + this.texts = characters; + this.rulings = rulings; + } + + + public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage, PDDocument doc, + List characters, List rulings, + float minCharWidth, float minCharHeight, RectangleSpatialIndex index) { + + this(top, left, width, height, rotation, page_number, pdPage, doc, characters, rulings); + this.minCharHeight = minCharHeight; + this.minCharWidth = minCharWidth; + this.spatial_index = index; + } + + public Page getArea(Rectangle area) { + List t = getText(area); + float min_char_width = 7; + float min_char_height = 7; + + if(t.size() > 0){ + min_char_width = Collections.min(t, new Comparator() { + @Override + public int compare(TextElement te1, TextElement te2) { + return java.lang.Float.compare(te1.width, te2.width); + }}).width; + min_char_height = Collections.min(t, new Comparator() { + @Override + public int compare(TextElement te1, TextElement te2) { + return java.lang.Float.compare(te1.height, te2.height); + }}).height; + } + Page rv = new Page( + area.getTop(), + area.getLeft(), + (float) area.getWidth(), + (float) area.getHeight(), + rotation, + pageNumber, + pdPage, + pdDoc, + t, + Ruling.cropRulingsToArea(getRulings(), area), + min_char_width, + min_char_height, + spatial_index); + + rv.addRuling(new Ruling( + new Point2D.Double(rv.getLeft(), + rv.getTop()), + new Point2D.Double(rv.getRight(), + rv.getTop()))); + rv.addRuling(new Ruling( + new Point2D.Double(rv.getRight(), + rv.getTop()), + new Point2D.Double(rv.getRight(), + rv.getBottom()))); + rv.addRuling(new Ruling( + new Point2D.Double(rv.getRight(), + rv.getBottom()), + new Point2D.Double(rv.getLeft(), + rv.getBottom()))); + rv.addRuling(new Ruling( + new Point2D.Double(rv.getLeft(), + rv.getBottom()), + new Point2D.Double(rv.getLeft(), + rv.getTop()))); + + return rv; + } + + public Page getArea(float top, float left, float bottom, float right) { + Rectangle area = new Rectangle(top, left, right - left, bottom - top); + return this.getArea(area); + } + + public List getText() { + return texts; + } + + public List getText(Rectangle area) { + return this.spatial_index.contains(area); + } + + /** @deprecated use {@linkplain #getText(Rectangle)} instead */ + @Deprecated public List getText(float top, float left, float bottom, float right) { + return this.getText(new Rectangle(top, left, right - left, bottom - top)); + } + + public Integer getRotation() { + return rotation; + } + + public int getPageNumber() { + return pageNumber; + } + + /** @deprecated use {@linkplain #getText()} instead */ + @Deprecated public List getTexts() { + return texts; + } + + /** + * Returns the minimum bounding box that contains all the TextElements on this Page + */ + public Rectangle getTextBounds() { + List texts = this.getText(); + if (!texts.isEmpty()) { + return Utils.bounds(texts); + } + else { + return new Rectangle(); + } + + } + + public List getRulings() { + if (this.cleanRulings != null) { + return this.cleanRulings; + } + + if (this.rulings == null || this.rulings.isEmpty()) { + this.verticalRulingLines = new ArrayList<>(); + this.horizontalRulingLines = new ArrayList<>(); + return new ArrayList<>(); + } + + Utils.snapPoints(this.rulings, this.minCharWidth, this.minCharHeight); + + List vrs = new ArrayList<>(); + for (Ruling vr: this.rulings) { + if (vr.vertical()) { + vrs.add(vr); + } + } + this.verticalRulingLines = Ruling.collapseOrientedRulings(vrs); + + List hrs = new ArrayList<>(); + for (Ruling hr: this.rulings) { + if (hr.horizontal()) { + hrs.add(hr); + } + } + this.horizontalRulingLines = Ruling.collapseOrientedRulings(hrs); + + this.cleanRulings = new ArrayList<>(this.verticalRulingLines); + this.cleanRulings.addAll(this.horizontalRulingLines); + + return this.cleanRulings; + + } + + public List getVerticalRulings() { + if (this.verticalRulingLines != null) { + return this.verticalRulingLines; + } + this.getRulings(); + return this.verticalRulingLines; + } - public Page getArea(Rectangle area) { - List t = getText(area); - float min_char_width = 7; - float min_char_height = 7; - - if(t.size() > 0){ - min_char_width = Collections.min(t, new Comparator() { - @Override - public int compare(TextElement te1, TextElement te2) { - return java.lang.Float.compare(te1.width, te2.width); - }}).width; - min_char_height = Collections.min(t, new Comparator() { - @Override - public int compare(TextElement te1, TextElement te2) { - return java.lang.Float.compare(te1.height, te2.height); - }}).height; - } - Page rv = new Page( - area.getTop(), - area.getLeft(), - (float) area.getWidth(), - (float) area.getHeight(), - rotation, - pageNumber, - pdPage, - t, - Ruling.cropRulingsToArea(getRulings(), area), - min_char_width, - min_char_height, - spatial_index); - - rv.addRuling(new Ruling( - new Point2D.Double(rv.getLeft(), - rv.getTop()), - new Point2D.Double(rv.getRight(), - rv.getTop()))); - rv.addRuling(new Ruling( - new Point2D.Double(rv.getRight(), - rv.getTop()), - new Point2D.Double(rv.getRight(), - rv.getBottom()))); - rv.addRuling(new Ruling( - new Point2D.Double(rv.getRight(), - rv.getBottom()), - new Point2D.Double(rv.getLeft(), - rv.getBottom()))); - rv.addRuling(new Ruling( - new Point2D.Double(rv.getLeft(), - rv.getBottom()), - new Point2D.Double(rv.getLeft(), - rv.getTop()))); - - return rv; - } - - public Page getArea(float top, float left, float bottom, float right) { - Rectangle area = new Rectangle(top, left, right - left, bottom - top); - return this.getArea(area); - } - - public List getText() { - return texts; - } - - public List getText(Rectangle area) { - return this.spatial_index.contains(area); - } - - /** @deprecated use {@linkplain #getText(Rectangle)} instead */ - @Deprecated public List getText(float top, float left, float bottom, float right) { - return this.getText(new Rectangle(top, left, right - left, bottom - top)); + public List getHorizontalRulings() { + if (this.horizontalRulingLines != null) { + return this.horizontalRulingLines; } + this.getRulings(); + return this.horizontalRulingLines; + } - public Integer getRotation() { - return rotation; + public void addRuling(Ruling r) { + if (r.oblique()) { + throw new UnsupportedOperationException("Can't add an oblique ruling"); } + this.rulings.add(r); + // clear caches + this.verticalRulingLines = null; + this.horizontalRulingLines = null; + this.cleanRulings = null; + } - public int getPageNumber() { - return pageNumber; - } + public List getUnprocessedRulings() { + return this.rulings; + } - /** @deprecated use {@linkplain #getText()} instead */ - @Deprecated public List getTexts() { - return texts; - } - - /** - * Returns the minimum bounding box that contains all the TextElements on this Page - */ - public Rectangle getTextBounds() { - List texts = this.getText(); - if (!texts.isEmpty()) { - return Utils.bounds(texts); - } - else { - return new Rectangle(); - } - - } + /** @deprecated with no replacement */ + @Deprecated public float getMinCharWidth() { + return minCharWidth; + } - public List getRulings() { - if (this.cleanRulings != null) { - return this.cleanRulings; - } - - if (this.rulings == null || this.rulings.isEmpty()) { - this.verticalRulingLines = new ArrayList<>(); - this.horizontalRulingLines = new ArrayList<>(); - return new ArrayList<>(); - } - - Utils.snapPoints(this.rulings, this.minCharWidth, this.minCharHeight); - - List vrs = new ArrayList<>(); - for (Ruling vr: this.rulings) { - if (vr.vertical()) { - vrs.add(vr); - } - } - this.verticalRulingLines = Ruling.collapseOrientedRulings(vrs); - - List hrs = new ArrayList<>(); - for (Ruling hr: this.rulings) { - if (hr.horizontal()) { - hrs.add(hr); - } - } - this.horizontalRulingLines = Ruling.collapseOrientedRulings(hrs); - - this.cleanRulings = new ArrayList<>(this.verticalRulingLines); - this.cleanRulings.addAll(this.horizontalRulingLines); - - return this.cleanRulings; - - } - - public List getVerticalRulings() { - if (this.verticalRulingLines != null) { - return this.verticalRulingLines; - } - this.getRulings(); - return this.verticalRulingLines; - } - - public List getHorizontalRulings() { - if (this.horizontalRulingLines != null) { - return this.horizontalRulingLines; - } - this.getRulings(); - return this.horizontalRulingLines; - } - - public void addRuling(Ruling r) { - if (r.oblique()) { - throw new UnsupportedOperationException("Can't add an oblique ruling"); - } - this.rulings.add(r); - // clear caches - this.verticalRulingLines = null; - this.horizontalRulingLines = null; - this.cleanRulings = null; - } - - public List getUnprocessedRulings() { - return this.rulings; - } + /** @deprecated with no replacement */ + @Deprecated public float getMinCharHeight() { + return minCharHeight; + } - /** @deprecated with no replacement */ - @Deprecated public float getMinCharWidth() { - return minCharWidth; - } + public PDPage getPDPage() { + return pdPage; + } - /** @deprecated with no replacement */ - @Deprecated public float getMinCharHeight() { - return minCharHeight; - } + public PDDocument getPDDoc() { + return pdDoc; + } - public PDPage getPDPage() { - return pdPage; - } + /** @deprecated with no replacement */ + @Deprecated public RectangleSpatialIndex getSpatialIndex() { + return this.spatial_index; + } - /** @deprecated with no replacement */ - @Deprecated public RectangleSpatialIndex getSpatialIndex() { - return this.spatial_index; - } - - /** @deprecated with no replacement */ - @Deprecated public boolean hasText() { - return this.texts.size() > 0; - } - - + /** @deprecated with no replacement */ + @Deprecated public boolean hasText() { + return this.texts.size() > 0; + } } diff --git a/src/main/java/technology/tabula/TextStripper.java b/src/main/java/technology/tabula/TextStripper.java index 429ff304..e437146e 100644 --- a/src/main/java/technology/tabula/TextStripper.java +++ b/src/main/java/technology/tabula/TextStripper.java @@ -1,6 +1,10 @@ package technology.tabula; +import org.apache.fontbox.util.BoundingBox; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDFontDescriptor; +import org.apache.pdfbox.pdmodel.font.PDType3Font; import org.apache.pdfbox.text.PDFTextStripper; import org.apache.pdfbox.text.TextPosition; @@ -81,6 +85,54 @@ protected void writeString(String string, List textPositions) thro } } + @Override + protected float computeFontHeight(PDFont font) throws IOException + { + BoundingBox bbox = font.getBoundingBox(); + if (bbox.getLowerLeftY() < Short.MIN_VALUE) + { + // PDFBOX-2158 and PDFBOX-3130 + // files by Salmat eSolutions / ClibPDF Library + bbox.setLowerLeftY(- (bbox.getLowerLeftY() + 65536)); + } + // 1/2 the bbox is used as the height todo: why? + float glyphHeight = bbox.getHeight() / 2; + + // sometimes the bbox has very high values, but CapHeight is OK + PDFontDescriptor fontDescriptor = font.getFontDescriptor(); + if (fontDescriptor != null) + { + float capHeight = fontDescriptor.getCapHeight(); + if (Float.compare(capHeight, 0) != 0 && + (capHeight < glyphHeight || Float.compare(glyphHeight, 0) == 0)) + { + glyphHeight = capHeight; + } + // PDFBOX-3464, PDFBOX-448: + // sometimes even CapHeight has very high value, but Ascent and Descent are ok + float ascent = fontDescriptor.getAscent(); + float descent = fontDescriptor.getDescent(); + if (ascent > 0 && descent < 0 && + ((ascent - descent) / 2 < glyphHeight || Float.compare(glyphHeight, 0) == 0)) + { + glyphHeight = (ascent - descent) / 2; + } + } + + // transformPoint from glyph space -> text space + float height; + if (font instanceof PDType3Font) + { + height = font.getFontMatrix().transformPoint(0, glyphHeight).y; + } + else + { + height = glyphHeight / 1000; + } + + return height; + } + private boolean isPrintable(String s) { Character c; Character.UnicodeBlock block; @@ -92,4 +144,4 @@ private boolean isPrintable(String s) { } return printable; } -} \ No newline at end of file +} diff --git a/src/main/java/technology/tabula/Utils.java b/src/main/java/technology/tabula/Utils.java index 35c6cc4d..00814429 100644 --- a/src/main/java/technology/tabula/Utils.java +++ b/src/main/java/technology/tabula/Utils.java @@ -280,4 +280,9 @@ public static BufferedImage pageConvertToImage(PDPage page, int dpi, ImageType i } } + public static BufferedImage pageConvertToImage(PDDocument doc, PDPage page, int dpi, ImageType imageType) throws IOException { + PDFRenderer renderer = new PDFRenderer(doc); + return renderer.renderImageWithDPI(doc.getPages().indexOf(page), dpi, imageType); + } + } diff --git a/src/main/java/technology/tabula/debug/Debug.java b/src/main/java/technology/tabula/debug/Debug.java index f9f923b5..91609045 100644 --- a/src/main/java/technology/tabula/debug/Debug.java +++ b/src/main/java/technology/tabula/debug/Debug.java @@ -227,7 +227,7 @@ public static void renderPage(String pdfPath, String outPath, int pageNumber, Re PDPage p = document.getPage(pageNumber); - BufferedImage image = Utils.pageConvertToImage(p, 72, ImageType.RGB); + BufferedImage image = Utils.pageConvertToImage(document, p, 72, ImageType.RGB); Graphics2D g = (Graphics2D) image.getGraphics(); diff --git a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java index fe30ae8d..74898e3e 100644 --- a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java +++ b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java @@ -106,7 +106,7 @@ public List detect(Page page) { BufferedImage image; PDPage pdfPage = page.getPDPage(); try { - image = Utils.pageConvertToImage(pdfPage, 144, ImageType.GRAY); + image = Utils.pageConvertToImage(page.getPDDoc(), pdfPage, 144, ImageType.GRAY); } catch (IOException e) { return new ArrayList<>(); } @@ -118,7 +118,7 @@ public List detect(Page page) { try { removeTextDocument = this.removeText(pdfPage); pdfPage = removeTextDocument.getPage(0); - image = Utils.pageConvertToImage(pdfPage, 144, ImageType.GRAY); + image = Utils.pageConvertToImage(removeTextDocument, pdfPage, 144, ImageType.GRAY); } catch (Exception e) { return new ArrayList<>(); } finally { diff --git a/src/test/java/technology/tabula/TestProjectionProfile.java b/src/test/java/technology/tabula/TestProjectionProfile.java index 44d488eb..4a3462f9 100644 --- a/src/test/java/technology/tabula/TestProjectionProfile.java +++ b/src/test/java/technology/tabula/TestProjectionProfile.java @@ -5,6 +5,7 @@ import java.util.ArrayList; import java.util.List; +import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.font.PDType1Font; import org.junit.Before; @@ -18,6 +19,7 @@ public class TestProjectionProfile { @Before public void setUpProjectionProfile() { PDPage pdPage = new PDPage(); + PDDocument pdDocument = new PDDocument(); TextElement textElement = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "test", 1f); TextElement textElement2 = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "test", 1f); @@ -30,7 +32,7 @@ public void setUpProjectionProfile() { rulingList.add(ruling); - page = new Page(0, 0, 1, 1, 0, 1, pdPage, textList, rulingList); + page = new Page(0, 0, 1, 1, 0, 1, pdPage, pdDocument, textList, rulingList); List rectangles = new ArrayList<>(); rectangles.add(new Rectangle(0f, 0f, 500f, 5f)); diff --git a/src/test/java/technology/tabula/TestUtils.java b/src/test/java/technology/tabula/TestUtils.java index db634d89..e68411df 100644 --- a/src/test/java/technology/tabula/TestUtils.java +++ b/src/test/java/technology/tabula/TestUtils.java @@ -124,7 +124,7 @@ public void testQuickSortLongList() { public void testJPEG2000DoesNotRaise() throws IOException { PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/jpeg2000.pdf")); PDPage page = pdf_document.getPage(0); - Utils.pageConvertToImage(page, 360, ImageType.RGB); + Utils.pageConvertToImage(pdf_document, page, 360, ImageType.RGB); } } From 3ece59c538b89b20325d216613d5283b965cc808 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Fri, 21 Aug 2020 16:51:14 -0300 Subject: [PATCH 104/200] Update to PDFBox 2.0.21 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 52f29f50..de437a39 100644 --- a/pom.xml +++ b/pom.xml @@ -250,7 +250,7 @@ org.apache.pdfbox pdfbox - 2.0.21-SNAPSHOT + 2.0.21 From a4e0d6c41a06bc6c06ef766562f3eeef5a38c22a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Sat, 22 Aug 2020 20:34:34 -0300 Subject: [PATCH 105/200] prepare for 1.0.4 release --- README.md | 40 +++++++++---------- pom.xml | 2 +- .../technology/tabula/CommandLineApp.java | 2 +- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 76e25663..2a08d3ac 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,7 @@ tabula-java [![Build Status](https://travis-ci.org/tabulapdf/tabula-java.svg?bra `tabula-java` is a library for extracting tables from PDF files — it is the table extraction engine that powers [Tabula](http://tabula.technology/) ([repo](http://github.com/tabulapdf/tabula)). You can use `tabula-java` as a command-line tool to programmatically extract tables from PDFs. -(This is the new version of the extraction engine; the previous code can be found at [`tabula-extractor`](http://github.com/tabulapdf/tabula-extractor).) - -© 2014-2018 Manuel Aristarán. Available under MIT License. See [`LICENSE`](LICENSE). +© 2014-2020 Manuel Aristarán. Available under MIT License. See [`LICENSE`](LICENSE). ## Download @@ -17,28 +15,28 @@ Download a version of the tabula-java's jar, with all dependencies included, tha ``` $ java -jar target/tabula-1.0.2-jar-with-dependencies.jar --help -usage: tabula [-a ] [-b ] [-c ] [-d] [-f - ] [-g] [-h] [-i] [-l] [-n] [-o ] [-p ] [-r] - [-s ] [-t] [-u] [-v] +usage: tabula [-a ] [-b ] [-c ] [-f ] + [-g] [-h] [-i] [-l] [-n] [-o ] [-p ] [-r] [-s + ] [-t] [-u] [-v] Tabula helps you extract tables from PDFs - -a,--area Portion of the page to analyze. Example: --area - 269.875,12.75,790.5,561. Accepts - top,left,bottom,right i.e. y1,x1,y2,x2 where all - values are in points relative to the top left - corner. If all values are between 0-100 - (inclusive) and preceded by '%', input will be - taken as % of actual height or width of the page. - Example: --area %0,0,100,50. To specify multiple - areas, -a option should be repeated. Default is - entire page + -a,--area -a/--area = Portion of the page to analyze. + Example: --area 269.875,12.75,790.5,561. + Accepts top,left,bottom,right i.e. y1,x1,y2,x2 + where all values are in points relative to the + top left corner. If all values are between + 0-100 (inclusive) and preceded by '%', input + will be taken as % of actual height or width + of the page. Example: --area %0,0,100,50. To + specify multiple areas, -a option should be + repeated. Default is entire page -b,--batch Convert all .pdfs in the provided directory. - -c,--columns X coordinates of column boundaries where values - are in points and relative to the left of the - page. Example --columns 10.1,20.2,30.3 - -d,--debug Print detected table areas instead of - processing. + -c,--columns X coordinates of column boundaries. Example + --columns 10.1,20.2,30.3. If all values are + between 0-100 (inclusive) and preceded by '%', + input will be taken as % of actual width of + the page. Example: --columns %25,50,80.6 -f,--format Output format: (CSV,TSV,JSON). Default: CSV -g,--guess Guess the portion of the page to analyze per page. diff --git a/pom.xml b/pom.xml index de437a39..c4d97b4b 100644 --- a/pom.xml +++ b/pom.xml @@ -2,7 +2,7 @@ 4.0.0 technology.tabula tabula - 1.0.4-SNAPSHOT + 1.0.4 Tabula Extract tables from PDF files http://github.com/tabulapdf/tabula-java diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java index 6e26e7cd..e18f603a 100644 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ b/src/main/java/technology/tabula/CommandLineApp.java @@ -30,7 +30,7 @@ public class CommandLineApp { private static String VERSION = "1.0.4"; - private static String VERSION_STRING = String.format("tabula %s (c) 2012-2018 Manuel Aristarán", VERSION); + private static String VERSION_STRING = String.format("tabula %s (c) 2012-2020 Manuel Aristarán", VERSION); private static String BANNER = "\nTabula helps you extract tables from PDFs\n\n"; private static final int RELATIVE_AREA_CALCULATION_MODE = 0; From ebc83ac2bb1a1cbe54ab8081d70f3c9fe81886ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Thu, 3 Sep 2020 10:30:58 -0300 Subject: [PATCH 106/200] Prepare for next release --- pom.xml | 14 +++++++++++++- .../java/technology/tabula/CommandLineApp.java | 2 +- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index c4d97b4b..b88e7f71 100644 --- a/pom.xml +++ b/pom.xml @@ -2,7 +2,7 @@ 4.0.0 technology.tabula tabula - 1.0.4 + 1.0.5-SNAPSHOT Tabula Extract tables from PDF files http://github.com/tabulapdf/tabula-java @@ -112,6 +112,9 @@ org.apache.maven.plugins maven-javadoc-plugin 3.2.0 + + 8 + attach-javadocs @@ -132,6 +135,12 @@ sign + + + --pinentry-mode + loopback + + @@ -187,6 +196,9 @@ org.apache.maven.plugins maven-javadoc-plugin 3.2.0 + + 8 + attach-javadocs diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java index e18f603a..1957f6f0 100644 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ b/src/main/java/technology/tabula/CommandLineApp.java @@ -29,7 +29,7 @@ public class CommandLineApp { - private static String VERSION = "1.0.4"; + private static String VERSION = "1.0.5"; private static String VERSION_STRING = String.format("tabula %s (c) 2012-2020 Manuel Aristarán", VERSION); private static String BANNER = "\nTabula helps you extract tables from PDFs\n\n"; From 21a4932b892acc9762a8b6632d0a6c7e178d4d49 Mon Sep 17 00:00:00 2001 From: travisbeale Date: Tue, 22 Sep 2020 09:11:15 -0400 Subject: [PATCH 107/200] Refine heuristic to filter out tall-ish whitespace elements that can throw off text chunking by considering realistic font sizes --- .../java/technology/tabula/TextStripper.java | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/main/java/technology/tabula/TextStripper.java b/src/main/java/technology/tabula/TextStripper.java index e437146e..329d45a2 100644 --- a/src/main/java/technology/tabula/TextStripper.java +++ b/src/main/java/technology/tabula/TextStripper.java @@ -13,8 +13,11 @@ import java.util.List; public class TextStripper extends PDFTextStripper { + private static final String NBSP = "\u00A0"; private static final float AVG_HEIGHT_MULT_THRESHOLD = 6.0f; + private static final float MAX_BLANK_FONT_SIZE = 40.0f; + private static final float MIN_BLANK_FONT_SIZE = 2.0f; private PDDocument document; public ArrayList textElements; public RectangleSpatialIndex spatialIndex; @@ -69,15 +72,24 @@ protected void writeString(String string, List textPositions) thro this.minCharWidth = (float) Math.min(this.minCharWidth, te.getWidth()); this.minCharHeight = (float) Math.min(this.minCharHeight, te.getHeight()); - + countHeight++; totalHeight += te.getHeight(); float avgHeight = totalHeight / countHeight; - if (avgHeight > 0 - && te.getHeight() >= (avgHeight * AVG_HEIGHT_MULT_THRESHOLD) - && (te.getText() == null || te.getText().trim().equals(""))) { - continue; + //We have an issue where tall blank cells throw off the row height calculation + //Introspect a blank cell a bit here to see if it should be thrown away + if ((te.getText() == null || te.getText().trim().equals(""))) { + //if the cell height is more than AVG_HEIGHT_MULT_THRESHOLDxaverage, throw it away + if (avgHeight > 0 + && te.getHeight() >= (avgHeight * AVG_HEIGHT_MULT_THRESHOLD)) { + continue; + } + + //if the font size is outside of reasonable ranges, throw it away + if (textPosition.getFontSizeInPt() > MAX_BLANK_FONT_SIZE || textPosition.getFontSizeInPt() < MIN_BLANK_FONT_SIZE) { + continue; + } } this.spatialIndex.add(te); From 00bee456790c16d9d972de1e5d513b58ced37a48 Mon Sep 17 00:00:00 2001 From: Nick Date: Sat, 3 Oct 2020 13:08:56 +0200 Subject: [PATCH 108/200] Fixes #379 --- .../technology/tabula/CommandLineApp.java | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java index 1957f6f0..3081b9d4 100644 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ b/src/main/java/technology/tabula/CommandLineApp.java @@ -44,7 +44,6 @@ public class CommandLineApp { private OutputFormat outputFormat; private String password; private TableExtractor tableExtractor; - private List verticalRulingPositions; public CommandLineApp(Appendable defaultOutput, CommandLine line) throws ParseException { this.defaultOutput = defaultOutput; @@ -56,10 +55,6 @@ public CommandLineApp(Appendable defaultOutput, CommandLine line) throws ParseEx if (line.hasOption('s')) { this.password = line.getOptionValue('s'); } - if (line.hasOption('c')) { - this.verticalRulingPositions = parseFloatList(line.getOptionValue('c')); - } - } public static void main(String[] args) { @@ -165,12 +160,6 @@ private void extractFile(File pdfFile, Appendable outFile) throws ParseException while (pageIterator.hasNext()) { Page page = pageIterator.next(); - if (verticalRulingPositions != null) { - for (Float verticalRulingPosition: verticalRulingPositions) { - page.addRuling(new Ruling(0, verticalRulingPosition, 0.0f, (float) page.getHeight())); - } - } - if (pageAreas != null) { for (Pair areaPair : pageAreas) { Rectangle area = areaPair.getRight(); @@ -290,7 +279,15 @@ public static List parseFloatList(String option) throws ParseException { List rv = new ArrayList<>(); try { for (int i = 0; i < f.length; i++) { - rv.add(Float.parseFloat(f[i])); + final String element = f[i]; + + if(element.startsWith("%")) { + + rv.add(Float.parseFloat(element)); + } else { + rv.add(Float.parseFloat(element)); + } + } return rv; } catch (NumberFormatException e) { From 2ab8579ef7dd5a7f9913f243b6e0e766f25f99bd Mon Sep 17 00:00:00 2001 From: Nick Date: Sun, 4 Oct 2020 00:36:11 +0200 Subject: [PATCH 109/200] Bugfix (#379) --- .../java/technology/tabula/CommandLineApp.java | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java index 3081b9d4..2efb278e 100644 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ b/src/main/java/technology/tabula/CommandLineApp.java @@ -160,11 +160,17 @@ private void extractFile(File pdfFile, Appendable outFile) throws ParseException while (pageIterator.hasNext()) { Page page = pageIterator.next(); + if (tableExtractor.verticalRulingPositions != null) { + for (Float verticalRulingPosition : tableExtractor.verticalRulingPositions) { + page.addRuling(new Ruling(0, verticalRulingPosition, 0.0f, (float) page.getHeight())); + } + } + if (pageAreas != null) { for (Pair areaPair : pageAreas) { Rectangle area = areaPair.getRight(); if (areaPair.getLeft() == RELATIVE_AREA_CALCULATION_MODE) { - area = new Rectangle((float) (area.getTop() / 100 * page.getHeight()), + area = new Rectangle((float) (area.getTop() / 100 * page.getHeight()), (float) (area.getLeft() / 100 * page.getWidth()), (float) (area.getWidth() / 100 * page.getWidth()), (float) (area.getHeight() / 100 * page.getHeight())); } @@ -220,7 +226,7 @@ private static List> whichAreas(CommandLine line) throw String[] optionValues = line.getOptionValues('a'); List> areaList = new ArrayList>(); - for (String optionValue: optionValues) { + for (String optionValue : optionValues) { int areaCalculationMode = ABSOLUTE_AREA_CALCULATION_MODE; int startIndex = 0; if (optionValue.startsWith("%")) { @@ -281,7 +287,7 @@ public static List parseFloatList(String option) throws ParseException { for (int i = 0; i < f.length; i++) { final String element = f[i]; - if(element.startsWith("%")) { + if (element.startsWith("%")) { rv.add(Float.parseFloat(element)); } else { @@ -381,6 +387,7 @@ public TableExtractor() { public void setVerticalRulingPositions(List positions) { this.verticalRulingPositions = positions; } + public void setVerticalRulingPositionsRelative(boolean relative) { this.verticalRulingPositionsRelative = relative; } @@ -435,8 +442,8 @@ public List
extractTablesBasic(Page page) { if (this.verticalRulingPositionsRelative) { // convert relative to absolute absoluteRulingPositions = new ArrayList<>(verticalRulingPositions.size()); - for (float relative: this.verticalRulingPositions) { - float absolute = (float)(relative / 100.0 * page.getWidth()); + for (float relative : this.verticalRulingPositions) { + float absolute = (float) (relative / 100.0 * page.getWidth()); absoluteRulingPositions.add(absolute); } } else { From 6ea9d3ad3f410aaf5d5f099847483286a52b609b Mon Sep 17 00:00:00 2001 From: Nick Date: Wed, 7 Oct 2020 15:16:41 +0200 Subject: [PATCH 110/200] Removes unnecessary code (#379) --- src/main/java/technology/tabula/CommandLineApp.java | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java index 2efb278e..0228df4b 100644 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ b/src/main/java/technology/tabula/CommandLineApp.java @@ -284,16 +284,8 @@ public static List parseFloatList(String option) throws ParseException { String[] f = option.split(","); List rv = new ArrayList<>(); try { - for (int i = 0; i < f.length; i++) { - final String element = f[i]; - - if (element.startsWith("%")) { - - rv.add(Float.parseFloat(element)); - } else { - rv.add(Float.parseFloat(element)); - } - + for (final String element : f) { + rv.add(Float.parseFloat(element)); } return rv; } catch (NumberFormatException e) { From 6fdf5545b0897f5847c27148189a0966d9fb7b4e Mon Sep 17 00:00:00 2001 From: Zaqueu Cavalcante Date: Wed, 18 Nov 2020 10:55:14 -0300 Subject: [PATCH 111/200] Refactoring JSON serializers. --- .../RectangularTextContainerSerializer.java | 30 +++++----- .../tabula/json/RulingSerializer.java | 20 ------- .../tabula/json/TableSerializer.java | 57 +++++++++---------- 3 files changed, 41 insertions(+), 66 deletions(-) delete mode 100644 src/main/java/technology/tabula/json/RulingSerializer.java diff --git a/src/main/java/technology/tabula/json/RectangularTextContainerSerializer.java b/src/main/java/technology/tabula/json/RectangularTextContainerSerializer.java index f6359b91..4a61aa31 100644 --- a/src/main/java/technology/tabula/json/RectangularTextContainerSerializer.java +++ b/src/main/java/technology/tabula/json/RectangularTextContainerSerializer.java @@ -11,21 +11,19 @@ public final class RectangularTextContainerSerializer implements JsonSerializer> { - public static final RectangularTextContainerSerializer INSTANCE = new RectangularTextContainerSerializer(); - - private RectangularTextContainerSerializer() { - // singleton - } - - @Override - public JsonElement serialize(RectangularTextContainer src, Type typeOfSrc, JsonSerializationContext context) { - JsonObject result = new JsonObject(); - result.addProperty("top", src.getTop()); - result.addProperty("left", src.getLeft()); - result.addProperty("width", src.getWidth()); - result.addProperty("height", src.getHeight()); - result.addProperty("text", src.getText()); - return result; - } + public static final RectangularTextContainerSerializer INSTANCE = new RectangularTextContainerSerializer(); + + private RectangularTextContainerSerializer() {} + + @Override + public JsonElement serialize(RectangularTextContainer textContainer, Type type, JsonSerializationContext context) { + JsonObject json = new JsonObject(); + json.addProperty("top", textContainer.getTop()); + json.addProperty("left", textContainer.getLeft()); + json.addProperty("width", textContainer.getWidth()); + json.addProperty("height", textContainer.getHeight()); + json.addProperty("text", textContainer.getText()); + return json; + } } \ No newline at end of file diff --git a/src/main/java/technology/tabula/json/RulingSerializer.java b/src/main/java/technology/tabula/json/RulingSerializer.java deleted file mode 100644 index 19fe8bcd..00000000 --- a/src/main/java/technology/tabula/json/RulingSerializer.java +++ /dev/null @@ -1,20 +0,0 @@ -package technology.tabula.json; - -import java.lang.reflect.Type; - -import com.google.gson.JsonElement; -import com.google.gson.JsonSerializationContext; -import com.google.gson.JsonSerializer; - -import technology.tabula.Ruling; - -@Deprecated -/** @deprecated This class is unused (Aug 2017) and will be removed at some later point */ -public class RulingSerializer implements JsonSerializer { - - @Override - public JsonElement serialize(Ruling src, Type typeOfSrc, JsonSerializationContext context) { - return null; - } - -} diff --git a/src/main/java/technology/tabula/json/TableSerializer.java b/src/main/java/technology/tabula/json/TableSerializer.java index ba04a7cf..2ba20bcd 100644 --- a/src/main/java/technology/tabula/json/TableSerializer.java +++ b/src/main/java/technology/tabula/json/TableSerializer.java @@ -14,35 +14,32 @@ public final class TableSerializer implements JsonSerializer
{ - public static final TableSerializer INSTANCE = new TableSerializer(); - - private TableSerializer() { - // singleton - } - - @Override - public JsonElement serialize(Table src, Type typeOfSrc, JsonSerializationContext context) { - - JsonObject result = new JsonObject(); - - result.addProperty("extraction_method", src.getExtractionMethod()); - result.addProperty("top", src.getTop()); - result.addProperty("left", src.getLeft()); - result.addProperty("width", src.getWidth()); - result.addProperty("height", src.getHeight()); - result.addProperty("right", src.getRight()); - result.addProperty("bottom", src.getBottom()); - - JsonArray data; - result.add("data", data = new JsonArray()); - - for (List srcRow : src.getRows()) { - JsonArray row = new JsonArray(); - for (RectangularTextContainer textChunk : srcRow) row.add(context.serialize(textChunk)); - data.add(row); - } - - return result; - } + public static final TableSerializer INSTANCE = new TableSerializer(); + + private TableSerializer() {} + + @Override + public JsonElement serialize(Table table, Type type, JsonSerializationContext context) { + JsonObject json = new JsonObject(); + JsonArray data = new JsonArray(); + + json.addProperty("extraction_method", table.getExtractionMethod()); + json.addProperty("top", table.getTop()); + json.addProperty("left", table.getLeft()); + json.addProperty("width", table.getWidth()); + json.addProperty("height", table.getHeight()); + json.addProperty("right", table.getRight()); + json.addProperty("bottom", table.getBottom()); + json.add("data", data); + + for (List tableRow : table.getRows()) { + JsonArray jsonRow = new JsonArray(); + for (RectangularTextContainer textChunk : tableRow) + jsonRow.add(context.serialize(textChunk)); + data.add(jsonRow); + } + + return json; + } } From a9932a82dea23fe257368219684ccbc7faa0b27f Mon Sep 17 00:00:00 2001 From: Zaqueu Cavalcante Date: Wed, 18 Nov 2020 11:13:43 -0300 Subject: [PATCH 112/200] Refactoring writers. --- .../technology/tabula/writers/CSVWriter.java | 54 +++++++------- .../technology/tabula/writers/JSONWriter.java | 72 +++++++++++-------- .../technology/tabula/writers/TSVWriter.java | 6 +- .../technology/tabula/writers/Writer.java | 3 + 4 files changed, 76 insertions(+), 59 deletions(-) diff --git a/src/main/java/technology/tabula/writers/CSVWriter.java b/src/main/java/technology/tabula/writers/CSVWriter.java index 2466cc6e..682397b8 100644 --- a/src/main/java/technology/tabula/writers/CSVWriter.java +++ b/src/main/java/technology/tabula/writers/CSVWriter.java @@ -13,33 +13,35 @@ public class CSVWriter implements Writer { - public CSVWriter() { - this(CSVFormat.EXCEL); - } - - protected CSVWriter(CSVFormat format) { - this.format = format; - } - private final CSVFormat format; - @Override - public void write(Appendable out, Table table) throws IOException { - write(out, Collections.singletonList(table)); - } - - @Override - public void write(Appendable out, List
tables) throws IOException { - try (CSVPrinter printer = new CSVPrinter(out, format)) { - for (Table table : tables) { - for (List row : table.getRows()) { - List cells = new ArrayList<>(row.size()); - for (RectangularTextContainer tc : row) cells.add(tc.getText()); - printer.printRecord(cells); - } - } - printer.flush(); - } - } + public CSVWriter() { + this(CSVFormat.EXCEL); + } + + protected CSVWriter(CSVFormat format) { + this.format = format; + } + + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // + @Override + public void write(Appendable out, Table table) throws IOException { + write(out, Collections.singletonList(table)); + } + + @Override + public void write(Appendable out, List
tables) throws IOException { + try (CSVPrinter printer = new CSVPrinter(out, format)) { + for (Table table : tables) { + for (List row : table.getRows()) { + List cells = new ArrayList<>(row.size()); + for (RectangularTextContainer cell : row) + cells.add(cell.getText()); + printer.printRecord(cells); + } + } + printer.flush(); + } + } } diff --git a/src/main/java/technology/tabula/writers/JSONWriter.java b/src/main/java/technology/tabula/writers/JSONWriter.java index 59e9b274..bb566f2d 100644 --- a/src/main/java/technology/tabula/writers/JSONWriter.java +++ b/src/main/java/technology/tabula/writers/JSONWriter.java @@ -1,9 +1,5 @@ package technology.tabula.writers; -import java.io.IOException; -import java.lang.reflect.Modifier; -import java.util.List; - import com.google.gson.ExclusionStrategy; import com.google.gson.FieldAttributes; import com.google.gson.Gson; @@ -17,33 +13,49 @@ import technology.tabula.json.RectangularTextContainerSerializer; import technology.tabula.json.TableSerializer; +import java.io.IOException; +import java.util.List; + +import static java.lang.reflect.Modifier.PUBLIC; + public class JSONWriter implements Writer { - private static final ExclusionStrategy ALLCLASSES_SKIPNONPUBLIC = new ExclusionStrategy() { - @Override public boolean shouldSkipClass(Class c) { return false; } - @Override public boolean shouldSkipField(FieldAttributes fa) { return !fa.hasModifier(Modifier.PUBLIC); } - }; - - @Override - public void write(Appendable out, Table table) throws IOException { - out.append(gson().toJson(table, Table.class)); - } - - @Override public void write(Appendable out, List
tables) throws IOException { - Gson gson = gson(); - JsonArray array = new JsonArray(); - for (Table table : tables) array.add(gson.toJsonTree(table, Table.class)); - out.append(gson.toJson(array)); - } - - private static Gson gson() { - return new GsonBuilder() - .addSerializationExclusionStrategy(ALLCLASSES_SKIPNONPUBLIC) - .registerTypeAdapter(Table.class, TableSerializer.INSTANCE) - .registerTypeAdapter(RectangularTextContainer.class, RectangularTextContainerSerializer.INSTANCE) - .registerTypeAdapter(Cell.class, RectangularTextContainerSerializer.INSTANCE) - .registerTypeAdapter(TextChunk.class, RectangularTextContainerSerializer.INSTANCE) - .create(); - } + private static final ExclusionStrategy ALL_CLASSES_SKIPPING_NON_PUBLIC_FIELDS = new ExclusionStrategy() { + @Override + public boolean shouldSkipClass(Class c) { + return false; + } + + @Override + public boolean shouldSkipField(FieldAttributes fieldAttributes) { + return !fieldAttributes.hasModifier(PUBLIC); + } + }; + + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // + @Override + public void write(Appendable out, Table table) throws IOException { + out.append(gson().toJson(table, Table.class)); + } + + @Override + public void write(Appendable out, List
tables) throws IOException { + Gson gson = gson(); + JsonArray jsonElements = new JsonArray(); + for (Table table : tables) + jsonElements.add(gson.toJsonTree(table, Table.class)); + out.append(gson.toJson(jsonElements)); + } + + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // + private static Gson gson() { + return new GsonBuilder() + .addSerializationExclusionStrategy(ALL_CLASSES_SKIPPING_NON_PUBLIC_FIELDS) + .registerTypeAdapter(Table.class, TableSerializer.INSTANCE) + .registerTypeAdapter(RectangularTextContainer.class, RectangularTextContainerSerializer.INSTANCE) + .registerTypeAdapter(Cell.class, RectangularTextContainerSerializer.INSTANCE) + .registerTypeAdapter(TextChunk.class, RectangularTextContainerSerializer.INSTANCE) + .create(); + } } diff --git a/src/main/java/technology/tabula/writers/TSVWriter.java b/src/main/java/technology/tabula/writers/TSVWriter.java index c3ddb737..115d0347 100644 --- a/src/main/java/technology/tabula/writers/TSVWriter.java +++ b/src/main/java/technology/tabula/writers/TSVWriter.java @@ -4,8 +4,8 @@ public class TSVWriter extends CSVWriter { - public TSVWriter() { - super(CSVFormat.TDF); - } + public TSVWriter() { + super(CSVFormat.TDF); + } } diff --git a/src/main/java/technology/tabula/writers/Writer.java b/src/main/java/technology/tabula/writers/Writer.java index 78f4faa4..99b708c6 100644 --- a/src/main/java/technology/tabula/writers/Writer.java +++ b/src/main/java/technology/tabula/writers/Writer.java @@ -6,6 +6,9 @@ import technology.tabula.Table; public interface Writer { + void write(Appendable out, Table table) throws IOException; + void write(Appendable out, List
tables) throws IOException; + } From 008c395b9af49655e829eff58851c9c08fc2bf7c Mon Sep 17 00:00:00 2001 From: Zaqueu Cavalcante Date: Wed, 18 Nov 2020 12:26:13 -0300 Subject: [PATCH 113/200] Refactoring classes related to a pdf page. --- .../technology/tabula/ObjectExtractor.java | 49 +++++++++---------- src/main/java/technology/tabula/Page.java | 7 +++ .../java/technology/tabula/PageIterator.java | 22 ++++----- 3 files changed, 40 insertions(+), 38 deletions(-) diff --git a/src/main/java/technology/tabula/ObjectExtractor.java b/src/main/java/technology/tabula/ObjectExtractor.java index 3998ba6f..97960243 100644 --- a/src/main/java/technology/tabula/ObjectExtractor.java +++ b/src/main/java/technology/tabula/ObjectExtractor.java @@ -13,55 +13,50 @@ public ObjectExtractor(PDDocument pdfDocument) { this.pdfDocument = pdfDocument; } + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // protected Page extractPage(Integer pageNumber) throws IOException { - - if (pageNumber > this.pdfDocument.getNumberOfPages() || pageNumber < 1) { - throw new java.lang.IndexOutOfBoundsException( - "Page number does not exist"); + if (pageNumber > pdfDocument.getNumberOfPages() || pageNumber < 1) { + throw new java.lang.IndexOutOfBoundsException("Page number does not exist."); } + PDPage page = pdfDocument.getPage(pageNumber - 1); - PDPage p = this.pdfDocument.getPage(pageNumber - 1); - - ObjectExtractorStreamEngine se = new ObjectExtractorStreamEngine(p); - se.processPage(p); - - - TextStripper pdfTextStripper = new TextStripper(this.pdfDocument, pageNumber); + ObjectExtractorStreamEngine streamEngine = new ObjectExtractorStreamEngine(page); + streamEngine.processPage(page); - pdfTextStripper.process(); + TextStripper textStripper = new TextStripper(pdfDocument, pageNumber); + textStripper.process(); - Utils.sort(pdfTextStripper.textElements, Rectangle.ILL_DEFINED_ORDER); + Utils.sort(textStripper.textElements, Rectangle.ILL_DEFINED_ORDER); - float w, h; - int pageRotation = p.getRotation(); - if (Math.abs(pageRotation) == 90 || Math.abs(pageRotation) == 270) { - w = p.getCropBox().getHeight(); - h = p.getCropBox().getWidth(); + float width, height; + int rotation = page.getRotation(); + if (Math.abs(rotation) == 90 || Math.abs(rotation) == 270) { + width = page.getCropBox().getHeight(); + height = page.getCropBox().getWidth(); } else { - w = p.getCropBox().getWidth(); - h = p.getCropBox().getHeight(); + width = page.getCropBox().getWidth(); + height = page.getCropBox().getHeight(); } - return new Page(0, 0, w, h, pageRotation, pageNumber, p, this.pdfDocument, pdfTextStripper.textElements, - se.rulings, pdfTextStripper.minCharWidth, pdfTextStripper.minCharHeight, pdfTextStripper.spatialIndex); + return new Page(0, 0, width, height, rotation, pageNumber, page, pdfDocument, streamEngine, textStripper); } + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // public PageIterator extract(Iterable pages) { return new PageIterator(this, pages); } public PageIterator extract() { - return extract(Utils.range(1, this.pdfDocument.getNumberOfPages() + 1)); + return extract(Utils.range(1, pdfDocument.getNumberOfPages() + 1)); } public Page extract(int pageNumber) { return extract(Utils.range(pageNumber, pageNumber + 1)).next(); } + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // public void close() throws IOException { - this.pdfDocument.close(); + pdfDocument.close(); } - - - + } diff --git a/src/main/java/technology/tabula/Page.java b/src/main/java/technology/tabula/Page.java index 3207bcb9..ef04c29f 100644 --- a/src/main/java/technology/tabula/Page.java +++ b/src/main/java/technology/tabula/Page.java @@ -39,6 +39,13 @@ public Page(float top, float left, float width, float height, int rotation, int this.rulings = rulings; } + public Page(float top, float left, float width, float height, int rotation, int number, PDPage pdPage, PDDocument doc, + ObjectExtractorStreamEngine streamEngine, TextStripper textStripper) { + this(top, left, width, height, rotation, number, pdPage, doc, textStripper.textElements, streamEngine.rulings); + this.minCharWidth = textStripper.minCharWidth; + this.minCharHeight = textStripper.minCharHeight; + this.spatial_index = textStripper.spatialIndex; + } public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage, PDDocument doc, List characters, List rulings, diff --git a/src/main/java/technology/tabula/PageIterator.java b/src/main/java/technology/tabula/PageIterator.java index 5fec2a77..052ed54a 100644 --- a/src/main/java/technology/tabula/PageIterator.java +++ b/src/main/java/technology/tabula/PageIterator.java @@ -5,39 +5,39 @@ public class PageIterator implements Iterator { - private ObjectExtractor oe; + private ObjectExtractor objectExtractor; private Iterator pageIndexIterator; - - public PageIterator(ObjectExtractor oe, Iterable pages) { + + public PageIterator(ObjectExtractor objectExtractor, Iterable pages) { super(); - this.oe = oe; + this.objectExtractor = objectExtractor; this.pageIndexIterator = pages.iterator(); } + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // @Override public boolean hasNext() { - return this.pageIndexIterator.hasNext(); + return pageIndexIterator.hasNext(); } @Override public Page next() { - Page page = null; + Page nextPage = null; if (!this.hasNext()) { throw new IllegalStateException(); } try { - page = oe.extractPage(this.pageIndexIterator.next()); + nextPage = objectExtractor.extractPage(pageIndexIterator.next()); } catch (IOException e) { - // TODO Auto-generated catch block e.printStackTrace(); } - return page; + return nextPage; } + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // @Override public void remove() { throw new UnsupportedOperationException(); - } -} \ No newline at end of file +} From c0592627efc4f608343b5c3f4800cb3f7e2e3900 Mon Sep 17 00:00:00 2001 From: Zaqueu Cavalcante Date: Wed, 18 Nov 2020 13:36:41 -0300 Subject: [PATCH 114/200] Grouping methods by context. --- src/main/java/technology/tabula/Page.java | 484 ++++++++++++---------- 1 file changed, 256 insertions(+), 228 deletions(-) diff --git a/src/main/java/technology/tabula/Page.java b/src/main/java/technology/tabula/Page.java index ef04c29f..cb03dc98 100644 --- a/src/main/java/technology/tabula/Page.java +++ b/src/main/java/technology/tabula/Page.java @@ -13,243 +13,271 @@ // TODO: this class should probably be called "PageArea" or something like that public class Page extends Rectangle { - private Integer rotation; - private int pageNumber; - private List texts; - private List rulings, cleanRulings = null, verticalRulingLines = null, horizontalRulingLines = null; - private float minCharWidth; - private float minCharHeight; - private RectangleSpatialIndex spatial_index; - private PDPage pdPage; - private PDDocument pdDoc; - - public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage, PDDocument doc) { - super(top, left, width, height); - this.rotation = rotation; - this.pageNumber = page_number; - this.pdPage = pdPage; - this.pdDoc = doc; - } - - public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage, PDDocument doc, - List characters, List rulings) { - - this(top, left, width, height, rotation, page_number, pdPage, doc); - this.texts = characters; - this.rulings = rulings; - } - - public Page(float top, float left, float width, float height, int rotation, int number, PDPage pdPage, PDDocument doc, - ObjectExtractorStreamEngine streamEngine, TextStripper textStripper) { - this(top, left, width, height, rotation, number, pdPage, doc, textStripper.textElements, streamEngine.rulings); - this.minCharWidth = textStripper.minCharWidth; - this.minCharHeight = textStripper.minCharHeight; - this.spatial_index = textStripper.spatialIndex; - } - - public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage, PDDocument doc, - List characters, List rulings, - float minCharWidth, float minCharHeight, RectangleSpatialIndex index) { - - this(top, left, width, height, rotation, page_number, pdPage, doc, characters, rulings); - this.minCharHeight = minCharHeight; - this.minCharWidth = minCharWidth; - this.spatial_index = index; - } - - public Page getArea(Rectangle area) { - List t = getText(area); - float min_char_width = 7; - float min_char_height = 7; - - if(t.size() > 0){ - min_char_width = Collections.min(t, new Comparator() { - @Override - public int compare(TextElement te1, TextElement te2) { - return java.lang.Float.compare(te1.width, te2.width); - }}).width; - min_char_height = Collections.min(t, new Comparator() { - @Override - public int compare(TextElement te1, TextElement te2) { - return java.lang.Float.compare(te1.height, te2.height); - }}).height; - } - Page rv = new Page( - area.getTop(), - area.getLeft(), - (float) area.getWidth(), - (float) area.getHeight(), - rotation, - pageNumber, - pdPage, - pdDoc, - t, - Ruling.cropRulingsToArea(getRulings(), area), - min_char_width, - min_char_height, - spatial_index); - - rv.addRuling(new Ruling( - new Point2D.Double(rv.getLeft(), - rv.getTop()), - new Point2D.Double(rv.getRight(), - rv.getTop()))); - rv.addRuling(new Ruling( - new Point2D.Double(rv.getRight(), - rv.getTop()), - new Point2D.Double(rv.getRight(), - rv.getBottom()))); - rv.addRuling(new Ruling( - new Point2D.Double(rv.getRight(), - rv.getBottom()), - new Point2D.Double(rv.getLeft(), - rv.getBottom()))); - rv.addRuling(new Ruling( - new Point2D.Double(rv.getLeft(), - rv.getBottom()), - new Point2D.Double(rv.getLeft(), - rv.getTop()))); - - return rv; - } - - public Page getArea(float top, float left, float bottom, float right) { - Rectangle area = new Rectangle(top, left, right - left, bottom - top); - return this.getArea(area); - } - - public List getText() { - return texts; - } - - public List getText(Rectangle area) { - return this.spatial_index.contains(area); - } - - /** @deprecated use {@linkplain #getText(Rectangle)} instead */ - @Deprecated public List getText(float top, float left, float bottom, float right) { - return this.getText(new Rectangle(top, left, right - left, bottom - top)); - } - - public Integer getRotation() { - return rotation; - } - - public int getPageNumber() { - return pageNumber; - } - - /** @deprecated use {@linkplain #getText()} instead */ - @Deprecated public List getTexts() { - return texts; - } - - /** - * Returns the minimum bounding box that contains all the TextElements on this Page - */ - public Rectangle getTextBounds() { - List texts = this.getText(); - if (!texts.isEmpty()) { - return Utils.bounds(texts); - } - else { - return new Rectangle(); - } - - } - - public List getRulings() { - if (this.cleanRulings != null) { - return this.cleanRulings; - } - - if (this.rulings == null || this.rulings.isEmpty()) { - this.verticalRulingLines = new ArrayList<>(); - this.horizontalRulingLines = new ArrayList<>(); - return new ArrayList<>(); - } - - Utils.snapPoints(this.rulings, this.minCharWidth, this.minCharHeight); - - List vrs = new ArrayList<>(); - for (Ruling vr: this.rulings) { - if (vr.vertical()) { - vrs.add(vr); - } - } - this.verticalRulingLines = Ruling.collapseOrientedRulings(vrs); - - List hrs = new ArrayList<>(); - for (Ruling hr: this.rulings) { - if (hr.horizontal()) { - hrs.add(hr); - } - } - this.horizontalRulingLines = Ruling.collapseOrientedRulings(hrs); - - this.cleanRulings = new ArrayList<>(this.verticalRulingLines); - this.cleanRulings.addAll(this.horizontalRulingLines); - - return this.cleanRulings; - - } - - public List getVerticalRulings() { - if (this.verticalRulingLines != null) { - return this.verticalRulingLines; - } - this.getRulings(); - return this.verticalRulingLines; - } + private int number; + private Integer rotation; + private float minCharWidth; + private float minCharHeight; - public List getHorizontalRulings() { - if (this.horizontalRulingLines != null) { - return this.horizontalRulingLines; + private List textElements; + + private List rulings, + cleanRulings = null, + verticalRulingLines = null, + horizontalRulingLines = null; + + private PDPage pdPage; + private PDDocument pdDoc; + + private RectangleSpatialIndex spatialIndex; + + public Page(float top, float left, float width, float height, int rotation, int pageNumber, PDPage pdPage, PDDocument doc) { + super(top, left, width, height); + this.rotation = rotation; + this.number = pageNumber; + this.pdPage = pdPage; + this.pdDoc = doc; + } + + public Page(float top, float left, float width, float height, int rotation, int pageNumber, PDPage pdPage, PDDocument doc, + List characters, List rulings) { + this(top, left, width, height, rotation, pageNumber, pdPage, doc); + this.textElements = characters; + this.rulings = rulings; + } + + public Page(float top, float left, float width, float height, int rotation, int number, PDPage pdPage, PDDocument doc, + ObjectExtractorStreamEngine streamEngine, TextStripper textStripper) { + this(top, left, width, height, rotation, number, pdPage, doc, textStripper.textElements, streamEngine.rulings); + this.minCharWidth = textStripper.minCharWidth; + this.minCharHeight = textStripper.minCharHeight; + this.spatialIndex = textStripper.spatialIndex; + } + + public Page(float top, float left, float width, float height, int rotation, int pageNumber, PDPage pdPage, PDDocument doc, + List characters, List rulings, + float minCharWidth, float minCharHeight, RectangleSpatialIndex index) { + this(top, left, width, height, rotation, pageNumber, pdPage, doc, characters, rulings); + this.minCharHeight = minCharHeight; + this.minCharWidth = minCharWidth; + this.spatialIndex = index; + } + + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // + public Page getArea(Rectangle area) { + List t = getText(area); + float min_char_width = 7; + float min_char_height = 7; + + if (t.size() > 0) { + min_char_width = Collections.min(t, new Comparator() { + @Override + public int compare(TextElement te1, TextElement te2) { + return java.lang.Float.compare(te1.width, te2.width); + } + }).width; + min_char_height = Collections.min(t, new Comparator() { + @Override + public int compare(TextElement te1, TextElement te2) { + return java.lang.Float.compare(te1.height, te2.height); + } + }).height; + } + Page rv = new Page( + area.getTop(), + area.getLeft(), + (float) area.getWidth(), + (float) area.getHeight(), + rotation, number, + pdPage, + pdDoc, + t, + Ruling.cropRulingsToArea(getRulings(), area), + min_char_width, + min_char_height, + spatialIndex); + + rv.addRuling(new Ruling( + new Point2D.Double(rv.getLeft(), + rv.getTop()), + new Point2D.Double(rv.getRight(), + rv.getTop()))); + rv.addRuling(new Ruling( + new Point2D.Double(rv.getRight(), + rv.getTop()), + new Point2D.Double(rv.getRight(), + rv.getBottom()))); + rv.addRuling(new Ruling( + new Point2D.Double(rv.getRight(), + rv.getBottom()), + new Point2D.Double(rv.getLeft(), + rv.getBottom()))); + rv.addRuling(new Ruling( + new Point2D.Double(rv.getLeft(), + rv.getBottom()), + new Point2D.Double(rv.getLeft(), + rv.getTop()))); + + return rv; + } + + public Page getArea(float top, float left, float bottom, float right) { + Rectangle area = new Rectangle(top, left, right - left, bottom - top); + return getArea(area); + } + + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // + public Integer getRotation() { + return rotation; + } + + public int getPageNumber() { + return number; + } + + /** + * @deprecated with no replacement + */ + @Deprecated + public float getMinCharWidth() { + return minCharWidth; + } + + /** + * @deprecated with no replacement + */ + @Deprecated + public float getMinCharHeight() { + return minCharHeight; } - this.getRulings(); - return this.horizontalRulingLines; - } - public void addRuling(Ruling r) { - if (r.oblique()) { - throw new UnsupportedOperationException("Can't add an oblique ruling"); + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // + public List getText() { + return textElements; } - this.rulings.add(r); - // clear caches - this.verticalRulingLines = null; - this.horizontalRulingLines = null; - this.cleanRulings = null; - } - public List getUnprocessedRulings() { - return this.rulings; - } + public List getText(Rectangle area) { + return spatialIndex.contains(area); + } - /** @deprecated with no replacement */ - @Deprecated public float getMinCharWidth() { - return minCharWidth; - } + /** + * @deprecated use {@linkplain #getText(Rectangle)} instead + */ + @Deprecated + public List getText(float top, float left, float bottom, float right) { + return getText(new Rectangle(top, left, right - left, bottom - top)); + } - /** @deprecated with no replacement */ - @Deprecated public float getMinCharHeight() { - return minCharHeight; - } + /** + * @deprecated use {@linkplain #getText()} instead + */ + @Deprecated + public List getTexts() { + return textElements; + } - public PDPage getPDPage() { - return pdPage; - } + /** + * Returns the minimum bounding box that contains all the TextElements on this Page + */ + public Rectangle getTextBounds() { + List texts = this.getText(); + if (!texts.isEmpty()) { + return Utils.bounds(texts); + } else { + return new Rectangle(); + } + } - public PDDocument getPDDoc() { - return pdDoc; - } + /** + * @deprecated with no replacement + */ + @Deprecated + public boolean hasText() { + return textElements.size() > 0; + } - /** @deprecated with no replacement */ - @Deprecated public RectangleSpatialIndex getSpatialIndex() { - return this.spatial_index; - } + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // + public List getRulings() { + if (this.cleanRulings != null) { + return this.cleanRulings; + } + + if (this.rulings == null || this.rulings.isEmpty()) { + this.verticalRulingLines = new ArrayList<>(); + this.horizontalRulingLines = new ArrayList<>(); + return new ArrayList<>(); + } + + Utils.snapPoints(this.rulings, this.minCharWidth, this.minCharHeight); + + List vrs = new ArrayList<>(); + for (Ruling vr : this.rulings) { + if (vr.vertical()) { + vrs.add(vr); + } + } + this.verticalRulingLines = Ruling.collapseOrientedRulings(vrs); + + List hrs = new ArrayList<>(); + for (Ruling hr : this.rulings) { + if (hr.horizontal()) { + hrs.add(hr); + } + } + this.horizontalRulingLines = Ruling.collapseOrientedRulings(hrs); + + this.cleanRulings = new ArrayList<>(this.verticalRulingLines); + this.cleanRulings.addAll(this.horizontalRulingLines); + + return this.cleanRulings; + } + + public List getVerticalRulings() { + if (this.verticalRulingLines != null) { + return this.verticalRulingLines; + } + this.getRulings(); + return this.verticalRulingLines; + } + + public List getHorizontalRulings() { + if (this.horizontalRulingLines != null) { + return this.horizontalRulingLines; + } + this.getRulings(); + return this.horizontalRulingLines; + } + + public void addRuling(Ruling r) { + if (r.oblique()) { + throw new UnsupportedOperationException("Can't add an oblique ruling"); + } + this.rulings.add(r); + // clear caches + this.verticalRulingLines = null; + this.horizontalRulingLines = null; + this.cleanRulings = null; + } + + public List getUnprocessedRulings() { + return rulings; + } + + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // + public PDPage getPDPage() { + return pdPage; + } + + public PDDocument getPDDoc() { + return pdDoc; + } + + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // + /** + * @deprecated with no replacement + */ + @Deprecated + public RectangleSpatialIndex getSpatialIndex() { + return spatialIndex; + } - /** @deprecated with no replacement */ - @Deprecated public boolean hasText() { - return this.texts.size() > 0; - } } From 62e6b5fc81d7c6536b886cc4406aafd709a57d95 Mon Sep 17 00:00:00 2001 From: Zaqueu Cavalcante Date: Wed, 18 Nov 2020 14:21:12 -0300 Subject: [PATCH 115/200] Refactoring methods related to 'rulings' attributes. --- src/main/java/technology/tabula/Page.java | 84 +++++++++++++---------- 1 file changed, 47 insertions(+), 37 deletions(-) diff --git a/src/main/java/technology/tabula/Page.java b/src/main/java/technology/tabula/Page.java index cb03dc98..ce3841c8 100644 --- a/src/main/java/technology/tabula/Page.java +++ b/src/main/java/technology/tabula/Page.java @@ -20,6 +20,7 @@ public class Page extends Rectangle { private List textElements; + // TODO: Create a class for 'List ' that encapsulates all of these lists and their behaviors? private List rulings, cleanRulings = null, verticalRulingLines = null, @@ -197,65 +198,74 @@ public boolean hasText() { // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // public List getRulings() { - if (this.cleanRulings != null) { - return this.cleanRulings; + if (cleanRulings != null) { + return cleanRulings; } - if (this.rulings == null || this.rulings.isEmpty()) { - this.verticalRulingLines = new ArrayList<>(); - this.horizontalRulingLines = new ArrayList<>(); + if (rulings == null || rulings.isEmpty()) { + verticalRulingLines = new ArrayList<>(); + horizontalRulingLines = new ArrayList<>(); return new ArrayList<>(); } - Utils.snapPoints(this.rulings, this.minCharWidth, this.minCharHeight); + // TODO: Move as a static method to the Ruling class? + Utils.snapPoints(rulings, minCharWidth, minCharHeight); - List vrs = new ArrayList<>(); - for (Ruling vr : this.rulings) { - if (vr.vertical()) { - vrs.add(vr); + verticalRulingLines = getCollapsedVerticalRulings(); + horizontalRulingLines = getCollapsedHorizontalRulings(); + + cleanRulings = new ArrayList<>(verticalRulingLines); + cleanRulings.addAll(horizontalRulingLines); + + return cleanRulings; + } + + // TODO: Create a class for 'List ' and encapsulate these behaviors within it? + private List getCollapsedVerticalRulings() { + List verticalRulings = new ArrayList<>(); + for (Ruling ruling : rulings) { + if (ruling.vertical()) { + verticalRulings.add(ruling); } } - this.verticalRulingLines = Ruling.collapseOrientedRulings(vrs); + return Ruling.collapseOrientedRulings(verticalRulings); + } - List hrs = new ArrayList<>(); - for (Ruling hr : this.rulings) { - if (hr.horizontal()) { - hrs.add(hr); + private List getCollapsedHorizontalRulings() { + List horizontalRulings = new ArrayList<>(); + for (Ruling ruling : rulings) { + if (ruling.horizontal()) { + horizontalRulings.add(ruling); } } - this.horizontalRulingLines = Ruling.collapseOrientedRulings(hrs); - - this.cleanRulings = new ArrayList<>(this.verticalRulingLines); - this.cleanRulings.addAll(this.horizontalRulingLines); - - return this.cleanRulings; + return Ruling.collapseOrientedRulings(horizontalRulings); } public List getVerticalRulings() { - if (this.verticalRulingLines != null) { - return this.verticalRulingLines; + if (verticalRulingLines != null) { + return verticalRulingLines; } - this.getRulings(); - return this.verticalRulingLines; + getRulings(); + return verticalRulingLines; } public List getHorizontalRulings() { - if (this.horizontalRulingLines != null) { - return this.horizontalRulingLines; + if (horizontalRulingLines != null) { + return horizontalRulingLines; } - this.getRulings(); - return this.horizontalRulingLines; + getRulings(); + return horizontalRulingLines; } - public void addRuling(Ruling r) { - if (r.oblique()) { - throw new UnsupportedOperationException("Can't add an oblique ruling"); + public void addRuling(Ruling ruling) { + if (ruling.oblique()) { + throw new UnsupportedOperationException("Can't add an oblique ruling."); } - this.rulings.add(r); - // clear caches - this.verticalRulingLines = null; - this.horizontalRulingLines = null; - this.cleanRulings = null; + rulings.add(ruling); + // Clear caches: + verticalRulingLines = null; + horizontalRulingLines = null; + cleanRulings = null; } public List getUnprocessedRulings() { From c3c3b9d5832e58d8706e91fa702812874eab7df6 Mon Sep 17 00:00:00 2001 From: Zaqueu Cavalcante Date: Wed, 18 Nov 2020 15:37:27 -0300 Subject: [PATCH 116/200] Refactoring getArea() method. --- src/main/java/technology/tabula/Page.java | 109 ++++++++++------------ 1 file changed, 50 insertions(+), 59 deletions(-) diff --git a/src/main/java/technology/tabula/Page.java b/src/main/java/technology/tabula/Page.java index ce3841c8..87d7c01d 100644 --- a/src/main/java/technology/tabula/Page.java +++ b/src/main/java/technology/tabula/Page.java @@ -9,6 +9,9 @@ import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; +import static java.lang.Float.compare; +import static java.util.Collections.min; + @SuppressWarnings("serial") // TODO: this class should probably be called "PageArea" or something like that public class Page extends Rectangle { @@ -31,17 +34,20 @@ public class Page extends Rectangle { private RectangleSpatialIndex spatialIndex; - public Page(float top, float left, float width, float height, int rotation, int pageNumber, PDPage pdPage, PDDocument doc) { + private static final float DEFAULT_MIN_CHAR_LENGTH = 7; + + // TODO: Use a creational design patterns here? + public Page(float top, float left, float width, float height, int rotation, int number, PDPage pdPage, PDDocument doc) { super(top, left, width, height); this.rotation = rotation; - this.number = pageNumber; + this.number = number; this.pdPage = pdPage; this.pdDoc = doc; } - public Page(float top, float left, float width, float height, int rotation, int pageNumber, PDPage pdPage, PDDocument doc, + public Page(float top, float left, float width, float height, int rotation, int number, PDPage pdPage, PDDocument doc, List characters, List rulings) { - this(top, left, width, height, rotation, pageNumber, pdPage, doc); + this(top, left, width, height, rotation, number, pdPage, doc); this.textElements = characters; this.rulings = rulings; } @@ -54,10 +60,10 @@ public Page(float top, float left, float width, float height, int rotation, int this.spatialIndex = textStripper.spatialIndex; } - public Page(float top, float left, float width, float height, int rotation, int pageNumber, PDPage pdPage, PDDocument doc, + public Page(float top, float left, float width, float height, int rotation, int number, PDPage pdPage, PDDocument doc, List characters, List rulings, float minCharWidth, float minCharHeight, RectangleSpatialIndex index) { - this(top, left, width, height, rotation, pageNumber, pdPage, doc, characters, rulings); + this(top, left, width, height, rotation, number, pdPage, doc, characters, rulings); this.minCharHeight = minCharHeight; this.minCharWidth = minCharWidth; this.spatialIndex = index; @@ -65,60 +71,44 @@ public Page(float top, float left, float width, float height, int rotation, int // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // public Page getArea(Rectangle area) { - List t = getText(area); - float min_char_width = 7; - float min_char_height = 7; - - if (t.size() > 0) { - min_char_width = Collections.min(t, new Comparator() { - @Override - public int compare(TextElement te1, TextElement te2) { - return java.lang.Float.compare(te1.width, te2.width); - } - }).width; - min_char_height = Collections.min(t, new Comparator() { - @Override - public int compare(TextElement te1, TextElement te2) { - return java.lang.Float.compare(te1.height, te2.height); - } - }).height; - } - Page rv = new Page( - area.getTop(), - area.getLeft(), - (float) area.getWidth(), - (float) area.getHeight(), - rotation, number, - pdPage, - pdDoc, - t, + List areaTextElements = getText(area); + + float minimumCharWidth = getMinimumCharWidthFrom(areaTextElements); + float minimumCharHeight = getMinimumCharHeightFrom(areaTextElements); + + Page page = new Page(area.getTop(), area.getLeft(), (float) area.getWidth(), (float) area.getHeight(), + rotation, number, pdPage, pdDoc, areaTextElements, Ruling.cropRulingsToArea(getRulings(), area), - min_char_width, - min_char_height, - spatialIndex); - - rv.addRuling(new Ruling( - new Point2D.Double(rv.getLeft(), - rv.getTop()), - new Point2D.Double(rv.getRight(), - rv.getTop()))); - rv.addRuling(new Ruling( - new Point2D.Double(rv.getRight(), - rv.getTop()), - new Point2D.Double(rv.getRight(), - rv.getBottom()))); - rv.addRuling(new Ruling( - new Point2D.Double(rv.getRight(), - rv.getBottom()), - new Point2D.Double(rv.getLeft(), - rv.getBottom()))); - rv.addRuling(new Ruling( - new Point2D.Double(rv.getLeft(), - rv.getBottom()), - new Point2D.Double(rv.getLeft(), - rv.getTop()))); - - return rv; + minimumCharWidth, minimumCharHeight, spatialIndex); + + addBorderRulingsTo(page); + + return page; + } + + private float getMinimumCharWidthFrom(List areaTextElements) { + if (!areaTextElements.isEmpty()) { + return min(areaTextElements, (te1, te2) -> compare(te1.width, te2.width)).width; + } + return DEFAULT_MIN_CHAR_LENGTH; + } + + private float getMinimumCharHeightFrom(List areaTextElements) { + if (!areaTextElements.isEmpty()) { + return min(areaTextElements, (te1, te2) -> compare(te1.height, te2.height)).height; + } + return DEFAULT_MIN_CHAR_LENGTH; + } + + private void addBorderRulingsTo(Page page) { + Point2D.Double leftTop = new Point2D.Double(page.getLeft(), page.getTop()), + rightTop = new Point2D.Double(page.getRight(), page.getTop()), + rightBottom = new Point2D.Double(page.getRight(), page.getBottom()), + leftBottom = new Point2D.Double(page.getLeft(), page.getBottom()); + page.addRuling(new Ruling(leftTop, rightTop)); + page.addRuling(new Ruling(rightTop, rightBottom)); + page.addRuling(new Ruling(rightBottom, leftBottom)); + page.addRuling(new Ruling(leftBottom, leftTop)); } public Page getArea(float top, float left, float bottom, float right) { @@ -282,6 +272,7 @@ public PDDocument getPDDoc() { } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // + /** * @deprecated with no replacement */ From 8a78148d9a3abc752f681837ca135d5f1424b37d Mon Sep 17 00:00:00 2001 From: Zaqueu Cavalcante Date: Wed, 18 Nov 2020 21:07:18 -0300 Subject: [PATCH 117/200] Refactoring constructor. --- .../tabula/ObjectExtractorStreamEngine.java | 90 +++++++++---------- 1 file changed, 40 insertions(+), 50 deletions(-) diff --git a/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java b/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java index ae249a40..d3f79458 100644 --- a/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java +++ b/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java @@ -7,7 +7,6 @@ import java.awt.geom.PathIterator; import java.awt.geom.Point2D; import java.awt.geom.Rectangle2D; -import java.io.IOException; import java.util.ArrayList; import java.util.Comparator; import java.util.List; @@ -17,7 +16,6 @@ import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.graphics.image.PDImage; -import org.apache.pdfbox.util.Matrix; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -25,72 +23,66 @@ class ObjectExtractorStreamEngine extends PDFGraphicsStreamEngine { protected List rulings; private AffineTransform pageTransform; - private boolean debugClippingPaths; private boolean extractRulingLines = true; - private Logger log; + private Logger logger; private int clipWindingRule = -1; private GeneralPath currentPath = new GeneralPath(); protected ObjectExtractorStreamEngine(PDPage page) { super(page); + logger = LoggerFactory.getLogger(ObjectExtractorStreamEngine.class); + rulings = new ArrayList<>(); - this.log = LoggerFactory.getLogger(ObjectExtractorStreamEngine.class); + // Calculate page transform: + pageTransform = new AffineTransform(); + PDRectangle pageCropBox = getPage().getCropBox(); + int rotationAngleInDegrees = getPage().getRotation(); - this.rulings = new ArrayList<>(); - this.pageTransform = null; - - // calculate page transform - PDRectangle cb = this.getPage().getCropBox(); - int rotation = this.getPage().getRotation(); - - this.pageTransform = new AffineTransform(); - - if (Math.abs(rotation) == 90 || Math.abs(rotation) == 270) { - this.pageTransform = AffineTransform.getRotateInstance(rotation * (Math.PI / 180.0), 0, 0); - this.pageTransform.concatenate(AffineTransform.getScaleInstance(1, -1)); + if (Math.abs(rotationAngleInDegrees) == 90 || Math.abs(rotationAngleInDegrees) == 270) { + double rotationAngleInRadians = rotationAngleInDegrees * (Math.PI / 180.0); + pageTransform = AffineTransform.getRotateInstance(rotationAngleInRadians, 0, 0); } else { - this.pageTransform.concatenate(AffineTransform.getTranslateInstance(0, cb.getHeight())); - this.pageTransform.concatenate(AffineTransform.getScaleInstance(1, -1)); + double deltaX = 0; + double deltaY = pageCropBox.getHeight(); + pageTransform.concatenate(AffineTransform.getTranslateInstance(deltaX, deltaY)); } - this.pageTransform.translate(-cb.getLowerLeftX(), -cb.getLowerLeftY()); + pageTransform.concatenate(AffineTransform.getScaleInstance(1, -1)); + pageTransform.translate(-pageCropBox.getLowerLeftX(), -pageCropBox.getLowerLeftY()); } + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // @Override - public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) { + public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) { currentPath.moveTo((float) p0.getX(), (float) p0.getY()); currentPath.lineTo((float) p1.getX(), (float) p1.getY()); currentPath.lineTo((float) p2.getX(), (float) p2.getY()); currentPath.lineTo((float) p3.getX(), (float) p3.getY()); - currentPath.closePath(); } @Override - public void clip(int windingRule) { - // the clipping path will not be updated until the succeeding painting - // operator is called + public void clip(int windingRule) { + // The clipping path will not be updated until the succeeding painting + // operator is called. clipWindingRule = windingRule; } @Override - public void closePath() { + public void closePath() { currentPath.closePath(); } @Override - public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) { + public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) { currentPath.curveTo(x1, y1, x2, y2, x3, y3); } @Override - public void drawImage(PDImage arg0) { - // TODO Auto-generated method stub - - } + public void drawImage(PDImage arg0) {} @Override - public void endPath() { + public void endPath() { if (clipWindingRule != -1) { currentPath.setWindingRule(clipWindingRule); getGraphicsState().intersectClippingPath(currentPath); @@ -105,7 +97,7 @@ public void fillAndStrokePath(int arg0) { } @Override - public void fillPath(int arg0) { + public void fillPath(int arg0) { strokeOrFillPath(true); } @@ -125,10 +117,7 @@ public void moveTo(float x, float y) { } @Override - public void shadingFill(COSName arg0) { - // TODO Auto-generated method stub - - } + public void shadingFill(COSName arg0) {} @Override public void strokePath() { @@ -236,33 +225,34 @@ private void strokeOrFillPath(boolean isFill) { } public AffineTransform getPageTransform() { - return this.pageTransform; + return pageTransform; } public Rectangle2D currentClippingPath() { - Shape clippingPath = this.getGraphicsState().getCurrentClippingPath(); - Shape transformedClippingPath = this.getPageTransform().createTransformedShape(clippingPath); - + Shape currentClippingPath = getGraphicsState().getCurrentClippingPath(); + Shape transformedClippingPath = getPageTransform().createTransformedShape(currentClippingPath); return transformedClippingPath.getBounds2D(); } + // TODO: repeated in SpreadsheetExtractionAlgorithm. class PointComparator implements Comparator { @Override - public int compare(Point2D o1, Point2D o2) { - float o1X = Utils.round(o1.getX(), 2); - float o1Y = Utils.round(o1.getY(), 2); - float o2X = Utils.round(o2.getX(), 2); - float o2Y = Utils.round(o2.getY(), 2); + public int compare(Point2D p1, Point2D p2) { + float p1X = Utils.round(p1.getX(), 2); + float p1Y = Utils.round(p1.getY(), 2); + float p2X = Utils.round(p2.getX(), 2); + float p2Y = Utils.round(p2.getY(), 2); - if (o1Y > o2Y) + if (p1Y > p2Y) return 1; - if (o1Y < o2Y) + if (p1Y < p2Y) return -1; - if (o1X > o2X) + if (p1X > p2X) return 1; - if (o1X < o2X) + if (p1X < p2X) return -1; return 0; } } + } From f7e19764b4035e3f7db5ddb4f2155cfbd0cda0b8 Mon Sep 17 00:00:00 2001 From: Zaqueu Cavalcante Date: Wed, 18 Nov 2020 21:36:25 -0300 Subject: [PATCH 118/200] Filter path by segment type. --- .../tabula/ObjectExtractorStreamEngine.java | 72 ++++++++++--------- 1 file changed, 39 insertions(+), 33 deletions(-) diff --git a/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java b/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java index d3f79458..e1c28c68 100644 --- a/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java +++ b/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java @@ -19,6 +19,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static java.awt.geom.PathIterator.*; + class ObjectExtractorStreamEngine extends PDFGraphicsStreamEngine { protected List rulings; @@ -119,64 +121,47 @@ public void moveTo(float x, float y) { @Override public void shadingFill(COSName arg0) {} + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // @Override public void strokePath() { strokeOrFillPath(false); } private void strokeOrFillPath(boolean isFill) { - GeneralPath path = this.currentPath; - - if (!this.extractRulingLines) { - this.currentPath.reset(); + if (!extractRulingLines) { + currentPath.reset(); return; } - PathIterator pi = path.getPathIterator(this.getPageTransform()); - float[] c = new float[6]; - int currentSegment; - - // skip paths whose first operation is not a MOVETO - // or contains operations other than LINETO, MOVETO or CLOSE - if ((pi.currentSegment(c) != PathIterator.SEG_MOVETO)) { - path.reset(); - return; - } - pi.next(); - while (!pi.isDone()) { - currentSegment = pi.currentSegment(c); - if (currentSegment != PathIterator.SEG_LINETO && currentSegment != PathIterator.SEG_CLOSE - && currentSegment != PathIterator.SEG_MOVETO) { - path.reset(); - return; - } - pi.next(); - } + boolean didNotPassedTheFilter = filterPathBySegmentType(); + if (didNotPassedTheFilter) return; // TODO: how to implement color filter? // skip the first path operation and save it as the starting position float[] first = new float[6]; - pi = path.getPathIterator(this.getPageTransform()); - pi.currentSegment(first); + PathIterator pathIterator = currentPath.getPathIterator(this.getPageTransform()); + float[] c = new float[6]; + int currentSegment; + pathIterator.currentSegment(first); // last move Point2D.Float start_pos = new Point2D.Float(Utils.round(first[0], 2), Utils.round(first[1], 2)); Point2D.Float last_move = start_pos; Point2D.Float end_pos = null; Line2D.Float line; PointComparator pc = new PointComparator(); - while (!pi.isDone()) { - pi.next(); + while (!pathIterator.isDone()) { + pathIterator.next(); // This can be the last segment, when pi.isDone, but we need to // process it // otherwise us-017.pdf fails the last value. try { - currentSegment = pi.currentSegment(c); + currentSegment = pathIterator.currentSegment(c); } catch (IndexOutOfBoundsException ex) { continue; } switch (currentSegment) { - case PathIterator.SEG_LINETO: + case SEG_LINETO: end_pos = new Point2D.Float(c[0], c[1]); if (start_pos == null || end_pos == null) { @@ -194,11 +179,11 @@ private void strokeOrFillPath(boolean isFill) { } } break; - case PathIterator.SEG_MOVETO: + case SEG_MOVETO: last_move = new Point2D.Float(c[0], c[1]); end_pos = last_move; break; - case PathIterator.SEG_CLOSE: + case SEG_CLOSE: // according to PathIterator docs: // "the preceding subpath should be closed by appending a line // segment @@ -221,9 +206,30 @@ private void strokeOrFillPath(boolean isFill) { } start_pos = end_pos; } - path.reset(); + currentPath.reset(); } + private boolean filterPathBySegmentType() { + PathIterator pathIterator = currentPath.getPathIterator(pageTransform); + float[] coordinates = new float[6]; + int currentSegmentType = pathIterator.currentSegment(coordinates); + if (currentSegmentType != SEG_MOVETO) { + currentPath.reset(); + return true; + } + pathIterator.next(); + while (!pathIterator.isDone()) { + currentSegmentType = pathIterator.currentSegment(coordinates); + if (currentSegmentType != SEG_LINETO && currentSegmentType != SEG_CLOSE && currentSegmentType != SEG_MOVETO) { + currentPath.reset(); + return true; + } + pathIterator.next(); + } + return false; + } + + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // public AffineTransform getPageTransform() { return pageTransform; } From 6e81297b4723da35303a30deef297a4ce4bb09d3 Mon Sep 17 00:00:00 2001 From: Zaqueu Cavalcante Date: Thu, 19 Nov 2020 15:20:31 -0300 Subject: [PATCH 119/200] Refactoring by methods extraction. --- .../tabula/ObjectExtractorStreamEngine.java | 99 ++++++++++--------- 1 file changed, 53 insertions(+), 46 deletions(-) diff --git a/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java b/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java index e1c28c68..9907eca1 100644 --- a/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java +++ b/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java @@ -30,6 +30,8 @@ class ObjectExtractorStreamEngine extends PDFGraphicsStreamEngine { private int clipWindingRule = -1; private GeneralPath currentPath = new GeneralPath(); + private static final float RULING_MINIMUM_LENGTH = 0.01f; + protected ObjectExtractorStreamEngine(PDPage page) { super(page); logger = LoggerFactory.getLogger(ObjectExtractorStreamEngine.class); @@ -138,73 +140,53 @@ private void strokeOrFillPath(boolean isFill) { // TODO: how to implement color filter? - // skip the first path operation and save it as the starting position - float[] first = new float[6]; - PathIterator pathIterator = currentPath.getPathIterator(this.getPageTransform()); - float[] c = new float[6]; + // Skip the first path operation and save it as the starting point. + PathIterator pathIterator = currentPath.getPathIterator(getPageTransform()); + + float[] coordinates = new float[6]; int currentSegment; - pathIterator.currentSegment(first); - // last move - Point2D.Float start_pos = new Point2D.Float(Utils.round(first[0], 2), Utils.round(first[1], 2)); - Point2D.Float last_move = start_pos; - Point2D.Float end_pos = null; + + Point2D.Float startPoint = getStartPoint(pathIterator); + Point2D.Float last_move = startPoint; + Point2D.Float endPoint = null; Line2D.Float line; - PointComparator pc = new PointComparator(); + PointComparator pointComparator = new PointComparator(); + while (!pathIterator.isDone()) { pathIterator.next(); - // This can be the last segment, when pi.isDone, but we need to - // process it - // otherwise us-017.pdf fails the last value. + // This can be the last segment, when pathIterator.isDone, but we need to + // process it otherwise us-017.pdf fails the last value. try { - currentSegment = pathIterator.currentSegment(c); + currentSegment = pathIterator.currentSegment(coordinates); } catch (IndexOutOfBoundsException ex) { continue; } switch (currentSegment) { case SEG_LINETO: - end_pos = new Point2D.Float(c[0], c[1]); - - if (start_pos == null || end_pos == null) { + endPoint = new Point2D.Float(coordinates[0], coordinates[1]); + if (startPoint == null || endPoint == null) { break; } - - line = pc.compare(start_pos, end_pos) == -1 ? new Line2D.Float(start_pos, end_pos) - : new Line2D.Float(end_pos, start_pos); - - if (line.intersects(this.currentClippingPath())) { - Ruling r = new Ruling(line.getP1(), line.getP2()).intersect(this.currentClippingPath()); - - if (r.length() > 0.01) { - this.rulings.add(r); - } - } + line = getLineBetween(startPoint, endPoint, pointComparator); + verifyLineIntersectsClipping(line); break; case SEG_MOVETO: - last_move = new Point2D.Float(c[0], c[1]); - end_pos = last_move; + last_move = new Point2D.Float(coordinates[0], coordinates[1]); + endPoint = last_move; break; case SEG_CLOSE: - // according to PathIterator docs: - // "the preceding subpath should be closed by appending a line - // segment - // back to the point corresponding to the most recent + // According to PathIterator docs: + // "The preceding sub-path should be closed by appending a line + // segment back to the point corresponding to the most recent // SEG_MOVETO." - if (start_pos == null || end_pos == null) { + if (startPoint == null || endPoint == null) { break; } - line = pc.compare(end_pos, last_move) == -1 ? new Line2D.Float(end_pos, last_move) - : new Line2D.Float(last_move, end_pos); - - if (line.intersects(this.currentClippingPath())) { - Ruling r = new Ruling(line.getP1(), line.getP2()).intersect(this.currentClippingPath()); - - if (r.length() > 0.01) { - this.rulings.add(r); - } - } + line = getLineBetween(endPoint, last_move, pointComparator); + verifyLineIntersectsClipping(line); break; } - start_pos = end_pos; + startPoint = endPoint; } currentPath.reset(); } @@ -229,6 +211,31 @@ private boolean filterPathBySegmentType() { return false; } + private Point2D.Float getStartPoint(PathIterator pathIterator) { + float[] startPointCoordinates = new float[6]; + pathIterator.currentSegment(startPointCoordinates); + float x = Utils.round(startPointCoordinates[0], 2); + float y = Utils.round(startPointCoordinates[1], 2); + return new Point2D.Float(x, y); + } + + private Line2D.Float getLineBetween(Point2D.Float pointA, Point2D.Float pointB, PointComparator pointComparator) { + if (pointComparator.compare(pointA, pointB) == -1) { + return new Line2D.Float(pointA, pointB); + } + return new Line2D.Float(pointB, pointA); + } + + private void verifyLineIntersectsClipping(Line2D.Float line) { + Rectangle2D currentClippingPath = currentClippingPath(); + if (line.intersects(currentClippingPath)) { + Ruling ruling = new Ruling(line.getP1(), line.getP2()).intersect(currentClippingPath); + if (ruling.length() > RULING_MINIMUM_LENGTH) { + rulings.add(ruling); + } + } + } + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // public AffineTransform getPageTransform() { return pageTransform; From 4519596d5234d076784db86705039f7cb24be9a4 Mon Sep 17 00:00:00 2001 From: Zaqueu Cavalcante Date: Thu, 19 Nov 2020 16:52:17 -0300 Subject: [PATCH 120/200] Adding some tests before refactor. --- .../tabula/CohenSutherlandClipping.java | 67 ++++++------ .../tabula/TestCohenSutherland.java | 101 ++++++++++++++++++ 2 files changed, 132 insertions(+), 36 deletions(-) create mode 100644 src/test/java/technology/tabula/TestCohenSutherland.java diff --git a/src/main/java/technology/tabula/CohenSutherlandClipping.java b/src/main/java/technology/tabula/CohenSutherlandClipping.java index 5e170ad8..ce6e2a40 100644 --- a/src/main/java/technology/tabula/CohenSutherlandClipping.java +++ b/src/main/java/technology/tabula/CohenSutherlandClipping.java @@ -18,18 +18,23 @@ * Implements the well known Cohen Sutherland line * clipping algorithm (line against clip rectangle). */ -public final class CohenSutherlandClipping -{ +public final class CohenSutherlandClipping { + private double xMin; private double yMin; private double xMax; private double yMax; + private static final int INSIDE = 0; + private static final int LEFT = 1; + private static final int RIGHT = 2; + private static final int BOTTOM = 4; + private static final int TOP = 8; + /** * Creates a Cohen Sutherland clipper with clip rect (0, 0, 0, 0). */ - public CohenSutherlandClipping() { - } + public CohenSutherlandClipping() {} /** * Creates a Cohen Sutherland clipper with the given clip rectangle. @@ -50,20 +55,12 @@ public void setClip(Rectangle2D clip) { yMax = yMin + clip.getHeight(); } - private static final int INSIDE = 0; - private static final int LEFT = 1; - private static final int RIGHT = 2; - private static final int BOTTOM = 4; - private static final int TOP = 8; - private final int regionCode(double x, double y) { - int code = x < xMin - ? LEFT - : x > xMax - ? RIGHT - : INSIDE; - if (y < yMin) code |= BOTTOM; - else if (y > yMax) code |= TOP; + int code = (x < xMin) ? LEFT : (x > xMax) ? RIGHT : INSIDE; + if (y < yMin) + code |= BOTTOM; + else if (y > yMax) + code |= TOP; return code; } @@ -84,56 +81,54 @@ public boolean clip(Line2D.Float line) { double qx = 0d; double qy = 0d; - boolean vertical = p1x == p2x; + boolean lineIsVertical = (p1x == p2x); - double slope = vertical - ? 0d - : (p2y-p1y)/(p2x-p1x); + double lineSlope = lineIsVertical ? 0d : (p2y-p1y)/(p2x-p1x); - int c1 = regionCode(p1x, p1y); - int c2 = regionCode(p2x, p2y); + int p1Region = regionCode(p1x, p1y); + int p2Region = regionCode(p2x, p2y); - while (c1 != INSIDE || c2 != INSIDE) { + while (p1Region != INSIDE || p2Region != INSIDE) { - if ((c1 & c2) != INSIDE) + if ((p1Region & p2Region) != INSIDE) return false; - int c = c1 == INSIDE ? c2 : c1; + int c = (p1Region == INSIDE) ? p2Region : p1Region; if ((c & LEFT) != INSIDE) { qx = xMin; - qy = (Utils.feq(qx, p1x) ? 0 : qx-p1x)*slope + p1y; + qy = (Utils.feq(qx, p1x) ? 0 : qx-p1x)*lineSlope + p1y; } else if ((c & RIGHT) != INSIDE) { qx = xMax; - qy = (Utils.feq(qx, p1x) ? 0 : qx-p1x)*slope + p1y; + qy = (Utils.feq(qx, p1x) ? 0 : qx-p1x)*lineSlope + p1y; } else if ((c & BOTTOM) != INSIDE) { qy = yMin; - qx = vertical + qx = lineIsVertical ? p1x - : (Utils.feq(qy, p1y) ? 0 : qy-p1y)/slope + p1x; + : (Utils.feq(qy, p1y) ? 0 : qy-p1y)/lineSlope + p1x; } else if ((c & TOP) != INSIDE) { qy = yMax; - qx = vertical + qx = lineIsVertical ? p1x - : (Utils.feq(qy, p1y) ? 0 : qy-p1y)/slope + p1x; + : (Utils.feq(qy, p1y) ? 0 : qy-p1y)/lineSlope + p1x; } - if (c == c1) { + if (c == p1Region) { p1x = qx; p1y = qy; - c1 = regionCode(p1x, p1y); + p1Region = regionCode(p1x, p1y); } else { p2x = qx; p2y = qy; - c2 = regionCode(p2x, p2y); + p2Region = regionCode(p2x, p2y); } } line.setLine(p1x, p1y, p2x, p2y); return true; } + } -// end of file \ No newline at end of file diff --git a/src/test/java/technology/tabula/TestCohenSutherland.java b/src/test/java/technology/tabula/TestCohenSutherland.java new file mode 100644 index 00000000..b2738d2b --- /dev/null +++ b/src/test/java/technology/tabula/TestCohenSutherland.java @@ -0,0 +1,101 @@ +package technology.tabula; + +import org.junit.Before; +import org.junit.Test; + +import java.awt.geom.Line2D; +import java.awt.geom.Rectangle2D; + +import static org.junit.Assert.*; + +public class TestCohenSutherland { + + private Rectangle2D clipWindow; + private CohenSutherlandClipping algorithm; + private static final double DELTA = 0.001; + + @Before + public void set() { + clipWindow = new Rectangle(10, 10, 50, 50); + algorithm = new CohenSutherlandClipping(clipWindow); + } + + // TODO: How to parameterize the tests? + + @Test + public void theLineIsCompletelyInside() { + Line2D.Float line = new Line2D.Float(20, 20, 30, 30); + assertTrue(algorithm.clip(line)); + assertEquals(20, line.x1, DELTA); + assertEquals(20, line.y1, DELTA); + assertEquals(30, line.x2, DELTA); + assertEquals(30, line.y2, DELTA); + } + + @Test + public void theLineIsCompletelyOnTheLeft() { + float x1 = 3, y1 = 13, x2 = 6, y2 = 16; + Line2D.Float line = new Line2D.Float(x1, y1, x2, y2); + assertFalse(algorithm.clip(line)); + assertEquals(x1, line.x1, DELTA); + assertEquals(y1, line.y1, DELTA); + assertEquals(x2, line.x2, DELTA); + assertEquals(y2, line.y2, DELTA); + } + + @Test + public void theLineIsCompletelyOnTheUp() { + float x1 = 15, y1 = 5, x2 = 25, y2 = 2; + Line2D.Float line = new Line2D.Float(x1, y1, x2, y2); + assertFalse(algorithm.clip(line)); + assertEquals(x1, line.x1, DELTA); + assertEquals(y1, line.y1, DELTA); + assertEquals(x2, line.x2, DELTA); + assertEquals(y2, line.y2, DELTA); + } + + @Test + public void theLineIsCompletelyOnTheRight() { + float x1 = 65, y1 = 15, x2 = 70, y2 = 20; + Line2D.Float line = new Line2D.Float(x1, y1, x2, y2); + assertFalse(algorithm.clip(line)); + assertEquals(x1, line.x1, DELTA); + assertEquals(y1, line.y1, DELTA); + assertEquals(x2, line.x2, DELTA); + assertEquals(y2, line.y2, DELTA); + } + + @Test + public void theLineIsCompletelyOnTheBottom() { + float x1 = 15, y1 = 65, x2 = 25, y2 = 70; + Line2D.Float line = new Line2D.Float(x1, y1, x2, y2); + assertFalse(algorithm.clip(line)); + assertEquals(x1, line.x1, DELTA); + assertEquals(y1, line.y1, DELTA); + assertEquals(x2, line.x2, DELTA); + assertEquals(y2, line.y2, DELTA); + } + + @Test + public void lineCrossesTopLeftCorner() { + float x1 = 5, y1 = 25, x2 = 25, y2 = 5; + Line2D.Float line = new Line2D.Float(x1, y1, x2, y2); + assertTrue(algorithm.clip(line)); + assertEquals(10, line.x1, DELTA); + assertEquals(20, line.y1, DELTA); + assertEquals(20, line.x2, DELTA); + assertEquals(10, line.y2, DELTA); + } + + @Test + public void lineCrossesPartiallyTopLeftCorner() { + float x1 = 15, y1 = 15, x2 = 25, y2 = 5; + Line2D.Float line = new Line2D.Float(x1, y1, x2, y2); + assertTrue(algorithm.clip(line)); + assertEquals(x1, line.x1, DELTA); + assertEquals(y1, line.y1, DELTA); + assertEquals(20, line.x2, DELTA); + assertEquals(10, line.y2, DELTA); + } + +} From b8d44f6dec19b0bb45501e804bfc7a0240149a65 Mon Sep 17 00:00:00 2001 From: Zaqueu Cavalcante Date: Fri, 20 Nov 2020 22:49:29 -0300 Subject: [PATCH 121/200] Renaming variables. --- .../tabula/CohenSutherlandClipping.java | 77 +++++++++---------- .../tabula/TestCohenSutherland.java | 3 +- 2 files changed, 37 insertions(+), 43 deletions(-) diff --git a/src/main/java/technology/tabula/CohenSutherlandClipping.java b/src/main/java/technology/tabula/CohenSutherlandClipping.java index ce6e2a40..9a95ad52 100644 --- a/src/main/java/technology/tabula/CohenSutherlandClipping.java +++ b/src/main/java/technology/tabula/CohenSutherlandClipping.java @@ -67,67 +67,60 @@ else if (y > yMax) /** * Clips a given line against the clip rectangle. * The modification (if needed) is done in place. - * @param line the line to clip + * @param line the line to clip. * @return true if line is clipped, false if line is * totally outside the clip rect. */ public boolean clip(Line2D.Float line) { + double point1X = line.getX1(), point1Y = line.getY1(); + double point2X = line.getX2(), point2Y = line.getY2(); + double outsidePointX = 0d, outsidePointY = 0d; - double p1x = line.getX1(); - double p1y = line.getY1(); - double p2x = line.getX2(); - double p2y = line.getY2(); + boolean lineIsVertical = (point1X == point2X); + double lineSlope = lineIsVertical ? 0d : (point2Y-point1Y)/(point2X-point1X); - double qx = 0d; - double qy = 0d; + int point1Region = regionCode(point1X, point1Y); + int point2Region = regionCode(point2X, point2Y); - boolean lineIsVertical = (p1x == p2x); - - double lineSlope = lineIsVertical ? 0d : (p2y-p1y)/(p2x-p1x); - - int p1Region = regionCode(p1x, p1y); - int p2Region = regionCode(p2x, p2y); - - while (p1Region != INSIDE || p2Region != INSIDE) { - - if ((p1Region & p2Region) != INSIDE) + while (point1Region != INSIDE || point2Region != INSIDE) { + if ((point1Region & point2Region) != INSIDE) return false; - int c = (p1Region == INSIDE) ? p2Region : p1Region; + int outsidePointRegion = (point1Region == INSIDE) ? point2Region : point1Region; - if ((c & LEFT) != INSIDE) { - qx = xMin; - qy = (Utils.feq(qx, p1x) ? 0 : qx-p1x)*lineSlope + p1y; + if ((outsidePointRegion & LEFT) != INSIDE) { + outsidePointX = xMin; + outsidePointY = (Utils.feq(outsidePointX, point1X) ? 0 : outsidePointX-point1X)*lineSlope + point1Y; } - else if ((c & RIGHT) != INSIDE) { - qx = xMax; - qy = (Utils.feq(qx, p1x) ? 0 : qx-p1x)*lineSlope + p1y; + else if ((outsidePointRegion & RIGHT) != INSIDE) { + outsidePointX = xMax; + outsidePointY = (Utils.feq(outsidePointX, point1X) ? 0 : outsidePointX-point1X)*lineSlope + point1Y; } - else if ((c & BOTTOM) != INSIDE) { - qy = yMin; - qx = lineIsVertical - ? p1x - : (Utils.feq(qy, p1y) ? 0 : qy-p1y)/lineSlope + p1x; + else if ((outsidePointRegion & BOTTOM) != INSIDE) { + outsidePointY = yMin; + outsidePointX = lineIsVertical + ? point1X + : (Utils.feq(outsidePointY, point1Y) ? 0 : outsidePointY-point1Y)/lineSlope + point1X; } - else if ((c & TOP) != INSIDE) { - qy = yMax; - qx = lineIsVertical - ? p1x - : (Utils.feq(qy, p1y) ? 0 : qy-p1y)/lineSlope + p1x; + else if ((outsidePointRegion & TOP) != INSIDE) { + outsidePointY = yMax; + outsidePointX = lineIsVertical + ? point1X + : (Utils.feq(outsidePointY, point1Y) ? 0 : outsidePointY-point1Y)/lineSlope + point1X; } - if (c == p1Region) { - p1x = qx; - p1y = qy; - p1Region = regionCode(p1x, p1y); + if (outsidePointRegion == point1Region) { + point1X = outsidePointX; + point1Y = outsidePointY; + point1Region = regionCode(point1X, point1Y); } else { - p2x = qx; - p2y = qy; - p2Region = regionCode(p2x, p2y); + point2X = outsidePointX; + point2Y = outsidePointY; + point2Region = regionCode(point2X, point2Y); } } - line.setLine(p1x, p1y, p2x, p2y); + line.setLine(point1X, point1Y, point2X, point2Y); return true; } diff --git a/src/test/java/technology/tabula/TestCohenSutherland.java b/src/test/java/technology/tabula/TestCohenSutherland.java index b2738d2b..2d747608 100644 --- a/src/test/java/technology/tabula/TestCohenSutherland.java +++ b/src/test/java/technology/tabula/TestCohenSutherland.java @@ -20,8 +20,8 @@ public void set() { algorithm = new CohenSutherlandClipping(clipWindow); } + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // // TODO: How to parameterize the tests? - @Test public void theLineIsCompletelyInside() { Line2D.Float line = new Line2D.Float(20, 20, 30, 30); @@ -76,6 +76,7 @@ public void theLineIsCompletelyOnTheBottom() { assertEquals(y2, line.y2, DELTA); } + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // @Test public void lineCrossesTopLeftCorner() { float x1 = 5, y1 = 25, x2 = 25, y2 = 5; From d498a5ec7fc6de7fc3de938c2699d979a4c7a15c Mon Sep 17 00:00:00 2001 From: Zaqueu Cavalcante Date: Sat, 21 Nov 2020 08:34:32 -0300 Subject: [PATCH 122/200] Moving logic. --- .../technology/tabula/CohenSutherlandClipping.java | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/main/java/technology/tabula/CohenSutherlandClipping.java b/src/main/java/technology/tabula/CohenSutherlandClipping.java index 9a95ad52..fcf9a394 100644 --- a/src/main/java/technology/tabula/CohenSutherlandClipping.java +++ b/src/main/java/technology/tabula/CohenSutherlandClipping.java @@ -31,6 +31,8 @@ public final class CohenSutherlandClipping { private static final int BOTTOM = 4; private static final int TOP = 8; + private final static float MINIMUM_DELTA = 0.01f; + /** * Creates a Cohen Sutherland clipper with clip rect (0, 0, 0, 0). */ @@ -90,23 +92,23 @@ public boolean clip(Line2D.Float line) { if ((outsidePointRegion & LEFT) != INSIDE) { outsidePointX = xMin; - outsidePointY = (Utils.feq(outsidePointX, point1X) ? 0 : outsidePointX-point1X)*lineSlope + point1Y; + outsidePointY = delta(outsidePointX, point1X)*lineSlope + point1Y; } else if ((outsidePointRegion & RIGHT) != INSIDE) { outsidePointX = xMax; - outsidePointY = (Utils.feq(outsidePointX, point1X) ? 0 : outsidePointX-point1X)*lineSlope + point1Y; + outsidePointY = delta(outsidePointX, point1X)*lineSlope + point1Y; } else if ((outsidePointRegion & BOTTOM) != INSIDE) { outsidePointY = yMin; outsidePointX = lineIsVertical ? point1X - : (Utils.feq(outsidePointY, point1Y) ? 0 : outsidePointY-point1Y)/lineSlope + point1X; + : delta(outsidePointY, point1Y)/lineSlope + point1X; } else if ((outsidePointRegion & TOP) != INSIDE) { outsidePointY = yMax; outsidePointX = lineIsVertical ? point1X - : (Utils.feq(outsidePointY, point1Y) ? 0 : outsidePointY-point1Y)/lineSlope + point1X; + : delta(outsidePointY, point1Y)/lineSlope + point1X; } if (outsidePointRegion == point1Region) { @@ -124,4 +126,8 @@ else if ((outsidePointRegion & TOP) != INSIDE) { return true; } + private static double delta(double value1, double value2) { + return (Math.abs(value1 - value2) < MINIMUM_DELTA) ? 0 : (value1 - value2); + } + } From 1deb2c9273e78de3261cb972fadd0b5270c7d57e Mon Sep 17 00:00:00 2001 From: Zaqueu Cavalcante Date: Sat, 21 Nov 2020 10:13:08 -0300 Subject: [PATCH 123/200] Encapsulating point attributes. --- .../tabula/CohenSutherlandClipping.java | 124 ++++++++++-------- 1 file changed, 66 insertions(+), 58 deletions(-) diff --git a/src/main/java/technology/tabula/CohenSutherlandClipping.java b/src/main/java/technology/tabula/CohenSutherlandClipping.java index fcf9a394..5094d26e 100644 --- a/src/main/java/technology/tabula/CohenSutherlandClipping.java +++ b/src/main/java/technology/tabula/CohenSutherlandClipping.java @@ -34,100 +34,108 @@ public final class CohenSutherlandClipping { private final static float MINIMUM_DELTA = 0.01f; /** - * Creates a Cohen Sutherland clipper with clip rect (0, 0, 0, 0). + * Creates a Cohen Sutherland clipper with clip window (0, 0, 0, 0). */ public CohenSutherlandClipping() {} /** - * Creates a Cohen Sutherland clipper with the given clip rectangle. - * @param clip the clip rectangle to use + * Creates a Cohen Sutherland clipper with the given clip window. + * @param clipWindow the clip window to use. */ - public CohenSutherlandClipping(Rectangle2D clip) { - setClip(clip); + public CohenSutherlandClipping(Rectangle2D clipWindow) { + setClip(clipWindow); } /** * Sets the clip rectangle. - * @param clip the clip rectangle + * @param clipWindow the clip window. */ - public void setClip(Rectangle2D clip) { - xMin = clip.getX(); - xMax = xMin + clip.getWidth(); - yMin = clip.getY(); - yMax = yMin + clip.getHeight(); - } - - private final int regionCode(double x, double y) { - int code = (x < xMin) ? LEFT : (x > xMax) ? RIGHT : INSIDE; - if (y < yMin) - code |= BOTTOM; - else if (y > yMax) - code |= TOP; - return code; + public void setClip(Rectangle2D clipWindow) { + xMin = clipWindow.getX(); + xMax = xMin + clipWindow.getWidth(); + yMin = clipWindow.getY(); + yMax = yMin + clipWindow.getHeight(); } + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // /** - * Clips a given line against the clip rectangle. + * Clips a given line against the clip window. * The modification (if needed) is done in place. * @param line the line to clip. * @return true if line is clipped, false if line is - * totally outside the clip rect. + * totally outside the clip window. */ public boolean clip(Line2D.Float line) { - double point1X = line.getX1(), point1Y = line.getY1(); - double point2X = line.getX2(), point2Y = line.getY2(); - double outsidePointX = 0d, outsidePointY = 0d; - - boolean lineIsVertical = (point1X == point2X); - double lineSlope = lineIsVertical ? 0d : (point2Y-point1Y)/(point2X-point1X); + Point point1 = new Point(line.getX1(), line.getY1()); + Point point2 = new Point(line.getX2(), line.getY2()); + Point outsidePoint = new Point(0d, 0d); - int point1Region = regionCode(point1X, point1Y); - int point2Region = regionCode(point2X, point2Y); + boolean lineIsVertical = (point1.x == point2.x); + double lineSlope = lineIsVertical ? 0d : (point2.y-point1.y)/(point2.x-point1.x); - while (point1Region != INSIDE || point2Region != INSIDE) { - if ((point1Region & point2Region) != INSIDE) - return false; + while (point1.region != INSIDE || point2.region != INSIDE) { + if ((point1.region & point2.region) != INSIDE) return false; - int outsidePointRegion = (point1Region == INSIDE) ? point2Region : point1Region; + outsidePoint.region = (point1.region == INSIDE) ? point2.region : point1.region; - if ((outsidePointRegion & LEFT) != INSIDE) { - outsidePointX = xMin; - outsidePointY = delta(outsidePointX, point1X)*lineSlope + point1Y; + if ((outsidePoint.region & LEFT) != INSIDE) { + outsidePoint.x = xMin; + outsidePoint.y = delta(outsidePoint.x, point1.x)*lineSlope + point1.y; } - else if ((outsidePointRegion & RIGHT) != INSIDE) { - outsidePointX = xMax; - outsidePointY = delta(outsidePointX, point1X)*lineSlope + point1Y; + else if ((outsidePoint.region & RIGHT) != INSIDE) { + outsidePoint.x = xMax; + outsidePoint.y = delta(outsidePoint.x, point1.x)*lineSlope + point1.y; } - else if ((outsidePointRegion & BOTTOM) != INSIDE) { - outsidePointY = yMin; - outsidePointX = lineIsVertical - ? point1X - : delta(outsidePointY, point1Y)/lineSlope + point1X; + else if ((outsidePoint.region & BOTTOM) != INSIDE) { + outsidePoint.y = yMin; + outsidePoint.x = lineIsVertical + ? point1.x + : delta(outsidePoint.y, point1.y)/lineSlope + point1.x; } - else if ((outsidePointRegion & TOP) != INSIDE) { - outsidePointY = yMax; - outsidePointX = lineIsVertical - ? point1X - : delta(outsidePointY, point1Y)/lineSlope + point1X; + else if ((outsidePoint.region & TOP) != INSIDE) { + outsidePoint.y = yMax; + outsidePoint.x = lineIsVertical + ? point1.x + : delta(outsidePoint.y, point1.y)/lineSlope + point1.x; } - if (outsidePointRegion == point1Region) { - point1X = outsidePointX; - point1Y = outsidePointY; - point1Region = regionCode(point1X, point1Y); + if (outsidePoint.isInTheSameRegionAs(point1)) { + point1.setPositionAndRegion(outsidePoint.x, outsidePoint.y); } else { - point2X = outsidePointX; - point2Y = outsidePointY; - point2Region = regionCode(point2X, point2Y); + point2.setPositionAndRegion(outsidePoint.x, outsidePoint.y); } } - line.setLine(point1X, point1Y, point2X, point2Y); + line.setLine(point1.x, point1.y, point2.x, point2.y); return true; } + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // private static double delta(double value1, double value2) { return (Math.abs(value1 - value2) < MINIMUM_DELTA) ? 0 : (value1 - value2); } + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // + class Point { + double x, y; + int region; + + Point(double x, double y) { + setPositionAndRegion(x, y); + } + + void setPositionAndRegion(double x, double y) { + this.x = x; this.y = y; + region = (x < xMin) ? LEFT : (x > xMax) ? RIGHT : INSIDE; + if (y < yMin) + region |= BOTTOM; + else if (y > yMax) + region |= TOP; + } + + boolean isInTheSameRegionAs(Point otherPoint) { + return this.region == otherPoint.region; + } + } + } From 9c219de5e1d3d300da917f7bb3201acf1be16035 Mon Sep 17 00:00:00 2001 From: Zaqueu Cavalcante Date: Sat, 21 Nov 2020 11:39:38 -0300 Subject: [PATCH 124/200] Correcting conceptual mistake. --- .../technology/tabula/CohenSutherlandClipping.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/technology/tabula/CohenSutherlandClipping.java b/src/main/java/technology/tabula/CohenSutherlandClipping.java index 5094d26e..db9153e9 100644 --- a/src/main/java/technology/tabula/CohenSutherlandClipping.java +++ b/src/main/java/technology/tabula/CohenSutherlandClipping.java @@ -74,25 +74,25 @@ public boolean clip(Line2D.Float line) { double lineSlope = lineIsVertical ? 0d : (point2.y-point1.y)/(point2.x-point1.x); while (point1.region != INSIDE || point2.region != INSIDE) { - if ((point1.region & point2.region) != INSIDE) return false; + if ((point1.region & point2.region) != 0) return false; outsidePoint.region = (point1.region == INSIDE) ? point2.region : point1.region; - if ((outsidePoint.region & LEFT) != INSIDE) { + if ((outsidePoint.region & LEFT) != 0) { outsidePoint.x = xMin; outsidePoint.y = delta(outsidePoint.x, point1.x)*lineSlope + point1.y; } - else if ((outsidePoint.region & RIGHT) != INSIDE) { + else if ((outsidePoint.region & RIGHT) != 0) { outsidePoint.x = xMax; outsidePoint.y = delta(outsidePoint.x, point1.x)*lineSlope + point1.y; } - else if ((outsidePoint.region & BOTTOM) != INSIDE) { + else if ((outsidePoint.region & BOTTOM) != 0) { outsidePoint.y = yMin; outsidePoint.x = lineIsVertical ? point1.x : delta(outsidePoint.y, point1.y)/lineSlope + point1.x; } - else if ((outsidePoint.region & TOP) != INSIDE) { + else if ((outsidePoint.region & TOP) != 0) { outsidePoint.y = yMax; outsidePoint.x = lineIsVertical ? point1.x From e8f9c15803d61d853412021677b3e387ecc03084 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Thu, 24 Dec 2020 05:23:57 +0000 Subject: [PATCH 125/200] Bump jts-core from 1.17.0 to 1.18.0 Bumps jts-core from 1.17.0 to 1.18.0. Signed-off-by: dependabot-preview[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index b88e7f71..05fec848 100644 --- a/pom.xml +++ b/pom.xml @@ -244,7 +244,7 @@ org.locationtech.jts jts-core - 1.17.0 + 1.18.0 From 47f784f7f180ceeea1f600cf62fdc3a239be43de Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Mon, 12 Oct 2020 05:35:30 +0000 Subject: [PATCH 126/200] Bump junit from 4.13 to 4.13.1 Bumps [junit](https://github.com/junit-team/junit4) from 4.13 to 4.13.1. - [Release notes](https://github.com/junit-team/junit4/releases) - [Changelog](https://github.com/junit-team/junit4/blob/main/doc/ReleaseNotes4.13.1.md) - [Commits](https://github.com/junit-team/junit4/compare/r4.13...r4.13.1) Signed-off-by: dependabot-preview[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 05fec848..8fd99723 100644 --- a/pom.xml +++ b/pom.xml @@ -280,7 +280,7 @@ junit junit - 4.13 + 4.13.1 test From f8cac70da7678534bb8843992a563223c5380f2d Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Fri, 11 Dec 2020 05:25:18 +0000 Subject: [PATCH 127/200] Bump jai-imageio-jpeg2000 from 1.3.0 to 1.4.0 Bumps [jai-imageio-jpeg2000](https://github.com/jai-imageio/jai-imageio-jpeg2000) from 1.3.0 to 1.4.0. - [Release notes](https://github.com/jai-imageio/jai-imageio-jpeg2000/releases) - [Commits](https://github.com/jai-imageio/jai-imageio-jpeg2000/compare/jai-imageio-jpeg2000-1.3.0...jai-imageio-jpeg2000-1.4.0) Signed-off-by: dependabot-preview[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 8fd99723..c9108802 100644 --- a/pom.xml +++ b/pom.xml @@ -311,7 +311,7 @@ com.github.jai-imageio jai-imageio-jpeg2000 - 1.3.0 + 1.4.0 From 7f3f039d49d3ce97ee996d6e3a35fb8ee8d3492f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Sun, 27 Dec 2020 14:31:06 -0300 Subject: [PATCH 128/200] pdfbox 2.0.22 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index c9108802..2e209198 100644 --- a/pom.xml +++ b/pom.xml @@ -262,7 +262,7 @@ org.apache.pdfbox pdfbox - 2.0.21 + 2.0.22 From 8c4a0027c521b40681f9c193e43b97ee2d12f507 Mon Sep 17 00:00:00 2001 From: Andreas Tscheinig Date: Fri, 15 Jan 2021 19:32:23 +0100 Subject: [PATCH 129/200] Refactor data-clumps in Page class --- .../technology/tabula/ObjectExtractor.java | 13 +- src/main/java/technology/tabula/Page.java | 135 ++++++++++++++---- src/main/java/technology/tabula/PageDims.java | 35 +++++ .../tabula/TestProjectionProfile.java | 11 +- 4 files changed, 164 insertions(+), 30 deletions(-) create mode 100644 src/main/java/technology/tabula/PageDims.java diff --git a/src/main/java/technology/tabula/ObjectExtractor.java b/src/main/java/technology/tabula/ObjectExtractor.java index 97960243..907312a9 100644 --- a/src/main/java/technology/tabula/ObjectExtractor.java +++ b/src/main/java/technology/tabula/ObjectExtractor.java @@ -38,7 +38,18 @@ protected Page extractPage(Integer pageNumber) throws IOException { height = page.getCropBox().getHeight(); } - return new Page(0, 0, width, height, rotation, pageNumber, page, pdfDocument, streamEngine, textStripper); + return Page.Builder.newInstance() + .withPageDims(PageDims.of(0, 0, width, height)) + .withRotation(rotation) + .withNumber(pageNumber) + .withPdPage(page) + .withPdDocument(pdfDocument) + .withRulings(streamEngine.rulings) + .withTextElements(textStripper.textElements) + .withMinCharWidth(textStripper.minCharWidth) + .withMinCharHeight(textStripper.minCharHeight) + .withIndex(textStripper.spatialIndex) + .build(); } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // diff --git a/src/main/java/technology/tabula/Page.java b/src/main/java/technology/tabula/Page.java index 87d7c01d..20c148d9 100644 --- a/src/main/java/technology/tabula/Page.java +++ b/src/main/java/technology/tabula/Page.java @@ -36,36 +36,27 @@ public class Page extends Rectangle { private static final float DEFAULT_MIN_CHAR_LENGTH = 7; - // TODO: Use a creational design patterns here? - public Page(float top, float left, float width, float height, int rotation, int number, PDPage pdPage, PDDocument doc) { - super(top, left, width, height); + private Page( + PageDims pageDims, + int rotation, + int number, + PDPage pdPage, + PDDocument doc, + List characters, + List rulings, + float minCharWidth, + float minCharHeight, + RectangleSpatialIndex index + ) { + super(pageDims.getTop(), pageDims.getLeft(), pageDims.getWidth(), pageDims.getHeight()); this.rotation = rotation; this.number = number; this.pdPage = pdPage; this.pdDoc = doc; - } - - public Page(float top, float left, float width, float height, int rotation, int number, PDPage pdPage, PDDocument doc, - List characters, List rulings) { - this(top, left, width, height, rotation, number, pdPage, doc); this.textElements = characters; this.rulings = rulings; - } - - public Page(float top, float left, float width, float height, int rotation, int number, PDPage pdPage, PDDocument doc, - ObjectExtractorStreamEngine streamEngine, TextStripper textStripper) { - this(top, left, width, height, rotation, number, pdPage, doc, textStripper.textElements, streamEngine.rulings); - this.minCharWidth = textStripper.minCharWidth; - this.minCharHeight = textStripper.minCharHeight; - this.spatialIndex = textStripper.spatialIndex; - } - - public Page(float top, float left, float width, float height, int rotation, int number, PDPage pdPage, PDDocument doc, - List characters, List rulings, - float minCharWidth, float minCharHeight, RectangleSpatialIndex index) { - this(top, left, width, height, rotation, number, pdPage, doc, characters, rulings); - this.minCharHeight = minCharHeight; this.minCharWidth = minCharWidth; + this.minCharHeight = minCharHeight; this.spatialIndex = index; } @@ -76,10 +67,18 @@ public Page getArea(Rectangle area) { float minimumCharWidth = getMinimumCharWidthFrom(areaTextElements); float minimumCharHeight = getMinimumCharHeightFrom(areaTextElements); - Page page = new Page(area.getTop(), area.getLeft(), (float) area.getWidth(), (float) area.getHeight(), - rotation, number, pdPage, pdDoc, areaTextElements, - Ruling.cropRulingsToArea(getRulings(), area), - minimumCharWidth, minimumCharHeight, spatialIndex); + final Page page = Page.Builder.newInstance() + .withPageDims(PageDims.of(area.getTop(), area.getLeft(), (float) area.getWidth(), (float) area.getHeight())) + .withRotation(rotation) + .withNumber(number) + .withPdPage(pdPage) + .withPdDocument(pdDoc) + .withTextElements(areaTextElements) + .withRulings(Ruling.cropRulingsToArea(getRulings(), area)) + .withMinCharWidth(minimumCharWidth) + .withMinCharHeight(minimumCharHeight) + .withIndex(spatialIndex) + .build(); addBorderRulingsTo(page); @@ -281,4 +280,86 @@ public RectangleSpatialIndex getSpatialIndex() { return spatialIndex; } + public static class Builder { + private PageDims pageDims; + private int rotation; + private int number; + private PDPage pdPage; + private PDDocument pdDocument; + private List textElements; + private List rulings; + private float minCharWidth; + private float minCharHeight; + private RectangleSpatialIndex index; + + private Builder() {} + + public static Builder newInstance() { + return new Builder(); + } + + public Builder withPageDims(PageDims pageDims) { + this.pageDims = pageDims; + + return this; + } + + public Builder withRotation(int rotation) { + this.rotation = rotation; + + return this; + } + + public Builder withNumber(int number) { + this.number = number; + + return this; + } + + public Builder withPdPage(PDPage pdPage) { + this.pdPage = pdPage; + + return this; + } + + public Builder withPdDocument(PDDocument pdDocument) { + this.pdDocument = pdDocument; + + return this; + } + + public Builder withTextElements(List textElements) { + this.textElements = textElements; + + return this; + } + + public Builder withRulings(List rulings) { + this.rulings = rulings; + + return this; + } + + public Builder withMinCharWidth(float minCharWidth) { + this.minCharWidth = minCharWidth; + + return this; + } + + public Builder withMinCharHeight(float minCharHeight) { + this.minCharHeight = minCharHeight; + + return this; + } + + public Builder withIndex(RectangleSpatialIndex index) { + this.index = index; + + return this; + } + + public Page build() { + return new Page(pageDims, rotation, number, pdPage, pdDocument, textElements, rulings, minCharWidth, minCharHeight, index); + } + } } diff --git a/src/main/java/technology/tabula/PageDims.java b/src/main/java/technology/tabula/PageDims.java new file mode 100644 index 00000000..1598d125 --- /dev/null +++ b/src/main/java/technology/tabula/PageDims.java @@ -0,0 +1,35 @@ +package technology.tabula; + +public class PageDims { + private final float top; + private final float left; + private final float width; + private final float height; + + private PageDims(final float top, final float left, final float width, final float height) { + this.top = top; + this.left = left; + this.width = width; + this.height = height; + } + + public static PageDims of(final float top, final float left, final float width, final float height) { + return new PageDims(top, left, width, height); + } + + public float getTop() { + return top; + } + + public float getLeft() { + return left; + } + + public float getWidth() { + return width; + } + + public float getHeight() { + return height; + } +} diff --git a/src/test/java/technology/tabula/TestProjectionProfile.java b/src/test/java/technology/tabula/TestProjectionProfile.java index 4a3462f9..e7af882f 100644 --- a/src/test/java/technology/tabula/TestProjectionProfile.java +++ b/src/test/java/technology/tabula/TestProjectionProfile.java @@ -31,9 +31,16 @@ public void setUpProjectionProfile() { List rulingList = new ArrayList<>(); rulingList.add(ruling); + page = Page.Builder.newInstance() + .withPageDims(PageDims.of(0, 0, 1, 1)) + .withRotation(0) + .withNumber(1) + .withPdPage(pdPage) + .withPdDocument(pdDocument) + .withTextElements(textList) + .withRulings(rulingList) + .build(); - page = new Page(0, 0, 1, 1, 0, 1, pdPage, pdDocument, textList, rulingList); - List rectangles = new ArrayList<>(); rectangles.add(new Rectangle(0f, 0f, 500f, 5f)); From 4fd6cafeea2abc572cbd5d704ea8ad9fdd4b7896 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Wed, 20 Jan 2021 09:27:56 -0300 Subject: [PATCH 130/200] Keep public constructors of Page class. Also, mark them as deprecated --- src/main/java/technology/tabula/Page.java | 51 +++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/src/main/java/technology/tabula/Page.java b/src/main/java/technology/tabula/Page.java index 20c148d9..8b23066a 100644 --- a/src/main/java/technology/tabula/Page.java +++ b/src/main/java/technology/tabula/Page.java @@ -60,6 +60,57 @@ private Page( this.spatialIndex = index; } + /** + * + * @deprecated use {@link Builder} instead + */ + @Deprecated + public Page(float top, float left, float width, float height, int rotation, int number, PDPage pdPage, PDDocument doc) { + super(top, left, width, height); + this.rotation = rotation; + this.number = number; + this.pdPage = pdPage; + this.pdDoc = doc; + } + + /** + * + * @deprecated use {@link Builder} instead + */ + public Page(float top, float left, float width, float height, int rotation, int number, PDPage pdPage, PDDocument doc, + List characters, List rulings) { + this(top, left, width, height, rotation, number, pdPage, doc); + this.textElements = characters; + this.rulings = rulings; + } + + /** + * + * @deprecated use {@link Builder} instead + */ + public Page(float top, float left, float width, float height, int rotation, int number, PDPage pdPage, PDDocument doc, + ObjectExtractorStreamEngine streamEngine, TextStripper textStripper) { + this(top, left, width, height, rotation, number, pdPage, doc, textStripper.textElements, streamEngine.rulings); + this.minCharWidth = textStripper.minCharWidth; + this.minCharHeight = textStripper.minCharHeight; + this.spatialIndex = textStripper.spatialIndex; + } + + + + /** + * + * @deprecated use {@link Builder} instead + */ + public Page(float top, float left, float width, float height, int rotation, int number, PDPage pdPage, PDDocument doc, + List characters, List rulings, + float minCharWidth, float minCharHeight, RectangleSpatialIndex index) { + this(top, left, width, height, rotation, number, pdPage, doc, characters, rulings); + this.minCharHeight = minCharHeight; + this.minCharWidth = minCharWidth; + this.spatialIndex = index; + } + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // public Page getArea(Rectangle area) { List areaTextElements = getText(area); From 6286f85fddc36d6bac5ec181b1120c19c7410331 Mon Sep 17 00:00:00 2001 From: Christoph Wedenig Date: Fri, 15 Jan 2021 20:58:09 +0100 Subject: [PATCH 131/200] Extracted duplicate code into methods * Enhanced readability with computeIfAbsent --- .../detectors/NurminenDetectionAlgorithm.java | 181 +++++++----------- 1 file changed, 72 insertions(+), 109 deletions(-) diff --git a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java index 74898e3e..fb43622a 100644 --- a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java +++ b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java @@ -6,15 +6,7 @@ import java.awt.image.Raster; import java.io.IOException; import java.io.OutputStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Comparator; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeSet; +import java.util.*; import org.apache.pdfbox.contentstream.operator.Operator; import org.apache.pdfbox.cos.COSName; @@ -526,132 +518,103 @@ private TextEdges getTextEdges(List lines) { Map> currMidEdges = new HashMap<>(); Map> currRightEdges = new HashMap<>(); + + int numOfLines = lines.size(); for (Line textRow : lines) { for (TextChunk text : textRow.getTextElements()) { - Integer left = new Integer((int) Math.floor(text.getLeft())); - Integer right = new Integer((int) Math.floor(text.getRight())); - Integer mid = new Integer(left + ((right - left) / 2)); + Integer left = (int) Math.floor(text.getLeft()); + Integer right = (int) Math.floor(text.getRight()); + Integer mid = left + ((right - left) / 2); // first put this chunk into any edge buckets it belongs to - List leftEdge = currLeftEdges.get(left); - if (leftEdge == null) { - leftEdge = new ArrayList<>(); - currLeftEdges.put(left, leftEdge); - } + List leftEdge = currLeftEdges.computeIfAbsent(left, k -> new ArrayList<>()); leftEdge.add(text); - List midEdge = currMidEdges.get(mid); - if (midEdge == null) { - midEdge = new ArrayList<>(); - currMidEdges.put(mid, midEdge); - } + List midEdge = currMidEdges.computeIfAbsent(mid, k -> new ArrayList<>()); midEdge.add(text); - List rightEdge = currRightEdges.get(right); - if (rightEdge == null) { - rightEdge = new ArrayList<>(); - currRightEdges.put(right, rightEdge); - } + List rightEdge = currRightEdges.computeIfAbsent(right, k -> new ArrayList<>()); rightEdge.add(text); // now see if this text chunk blows up any other edges - for (Iterator>> iterator = currLeftEdges.entrySet().iterator(); iterator.hasNext(); ) { - Map.Entry> entry = iterator.next(); - Integer key = entry.getKey(); - if (key > left && key < right) { - iterator.remove(); - List edgeChunks = entry.getValue(); - if (edgeChunks.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { - TextChunk first = edgeChunks.get(0); - TextChunk last = edgeChunks.get(edgeChunks.size() - 1); - - TextEdge edge = new TextEdge(key, first.getTop(), key, last.getBottom()); - edge.intersectingTextRowCount = Math.min(edgeChunks.size(), lines.size()); - - leftTextEdges.add(edge); - } - } - } - - for (Iterator>> iterator = currMidEdges.entrySet().iterator(); iterator.hasNext(); ) { - Map.Entry> entry = iterator.next(); - Integer key = entry.getKey(); - if (key > left && key < right && Math.abs(key - mid) > 2) { - iterator.remove(); - List edgeChunks = entry.getValue(); - if (edgeChunks.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { - TextChunk first = edgeChunks.get(0); - TextChunk last = edgeChunks.get(edgeChunks.size() - 1); - - TextEdge edge = new TextEdge(key, first.getTop(), key, last.getBottom()); - edge.intersectingTextRowCount = Math.min(edgeChunks.size(), lines.size()); - - midTextEdges.add(edge); - } - } - } - - for (Iterator>> iterator = currRightEdges.entrySet().iterator(); iterator.hasNext(); ) { - Map.Entry> entry = iterator.next(); - Integer key = entry.getKey(); - if (key > left && key < right) { - iterator.remove(); - List edgeChunks = entry.getValue(); - if (edgeChunks.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { - TextChunk first = edgeChunks.get(0); - TextChunk last = edgeChunks.get(edgeChunks.size() - 1); + leftTextEdges.addAll( + calculateExtendedEdges(numOfLines, currLeftEdges, left, right) + ); - TextEdge edge = new TextEdge(key, first.getTop(), key, last.getBottom()); - edge.intersectingTextRowCount = Math.min(edgeChunks.size(), lines.size()); + midTextEdges.addAll( + calculateExtendedEdges(numOfLines, currMidEdges, left, right, mid, 2) + ); - rightTextEdges.add(edge); - } - } - } + rightTextEdges.addAll( + calculateExtendedEdges(numOfLines, currRightEdges, left, right) + ); } } // add the leftovers - for (Integer key : currLeftEdges.keySet()) { - List edgeChunks = currLeftEdges.get(key); - if (edgeChunks.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { - TextChunk first = edgeChunks.get(0); - TextChunk last = edgeChunks.get(edgeChunks.size() - 1); + leftTextEdges.addAll( + calculateLeftoverEdges(numOfLines, currLeftEdges) + ); - TextEdge edge = new TextEdge(key, first.getTop(), key, last.getBottom()); - edge.intersectingTextRowCount = Math.min(edgeChunks.size(), lines.size()); + midTextEdges.addAll( + calculateLeftoverEdges(numOfLines, currMidEdges) + ); - leftTextEdges.add(edge); - } - } + rightTextEdges.addAll( + calculateLeftoverEdges(numOfLines, currRightEdges) + ); - for (Integer key : currMidEdges.keySet()) { - List edgeChunks = currMidEdges.get(key); - if (edgeChunks.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { - TextChunk first = edgeChunks.get(0); - TextChunk last = edgeChunks.get(edgeChunks.size() - 1); + return new TextEdges(leftTextEdges, midTextEdges, rightTextEdges); + } - TextEdge edge = new TextEdge(key, first.getTop(), key, last.getBottom()); - edge.intersectingTextRowCount = Math.min(edgeChunks.size(), lines.size()); + private Set calculateLeftoverEdges(int numOfLines, Map> currDirectedEdges) { + Set leftoverEdges = new HashSet<>(); + for (Integer key : currDirectedEdges.keySet()) { + List edgeChunks = currDirectedEdges.get(key); + if (edgeChunks.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { + TextEdge edge = getEdgeFromChunks(numOfLines, key, edgeChunks); - midTextEdges.add(edge); + leftoverEdges.add(edge); } } + return leftoverEdges; + } - for (Integer key : currRightEdges.keySet()) { - List edgeChunks = currRightEdges.get(key); - if (edgeChunks.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { - TextChunk first = edgeChunks.get(0); - TextChunk last = edgeChunks.get(edgeChunks.size() - 1); + private TextEdge getEdgeFromChunks(int numOfLines, Integer key, List edgeChunks) { + TextChunk first = edgeChunks.get(0); + TextChunk last = edgeChunks.get(edgeChunks.size() - 1); - TextEdge edge = new TextEdge(key, first.getTop(), key, last.getBottom()); - edge.intersectingTextRowCount = Math.min(edgeChunks.size(), lines.size()); + TextEdge edge = new TextEdge(key, first.getTop(), key, last.getBottom()); + edge.intersectingTextRowCount = Math.min(edgeChunks.size(), numOfLines); + return edge; + } - rightTextEdges.add(edge); + + private Collection calculateExtendedEdges(Integer numOfLines, Map> currDirectedEdges, Integer left, Integer right) { + return calculateExtendedEdges(numOfLines, currDirectedEdges, left, right, null, null); + } + + private Collection calculateExtendedEdges(Integer numOfLines, Map> currDirectedEdges, Integer left, Integer right, Integer mid, Integer minDistToMid) { + Set extendedEdges = new HashSet<>(); + Iterator>> edgeIterator = currDirectedEdges.entrySet().iterator(); + while (edgeIterator.hasNext()) { + Map.Entry> entry = edgeIterator.next(); + Integer key = entry.getKey(); + + // if mid and minDistToMid are set, we calculate if the distance to mid is actually above, + // otherwise we ignore it + boolean hasMinDistToMid = mid == null || minDistToMid == null || Math.abs(key - mid) > minDistToMid; + + if (key > left && key < right && hasMinDistToMid) { + edgeIterator.remove(); + List edgeChunks = entry.getValue(); + if (edgeChunks.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { + TextEdge edge = getEdgeFromChunks(numOfLines, key, edgeChunks); + extendedEdges.add(edge); + } } } - - return new TextEdges(leftTextEdges, midTextEdges, rightTextEdges); + return extendedEdges; } private List getTableAreasFromCells(List cells) { @@ -665,9 +628,9 @@ private List getTableAreasFromCells(List cells) Point2D[] groupCellCorners = groupCell.getPoints(); Point2D[] candidateCorners = cell.getPoints(); - for (int i = 0; i < candidateCorners.length; i++) { - for (int j = 0; j < groupCellCorners.length; j++) { - if (candidateCorners[i].distance(groupCellCorners[j]) < CELL_CORNER_DISTANCE_MAXIMUM) { + for (Point2D candidateCorner : candidateCorners) { + for (Point2D groupCellCorner : groupCellCorners) { + if (candidateCorner.distance(groupCellCorner) < CELL_CORNER_DISTANCE_MAXIMUM) { cellGroup.add(cell); addedToGroup = true; break cellCheck; From 11928877d03545264ff4411111a577199e3fba2c Mon Sep 17 00:00:00 2001 From: Andreas Tscheinig Date: Fri, 15 Jan 2021 21:08:04 +0100 Subject: [PATCH 132/200] Resolve dependency inversion code smell --- src/main/java/technology/tabula/Cell.java | 15 +---------- src/main/java/technology/tabula/HasText.java | 1 + .../tabula/RectangularTextContainer.java | 25 +++++++++++++++---- .../java/technology/tabula/TextChunk.java | 15 ++++------- .../java/technology/tabula/TextElement.java | 8 +++++- 5 files changed, 34 insertions(+), 30 deletions(-) diff --git a/src/main/java/technology/tabula/Cell.java b/src/main/java/technology/tabula/Cell.java index 79c64fbc..3b42b4ca 100644 --- a/src/main/java/technology/tabula/Cell.java +++ b/src/main/java/technology/tabula/Cell.java @@ -1,9 +1,7 @@ package technology.tabula; import java.awt.geom.Point2D; -import java.util.ArrayList; import java.util.Collections; -import java.util.List; @SuppressWarnings("serial") public class Cell extends RectangularTextContainer { @@ -12,19 +10,16 @@ public Cell(float top, float left, float width, float height) { super(top, left, width, height); this.setPlaceholder(false); this.setSpanning(false); - this.setTextElements(new ArrayList()); } public Cell(Point2D topLeft, Point2D bottomRight) { super((float) topLeft.getY(), (float) topLeft.getX(), (float) (bottomRight.getX() - topLeft.getX()), (float) (bottomRight.getY() - topLeft.getY())); this.setPlaceholder(false); this.setSpanning(false); - this.setTextElements(new ArrayList()); } private boolean spanning; private boolean placeholder; - private List textElements; @Override public String getText(boolean useLineReturns) { @@ -44,6 +39,7 @@ public String getText(boolean useLineReturns) { return sb.toString().trim(); } + @Override public String getText() { return getText(true); } @@ -63,13 +59,4 @@ public boolean isPlaceholder() { public void setPlaceholder(boolean placeholder) { this.placeholder = placeholder; } - - public List getTextElements() { - return textElements; - } - - public void setTextElements(List textElements) { - this.textElements = textElements; - } - } diff --git a/src/main/java/technology/tabula/HasText.java b/src/main/java/technology/tabula/HasText.java index 99455afb..1a9bda99 100644 --- a/src/main/java/technology/tabula/HasText.java +++ b/src/main/java/technology/tabula/HasText.java @@ -3,5 +3,6 @@ public interface HasText { String getText(); + String getText(boolean useLineReturns); } diff --git a/src/main/java/technology/tabula/RectangularTextContainer.java b/src/main/java/technology/tabula/RectangularTextContainer.java index 5f4d3716..934b5f13 100644 --- a/src/main/java/technology/tabula/RectangularTextContainer.java +++ b/src/main/java/technology/tabula/RectangularTextContainer.java @@ -1,11 +1,14 @@ package technology.tabula; +import java.util.ArrayList; import java.util.List; @SuppressWarnings("serial") -public abstract class RectangularTextContainer extends Rectangle { +public class RectangularTextContainer extends Rectangle implements HasText { - public RectangularTextContainer(float top, float left, float width, float height) { + protected List textElements = new ArrayList<>(); + + protected RectangularTextContainer(float top, float left, float width, float height) { super(top, left, width, height); } @@ -19,11 +22,23 @@ public RectangularTextContainer merge(RectangularTextContainer other) { return this; } - public abstract String getText(); + public List getTextElements() { + return textElements; + } + + public void setTextElements(List textElements) { + this.textElements = textElements; + } - public abstract String getText(boolean useLineReturns); + @Override + public String getText() { + throw new UnsupportedOperationException(); + } - public abstract List getTextElements(); + @Override + public String getText(boolean useLineReturns) { + throw new UnsupportedOperationException(); + } @Override public String toString() { StringBuilder sb = new StringBuilder(); diff --git a/src/main/java/technology/tabula/TextChunk.java b/src/main/java/technology/tabula/TextChunk.java index d387a5de..6e7722a3 100644 --- a/src/main/java/technology/tabula/TextChunk.java +++ b/src/main/java/technology/tabula/TextChunk.java @@ -8,9 +8,9 @@ import java.text.Normalizer; @SuppressWarnings("serial") -public class TextChunk extends RectangularTextContainer implements HasText { +public class TextChunk extends RectangularTextContainer { public static final TextChunk EMPTY = new TextChunk(0, 0, 0, 0); - List textElements = new ArrayList<>(); +// List textElements = new ArrayList<>(); public TextChunk(float top, float left, float width, float height) { super(top, left, width, height); @@ -165,11 +165,8 @@ public void add(List elements) { } } - @Override public List getTextElements() { - return textElements; - } - - @Override public String getText() { + @Override + public String getText() { if (this.textElements.size() == 0) { return ""; } @@ -183,11 +180,9 @@ public void add(List elements) { @Override public String getText(boolean useLineReturns) { - // TODO Auto-generated method stub - return null; + return getText(); } - /** * Returns true if text contained in this TextChunk is the same repeated character */ diff --git a/src/main/java/technology/tabula/TextElement.java b/src/main/java/technology/tabula/TextElement.java index f54c4e2f..a0f24fa0 100644 --- a/src/main/java/technology/tabula/TextElement.java +++ b/src/main/java/technology/tabula/TextElement.java @@ -30,7 +30,13 @@ public TextElement(float y, float x, float width, float height, this.dir = dir; } - @Override public String getText() { + @Override + public String getText() { + return text; + } + + @Override + public String getText(boolean useLineReturns) { return text; } From 3452fe14c2c333985a74544667b06fe069bd5321 Mon Sep 17 00:00:00 2001 From: Andreas Tscheinig Date: Fri, 15 Jan 2021 21:46:25 +0100 Subject: [PATCH 133/200] Resolve data-class code smell --- .../technology/tabula/ObjectExtractor.java | 10 +++---- .../java/technology/tabula/TextStripper.java | 30 ++++++++++++++----- 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/src/main/java/technology/tabula/ObjectExtractor.java b/src/main/java/technology/tabula/ObjectExtractor.java index 907312a9..c4348cd3 100644 --- a/src/main/java/technology/tabula/ObjectExtractor.java +++ b/src/main/java/technology/tabula/ObjectExtractor.java @@ -26,7 +26,7 @@ protected Page extractPage(Integer pageNumber) throws IOException { TextStripper textStripper = new TextStripper(pdfDocument, pageNumber); textStripper.process(); - Utils.sort(textStripper.textElements, Rectangle.ILL_DEFINED_ORDER); + Utils.sort(textStripper.getTextElements(), Rectangle.ILL_DEFINED_ORDER); float width, height; int rotation = page.getRotation(); @@ -45,10 +45,10 @@ protected Page extractPage(Integer pageNumber) throws IOException { .withPdPage(page) .withPdDocument(pdfDocument) .withRulings(streamEngine.rulings) - .withTextElements(textStripper.textElements) - .withMinCharWidth(textStripper.minCharWidth) - .withMinCharHeight(textStripper.minCharHeight) - .withIndex(textStripper.spatialIndex) + .withTextElements(textStripper.getTextElements()) + .withMinCharWidth(textStripper.getMinCharWidth()) + .withMinCharHeight(textStripper.getMinCharHeight()) + .withIndex(textStripper.getSpatialIndex()) .build(); } diff --git a/src/main/java/technology/tabula/TextStripper.java b/src/main/java/technology/tabula/TextStripper.java index 329d45a2..557fa439 100644 --- a/src/main/java/technology/tabula/TextStripper.java +++ b/src/main/java/technology/tabula/TextStripper.java @@ -18,13 +18,13 @@ public class TextStripper extends PDFTextStripper { private static final float AVG_HEIGHT_MULT_THRESHOLD = 6.0f; private static final float MAX_BLANK_FONT_SIZE = 40.0f; private static final float MIN_BLANK_FONT_SIZE = 2.0f; - private PDDocument document; - public ArrayList textElements; - public RectangleSpatialIndex spatialIndex; - public float minCharWidth = Float.MAX_VALUE; - public float minCharHeight = Float.MAX_VALUE; - public float totalHeight = 0.0f; - public int countHeight = 0; + private final PDDocument document; + private final ArrayList textElements; + private final RectangleSpatialIndex spatialIndex; + private float minCharWidth = Float.MAX_VALUE; + private float minCharHeight = Float.MAX_VALUE; + private float totalHeight = 0.0f; + private int countHeight = 0; public TextStripper(PDDocument document, int pageNumber) throws IOException { super(); @@ -156,4 +156,20 @@ private boolean isPrintable(String s) { } return printable; } + + public List getTextElements() { + return this.textElements; + } + + public RectangleSpatialIndex getSpatialIndex() { + return spatialIndex; + } + + public float getMinCharWidth() { + return minCharWidth; + } + + public float getMinCharHeight() { + return minCharHeight; + } } From 6923895ef37c0f03c7cd44ee755e875b078e82d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Wed, 20 Jan 2021 09:40:51 -0300 Subject: [PATCH 134/200] adapt to refactor --- src/main/java/technology/tabula/Page.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/technology/tabula/Page.java b/src/main/java/technology/tabula/Page.java index 8b23066a..ed74d14a 100644 --- a/src/main/java/technology/tabula/Page.java +++ b/src/main/java/technology/tabula/Page.java @@ -90,10 +90,10 @@ public Page(float top, float left, float width, float height, int rotation, int */ public Page(float top, float left, float width, float height, int rotation, int number, PDPage pdPage, PDDocument doc, ObjectExtractorStreamEngine streamEngine, TextStripper textStripper) { - this(top, left, width, height, rotation, number, pdPage, doc, textStripper.textElements, streamEngine.rulings); - this.minCharWidth = textStripper.minCharWidth; - this.minCharHeight = textStripper.minCharHeight; - this.spatialIndex = textStripper.spatialIndex; + this(top, left, width, height, rotation, number, pdPage, doc, textStripper.getTextElements(), streamEngine.rulings); + this.minCharWidth = textStripper.getMinCharWidth(); + this.minCharHeight = textStripper.getMinCharHeight(); + this.spatialIndex = textStripper.getSpatialIndex(); } From df3653b1122f7feb0dfa919746be46253e25490f Mon Sep 17 00:00:00 2001 From: Christoph Wedenig Date: Fri, 15 Jan 2021 23:22:18 +0100 Subject: [PATCH 135/200] Extracted rounded comparator --- .../SpreadsheetExtractionAlgorithm.java | 65 +++++-------------- 1 file changed, 18 insertions(+), 47 deletions(-) diff --git a/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java b/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java index c377507c..0acf2092 100644 --- a/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java +++ b/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java @@ -27,57 +27,28 @@ public class SpreadsheetExtractionAlgorithm implements ExtractionAlgorithm { private static final float MAGIC_HEURISTIC_NUMBER = 0.65f; - private static final Comparator POINT_COMPARATOR = new Comparator() { - @Override - public int compare(Point2D arg0, Point2D arg1) { - int rv = 0; - float arg0X = Utils.round(arg0.getX(), 2); - float arg0Y = Utils.round(arg0.getY(), 2); - float arg1X = Utils.round(arg1.getX(), 2); - float arg1Y = Utils.round(arg1.getY(), 2); - - - if (arg0Y > arg1Y) { - rv = 1; - } - else if (arg0Y < arg1Y) { - rv = -1; - } - else if (arg0X > arg1X) { - rv = 1; - } - else if (arg0X < arg1X) { - rv = -1; - } - return rv; + private static final Comparator Y_FIRST_POINT_COMPARATOR = (point1, point2) -> { + int compareY = compareRounded(point1.getY(), point2.getY()); + if (compareY == 0) { + return compareRounded(point1.getX(), point2.getX()); } + return compareY; }; - private static final Comparator X_FIRST_POINT_COMPARATOR = new Comparator() { - @Override - public int compare(Point2D arg0, Point2D arg1) { - int rv = 0; - float arg0X = Utils.round(arg0.getX(), 2); - float arg0Y = Utils.round(arg0.getY(), 2); - float arg1X = Utils.round(arg1.getX(), 2); - float arg1Y = Utils.round(arg1.getY(), 2); - - if (arg0X > arg1X) { - rv = 1; - } - else if (arg0X < arg1X) { - rv = -1; - } - else if (arg0Y > arg1Y) { - rv = 1; - } - else if (arg0Y < arg1Y) { - rv = -1; - } - return rv; + private static final Comparator X_FIRST_POINT_COMPARATOR = (point1, point2) -> { + int compareX = compareRounded(point1.getX(), point2.getX()); + if (compareX == 0) { + return compareRounded(point1.getY(), point2.getY()); } + return compareX; }; + private static int compareRounded(double d1, double d2) { + float d1Rounded = Utils.round(d1, 2); + float d2Rounded = Utils.round(d2, 2); + + return Float.compare(d1Rounded, d2Rounded); + } @Override public List
extract(Page page) { @@ -175,7 +146,7 @@ public static List findCells(List horizontalRulingLines, List cellsFound = new ArrayList<>(); Map intersectionPoints = Ruling.findIntersections(horizontalRulingLines, verticalRulingLines); List intersectionPointsList = new ArrayList<>(intersectionPoints.keySet()); - Collections.sort(intersectionPointsList, POINT_COMPARATOR); + intersectionPointsList.sort(Y_FIRST_POINT_COMPARATOR); boolean doBreak = false; for (int i = 0; i < intersectionPointsList.size(); i++) { @@ -256,7 +227,7 @@ public static List findSpreadsheetsFromCells(List pointsSortY = new ArrayList<>(pointSet); - Collections.sort(pointsSortY, POINT_COMPARATOR); + Collections.sort(pointsSortY, Y_FIRST_POINT_COMPARATOR); while (i < pointSet.size()) { float currY = (float) pointsSortY.get(i).getY(); From cbb6d73a9eed856cdf1ec29d3658e07177382404 Mon Sep 17 00:00:00 2001 From: Christoph Wedenig Date: Fri, 15 Jan 2021 23:38:39 +0100 Subject: [PATCH 136/200] Cleaned up SpreadsheetExtractionAlgorithm --- .../SpreadsheetExtractionAlgorithm.java | 65 +++++++------------ 1 file changed, 24 insertions(+), 41 deletions(-) diff --git a/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java b/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java index 0acf2092..44998cfb 100644 --- a/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java +++ b/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java @@ -1,23 +1,9 @@ package technology.tabula.extractors; -import java.awt.geom.Point2D; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import technology.tabula.*; -import technology.tabula.Cell; -import technology.tabula.Page; -import technology.tabula.Rectangle; -import technology.tabula.Ruling; -import technology.tabula.Table; -import technology.tabula.TableWithRulingLines; -import technology.tabula.TextElement; -import technology.tabula.Utils; +import java.awt.geom.Point2D; +import java.util.*; /** * @author manuel @@ -60,8 +46,8 @@ public List
extract(Page page) { */ public List
extract(Page page, List rulings) { // split rulings into horizontal and vertical - List horizontalR = new ArrayList<>(), - verticalR = new ArrayList<>(); + List horizontalR = new ArrayList<>(); + List verticalR = new ArrayList<>(); for (Ruling r: rulings) { if (r.horizontal()) { @@ -113,7 +99,7 @@ public boolean isTabular(Page page) { // if there's no text at all on the page, it's not a table // (we won't be able to do anything with it though) - if(page.getText().isEmpty()){ + if (page.getText().isEmpty()){ return false; } @@ -122,7 +108,7 @@ public boolean isTabular(Page page) { Page minimalRegion = page.getArea(Utils.bounds(page.getText())); List tables = new SpreadsheetExtractionAlgorithm().extract(minimalRegion); - if (tables.size() == 0) { + if (tables.isEmpty()) { return false; } Table table = tables.get(0); @@ -130,16 +116,17 @@ public boolean isTabular(Page page) { int colsDefinedByLines = table.getColCount(); tables = new BasicExtractionAlgorithm().extract(minimalRegion); - if (tables.size() == 0) { - // TODO WHAT DO WE DO HERE? + if (tables.isEmpty()) { + return false; } table = tables.get(0); int rowsDefinedWithoutLines = table.getRowCount(); int colsDefinedWithoutLines = table.getColCount(); - float ratio = (((float) colsDefinedByLines / colsDefinedWithoutLines) + ((float) rowsDefinedByLines / rowsDefinedWithoutLines)) / 2.0f; + float ratio = (((float) colsDefinedByLines / colsDefinedWithoutLines) + + ((float) rowsDefinedByLines / rowsDefinedWithoutLines)) / 2.0f; - return ratio > MAGIC_HEURISTIC_NUMBER && ratio < (1/MAGIC_HEURISTIC_NUMBER); + return ratio > MAGIC_HEURISTIC_NUMBER && ratio < (1 / MAGIC_HEURISTIC_NUMBER); } public static List findCells(List horizontalRulingLines, List verticalRulingLines) { @@ -147,16 +134,12 @@ public static List findCells(List horizontalRulingLines, List intersectionPoints = Ruling.findIntersections(horizontalRulingLines, verticalRulingLines); List intersectionPointsList = new ArrayList<>(intersectionPoints.keySet()); intersectionPointsList.sort(Y_FIRST_POINT_COMPARATOR); - boolean doBreak = false; for (int i = 0; i < intersectionPointsList.size(); i++) { Point2D topLeft = intersectionPointsList.get(i); Ruling[] hv = intersectionPoints.get(topLeft); - doBreak = false; - - // CrossingPointsDirectlyBelow( topLeft ); + List xPoints = new ArrayList<>(); - // CrossingPointsDirectlyToTheRight( topLeft ); List yPoints = new ArrayList<>(); for (Point2D p: intersectionPointsList.subList(i, intersectionPointsList.size())) { @@ -169,7 +152,6 @@ public static List findCells(List horizontalRulingLines, List findCells(List horizontalRulingLines, List findSpreadsheetsFromCells(List pointsSortX = new ArrayList<>(pointSet); - Collections.sort(pointsSortX, X_FIRST_POINT_COMPARATOR); + pointsSortX.sort(X_FIRST_POINT_COMPARATOR); // Y first sort List pointsSortY = new ArrayList<>(pointSet); - Collections.sort(pointsSortY, Y_FIRST_POINT_COMPARATOR); + pointsSortY.sort(Y_FIRST_POINT_COMPARATOR); while (i < pointSet.size()) { float currY = (float) pointsSortY.get(i).getY(); @@ -263,16 +244,15 @@ public static List findSpreadsheetsFromCells(List Date: Sun, 7 Mar 2021 15:45:56 -0300 Subject: [PATCH 137/200] ObjectExtractor: implement java.io.Closeable (fixes #408) --- src/main/java/technology/tabula/ObjectExtractor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/technology/tabula/ObjectExtractor.java b/src/main/java/technology/tabula/ObjectExtractor.java index c4348cd3..9f3f6a03 100644 --- a/src/main/java/technology/tabula/ObjectExtractor.java +++ b/src/main/java/technology/tabula/ObjectExtractor.java @@ -5,7 +5,7 @@ import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; -public class ObjectExtractor { +public class ObjectExtractor implements java.io.Closeable { private final PDDocument pdfDocument; From 6ba8ad8978c0dac9c7774c4953adee174d2edd17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Sun, 7 Mar 2021 16:25:48 -0300 Subject: [PATCH 138/200] Fix unclosed document warnings --- .../technology/tabula/TestBasicExtractor.java | 11 ++ .../tabula/TestObjectExtractor.java | 102 ++++++++---------- 2 files changed, 58 insertions(+), 55 deletions(-) diff --git a/src/test/java/technology/tabula/TestBasicExtractor.java b/src/test/java/technology/tabula/TestBasicExtractor.java index 5d5d985c..d120546f 100644 --- a/src/test/java/technology/tabula/TestBasicExtractor.java +++ b/src/test/java/technology/tabula/TestBasicExtractor.java @@ -135,6 +135,7 @@ public void testRemoveSequentialSpaces() throws IOException { assertTrue(firstRow.get(1).getText().equals("ALLEGIANT AIR")); assertTrue(firstRow.get(2).getText().equals("ALLEGIANT AIR LLC")); + page.getPDDoc().close(); } @Test @@ -143,6 +144,7 @@ public void testColumnRecognition() throws IOException { BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.extract(page).get(0); assertArrayEquals(ARGENTINA_DIPUTADOS_VOTING_RECORD_EXPECTED, UtilsForTesting.tableToArrayOfRows(table)); + page.getPDDoc().close(); } @Test @@ -162,6 +164,7 @@ public void testVerticalRulingsPreventMergingOfColumns() throws IOException { assertTrue(sixthRow.get(0).getText().equals("VALSANGIACOMO BLANC")); assertTrue(sixthRow.get(1).getText().equals("OFERNANDO JORGE")); + page.getPDDoc().close(); } @Test @@ -170,6 +173,7 @@ public void testExtractColumnsCorrectly() throws IOException { BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.extract(page).get(0); assertArrayEquals(EU_002_EXPECTED, UtilsForTesting.tableToArrayOfRows(table)); + page.getPDDoc().close(); } @Test @@ -178,6 +182,7 @@ public void testExtractColumnsCorrectly2() throws IOException { BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(page.getVerticalRulings()); Table table = bea.extract(page.getArea(299.625f, 148.44f, 711.875f, 452.32f)).get(0); assertArrayEquals(EU_017_EXPECTED, UtilsForTesting.tableToArrayOfRows(table)); + page.getPDDoc().close(); } @Test @@ -186,6 +191,7 @@ public void testExtractColumnsCorrectly3() throws IOException { BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.extract(page).get(0); assertArrayEquals(FRX_2012_DISCLOSURE_EXPECTED, UtilsForTesting.tableToArrayOfRows(table)); + page.getPDDoc().close(); } @Test @@ -199,6 +205,7 @@ public void testCheckSqueezeDoesntBreak() throws IOException { List lastRow = rows.get(rows.size() - 1); assertTrue(firstRow.get(0).getText().equals("Violent crime . . . . . . . . . . . . . . . . . .")); assertTrue(lastRow.get(lastRow.size() - 1).getText().equals("(X)")); + page.getPDDoc().close(); } @Test @@ -274,6 +281,8 @@ public void testNaturalOrderOfRectangles() throws IOException { assertEquals("DOD, and NIH", cells.get(38).getText()); assertEquals("and networks", cells.get(39).getText()); + page.getPDDoc().close(); + } @Test @@ -314,6 +323,7 @@ public void testRealLifeRTL2() throws IOException { StringBuilder sb = new StringBuilder(); (new CSVWriter()).write(sb, table); assertEquals(expectedCsv, sb.toString()); + page.getPDDoc().close(); } @@ -323,6 +333,7 @@ public void testEmptyRegion() throws IOException { BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.extract(page).get(0); assertArrayEquals(EXPECTED_EMPTY_TABLE, UtilsForTesting.tableToArrayOfRows(table)); + page.getPDDoc().close(); } diff --git a/src/test/java/technology/tabula/TestObjectExtractor.java b/src/test/java/technology/tabula/TestObjectExtractor.java index fe458b87..9db7ad18 100644 --- a/src/test/java/technology/tabula/TestObjectExtractor.java +++ b/src/test/java/technology/tabula/TestObjectExtractor.java @@ -22,74 +22,80 @@ public void testWrongPasswordRaisesException() throws IOException { @Test(expected = IOException.class) public void testEmptyOnEncryptedFileRaisesException() throws IOException { PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/encrypted.pdf")); - ObjectExtractor oe = new ObjectExtractor(pdf_document); - oe.extract().next(); + try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { + oe.extract().next(); + } } @Test public void testCanReadPDFWithOwnerEncryption() throws IOException { PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf")); - ObjectExtractor oe = new ObjectExtractor(pdf_document); - PageIterator pi = oe.extract(); - int i = 0; - while (pi.hasNext()) { + try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { + PageIterator pi = oe.extract(); + int i = 0; + while (pi.hasNext()) { i++; pi.next(); + } + assertEquals(2, i); } - assertEquals(2, i); } @Test public void testGoodPassword() throws IOException { PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/encrypted.pdf"), "userpassword"); - ObjectExtractor oe = new ObjectExtractor(pdf_document); - List pages = new ArrayList<>(); - PageIterator pi = oe.extract(); - while (pi.hasNext()) { + try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { + List pages = new ArrayList<>(); + PageIterator pi = oe.extract(); + while (pi.hasNext()) { pages.add(pi.next()); + } + assertEquals(1, pages.size()); } - assertEquals(1, pages.size()); } @Test public void testTextExtractionDoesNotRaise() throws IOException { PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/rotated_page.pdf")); - ObjectExtractor oe = new ObjectExtractor(pdf_document); - PageIterator pi = oe.extract(); - - assertTrue(pi.hasNext()); - assertNotNull(pi.next()); - assertFalse(pi.hasNext()); + try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { + PageIterator pi = oe.extract(); + assertTrue(pi.hasNext()); + assertNotNull(pi.next()); + assertFalse(pi.hasNext()); + } } @Test public void testShouldDetectRulings() throws IOException { PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/should_detect_rulings.pdf")); - ObjectExtractor oe = new ObjectExtractor(pdf_document); - PageIterator pi = oe.extract(); + try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { + PageIterator pi = oe.extract(); - Page page = pi.next(); - List rulings = page.getRulings(); + Page page = pi.next(); + List rulings = page.getRulings(); - for (Ruling r: rulings) { + for (Ruling r: rulings) { assertTrue(page.contains(r.getBounds())); + } } } @Test public void testDontThrowNPEInShfill() throws IOException { PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/labor.pdf")); - ObjectExtractor oe = new ObjectExtractor(pdf_document); - PageIterator pi = oe.extract(); - assertTrue(pi.hasNext()); - try { + + try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { + PageIterator pi = oe.extract(); + assertTrue(pi.hasNext()); + try { Page p = pi.next(); assertNotNull(p); - } catch (NullPointerException e) { + } catch (NullPointerException e) { fail("NPE in ObjectExtractor " + e.toString()); + } } } @@ -98,10 +104,11 @@ public void testExtractOnePage() throws IOException { PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf")); assertEquals(2, pdf_document.getNumberOfPages()); - ObjectExtractor oe = new ObjectExtractor(pdf_document); - Page page = oe.extract(2); + try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { + Page page = oe.extract(2); - assertNotNull(page); + assertNotNull(page); + } } @@ -110,48 +117,33 @@ public void testExtractWrongPageNumber() throws IOException { PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf")); assertEquals(2, pdf_document.getNumberOfPages()); - ObjectExtractor oe = new ObjectExtractor(pdf_document); - oe.extract(3); - + try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { + oe.extract(3); + } } @Test public void testTextElementsContainedInPage() throws IOException { PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/cs-en-us-pbms.pdf")); - ObjectExtractor oe = new ObjectExtractor(pdf_document); - Page page = oe.extractPage(1); + try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { + Page page = oe.extractPage(1); - for (TextElement te: page.getText()) { + for (TextElement te: page.getText()) { assertTrue(page.contains(te)); + } } + } @Test public void testDoNotNPEInPointComparator() throws IOException { PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/npe_issue_206.pdf")); - ObjectExtractor oe = new ObjectExtractor(pdf_document); - try { + try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { Page p = oe.extractPage(1); assertNotNull(p); } catch (NullPointerException e) { fail("NPE in ObjectExtractor " + e.toString()); } } - - /* - @Test - public void testExtractWithoutExtractingRulings() throws IOException { - PDDocument pdf_document = PDDocument.load("src/test/resources/technology/tabula/should_detect_rulings.pdf"); - ObjectExtractor oe = new ObjectExtractor(pdf_document, null, false, false); - PageIterator pi = oe.extract(); - - assertTrue(pi.hasNext()); - Page page = pi.next(); - assertNotNull(page); - assertEquals(0, page.getRulings().size()); - assertFalse(pi.hasNext()); - } - */ - } From 96ac1829c09aa2df7434d0ed96a3f5424bef6c89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Sun, 7 Mar 2021 16:35:55 -0300 Subject: [PATCH 139/200] fix more warnings in tests --- .../tabula/TestSpreadsheetExtractor.java | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/test/java/technology/tabula/TestSpreadsheetExtractor.java b/src/test/java/technology/tabula/TestSpreadsheetExtractor.java index 0abd578b..f8bd4074 100644 --- a/src/test/java/technology/tabula/TestSpreadsheetExtractor.java +++ b/src/test/java/technology/tabula/TestSpreadsheetExtractor.java @@ -189,6 +189,7 @@ public void testSpreadsheetExtraction() throws IOException { 269.875f, 12.75f, 790.5f, 561f); SpreadsheetExtractionAlgorithm.findCells(page.getHorizontalRulings(), page.getVerticalRulings()); + page.getPDDoc().close(); } @Test @@ -204,7 +205,7 @@ public void testSpanningCells() throws IOException { StringBuilder sb = new StringBuilder(); (new JSONWriter()).write(sb, tables); assertEquals(expectedJson, sb.toString()); - + page.getPDDoc().close(); } @Test @@ -220,7 +221,7 @@ public void testSpanningCellsToCsv() throws IOException { StringBuilder sb = new StringBuilder(); (new CSVWriter()).write(sb, tables); assertEquals(expectedCsv, sb.toString()); - + page.getPDDoc().close(); } @Test @@ -229,6 +230,7 @@ public void testIncompleteGrid() throws IOException { SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); List tables = se.extract(page); assertEquals(2, tables.size()); + page.getPDDoc().close(); } @Test @@ -244,6 +246,7 @@ public void testNaturalOrderOfRectanglesDoesNotBreakContract() throws IOExceptio String expected = "Project,Agency,Institution\r\nNanotechnology and its publics,NSF,Pennsylvania State University\r\n\"Public information and deliberation in nanoscience and\rnanotechnology policy (SGER)\",Interagency,\"North Carolina State\rUniversity\"\r\n\"Social and ethical research and education in agrifood\rnanotechnology (NIRT)\",NSF,Michigan State University\r\n\"From laboratory to society: developing an informed\rapproach to nanoscale science and engineering (NIRT)\",NSF,University of South Carolina\r\nDatabase and innovation timeline for nanotechnology,NSF,UCLA\r\nSocial and ethical dimensions of nanotechnology,NSF,University of Virginia\r\n\"Undergraduate exploration of nanoscience,\rapplications and societal implications (NUE)\",NSF,\"Michigan Technological\rUniversity\"\r\n\"Ethics and belief inside the development of\rnanotechnology (CAREER)\",NSF,University of Virginia\r\n\"All centers, NNIN and NCN have a societal\rimplications components\",\"NSF, DOE,\rDOD, and NIH\",\"All nanotechnology centers\rand networks\"\r\n"; assertEquals(expected, result); + page.getPDDoc().close(); } @Test @@ -255,8 +258,7 @@ public void testMergeLinesCloseToEachOther() throws IOException { assertEquals(expectedRulings[i], rulings.get(i).getLeft(), 0.1); } assertEquals(6, rulings.size()); - - + page.getPDDoc().close(); } @Test @@ -274,6 +276,7 @@ public void testSpreadsheetWithNoBoundingFrameShouldBeSpreadsheet() throws IOExc (new CSVWriter()).write(sb, tables.get(0)); assertEquals(expectedCsv, sb.toString()); + page.getPDDoc().close(); } @@ -333,7 +336,7 @@ public void testExtractSpreadsheetWithinAnArea() throws IOException { for (int i = 0; i < parsedResult.size(); i++) { assertEquals(parsedResult.get(i).size(), parsedExpected.get(i).size()); } - + page.getPDDoc().close(); } @Test @@ -354,6 +357,7 @@ public void testDontRaiseSortException() throws IOException { page.getText(); SpreadsheetExtractionAlgorithm bea = new SpreadsheetExtractionAlgorithm(); bea.extract(page).get(0); + page.getPDDoc().close(); } @Test @@ -365,6 +369,7 @@ public void testShouldDetectASingleSpreadsheet() throws IOException { SpreadsheetExtractionAlgorithm bea = new SpreadsheetExtractionAlgorithm(); List
tables = bea.extract(page); assertEquals(1, tables.size()); + page.getPDDoc().close(); } @Test @@ -395,6 +400,7 @@ public void testExtractTableWithExternallyDefinedRulings() throws IOException { assertEquals("3,700.00", table.getRows().get(7).get(1).getText()); assertEquals("Daily or Miscellaneous\r(each day of the payroll period)", table.getRows().get(8).get(0).getText()); assertEquals("14.23", table.getRows().get(8).get(1).getText()); + page.getPDDoc().close(); } @@ -410,6 +416,7 @@ public void testAnotherExtractTableWithExternallyDefinedRulings() throws IOExcep assertEquals("Total Supply", table.getRows().get(4).get(0).getText()); assertEquals("6.6", table.getRows().get(6).get(2).getText()); + page.getPDDoc().close(); } @Test @@ -422,6 +429,7 @@ public void testSpreadsheetsSortedByTopAndRight() throws IOException { for (int i = 1; i < tables.size(); i++) { assert (tables.get(i - 1).getTop() <= tables.get(i).getTop()); } + page.getPDDoc().close(); } @Test @@ -434,6 +442,7 @@ public void testDontStackOverflowQuicksort() throws IOException { for (int i = 1; i < tables.size(); i++) { assert (tables.get(i - 1).getTop() <= tables.get(i).getTop()); } + page.getPDDoc().close(); } @Test @@ -464,6 +473,7 @@ public void testRTL() throws IOException { // which is not currently possible because of the two problems listed above // assertEquals("مرحباً", table.getRows().get(0).get(0).getText()); // really ought to be ً, but this is forgiveable for now + page.getPDDoc().close(); } @@ -497,6 +507,7 @@ public void testRealLifeRTL() throws IOException { // these (commented-out) tests reflect the theoretical correct answer, // which is not currently possible because of the two problems listed above // assertEquals("مرحباً", table.getRows().get(0).get(0).getText()); // really ought to be ً, but this is forgiveable for now + page.getPDDoc().close(); } @@ -509,6 +520,7 @@ public void testExtractColumnsCorrectly3() throws IOException { Table table = sea.extract(page).get(0); assertEquals("REGIONAL PULMONARY & SLEEP\rMEDICINE", table.getRows().get(8).get(1).getText()); + page.getPDDoc().close(); } @@ -529,6 +541,7 @@ public void testSpreadsheetExtractionIssue656() throws IOException { (new CSVWriter()).write(sb, table); String result = sb.toString(); assertEquals(expectedCsv, result); + page.getPDDoc().close(); } } From 7fca22e0810ef1643f6a5a18437a71fecf1af6a4 Mon Sep 17 00:00:00 2001 From: Max Rydahl Andersen Date: Sun, 14 Mar 2021 01:20:18 +0100 Subject: [PATCH 140/200] add jbang catalog --- jbang-catalog.json | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 jbang-catalog.json diff --git a/jbang-catalog.json b/jbang-catalog.json new file mode 100644 index 00000000..b7f71347 --- /dev/null +++ b/jbang-catalog.json @@ -0,0 +1,8 @@ +{ + "catalogs": {}, + "aliases": { + "tabula": { + "script-ref": "https://github.com/tabulapdf/tabula-java/releases/download/v1.0.4/tabula-1.0.4-jar-with-dependencies.jar" + } + } +} \ No newline at end of file From 4e23be7e8449223b8ef3b660cb7aca864ee732f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Fri, 19 Mar 2021 13:21:28 -0300 Subject: [PATCH 141/200] pdfbox 2.0.23 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 2e209198..17c49742 100644 --- a/pom.xml +++ b/pom.xml @@ -262,7 +262,7 @@ org.apache.pdfbox pdfbox - 2.0.22 + 2.0.23 From c355a342672117c791b3409cadf95ead54418fc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Mon, 22 Mar 2021 08:02:58 -0300 Subject: [PATCH 142/200] fix logo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2a08d3ac..885d3eb8 100644 --- a/README.md +++ b/README.md @@ -112,5 +112,5 @@ Special thanks to the following users and organizations for generously supportin -The John S. and James L. Knight Foundation +The John S. and James L. Knight Foundation The Shuttleworth Foundation From 14b3d261bf7e5d3bc452b78f2f6539b6c894fcef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Thu, 8 Apr 2021 10:29:35 -0300 Subject: [PATCH 143/200] Light cleanup --- src/main/java/technology/tabula/Cell.java | 2 +- .../technology/tabula/RectangleSpatialIndex.java | 3 +-- src/main/java/technology/tabula/Ruling.java | 8 -------- src/main/java/technology/tabula/TextChunk.java | 16 ++++++---------- .../extractors/BasicExtractionAlgorithm.java | 2 +- 5 files changed, 9 insertions(+), 22 deletions(-) diff --git a/src/main/java/technology/tabula/Cell.java b/src/main/java/technology/tabula/Cell.java index 3b42b4ca..d02c8c50 100644 --- a/src/main/java/technology/tabula/Cell.java +++ b/src/main/java/technology/tabula/Cell.java @@ -27,7 +27,7 @@ public String getText(boolean useLineReturns) { return ""; } StringBuilder sb = new StringBuilder(); - Collections.sort(this.textElements, Rectangle.ILL_DEFINED_ORDER); + this.textElements.sort(Rectangle.ILL_DEFINED_ORDER); double curTop = this.textElements.get(0).getTop(); for (TextChunk tc : this.textElements) { if (useLineReturns && tc.getTop() > curTop) { diff --git a/src/main/java/technology/tabula/RectangleSpatialIndex.java b/src/main/java/technology/tabula/RectangleSpatialIndex.java index a39114a2..0e942545 100644 --- a/src/main/java/technology/tabula/RectangleSpatialIndex.java +++ b/src/main/java/technology/tabula/RectangleSpatialIndex.java @@ -32,8 +32,7 @@ public List contains(Rectangle r) { } public List intersects(Rectangle r) { - List rv = si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom())); - return rv; + return si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom())); } /** diff --git a/src/main/java/technology/tabula/Ruling.java b/src/main/java/technology/tabula/Ruling.java index cdab6c4f..213ce87f 100644 --- a/src/main/java/technology/tabula/Ruling.java +++ b/src/main/java/technology/tabula/Ruling.java @@ -40,9 +40,6 @@ public void normalize() { else if (Utils.within(angle, 90, 1) || Utils.within(angle, 270, 1)) { // almost vertical this.setLine(this.x1, this.y1, this.x1, this.y2); } -// else { -// System.out.println("oblique: " + this + " ("+ this.getAngle() + ")"); -// } } public boolean vertical() { @@ -231,11 +228,6 @@ public boolean equals(Object other) { return this.getP1().equals(o.getP1()) && this.getP2().equals(o.getP2()); } - @Override - public int hashCode() { - return super.hashCode(); - } - public float getTop() { return this.y1; } diff --git a/src/main/java/technology/tabula/TextChunk.java b/src/main/java/technology/tabula/TextChunk.java index 6e7722a3..9f5adbd5 100644 --- a/src/main/java/technology/tabula/TextChunk.java +++ b/src/main/java/technology/tabula/TextChunk.java @@ -209,11 +209,10 @@ public TextChunk[] splitAt(int i) { throw new IllegalArgumentException(); } - TextChunk[] rv = new TextChunk[]{ - new TextChunk(this.getTextElements().subList(0, i)), - new TextChunk(this.getTextElements().subList(i, this.getTextElements().size())) - }; - return rv; + return new TextChunk[]{ + new TextChunk(this.getTextElements().subList(0, i)), + new TextChunk(this.getTextElements().subList(i, this.getTextElements().size())) + }; } /** @@ -293,11 +292,8 @@ public boolean equals(Object obj) { return false; TextChunk other = (TextChunk) obj; if (textElements == null) { - if (other.textElements != null) - return false; - } else if (!textElements.equals(other.textElements)) - return false; - return true; + return other.textElements == null; + } else return textElements.equals(other.textElements); } public static boolean allSameChar(List textChunks) { diff --git a/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java b/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java index dcd01695..246c5342 100644 --- a/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java +++ b/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java @@ -74,7 +74,7 @@ public int compare(Ruling arg0, Ruling arg1) { @Override public int compare(TextChunk o1, TextChunk o2) { - return new java.lang.Float(o1.getLeft()).compareTo(o2.getLeft()); + return Float.compare(o1.getLeft(), o2.getLeft()); } }); From ae9d2ebf53dcdb5b6f436a66db739343e2dc0f7c Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Thu, 22 Apr 2021 05:29:02 +0000 Subject: [PATCH 144/200] Bump junit from 4.13.1 to 4.13.2 Bumps [junit](https://github.com/junit-team/junit4) from 4.13.1 to 4.13.2. - [Release notes](https://github.com/junit-team/junit4/releases) - [Changelog](https://github.com/junit-team/junit4/blob/main/doc/ReleaseNotes4.13.1.md) - [Commits](https://github.com/junit-team/junit4/compare/r4.13.1...r4.13.2) Signed-off-by: dependabot-preview[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 17c49742..5c979351 100644 --- a/pom.xml +++ b/pom.xml @@ -280,7 +280,7 @@ junit junit - 4.13.1 + 4.13.2 test From c89831109f6892c62d9597ba593161b4cf2b0c1f Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Thu, 29 Apr 2021 15:11:21 +0000 Subject: [PATCH 145/200] Upgrade to GitHub-native Dependabot --- .github/dependabot.yml | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..a217b347 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,7 @@ +version: 2 +updates: +- package-ecosystem: maven + directory: "/" + schedule: + interval: daily + open-pull-requests-limit: 10 From eac87d0b25dfd6d176b3e3e9c18782a66f74a99f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 30 Apr 2021 05:57:52 +0000 Subject: [PATCH 146/200] Bump jts-core from 1.18.0 to 1.18.1 Bumps jts-core from 1.18.0 to 1.18.1. Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 5c979351..80fd3c04 100644 --- a/pom.xml +++ b/pom.xml @@ -244,7 +244,7 @@ org.locationtech.jts jts-core - 1.18.0 + 1.18.1 From 39253c5963b5506f1a302dc53d7976997765527a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 30 Apr 2021 05:57:43 +0000 Subject: [PATCH 147/200] Bump bcprov-jdk15on from 1.66 to 1.68 Bumps [bcprov-jdk15on](https://github.com/bcgit/bc-java) from 1.66 to 1.68. - [Release notes](https://github.com/bcgit/bc-java/releases) - [Changelog](https://github.com/bcgit/bc-java/blob/master/docs/releasenotes.html) - [Commits](https://github.com/bcgit/bc-java/commits) Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 80fd3c04..3500e21a 100644 --- a/pom.xml +++ b/pom.xml @@ -268,7 +268,7 @@ org.bouncycastle bcprov-jdk15on - 1.66 + 1.68 From 92a69b444ed18a6493b583998bdbd1f205042d8b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 30 Apr 2021 05:57:49 +0000 Subject: [PATCH 148/200] Bump bcmail-jdk15on from 1.66 to 1.68 Bumps [bcmail-jdk15on](https://github.com/bcgit/bc-java) from 1.66 to 1.68. - [Release notes](https://github.com/bcgit/bc-java/releases) - [Changelog](https://github.com/bcgit/bc-java/blob/master/docs/releasenotes.html) - [Commits](https://github.com/bcgit/bc-java/commits) Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 3500e21a..fa52a851 100644 --- a/pom.xml +++ b/pom.xml @@ -274,7 +274,7 @@ org.bouncycastle bcmail-jdk15on - 1.66 + 1.68 From b0b0860ee46bcf150cd02a86480392d9882ee2d0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 25 May 2021 05:38:59 +0000 Subject: [PATCH 149/200] Bump gson from 2.8.6 to 2.8.7 Bumps [gson](https://github.com/google/gson) from 2.8.6 to 2.8.7. - [Release notes](https://github.com/google/gson/releases) - [Changelog](https://github.com/google/gson/blob/master/CHANGELOG.md) - [Commits](https://github.com/google/gson/compare/gson-parent-2.8.6...gson-parent-2.8.7) Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index fa52a851..b139db15 100644 --- a/pom.xml +++ b/pom.xml @@ -299,7 +299,7 @@ com.google.code.gson gson - 2.8.6 + 2.8.7 From 6b7ec97688071c728801a1857517d52826831794 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 11 Jun 2021 06:43:12 +0000 Subject: [PATCH 150/200] Bump pdfbox from 2.0.23 to 2.0.24 Bumps pdfbox from 2.0.23 to 2.0.24. --- updated-dependencies: - dependency-name: org.apache.pdfbox:pdfbox dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index b139db15..cceb918a 100644 --- a/pom.xml +++ b/pom.xml @@ -262,7 +262,7 @@ org.apache.pdfbox pdfbox - 2.0.23 + 2.0.24 From ae281f671c1872f8ede2eaed14b5d0fb717085a0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 30 Jul 2021 21:03:52 +0000 Subject: [PATCH 151/200] Bump commons-csv from 1.8 to 1.9.0 Bumps commons-csv from 1.8 to 1.9.0. --- updated-dependencies: - dependency-name: org.apache.commons:commons-csv dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index cceb918a..01722eab 100644 --- a/pom.xml +++ b/pom.xml @@ -293,7 +293,7 @@ org.apache.commons commons-csv - 1.8 + 1.9.0 From 80042f581b99bd42b1d9fde8d1b5f37a3daa5404 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 10 Jun 2021 05:20:07 +0000 Subject: [PATCH 152/200] Bump bcprov-jdk15on from 1.68 to 1.69 Bumps [bcprov-jdk15on](https://github.com/bcgit/bc-java) from 1.68 to 1.69. - [Release notes](https://github.com/bcgit/bc-java/releases) - [Changelog](https://github.com/bcgit/bc-java/blob/master/docs/releasenotes.html) - [Commits](https://github.com/bcgit/bc-java/commits) --- updated-dependencies: - dependency-name: org.bouncycastle:bcprov-jdk15on dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 01722eab..5ba35709 100644 --- a/pom.xml +++ b/pom.xml @@ -268,7 +268,7 @@ org.bouncycastle bcprov-jdk15on - 1.68 + 1.69 From ce74f121a35d80c7df36a55c926ecc847a733021 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 10 Jun 2021 05:20:19 +0000 Subject: [PATCH 153/200] Bump bcmail-jdk15on from 1.68 to 1.69 Bumps [bcmail-jdk15on](https://github.com/bcgit/bc-java) from 1.68 to 1.69. - [Release notes](https://github.com/bcgit/bc-java/releases) - [Changelog](https://github.com/bcgit/bc-java/blob/master/docs/releasenotes.html) - [Commits](https://github.com/bcgit/bc-java/commits) --- updated-dependencies: - dependency-name: org.bouncycastle:bcmail-jdk15on dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 5ba35709..3b53575e 100644 --- a/pom.xml +++ b/pom.xml @@ -274,7 +274,7 @@ org.bouncycastle bcmail-jdk15on - 1.68 + 1.69 From 1e0d7512c318bfc3f3ba05b71643b59235b2d9b2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 20 Jul 2021 21:03:26 +0000 Subject: [PATCH 154/200] Bump slf4j-api from 1.7.30 to 1.7.32 Bumps [slf4j-api](https://github.com/qos-ch/slf4j) from 1.7.30 to 1.7.32. - [Release notes](https://github.com/qos-ch/slf4j/releases) - [Commits](https://github.com/qos-ch/slf4j/commits) --- updated-dependencies: - dependency-name: org.slf4j:slf4j-api dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 3b53575e..3333a420 100644 --- a/pom.xml +++ b/pom.xml @@ -250,7 +250,7 @@ org.slf4j slf4j-api - 1.7.30 + 1.7.32 From a3bba8b0bbaa67217dda2ec1368ea6358f5b0601 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 20 Jul 2021 21:03:28 +0000 Subject: [PATCH 155/200] Bump slf4j-simple from 1.7.30 to 1.7.32 Bumps [slf4j-simple](https://github.com/qos-ch/slf4j) from 1.7.30 to 1.7.32. - [Release notes](https://github.com/qos-ch/slf4j/releases) - [Commits](https://github.com/qos-ch/slf4j/commits) --- updated-dependencies: - dependency-name: org.slf4j:slf4j-simple dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 3333a420..05865e9a 100644 --- a/pom.xml +++ b/pom.xml @@ -256,7 +256,7 @@ org.slf4j slf4j-simple - 1.7.30 + 1.7.32 From 20e3c2e2052ad806905d24c3f15976dcea46f143 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Tue, 17 Aug 2021 09:39:03 -0300 Subject: [PATCH 156/200] prepare release 1.0.5 --- README.md | 4 ++-- pom.xml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 885d3eb8..b5651af6 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ Download a version of the tabula-java's jar, with all dependencies included, tha `tabula-java` provides a command line application: ``` -$ java -jar target/tabula-1.0.2-jar-with-dependencies.jar --help +$ java -jar target/tabula-1.0.5-jar-with-dependencies.jar --help usage: tabula [-a ] [-b ] [-c ] [-f ] [-g] [-h] [-i] [-l] [-n] [-o ] [-p ] [-r] [-s ] [-t] [-u] [-v] @@ -69,7 +69,7 @@ Tabula helps you extract tables from PDFs -v,--version Print version and exit. ``` -It also includes a debugging tool, run `java -cp ./target/tabula-1.0.2-jar-with-dependencies.jar technology.tabula.debug.Debug -h` for the available options. +It also includes a debugging tool, run `java -cp ./target/tabula-1.0.5-jar-with-dependencies.jar technology.tabula.debug.Debug -h` for the available options. You can also integrate `tabula-java` with any JVM language. For Java examples, see the [`tests`](src/test/java/technology/tabula/) folder. diff --git a/pom.xml b/pom.xml index 05865e9a..7cc0473a 100644 --- a/pom.xml +++ b/pom.xml @@ -2,7 +2,7 @@ 4.0.0 technology.tabula tabula - 1.0.5-SNAPSHOT + 1.0.5 Tabula Extract tables from PDF files http://github.com/tabulapdf/tabula-java From adb7738c87f0019cf95519ff37b58e4d4992c51d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Tue, 17 Aug 2021 11:49:50 -0300 Subject: [PATCH 157/200] prepare for next release --- pom.xml | 2 +- src/main/java/technology/tabula/CommandLineApp.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 7cc0473a..23aa7700 100644 --- a/pom.xml +++ b/pom.xml @@ -2,7 +2,7 @@ 4.0.0 technology.tabula tabula - 1.0.5 + 1.0.6-SNAPSHOT Tabula Extract tables from PDF files http://github.com/tabulapdf/tabula-java diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java index 0228df4b..4a771fb2 100644 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ b/src/main/java/technology/tabula/CommandLineApp.java @@ -29,7 +29,7 @@ public class CommandLineApp { - private static String VERSION = "1.0.5"; + private static String VERSION = "1.0.6-SNAPSHOT"; private static String VERSION_STRING = String.format("tabula %s (c) 2012-2020 Manuel Aristarán", VERSION); private static String BANNER = "\nTabula helps you extract tables from PDFs\n\n"; From 01c25598e050cbc1145c7fb3ce67b7d47a225dcd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 21 Dec 2021 21:41:36 +0000 Subject: [PATCH 158/200] Bump pdfbox from 2.0.24 to 2.0.25 Bumps pdfbox from 2.0.24 to 2.0.25. --- updated-dependencies: - dependency-name: org.apache.pdfbox:pdfbox dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 23aa7700..6c66027c 100644 --- a/pom.xml +++ b/pom.xml @@ -262,7 +262,7 @@ org.apache.pdfbox pdfbox - 2.0.24 + 2.0.25 From a5f59ed10699120fa827203c1f5fbeac6519f846 Mon Sep 17 00:00:00 2001 From: Tilman Hausherr Date: Thu, 23 Dec 2021 08:41:08 +0100 Subject: [PATCH 159/200] update pdfbox to latest version --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 23aa7700..6c66027c 100644 --- a/pom.xml +++ b/pom.xml @@ -262,7 +262,7 @@ org.apache.pdfbox pdfbox - 2.0.24 + 2.0.25 From fa9363b322970a1665bfd16ba113f4a6f71ab431 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 25 Jan 2022 21:43:51 +0000 Subject: [PATCH 160/200] Bump slf4j-api from 1.7.32 to 1.7.35 Bumps [slf4j-api](https://github.com/qos-ch/slf4j) from 1.7.32 to 1.7.35. - [Release notes](https://github.com/qos-ch/slf4j/releases) - [Commits](https://github.com/qos-ch/slf4j/compare/v_1.7.32...v_1.7.35) --- updated-dependencies: - dependency-name: org.slf4j:slf4j-api dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 6c66027c..e2947ada 100644 --- a/pom.xml +++ b/pom.xml @@ -250,7 +250,7 @@ org.slf4j slf4j-api - 1.7.32 + 1.7.35 From d175879527e3c47980f631ac1797231562d01059 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Tue, 25 Jan 2022 18:59:56 -0300 Subject: [PATCH 161/200] goodbye travis, hello github actions --- .github/workflows/tests.yml | 17 +++++++++++++++++ .travis.yml | 9 --------- 2 files changed, 17 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/tests.yml delete mode 100644 .travis.yml diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 00000000..9fe8e8f8 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,17 @@ +name: Java CI + +on: [push] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up JDK 11 + uses: actions/setup-java@v2 + with: + java-version: '11' + distribution: 'adopt' + - name: Build with Maven + run: mvn --batch-mode test diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 7397abbf..00000000 --- a/.travis.yml +++ /dev/null @@ -1,9 +0,0 @@ -language: java -install: mvn install -DskipTests=true -Dmaven.javadoc.skip=true -Dgpg.skip=true -B -V -script: mvn test -Dgpg.skip=true -jdk: - - openjdk8 - - openjdk9 - - openjdk10 - - openjdk11 -sudo: false From 1739fbf9915820109d4937963949b6f39b5f4ef8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Tue, 25 Jan 2022 19:03:21 -0300 Subject: [PATCH 162/200] goodbye appveyor --- appveyor.yml | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 appveyor.yml diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index b2c4a0ae..00000000 --- a/appveyor.yml +++ /dev/null @@ -1,21 +0,0 @@ -version: '{build}' -install: - - ps: | - Add-Type -AssemblyName System.IO.Compression.FileSystem - if (!(Test-Path -Path "C:\maven\apache-maven-3.5.4" )) { - (new-object System.Net.WebClient).DownloadFile( - 'http://www-us.apache.org/dist/maven/maven-3/3.5.4/binaries/apache-maven-3.5.4-bin.zip', - 'C:\maven-bin.zip' - ) - [System.IO.Compression.ZipFile]::ExtractToDirectory("C:\maven-bin.zip", "C:\maven") - } - - cmd: SET PATH=C:\maven\apache-maven-3.5.4\bin;%JAVA_HOME%\bin;%PATH% - - cmd: SET MAVEN_OPTS=-Xmx2g - - cmd: SET JAVA_OPTS=-Xmx2g -build_script: - - mvn clean package -B -DskipTests -Dmaven.javadoc.skip=true -test_script: - - mvn install -B -Dmaven.javadoc.skip=true -Dgpg.skip -cache: - - C:\maven -> appveyor.yml - - C:\Users\appveyor\.m2 -> appveyor.yml From 5f43a939ddec5f6b5adddf3a4d84def6eef9dfcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Tue, 25 Jan 2022 19:03:47 -0300 Subject: [PATCH 163/200] Remove Appveyor badge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 27cbc770..8d3c8210 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -tabula-java [![Build Status](https://travis-ci.org/tabulapdf/tabula-java.svg?branch=master)](https://travis-ci.org/tabulapdf/tabula-java) [![Build status](https://ci.appveyor.com/api/projects/status/l5gym1mjhrd2v8yn?svg=true)](https://ci.appveyor.com/project/jazzido/tabula-java) +tabula-java [![Build Status](https://travis-ci.org/tabulapdf/tabula-java.svg?branch=master)](https://travis-ci.org/tabulapdf/tabula-java) =========== `tabula-java` is a library for extracting tables from PDF files — it is the table extraction engine that powers [Tabula](http://tabula.technology/) ([repo](http://github.com/tabulapdf/tabula)). You can use `tabula-java` as a command-line tool to programmatically extract tables from PDFs. From ef23f62ad5bd3afb669d54950ffbb983346cc248 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Tue, 25 Jan 2022 19:06:02 -0300 Subject: [PATCH 164/200] cache maven deps --- .github/workflows/tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 9fe8e8f8..cf36b0ce 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,5 +13,6 @@ jobs: with: java-version: '11' distribution: 'adopt' + cache: maven - name: Build with Maven run: mvn --batch-mode test From c6de348f4c5241d832b37130bcc4ee858ffd3e2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Tue, 25 Jan 2022 19:06:26 -0300 Subject: [PATCH 165/200] windows test runner --- .github/workflows/tests-windows.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 .github/workflows/tests-windows.yml diff --git a/.github/workflows/tests-windows.yml b/.github/workflows/tests-windows.yml new file mode 100644 index 00000000..4ff7f542 --- /dev/null +++ b/.github/workflows/tests-windows.yml @@ -0,0 +1,18 @@ +name: Java CI + +on: [push] + +jobs: + build: + runs-on: windows-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up JDK 11 + uses: actions/setup-java@v2 + with: + java-version: '11' + distribution: 'adopt' + cache: maven + - name: Build with Maven + run: mvn --batch-mode test From 19ddc51b888c8f43a81c7862e2e126c377ec4f8d Mon Sep 17 00:00:00 2001 From: Martin Swanson Date: Mon, 7 Mar 2022 18:38:20 +0000 Subject: [PATCH 166/200] add page number to JSON output --- src/main/java/technology/tabula/Table.java | 3 +++ src/main/java/technology/tabula/TableWithRulingLines.java | 3 ++- .../technology/tabula/extractors/BasicExtractionAlgorithm.java | 1 + .../tabula/extractors/SpreadsheetExtractionAlgorithm.java | 2 +- src/main/java/technology/tabula/json/TableSerializer.java | 1 + 5 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/main/java/technology/tabula/Table.java b/src/main/java/technology/tabula/Table.java index c031c9ed..1e73bedf 100644 --- a/src/main/java/technology/tabula/Table.java +++ b/src/main/java/technology/tabula/Table.java @@ -23,11 +23,14 @@ public Table(ExtractionAlgorithm extractionAlgorithm) { private int rowCount = 0; private int colCount = 0; + private int pageNumber = 0; /* visible for testing */ final TreeMap cells = new TreeMap<>(); public int getRowCount() { return rowCount; } public int getColCount() { return colCount; } + public int getPageNumber() { return pageNumber; } + public void setPageNumber(int pageNumber) { this.pageNumber = pageNumber; } public String getExtractionMethod() { return extractionMethod; } diff --git a/src/main/java/technology/tabula/TableWithRulingLines.java b/src/main/java/technology/tabula/TableWithRulingLines.java index c119f191..cde0ce72 100644 --- a/src/main/java/technology/tabula/TableWithRulingLines.java +++ b/src/main/java/technology/tabula/TableWithRulingLines.java @@ -14,12 +14,13 @@ public class TableWithRulingLines extends Table { List verticalRulings, horizontalRulings; RectangleSpatialIndex si = new RectangleSpatialIndex<>(); - public TableWithRulingLines(Rectangle area, List cells, List horizontalRulings, List verticalRulings, ExtractionAlgorithm extractionAlgorithm) { + public TableWithRulingLines(Rectangle area, List cells, List horizontalRulings, List verticalRulings, ExtractionAlgorithm extractionAlgorithm, int pageNumber) { super(extractionAlgorithm); this.setRect(area); this.verticalRulings = verticalRulings; this.horizontalRulings = horizontalRulings; this.addCells(cells); + this.setPageNumber(pageNumber); } private void addCells(List cells) { diff --git a/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java b/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java index 246c5342..ed2e78e3 100644 --- a/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java +++ b/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java @@ -65,6 +65,7 @@ public int compare(Ruling arg0, Ruling arg1) { Table table = new Table(this); table.setRect(page.getLeft(), page.getTop(), page.getWidth(), page.getHeight()); + table.setPageNumber(page.getPageNumber()); for (int i = 0; i < lines.size(); i++) { Line line = lines.get(i); diff --git a/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java b/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java index 44998cfb..5b4af3d5 100644 --- a/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java +++ b/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java @@ -88,7 +88,7 @@ else if (r.vertical()) { } } - TableWithRulingLines t = new TableWithRulingLines(area, overlappingCells, horizontalOverlappingRulings, verticalOverlappingRulings, this); + TableWithRulingLines t = new TableWithRulingLines(area, overlappingCells, horizontalOverlappingRulings, verticalOverlappingRulings, this, page.getPageNumber()); spreadsheets.add(t); } Utils.sort(spreadsheets, Rectangle.ILL_DEFINED_ORDER); diff --git a/src/main/java/technology/tabula/json/TableSerializer.java b/src/main/java/technology/tabula/json/TableSerializer.java index 2ba20bcd..0caaf0e5 100644 --- a/src/main/java/technology/tabula/json/TableSerializer.java +++ b/src/main/java/technology/tabula/json/TableSerializer.java @@ -24,6 +24,7 @@ public JsonElement serialize(Table table, Type type, JsonSerializationContext co JsonArray data = new JsonArray(); json.addProperty("extraction_method", table.getExtractionMethod()); + json.addProperty("page_number", table.getPageNumber()); json.addProperty("top", table.getTop()); json.addProperty("left", table.getLeft()); json.addProperty("width", table.getWidth()); From 56cd71313f288b55d1a50ee222e821e2497df1a6 Mon Sep 17 00:00:00 2001 From: Martin Swanson Date: Mon, 7 Mar 2022 19:48:46 +0000 Subject: [PATCH 167/200] tests passing --- src/test/resources/technology/tabula/json/AnimalSounds1.json | 2 +- .../tabula/json/argentina_diputados_voting_record.json | 2 +- src/test/resources/technology/tabula/json/schools.json | 2 +- src/test/resources/technology/tabula/json/spanning_cells.json | 2 +- .../resources/technology/tabula/json/spanning_cells_basic.json | 2 +- src/test/resources/technology/tabula/json/twotables.json | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/test/resources/technology/tabula/json/AnimalSounds1.json b/src/test/resources/technology/tabula/json/AnimalSounds1.json index c13c6759..8511b786 100644 --- a/src/test/resources/technology/tabula/json/AnimalSounds1.json +++ b/src/test/resources/technology/tabula/json/AnimalSounds1.json @@ -1 +1 @@ -[{"extraction_method":"lattice","top":0.006499578,"left":56.8,"width":241.1999969482422,"height":315.36407470703125,"right":298.0,"bottom":315.37057,"data":[[{"top":0.006499578,"left":56.8,"width":79.19999694824219,"height":95.31405639648438,"text":"Animal"},{"top":0.006499578,"left":136.0,"width":61.0,"height":95.31405639648438,"text":"Action"},{"top":0.006499578,"left":197.0,"width":101.0,"height":95.31405639648438,"text":"Result"}],[{"top":95.32056,"left":56.8,"width":79.19999694824219,"height":23.050010681152344,"text":"Cat"},{"top":95.32056,"left":136.0,"width":61.0,"height":23.050010681152344,"text":"Says"},{"top":95.32056,"left":197.0,"width":101.0,"height":23.050010681152344,"text":"Meow"}],[{"top":118.37057,"left":56.8,"width":79.19999694824219,"height":63.99999237060547,"text":"Parastratiosph\recomyiastratio\rsphecomyioid\res"},{"top":118.37057,"left":136.0,"width":61.0,"height":63.99999237060547,"text":"Says"},{"top":118.37057,"left":197.0,"width":101.0,"height":63.99999237060547,"text":"bzzzzzzz"}],[{"top":182.37056,"left":56.8,"width":79.19999694824219,"height":133.00001525878906,"text":"Fox"},{"top":182.37056,"left":136.0,"width":61.0,"height":133.00001525878906,"text":"Says"},{"top":182.37056,"left":197.0,"width":101.0,"height":133.00001525878906,"text":"Ring-\rdingdingdingd\ringeringeding\rGering-\rdingdingdingd\ringeringeding\rGering-\rdingdingdingd\ringeringeding"}]]},{"extraction_method":"lattice","top":0.006499578,"left":313.35715,"width":241.55941772460938,"height":259.2640380859375,"right":554.91656,"bottom":259.27054,"data":[[{"top":0.006499578,"left":313.35715,"width":77.64285278320312,"height":72.26405334472656,"text":""},{"top":0.006499578,"left":391.0,"width":66.0,"height":72.26405334472656,"text":""},{"top":0.006499578,"left":457.0,"width":97.91656494140625,"height":72.26405334472656,"text":""}],[{"top":72.27055,"left":313.35715,"width":77.64285278320312,"height":23.050003051757812,"text":"Animal"},{"top":72.27055,"left":391.0,"width":66.0,"height":23.050003051757812,"text":"Action"},{"top":72.27055,"left":457.0,"width":97.91656494140625,"height":23.050003051757812,"text":"Result"}],[{"top":95.32056,"left":313.35715,"width":77.64285278320312,"height":35.94999694824219,"text":"Dogs/wolves/\rMore dogs"},{"top":95.32056,"left":391.0,"width":66.0,"height":35.94999694824219,"text":"Says"},{"top":95.32056,"left":457.0,"width":97.91656494140625,"height":35.94999694824219,"text":"Bow-wow/\rruff-ruff"}],[{"top":131.27055,"left":313.35715,"width":77.64285278320312,"height":36.40000915527344,"text":"Donkey"},{"top":131.27055,"left":391.0,"width":66.0,"height":36.40000915527344,"text":"Says"},{"top":131.27055,"left":457.0,"width":97.91656494140625,"height":36.40000915527344,"text":"Hee-Haw Hee-\rHaw"}],[{"top":167.67056,"left":313.35715,"width":77.64285278320312,"height":91.5999755859375,"text":"Fox"},{"top":167.67056,"left":391.0,"width":66.0,"height":91.5999755859375,"text":"Says"},{"top":167.67056,"left":457.0,"width":97.91656494140625,"height":91.5999755859375,"text":"Wa-pa-pa-pa-\rpa-pa-pow\rWa-pa-pa-pa-\rpa-pow\rWa-pa-pa-pa-\rpa-pa-pow"}]]}] +[{"extraction_method":"lattice","page_number":1,"top":0.006499578,"left":56.8,"width":241.1999969482422,"height":315.36407470703125,"right":298.0,"bottom":315.37057,"data":[[{"top":0.006499578,"left":56.8,"width":79.19999694824219,"height":95.31405639648438,"text":"Animal"},{"top":0.006499578,"left":136.0,"width":61.0,"height":95.31405639648438,"text":"Action"},{"top":0.006499578,"left":197.0,"width":101.0,"height":95.31405639648438,"text":"Result"}],[{"top":95.32056,"left":56.8,"width":79.19999694824219,"height":23.050010681152344,"text":"Cat"},{"top":95.32056,"left":136.0,"width":61.0,"height":23.050010681152344,"text":"Says"},{"top":95.32056,"left":197.0,"width":101.0,"height":23.050010681152344,"text":"Meow"}],[{"top":118.37057,"left":56.8,"width":79.19999694824219,"height":63.99999237060547,"text":"Parastratiosph\recomyiastratio\rsphecomyioid\res"},{"top":118.37057,"left":136.0,"width":61.0,"height":63.99999237060547,"text":"Says"},{"top":118.37057,"left":197.0,"width":101.0,"height":63.99999237060547,"text":"bzzzzzzz"}],[{"top":182.37056,"left":56.8,"width":79.19999694824219,"height":133.00001525878906,"text":"Fox"},{"top":182.37056,"left":136.0,"width":61.0,"height":133.00001525878906,"text":"Says"},{"top":182.37056,"left":197.0,"width":101.0,"height":133.00001525878906,"text":"Ring-\rdingdingdingd\ringeringeding\rGering-\rdingdingdingd\ringeringeding\rGering-\rdingdingdingd\ringeringeding"}]]},{"extraction_method":"lattice","page_number":1,"top":0.006499578,"left":313.35715,"width":241.55941772460938,"height":259.2640380859375,"right":554.91656,"bottom":259.27054,"data":[[{"top":0.006499578,"left":313.35715,"width":77.64285278320312,"height":72.26405334472656,"text":""},{"top":0.006499578,"left":391.0,"width":66.0,"height":72.26405334472656,"text":""},{"top":0.006499578,"left":457.0,"width":97.91656494140625,"height":72.26405334472656,"text":""}],[{"top":72.27055,"left":313.35715,"width":77.64285278320312,"height":23.050003051757812,"text":"Animal"},{"top":72.27055,"left":391.0,"width":66.0,"height":23.050003051757812,"text":"Action"},{"top":72.27055,"left":457.0,"width":97.91656494140625,"height":23.050003051757812,"text":"Result"}],[{"top":95.32056,"left":313.35715,"width":77.64285278320312,"height":35.94999694824219,"text":"Dogs/wolves/\rMore dogs"},{"top":95.32056,"left":391.0,"width":66.0,"height":35.94999694824219,"text":"Says"},{"top":95.32056,"left":457.0,"width":97.91656494140625,"height":35.94999694824219,"text":"Bow-wow/\rruff-ruff"}],[{"top":131.27055,"left":313.35715,"width":77.64285278320312,"height":36.40000915527344,"text":"Donkey"},{"top":131.27055,"left":391.0,"width":66.0,"height":36.40000915527344,"text":"Says"},{"top":131.27055,"left":457.0,"width":97.91656494140625,"height":36.40000915527344,"text":"Hee-Haw Hee-\rHaw"}],[{"top":167.67056,"left":313.35715,"width":77.64285278320312,"height":91.5999755859375,"text":"Fox"},{"top":167.67056,"left":391.0,"width":66.0,"height":91.5999755859375,"text":"Says"},{"top":167.67056,"left":457.0,"width":97.91656494140625,"height":91.5999755859375,"text":"Wa-pa-pa-pa-\rpa-pa-pow\rWa-pa-pa-pa-\rpa-pow\rWa-pa-pa-pa-\rpa-pa-pow"}]]}] diff --git a/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json b/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json index 21ad2880..fb2d478e 100644 --- a/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json +++ b/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json @@ -1 +1 @@ -{"extraction_method":"stream","top":269.875,"left":12.75,"width":548.25,"height":520.625,"right":561.0,"bottom":790.5,"data":[[{"top":281.82,"left":28.56,"width":175.21029663085938,"height":4.5,"text":"ABDALA de MATARAZZO, Norma Amanda"},{"top":281.82,"left":213.72,"width":108.25027465820312,"height":4.5,"text":"Frente Cívico por Santiago"},{"top":282.54,"left":397.56,"width":79.69027709960938,"height":4.5,"text":"Santiago del Estero"},{"top":281.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":298.26,"left":28.56,"width":141.71029663085938,"height":4.5,"text":"ALBRIEU, Oscar Edmundo Nicolas"},{"top":298.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":299.1,"left":397.56,"width":41.6602783203125,"height":4.5,"text":"Rio Negro"},{"top":298.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":314.82,"left":28.56,"width":83.75028228759766,"height":4.5,"text":"ALONSO, María Luz"},{"top":314.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":315.54,"left":397.56,"width":42.250274658203125,"height":4.5,"text":"La Pampa"},{"top":314.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":331.26,"left":28.56,"width":84.25028228759766,"height":4.5,"text":"ARENA, Celia Isabel"},{"top":331.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":332.1,"left":397.56,"width":37.690277099609375,"height":4.5,"text":"Santa Fe"},{"top":331.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":347.82,"left":28.56,"width":110.29029846191406,"height":4.5,"text":"ARREGUI, Andrés Roberto"},{"top":347.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":348.54,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":347.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":364.26,"left":28.56,"width":115.69029235839844,"height":4.5,"text":"AVOSCAN, Herman Horacio"},{"top":364.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":365.1,"left":397.56,"width":41.6602783203125,"height":4.5,"text":"Rio Negro"},{"top":364.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":380.82,"left":28.56,"width":95.6902847290039,"height":4.5,"text":"BALCEDO, María Ester"},{"top":380.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":381.54,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":380.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":397.26,"left":28.56,"width":127.69029235839844,"height":4.5,"text":"BARRANDEGUY, Raúl Enrique"},{"top":397.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":398.1,"left":397.56,"width":43.6702880859375,"height":4.5,"text":"Entre Ríos"},{"top":397.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":413.82,"left":28.56,"width":106.69029235839844,"height":4.5,"text":"BASTERRA, Luis Eugenio"},{"top":413.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":414.54,"left":397.56,"width":36.73028564453125,"height":4.5,"text":"Formosa"},{"top":413.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":430.26,"left":28.56,"width":92.21028137207031,"height":4.5,"text":"BEDANO, Nora Esther"},{"top":430.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":431.1,"left":397.56,"width":35.6602783203125,"height":4.5,"text":"Córdoba"},{"top":430.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":446.82,"left":28.56,"width":100.69029235839844,"height":4.5,"text":"BERNAL, María Eugenia"},{"top":446.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":447.54,"left":397.56,"width":22.200286865234375,"height":4.5,"text":"Jujuy"},{"top":446.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":463.26,"left":28.56,"width":112.21029663085938,"height":4.5,"text":"BERTONE, Rosana Andrea"},{"top":463.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":464.1,"left":397.56,"width":67.21026611328125,"height":4.5,"text":"Tierra del Fuego"},{"top":463.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":479.82,"left":28.56,"width":114.73028564453125,"height":4.5,"text":"BIANCHI, María del Carmen"},{"top":479.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":480.54,"left":397.56,"width":74.6702880859375,"height":4.5,"text":"Cdad. Aut. Bs. As."},{"top":479.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":496.26,"left":28.56,"width":115.19029235839844,"height":4.5,"text":"BIDEGAIN, Gloria Mercedes"},{"top":496.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":497.1,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":496.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":512.82,"left":28.56,"width":66.25028228759766,"height":4.5,"text":"BRAWER, Mara"},{"top":512.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":513.54,"left":397.56,"width":74.6702880859375,"height":4.5,"text":"Cdad. Aut. Bs. As."},{"top":512.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":529.26,"left":28.56,"width":90.73028564453125,"height":4.5,"text":"BRILLO, José Ricardo"},{"top":529.26,"left":213.72,"width":121.81027221679688,"height":4.5,"text":"Movimiento Popular Neuquino"},{"top":530.1,"left":397.56,"width":37.700286865234375,"height":4.5,"text":"Neuquén"},{"top":529.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":545.82,"left":28.56,"width":120.73028564453125,"height":4.5,"text":"BROMBERG, Isaac Benjamín"},{"top":545.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":546.54,"left":397.56,"width":38.6602783203125,"height":4.5,"text":"Tucumán"},{"top":545.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":562.26,"left":28.56,"width":89.7702865600586,"height":4.5,"text":"BRUE, Daniel Agustín"},{"top":562.26,"left":213.72,"width":108.25027465820312,"height":4.5,"text":"Frente Cívico por Santiago"},{"top":563.1,"left":397.56,"width":79.69027709960938,"height":4.5,"text":"Santiago del Estero"},{"top":562.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":578.82,"left":28.56,"width":72.23028564453125,"height":4.5,"text":"CALCAGNO, Eric"},{"top":578.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":579.54,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":578.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":595.26,"left":28.56,"width":114.73028564453125,"height":4.5,"text":"CARLOTTO, Remo Gerardo"},{"top":595.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":596.1,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":595.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":611.82,"left":28.56,"width":122.1702880859375,"height":4.5,"text":"CARMONA, Guillermo Ramón"},{"top":611.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":612.54,"left":397.56,"width":38.1602783203125,"height":4.5,"text":"Mendoza"},{"top":611.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":628.26,"left":28.56,"width":124.73028564453125,"height":4.5,"text":"CATALAN MAGNI, Julio César"},{"top":628.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":629.1,"left":397.56,"width":67.21026611328125,"height":4.5,"text":"Tierra del Fuego"},{"top":628.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":644.82,"left":28.56,"width":88.6902847290039,"height":4.5,"text":"CEJAS, Jorge Alberto"},{"top":644.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":645.54,"left":397.56,"width":41.6602783203125,"height":4.5,"text":"Rio Negro"},{"top":644.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":661.26,"left":28.56,"width":89.7702865600586,"height":4.5,"text":"CHIENO, María Elena"},{"top":661.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":662.1,"left":397.56,"width":42.72027587890625,"height":4.5,"text":"Corrientes"},{"top":661.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":677.82,"left":28.56,"width":96.25028228759766,"height":4.5,"text":"CIAMPINI, José Alberto"},{"top":677.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":678.54,"left":397.56,"width":37.700286865234375,"height":4.5,"text":"Neuquén"},{"top":677.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":694.26,"left":28.56,"width":131.77029418945312,"height":4.5,"text":"CIGOGNA, Luis Francisco Jorge"},{"top":694.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":695.1,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":694.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":710.82,"left":28.56,"width":62.15028381347656,"height":4.5,"text":"CLERI, Marcos"},{"top":710.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":711.54,"left":397.56,"width":37.690277099609375,"height":4.5,"text":"Santa Fe"},{"top":710.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":727.26,"left":28.56,"width":101.77029418945312,"height":4.5,"text":"COMELLI, Alicia Marcela"},{"top":727.26,"left":213.72,"width":121.81027221679688,"height":4.5,"text":"Movimiento Popular Neuquino"},{"top":728.1,"left":397.56,"width":37.700286865234375,"height":4.5,"text":"Neuquén"},{"top":727.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":743.82,"left":28.56,"width":88.1902847290039,"height":4.5,"text":"CONTI, Diana Beatriz"},{"top":743.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":744.54,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":743.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":760.26,"left":28.56,"width":98.75028228759766,"height":4.5,"text":"CORDOBA, Stella Maris"},{"top":760.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":761.1,"left":397.56,"width":38.6602783203125,"height":4.5,"text":"Tucumán"},{"top":760.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":776.82,"left":28.56,"width":105.25028991699219,"height":4.5,"text":"CURRILEN, Oscar Rubén"},{"top":776.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":777.54,"left":397.56,"width":30.2802734375,"height":4.5,"text":"Chubut"},{"top":776.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}]]} \ No newline at end of file +{"extraction_method":"stream","page_number":1,"top":269.875,"left":12.75,"width":548.25,"height":520.625,"right":561.0,"bottom":790.5,"data":[[{"top":281.82,"left":28.56,"width":175.21029663085938,"height":4.5,"text":"ABDALA de MATARAZZO, Norma Amanda"},{"top":281.82,"left":213.72,"width":108.25027465820312,"height":4.5,"text":"Frente Cívico por Santiago"},{"top":282.54,"left":397.56,"width":79.69027709960938,"height":4.5,"text":"Santiago del Estero"},{"top":281.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":298.26,"left":28.56,"width":141.71029663085938,"height":4.5,"text":"ALBRIEU, Oscar Edmundo Nicolas"},{"top":298.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":299.1,"left":397.56,"width":41.6602783203125,"height":4.5,"text":"Rio Negro"},{"top":298.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":314.82,"left":28.56,"width":83.75028228759766,"height":4.5,"text":"ALONSO, María Luz"},{"top":314.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":315.54,"left":397.56,"width":42.250274658203125,"height":4.5,"text":"La Pampa"},{"top":314.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":331.26,"left":28.56,"width":84.25028228759766,"height":4.5,"text":"ARENA, Celia Isabel"},{"top":331.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":332.1,"left":397.56,"width":37.690277099609375,"height":4.5,"text":"Santa Fe"},{"top":331.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":347.82,"left":28.56,"width":110.29029846191406,"height":4.5,"text":"ARREGUI, Andrés Roberto"},{"top":347.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":348.54,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":347.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":364.26,"left":28.56,"width":115.69029235839844,"height":4.5,"text":"AVOSCAN, Herman Horacio"},{"top":364.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":365.1,"left":397.56,"width":41.6602783203125,"height":4.5,"text":"Rio Negro"},{"top":364.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":380.82,"left":28.56,"width":95.6902847290039,"height":4.5,"text":"BALCEDO, María Ester"},{"top":380.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":381.54,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":380.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":397.26,"left":28.56,"width":127.69029235839844,"height":4.5,"text":"BARRANDEGUY, Raúl Enrique"},{"top":397.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":398.1,"left":397.56,"width":43.6702880859375,"height":4.5,"text":"Entre Ríos"},{"top":397.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":413.82,"left":28.56,"width":106.69029235839844,"height":4.5,"text":"BASTERRA, Luis Eugenio"},{"top":413.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":414.54,"left":397.56,"width":36.73028564453125,"height":4.5,"text":"Formosa"},{"top":413.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":430.26,"left":28.56,"width":92.21028137207031,"height":4.5,"text":"BEDANO, Nora Esther"},{"top":430.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":431.1,"left":397.56,"width":35.6602783203125,"height":4.5,"text":"Córdoba"},{"top":430.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":446.82,"left":28.56,"width":100.69029235839844,"height":4.5,"text":"BERNAL, María Eugenia"},{"top":446.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":447.54,"left":397.56,"width":22.200286865234375,"height":4.5,"text":"Jujuy"},{"top":446.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":463.26,"left":28.56,"width":112.21029663085938,"height":4.5,"text":"BERTONE, Rosana Andrea"},{"top":463.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":464.1,"left":397.56,"width":67.21026611328125,"height":4.5,"text":"Tierra del Fuego"},{"top":463.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":479.82,"left":28.56,"width":114.73028564453125,"height":4.5,"text":"BIANCHI, María del Carmen"},{"top":479.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":480.54,"left":397.56,"width":74.6702880859375,"height":4.5,"text":"Cdad. Aut. Bs. As."},{"top":479.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":496.26,"left":28.56,"width":115.19029235839844,"height":4.5,"text":"BIDEGAIN, Gloria Mercedes"},{"top":496.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":497.1,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":496.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":512.82,"left":28.56,"width":66.25028228759766,"height":4.5,"text":"BRAWER, Mara"},{"top":512.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":513.54,"left":397.56,"width":74.6702880859375,"height":4.5,"text":"Cdad. Aut. Bs. As."},{"top":512.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":529.26,"left":28.56,"width":90.73028564453125,"height":4.5,"text":"BRILLO, José Ricardo"},{"top":529.26,"left":213.72,"width":121.81027221679688,"height":4.5,"text":"Movimiento Popular Neuquino"},{"top":530.1,"left":397.56,"width":37.700286865234375,"height":4.5,"text":"Neuquén"},{"top":529.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":545.82,"left":28.56,"width":120.73028564453125,"height":4.5,"text":"BROMBERG, Isaac Benjamín"},{"top":545.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":546.54,"left":397.56,"width":38.6602783203125,"height":4.5,"text":"Tucumán"},{"top":545.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":562.26,"left":28.56,"width":89.7702865600586,"height":4.5,"text":"BRUE, Daniel Agustín"},{"top":562.26,"left":213.72,"width":108.25027465820312,"height":4.5,"text":"Frente Cívico por Santiago"},{"top":563.1,"left":397.56,"width":79.69027709960938,"height":4.5,"text":"Santiago del Estero"},{"top":562.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":578.82,"left":28.56,"width":72.23028564453125,"height":4.5,"text":"CALCAGNO, Eric"},{"top":578.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":579.54,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":578.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":595.26,"left":28.56,"width":114.73028564453125,"height":4.5,"text":"CARLOTTO, Remo Gerardo"},{"top":595.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":596.1,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":595.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":611.82,"left":28.56,"width":122.1702880859375,"height":4.5,"text":"CARMONA, Guillermo Ramón"},{"top":611.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":612.54,"left":397.56,"width":38.1602783203125,"height":4.5,"text":"Mendoza"},{"top":611.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":628.26,"left":28.56,"width":124.73028564453125,"height":4.5,"text":"CATALAN MAGNI, Julio César"},{"top":628.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":629.1,"left":397.56,"width":67.21026611328125,"height":4.5,"text":"Tierra del Fuego"},{"top":628.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":644.82,"left":28.56,"width":88.6902847290039,"height":4.5,"text":"CEJAS, Jorge Alberto"},{"top":644.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":645.54,"left":397.56,"width":41.6602783203125,"height":4.5,"text":"Rio Negro"},{"top":644.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":661.26,"left":28.56,"width":89.7702865600586,"height":4.5,"text":"CHIENO, María Elena"},{"top":661.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":662.1,"left":397.56,"width":42.72027587890625,"height":4.5,"text":"Corrientes"},{"top":661.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":677.82,"left":28.56,"width":96.25028228759766,"height":4.5,"text":"CIAMPINI, José Alberto"},{"top":677.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":678.54,"left":397.56,"width":37.700286865234375,"height":4.5,"text":"Neuquén"},{"top":677.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":694.26,"left":28.56,"width":131.77029418945312,"height":4.5,"text":"CIGOGNA, Luis Francisco Jorge"},{"top":694.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":695.1,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":694.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":710.82,"left":28.56,"width":62.15028381347656,"height":4.5,"text":"CLERI, Marcos"},{"top":710.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":711.54,"left":397.56,"width":37.690277099609375,"height":4.5,"text":"Santa Fe"},{"top":710.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":727.26,"left":28.56,"width":101.77029418945312,"height":4.5,"text":"COMELLI, Alicia Marcela"},{"top":727.26,"left":213.72,"width":121.81027221679688,"height":4.5,"text":"Movimiento Popular Neuquino"},{"top":728.1,"left":397.56,"width":37.700286865234375,"height":4.5,"text":"Neuquén"},{"top":727.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":743.82,"left":28.56,"width":88.1902847290039,"height":4.5,"text":"CONTI, Diana Beatriz"},{"top":743.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":744.54,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":743.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":760.26,"left":28.56,"width":98.75028228759766,"height":4.5,"text":"CORDOBA, Stella Maris"},{"top":760.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":761.1,"left":397.56,"width":38.6602783203125,"height":4.5,"text":"Tucumán"},{"top":760.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":776.82,"left":28.56,"width":105.25028991699219,"height":4.5,"text":"CURRILEN, Oscar Rubén"},{"top":776.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":777.54,"left":397.56,"width":30.2802734375,"height":4.5,"text":"Chubut"},{"top":776.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}]]} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/json/schools.json b/src/test/resources/technology/tabula/json/schools.json index 57544ff0..d251bb20 100644 --- a/src/test/resources/technology/tabula/json/schools.json +++ b/src/test/resources/technology/tabula/json/schools.json @@ -1 +1 @@ -{"extraction_method":"lattice","top":54.315777,"left":16.97,"width":745.3303833007812,"height":483.9442443847656,"right":762.30035,"bottom":538.26,"data":[[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":54.315777,"left":18.588728,"width":153.91128540039062,"height":8.744216918945312,"text":"Last Name"},{"top":54.315777,"left":172.50002,"width":69.71998596191406,"height":8.744216918945312,"text":"First Name"},{"top":54.315777,"left":242.22,"width":104.760009765625,"height":8.744216918945312,"text":"Address"},{"top":54.315777,"left":346.98,"width":70.80001831054688,"height":8.744216918945312,"text":"City"},{"top":54.315777,"left":417.78003,"width":20.87994384765625,"height":8.744216918945312,"text":"State"},{"top":54.315777,"left":438.65997,"width":49.91998291015625,"height":8.744216918945312,"text":"Zip"},{"top":54.315777,"left":488.57996,"width":78.47998046875,"height":8.744216918945312,"text":"Occupation"},{"top":54.315777,"left":567.05994,"width":106.4400634765625,"height":8.744216918945312,"text":"Employer"},{"top":54.315777,"left":673.5,"width":45.52001953125,"height":8.744216918945312,"text":"Date"},{"top":54.315777,"left":719.02,"width":43.28033447265625,"height":8.744216918945312,"text":"Amount"}],[{"top":63.059994,"left":16.97,"width":1.6187286376953125,"height":10.799991607666016,"text":""},{"top":63.059994,"left":18.588728,"width":153.91128540039062,"height":10.799991607666016,"text":"Lidstad"},{"top":63.059994,"left":172.50002,"width":69.71998596191406,"height":10.799991607666016,"text":"Dick \u0026 Peg"},{"top":63.059994,"left":242.22,"width":104.760009765625,"height":10.799991607666016,"text":"62 Mississippi River Blvd N"},{"top":63.059994,"left":346.98,"width":70.80001831054688,"height":10.799991607666016,"text":"Saint Paul"},{"top":63.059994,"left":417.78003,"width":20.87994384765625,"height":10.799991607666016,"text":"MN"},{"top":63.059994,"left":438.65997,"width":49.91998291015625,"height":10.799991607666016,"text":"55104"},{"top":63.059994,"left":488.57996,"width":78.47998046875,"height":10.799991607666016,"text":"retired"},{"top":63.059994,"left":567.05994,"width":106.4400634765625,"height":10.799991607666016,"text":""},{"top":63.059994,"left":673.5,"width":45.52001953125,"height":10.799991607666016,"text":"10/12/2012"},{"top":63.059994,"left":719.02,"width":43.28033447265625,"height":10.799991607666016,"text":"60.00"}],[{"top":73.859985,"left":16.97,"width":1.6187286376953125,"height":10.800025939941406,"text":""},{"top":73.859985,"left":18.588728,"width":153.91128540039062,"height":10.800025939941406,"text":"Strom"},{"top":73.859985,"left":172.50002,"width":69.71998596191406,"height":10.800025939941406,"text":"Pam"},{"top":73.859985,"left":242.22,"width":104.760009765625,"height":10.800025939941406,"text":"1229 Hague Ave"},{"top":73.859985,"left":346.98,"width":70.80001831054688,"height":10.800025939941406,"text":"St. Paul"},{"top":73.859985,"left":417.78003,"width":20.87994384765625,"height":10.800025939941406,"text":"MN"},{"top":73.859985,"left":438.65997,"width":49.91998291015625,"height":10.800025939941406,"text":"55104"},{"top":73.859985,"left":488.57996,"width":78.47998046875,"height":10.800025939941406,"text":""},{"top":73.859985,"left":567.05994,"width":106.4400634765625,"height":10.800025939941406,"text":""},{"top":73.859985,"left":673.5,"width":45.52001953125,"height":10.800025939941406,"text":"9/12/2012"},{"top":73.859985,"left":719.02,"width":43.28033447265625,"height":10.800025939941406,"text":"60.00"}],[{"top":84.66001,"left":16.97,"width":1.6187286376953125,"height":10.799995422363281,"text":""},{"top":84.66001,"left":18.588728,"width":153.91128540039062,"height":10.799995422363281,"text":"Seeba"},{"top":84.66001,"left":172.50002,"width":69.71998596191406,"height":10.799995422363281,"text":"Louise \u0026 Paul"},{"top":84.66001,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"1399 Sheldon St"},{"top":84.66001,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"Saint Paul"},{"top":84.66001,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":84.66001,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55108"},{"top":84.66001,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":"BOE"},{"top":84.66001,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":"City of Saint Paul"},{"top":84.66001,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"10/12/2012"},{"top":84.66001,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"60.00"}],[{"top":95.46001,"left":16.97,"width":1.6187286376953125,"height":10.799995422363281,"text":""},{"top":95.46001,"left":18.588728,"width":153.91128540039062,"height":10.799995422363281,"text":"Schumacher / Bales"},{"top":95.46001,"left":172.50002,"width":69.71998596191406,"height":10.799995422363281,"text":"Douglas L. / Patricia"},{"top":95.46001,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"948 County Rd. D W"},{"top":95.46001,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"Saint Paul"},{"top":95.46001,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":95.46001,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55126"},{"top":95.46001,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":""},{"top":95.46001,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":""},{"top":95.46001,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"10/13/2012"},{"top":95.46001,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"60.00"}],[{"top":106.26,"left":16.97,"width":1.6187286376953125,"height":10.799995422363281,"text":""},{"top":106.26,"left":18.588728,"width":153.91128540039062,"height":10.799995422363281,"text":"Abrams"},{"top":106.26,"left":172.50002,"width":69.71998596191406,"height":10.799995422363281,"text":"Marjorie"},{"top":106.26,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"238 8th St east"},{"top":106.26,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"St Paul"},{"top":106.26,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":106.26,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55101"},{"top":106.26,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":"Retired"},{"top":106.26,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":"Retired"},{"top":106.26,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"8/8/2012"},{"top":106.26,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"75.00"}],[{"top":117.06,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":117.06,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Crouse / Schroeder"},{"top":117.06,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Abigail / Jonathan"},{"top":117.06,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1545 Branston St."},{"top":117.06,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":117.06,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":117.06,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55108"},{"top":117.06,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":""},{"top":117.06,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":""},{"top":117.06,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/6/2012"},{"top":117.06,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"75.00"}],[{"top":127.859985,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":127.859985,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"O\u0027Connell"},{"top":127.859985,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"Jean"},{"top":127.859985,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"888 Ivy Ave W."},{"top":127.859985,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Saint Paul"},{"top":127.859985,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":127.859985,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55117"},{"top":127.859985,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":""},{"top":127.859985,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":""},{"top":127.859985,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/30/2012"},{"top":127.859985,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"75.00"}],[{"top":138.65999,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":138.65999,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Reese"},{"top":138.65999,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Cheri A"},{"top":138.65999,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"981 Davern St."},{"top":138.65999,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":138.65999,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":138.65999,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55116"},{"top":138.65999,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Public Relations"},{"top":138.65999,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Far North Spirits"},{"top":138.65999,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/3/2012"},{"top":138.65999,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"75.00"}],[{"top":149.46,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":149.46,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"Serrano"},{"top":149.46,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"Luz Maria"},{"top":149.46,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"5559 Park Place Drive"},{"top":149.46,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Shoreview"},{"top":149.46,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":149.46,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55126"},{"top":149.46,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"retired"},{"top":149.46,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":"SPPS"},{"top":149.46,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/20/2012"},{"top":149.46,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"75.00"}],[{"top":160.26001,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":160.26001,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Alsiddiqui"},{"top":160.26001,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Jaber"},{"top":160.26001,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"13056 Euclid Ave"},{"top":160.26001,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Apple Valley"},{"top":160.26001,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":160.26001,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55124"},{"top":160.26001,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"SPPS budget analyst"},{"top":160.26001,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"SPPS"},{"top":160.26001,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"9/20/2012"},{"top":160.26001,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":171.06,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":171.06,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"Andrastek"},{"top":171.06,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"John"},{"top":171.06,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"774 Ashland Ave"},{"top":171.06,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"St Paul"},{"top":171.06,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":171.06,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55104"},{"top":171.06,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"asst principal"},{"top":171.06,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":"SPPS"},{"top":171.06,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"8/22/2012"},{"top":171.06,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"100.00"}],[{"top":181.86,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":181.86,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Anfang"},{"top":181.86,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Heather L. \u0026 Matt"},{"top":181.86,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1635 Bayard Ave"},{"top":181.86,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":181.86,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":181.86,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55116"},{"top":181.86,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"Ex Director"},{"top":181.86,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"BOMA"},{"top":181.86,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/6/2012"},{"top":181.86,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":192.65999,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":192.65999,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Aronson"},{"top":192.65999,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Roger J."},{"top":192.65999,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4852 Emerson Ave. S."},{"top":192.65999,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Minneapolis"},{"top":192.65999,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":192.65999,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55419"},{"top":192.65999,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Attorney at Law"},{"top":192.65999,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":""},{"top":192.65999,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"9/20/2012"},{"top":192.65999,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":203.46,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":203.46,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"Aronson"},{"top":203.46,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"Roger J."},{"top":203.46,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"4852 Emerson Ave. S."},{"top":203.46,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Minneapolis"},{"top":203.46,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":203.46,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55419"},{"top":203.46,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"Attorney at Law"},{"top":203.46,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":""},{"top":203.46,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/20/2012"},{"top":203.46,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"100.00"}],[{"top":214.26001,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":214.26001,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Banks"},{"top":214.26001,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Michael or Patricia"},{"top":214.26001,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1664 Van Buren Ave."},{"top":214.26001,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":214.26001,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":214.26001,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":214.26001,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"retired social worker"},{"top":214.26001,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":""},{"top":214.26001,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"9/20/2012"},{"top":214.26001,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":225.06,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":225.06,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Bennett"},{"top":225.06,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"David"},{"top":225.06,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"25 Birchwood Rd."},{"top":225.06,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Mahtomedi"},{"top":225.06,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":225.06,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55110"},{"top":225.06,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"retired"},{"top":225.06,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"retired"},{"top":225.06,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"9/3/2012"},{"top":225.06,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":235.86002,"left":16.97,"width":1.6187286376953125,"height":10.799972534179688,"text":""},{"top":235.86002,"left":18.588728,"width":153.91128540039062,"height":10.799972534179688,"text":"Berry"},{"top":235.86002,"left":172.50002,"width":69.71998596191406,"height":10.799972534179688,"text":"Margaret"},{"top":235.86002,"left":242.22,"width":104.760009765625,"height":10.799972534179688,"text":"1267 Pike Lake Dr"},{"top":235.86002,"left":346.98,"width":70.80001831054688,"height":10.799972534179688,"text":"New Brighton"},{"top":235.86002,"left":417.78003,"width":20.87994384765625,"height":10.799972534179688,"text":"MN"},{"top":235.86002,"left":438.65997,"width":49.91998291015625,"height":10.799972534179688,"text":"55112"},{"top":235.86002,"left":488.57996,"width":78.47998046875,"height":10.799972534179688,"text":"Asst Principal"},{"top":235.86002,"left":567.05994,"width":106.4400634765625,"height":10.799972534179688,"text":"Saint Paul Public Schools"},{"top":235.86002,"left":673.5,"width":45.52001953125,"height":10.799972534179688,"text":"8/8/2012"},{"top":235.86002,"left":719.02,"width":43.28033447265625,"height":10.799972534179688,"text":"100.00"}],[{"top":246.65999,"left":16.97,"width":1.6187286376953125,"height":10.800033569335938,"text":""},{"top":246.65999,"left":18.588728,"width":153.91128540039062,"height":10.800033569335938,"text":"Boyle"},{"top":246.65999,"left":172.50002,"width":69.71998596191406,"height":10.800033569335938,"text":"Matthew C. \u0026 Eliza"},{"top":246.65999,"left":242.22,"width":104.760009765625,"height":10.800033569335938,"text":"2165 Princeton Ave"},{"top":246.65999,"left":346.98,"width":70.80001831054688,"height":10.800033569335938,"text":"Saint Paul"},{"top":246.65999,"left":417.78003,"width":20.87994384765625,"height":10.800033569335938,"text":"MN"},{"top":246.65999,"left":438.65997,"width":49.91998291015625,"height":10.800033569335938,"text":"55105"},{"top":246.65999,"left":488.57996,"width":78.47998046875,"height":10.800033569335938,"text":""},{"top":246.65999,"left":567.05994,"width":106.4400634765625,"height":10.800033569335938,"text":""},{"top":246.65999,"left":673.5,"width":45.52001953125,"height":10.800033569335938,"text":"10/6/2012"},{"top":246.65999,"left":719.02,"width":43.28033447265625,"height":10.800033569335938,"text":"100.00"}],[{"top":257.46002,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":257.46002,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Brodrick"},{"top":257.46002,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"John"},{"top":257.46002,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1007 Charles"},{"top":257.46002,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"St. Paul"},{"top":257.46002,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":257.46002,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":257.46002,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"BOE"},{"top":257.46002,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"retired"},{"top":257.46002,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"8/26/2012"},{"top":257.46002,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":268.26,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":268.26,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Brodrick"},{"top":268.26,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"John F."},{"top":268.26,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1007 Charles Ave"},{"top":268.26,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":268.26,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":268.26,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":268.26,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"BoE"},{"top":268.26,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"SPPS"},{"top":268.26,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/4/2012"},{"top":268.26,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":279.06,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":279.06,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Brown"},{"top":279.06,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Mike"},{"top":279.06,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1440 Goodrich Ave"},{"top":279.06,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":279.06,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":279.06,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":279.06,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":279.06,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":279.06,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"9/23/2012"},{"top":279.06,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":289.84,"left":16.97,"width":1.6187286376953125,"height":10.819976806640625,"text":""},{"top":289.84,"left":18.588728,"width":153.91128540039062,"height":10.819976806640625,"text":"Cacy"},{"top":289.84,"left":172.50002,"width":69.71998596191406,"height":10.819976806640625,"text":"Jill"},{"top":289.84,"left":242.22,"width":104.760009765625,"height":10.819976806640625,"text":"1409 Smith Ave So"},{"top":289.84,"left":346.98,"width":70.80001831054688,"height":10.819976806640625,"text":"West St. Paul"},{"top":289.84,"left":417.78003,"width":20.87994384765625,"height":10.819976806640625,"text":"MN"},{"top":289.84,"left":438.65997,"width":49.91998291015625,"height":10.819976806640625,"text":"55118"},{"top":289.84,"left":488.57996,"width":78.47998046875,"height":10.819976806640625,"text":"Administrator"},{"top":289.84,"left":567.05994,"width":106.4400634765625,"height":10.819976806640625,"text":"SPPS"},{"top":289.84,"left":673.5,"width":45.52001953125,"height":10.819976806640625,"text":"9/12/2012"},{"top":289.84,"left":719.02,"width":43.28033447265625,"height":10.819976806640625,"text":"100.00"}],[{"top":300.65997,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":300.65997,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Cardwell"},{"top":300.65997,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Eileen"},{"top":300.65997,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4172 Bridgewater Ct"},{"top":300.65997,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Vadnais Height"},{"top":300.65997,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":300.65997,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55127"},{"top":300.65997,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Retired"},{"top":300.65997,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Retired"},{"top":300.65997,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/3/2012"},{"top":300.65997,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":311.46,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":311.46,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Carlstrom"},{"top":311.46,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Cheryl"},{"top":311.46,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4244 Oakmede Ln"},{"top":311.46,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"St Paul"},{"top":311.46,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":311.46,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55110"},{"top":311.46,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Satff"},{"top":311.46,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Saint Paul Public Schools"},{"top":311.46,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/8/2012"},{"top":311.46,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":322.26,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":322.26,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Carter"},{"top":322.26,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Melvin W. \u0026 Willet"},{"top":322.26,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"405 Western Ave N"},{"top":322.26,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":322.26,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":322.26,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55103"},{"top":322.26,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Cty Commissioner"},{"top":322.26,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Ramsey Cty"},{"top":322.26,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/8/2012"},{"top":322.26,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":333.06003,"left":16.97,"width":1.6187286376953125,"height":10.8199462890625,"text":""},{"top":333.06003,"left":18.588728,"width":153.91128540039062,"height":10.8199462890625,"text":"Caruso"},{"top":333.06003,"left":172.50002,"width":69.71998596191406,"height":10.8199462890625,"text":"Sarah"},{"top":333.06003,"left":242.22,"width":104.760009765625,"height":10.8199462890625,"text":"2615 Newton Ave S"},{"top":333.06003,"left":346.98,"width":70.80001831054688,"height":10.8199462890625,"text":"Minneapolis"},{"top":333.06003,"left":417.78003,"width":20.87994384765625,"height":10.8199462890625,"text":"MN"},{"top":333.06003,"left":438.65997,"width":49.91998291015625,"height":10.8199462890625,"text":"55405"},{"top":333.06003,"left":488.57996,"width":78.47998046875,"height":10.8199462890625,"text":"CEO"},{"top":333.06003,"left":567.05994,"width":106.4400634765625,"height":10.8199462890625,"text":"United Way"},{"top":333.06003,"left":673.5,"width":45.52001953125,"height":10.8199462890625,"text":"9/12/2012"},{"top":333.06003,"left":719.02,"width":43.28033447265625,"height":10.8199462890625,"text":"100.00"}],[{"top":343.87997,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":343.87997,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Casey /Trewartha"},{"top":343.87997,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Kerry F. / Kelly A."},{"top":343.87997,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"2266 Goodrich Ave"},{"top":343.87997,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":343.87997,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":343.87997,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":343.87997,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":343.87997,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":343.87997,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/4/2012"},{"top":343.87997,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":354.65997,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":354.65997,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Cassidy"},{"top":354.65997,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Paul D."},{"top":354.65997,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1990 Dayton Ave"},{"top":354.65997,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":354.65997,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":354.65997,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55104"},{"top":354.65997,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":354.65997,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":354.65997,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/6/2012"},{"top":354.65997,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":365.43997,"left":16.97,"width":1.6187286376953125,"height":10.820037841796875,"text":""},{"top":365.43997,"left":18.588728,"width":153.91128540039062,"height":10.820037841796875,"text":"Causey"},{"top":365.43997,"left":172.50002,"width":69.71998596191406,"height":10.820037841796875,"text":"Christopher"},{"top":365.43997,"left":242.22,"width":104.760009765625,"height":10.820037841796875,"text":"2181 Doswell Avenue"},{"top":365.43997,"left":346.98,"width":70.80001831054688,"height":10.820037841796875,"text":"Saint Paul"},{"top":365.43997,"left":417.78003,"width":20.87994384765625,"height":10.820037841796875,"text":"MN"},{"top":365.43997,"left":438.65997,"width":49.91998291015625,"height":10.820037841796875,"text":"55108"},{"top":365.43997,"left":488.57996,"width":78.47998046875,"height":10.820037841796875,"text":"finance"},{"top":365.43997,"left":567.05994,"width":106.4400634765625,"height":10.820037841796875,"text":""},{"top":365.43997,"left":673.5,"width":45.52001953125,"height":10.820037841796875,"text":"9/3/2012"},{"top":365.43997,"left":719.02,"width":43.28033447265625,"height":10.820037841796875,"text":"100.00"}],[{"top":376.26,"left":16.97,"width":1.6187286376953125,"height":10.819976806640625,"text":""},{"top":376.26,"left":18.588728,"width":153.91128540039062,"height":10.819976806640625,"text":"Christiansen"},{"top":376.26,"left":172.50002,"width":69.71998596191406,"height":10.819976806640625,"text":"Lena"},{"top":376.26,"left":242.22,"width":104.760009765625,"height":10.819976806640625,"text":"2453 Como Ave"},{"top":376.26,"left":346.98,"width":70.80001831054688,"height":10.819976806640625,"text":"St Paul"},{"top":376.26,"left":417.78003,"width":20.87994384765625,"height":10.819976806640625,"text":"MN"},{"top":376.26,"left":438.65997,"width":49.91998291015625,"height":10.819976806640625,"text":"55108"},{"top":376.26,"left":488.57996,"width":78.47998046875,"height":10.819976806640625,"text":"Principal"},{"top":376.26,"left":567.05994,"width":106.4400634765625,"height":10.819976806640625,"text":"Saint Paul Public Schools"},{"top":376.26,"left":673.5,"width":45.52001953125,"height":10.819976806640625,"text":"8/8/2012"},{"top":376.26,"left":719.02,"width":43.28033447265625,"height":10.819976806640625,"text":"100.00"}],[{"top":387.08,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":387.08,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Clapp"},{"top":387.08,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Agustus (Bill)"},{"top":387.08,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"757 Osceola Ave #1"},{"top":387.08,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":387.08,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":387.08,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":387.08,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":"retired"},{"top":387.08,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":387.08,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/2/2012"},{"top":387.08,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":397.86,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":397.86,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Cohen"},{"top":397.86,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Brad A."},{"top":397.86,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1460 Raymond Ave"},{"top":397.86,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":397.86,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":397.86,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55108"},{"top":397.86,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":"academic technology"},{"top":397.86,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":"UMN"},{"top":397.86,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/17/2012"},{"top":397.86,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":408.63998,"left":16.97,"width":1.6187286376953125,"height":10.82000732421875,"text":""},{"top":408.63998,"left":18.588728,"width":153.91128540039062,"height":10.82000732421875,"text":"Commers"},{"top":408.63998,"left":172.50002,"width":69.71998596191406,"height":10.82000732421875,"text":"Beth"},{"top":408.63998,"left":242.22,"width":104.760009765625,"height":10.82000732421875,"text":"2294 Commonwealth Ave"},{"top":408.63998,"left":346.98,"width":70.80001831054688,"height":10.82000732421875,"text":"St Paul"},{"top":408.63998,"left":417.78003,"width":20.87994384765625,"height":10.82000732421875,"text":"MN"},{"top":408.63998,"left":438.65997,"width":49.91998291015625,"height":10.82000732421875,"text":"55108"},{"top":408.63998,"left":488.57996,"width":78.47998046875,"height":10.82000732421875,"text":"Self Employed"},{"top":408.63998,"left":567.05994,"width":106.4400634765625,"height":10.82000732421875,"text":"Homemaker"},{"top":408.63998,"left":673.5,"width":45.52001953125,"height":10.82000732421875,"text":"8/1/2012"},{"top":408.63998,"left":719.02,"width":43.28033447265625,"height":10.82000732421875,"text":"100.00"}],[{"top":419.46,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":419.46,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Couture"},{"top":419.46,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Steven"},{"top":419.46,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"7950 Victoria Way"},{"top":419.46,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Louise Park"},{"top":419.46,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":419.46,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55426"},{"top":419.46,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Principal"},{"top":419.46,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Saint Paul Public Schools"},{"top":419.46,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/10/2012"},{"top":419.46,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":430.26,"left":16.97,"width":1.6187286376953125,"height":10.800048828125,"text":""},{"top":430.26,"left":18.588728,"width":153.91128540039062,"height":10.800048828125,"text":"Crawford"},{"top":430.26,"left":172.50002,"width":69.71998596191406,"height":10.800048828125,"text":"Lydia P."},{"top":430.26,"left":242.22,"width":104.760009765625,"height":10.800048828125,"text":"1140 Edgcumbe Rd"},{"top":430.26,"left":346.98,"width":70.80001831054688,"height":10.800048828125,"text":"Saint Paul"},{"top":430.26,"left":417.78003,"width":20.87994384765625,"height":10.800048828125,"text":"MN"},{"top":430.26,"left":438.65997,"width":49.91998291015625,"height":10.800048828125,"text":"55105"},{"top":430.26,"left":488.57996,"width":78.47998046875,"height":10.800048828125,"text":""},{"top":430.26,"left":567.05994,"width":106.4400634765625,"height":10.800048828125,"text":""},{"top":430.26,"left":673.5,"width":45.52001953125,"height":10.800048828125,"text":"9/15/2012"},{"top":430.26,"left":719.02,"width":43.28033447265625,"height":10.800048828125,"text":"100.00"}],[{"top":441.06006,"left":16.97,"width":1.6187286376953125,"height":10.799896240234375,"text":""},{"top":441.06006,"left":18.588728,"width":153.91128540039062,"height":10.799896240234375,"text":"Cudahy / Ricker"},{"top":441.06006,"left":172.50002,"width":69.71998596191406,"height":10.799896240234375,"text":"Robert \u0026 Mary C"},{"top":441.06006,"left":242.22,"width":104.760009765625,"height":10.799896240234375,"text":"616 Cherokee Ave."},{"top":441.06006,"left":346.98,"width":70.80001831054688,"height":10.799896240234375,"text":"Saint Paul"},{"top":441.06006,"left":417.78003,"width":20.87994384765625,"height":10.799896240234375,"text":"MN"},{"top":441.06006,"left":438.65997,"width":49.91998291015625,"height":10.799896240234375,"text":"55107"},{"top":441.06006,"left":488.57996,"width":78.47998046875,"height":10.799896240234375,"text":"Teacher/Union Presid"},{"top":441.06006,"left":567.05994,"width":106.4400634765625,"height":10.799896240234375,"text":"SPPS/SPFT"},{"top":441.06006,"left":673.5,"width":45.52001953125,"height":10.799896240234375,"text":"9/18/2012"},{"top":441.06006,"left":719.02,"width":43.28033447265625,"height":10.799896240234375,"text":"100.00"}],[{"top":451.85995,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":451.85995,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Cudahy / Ricker"},{"top":451.85995,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Robert \u0026 Mary C"},{"top":451.85995,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"616 Cherokee Ave."},{"top":451.85995,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":451.85995,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":451.85995,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55107"},{"top":451.85995,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Teacher/Union Presid"},{"top":451.85995,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"SPPS/SPFT"},{"top":451.85995,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/6/2012"},{"top":451.85995,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":462.65997,"left":16.97,"width":1.6187286376953125,"height":10.82000732421875,"text":""},{"top":462.65997,"left":18.588728,"width":153.91128540039062,"height":10.82000732421875,"text":"Currie"},{"top":462.65997,"left":172.50002,"width":69.71998596191406,"height":10.82000732421875,"text":"Elisabeth"},{"top":462.65997,"left":242.22,"width":104.760009765625,"height":10.82000732421875,"text":"2274 Hillside Ave"},{"top":462.65997,"left":346.98,"width":70.80001831054688,"height":10.82000732421875,"text":"St. Paul"},{"top":462.65997,"left":417.78003,"width":20.87994384765625,"height":10.82000732421875,"text":"MN"},{"top":462.65997,"left":438.65997,"width":49.91998291015625,"height":10.82000732421875,"text":"55108"},{"top":462.65997,"left":488.57996,"width":78.47998046875,"height":10.82000732421875,"text":""},{"top":462.65997,"left":567.05994,"width":106.4400634765625,"height":10.82000732421875,"text":""},{"top":462.65997,"left":673.5,"width":45.52001953125,"height":10.82000732421875,"text":"9/12/2012"},{"top":462.65997,"left":719.02,"width":43.28033447265625,"height":10.82000732421875,"text":"100.00"}],[{"top":473.47998,"left":16.97,"width":1.6187286376953125,"height":10.780029296875,"text":""},{"top":473.47998,"left":18.588728,"width":153.91128540039062,"height":10.780029296875,"text":"Doane"},{"top":473.47998,"left":172.50002,"width":69.71998596191406,"height":10.780029296875,"text":"Paul V \u0026 Helen R"},{"top":473.47998,"left":242.22,"width":104.760009765625,"height":10.780029296875,"text":"444 Portland Ave"},{"top":473.47998,"left":346.98,"width":70.80001831054688,"height":10.780029296875,"text":"Sant Paul"},{"top":473.47998,"left":417.78003,"width":20.87994384765625,"height":10.780029296875,"text":"MN"},{"top":473.47998,"left":438.65997,"width":49.91998291015625,"height":10.780029296875,"text":"55102"},{"top":473.47998,"left":488.57996,"width":78.47998046875,"height":10.780029296875,"text":"Ex director"},{"top":473.47998,"left":567.05994,"width":106.4400634765625,"height":10.780029296875,"text":"St Paul Teachers\u0027 Retirement A"},{"top":473.47998,"left":673.5,"width":45.52001953125,"height":10.780029296875,"text":"10/3/2012"},{"top":473.47998,"left":719.02,"width":43.28033447265625,"height":10.780029296875,"text":"100.00"}],[{"top":484.26,"left":16.97,"width":1.6187286376953125,"height":10.800048828125,"text":""},{"top":484.26,"left":18.588728,"width":153.91128540039062,"height":10.800048828125,"text":"Dougherty"},{"top":484.26,"left":172.50002,"width":69.71998596191406,"height":10.800048828125,"text":"Richards S \u0026 Patrici"},{"top":484.26,"left":242.22,"width":104.760009765625,"height":10.800048828125,"text":"812 Goodrich Ave"},{"top":484.26,"left":346.98,"width":70.80001831054688,"height":10.800048828125,"text":"Saint Paul"},{"top":484.26,"left":417.78003,"width":20.87994384765625,"height":10.800048828125,"text":"MN"},{"top":484.26,"left":438.65997,"width":49.91998291015625,"height":10.800048828125,"text":"55105"},{"top":484.26,"left":488.57996,"width":78.47998046875,"height":10.800048828125,"text":""},{"top":484.26,"left":567.05994,"width":106.4400634765625,"height":10.800048828125,"text":""},{"top":484.26,"left":673.5,"width":45.52001953125,"height":10.800048828125,"text":"10/4/2012"},{"top":484.26,"left":719.02,"width":43.28033447265625,"height":10.800048828125,"text":"100.00"}],[{"top":495.06006,"left":16.97,"width":1.6187286376953125,"height":10.799896240234375,"text":""},{"top":495.06006,"left":18.588728,"width":153.91128540039062,"height":10.799896240234375,"text":"Driscoll"},{"top":495.06006,"left":172.50002,"width":69.71998596191406,"height":10.799896240234375,"text":"Joe \u0026 Jill"},{"top":495.06006,"left":242.22,"width":104.760009765625,"height":10.799896240234375,"text":"2383 Bourne Ave"},{"top":495.06006,"left":346.98,"width":70.80001831054688,"height":10.799896240234375,"text":"Saint Paul"},{"top":495.06006,"left":417.78003,"width":20.87994384765625,"height":10.799896240234375,"text":"MN"},{"top":495.06006,"left":438.65997,"width":49.91998291015625,"height":10.799896240234375,"text":"55108"},{"top":495.06006,"left":488.57996,"width":78.47998046875,"height":10.799896240234375,"text":"HR Manager"},{"top":495.06006,"left":567.05994,"width":106.4400634765625,"height":10.799896240234375,"text":"LexisNexis"},{"top":495.06006,"left":673.5,"width":45.52001953125,"height":10.799896240234375,"text":"10/18/2012"},{"top":495.06006,"left":719.02,"width":43.28033447265625,"height":10.799896240234375,"text":"100.00"}],[{"top":505.85995,"left":16.97,"width":1.6187286376953125,"height":10.780059814453125,"text":""},{"top":505.85995,"left":18.588728,"width":153.91128540039062,"height":10.780059814453125,"text":"Dubaille / Haugee"},{"top":505.85995,"left":172.50002,"width":69.71998596191406,"height":10.780059814453125,"text":"Florence M. /Eric"},{"top":505.85995,"left":242.22,"width":104.760009765625,"height":10.780059814453125,"text":"1009 Edmund Ave"},{"top":505.85995,"left":346.98,"width":70.80001831054688,"height":10.780059814453125,"text":"Saint Paul"},{"top":505.85995,"left":417.78003,"width":20.87994384765625,"height":10.780059814453125,"text":"MN"},{"top":505.85995,"left":438.65997,"width":49.91998291015625,"height":10.780059814453125,"text":"55104"},{"top":505.85995,"left":488.57996,"width":78.47998046875,"height":10.780059814453125,"text":"Florence‐teacher"},{"top":505.85995,"left":567.05994,"width":106.4400634765625,"height":10.780059814453125,"text":"SPPS"},{"top":505.85995,"left":673.5,"width":45.52001953125,"height":10.780059814453125,"text":"10/3/2012"},{"top":505.85995,"left":719.02,"width":43.28033447265625,"height":10.780059814453125,"text":"100.00"}],[{"top":516.64,"left":16.97,"width":1.6187286376953125,"height":10.83990478515625,"text":""},{"top":516.64,"left":18.588728,"width":153.91128540039062,"height":10.83990478515625,"text":"Eaton"},{"top":516.64,"left":172.50002,"width":69.71998596191406,"height":10.83990478515625,"text":"Jim"},{"top":516.64,"left":242.22,"width":104.760009765625,"height":10.83990478515625,"text":"2133 Berkeley Ave"},{"top":516.64,"left":346.98,"width":70.80001831054688,"height":10.83990478515625,"text":"St Paul"},{"top":516.64,"left":417.78003,"width":20.87994384765625,"height":10.83990478515625,"text":"MN"},{"top":516.64,"left":438.65997,"width":49.91998291015625,"height":10.83990478515625,"text":"55105"},{"top":516.64,"left":488.57996,"width":78.47998046875,"height":10.83990478515625,"text":"Principal"},{"top":516.64,"left":567.05994,"width":106.4400634765625,"height":10.83990478515625,"text":"Saint Paul Public Schools"},{"top":516.64,"left":673.5,"width":45.52001953125,"height":10.83990478515625,"text":"8/23/2012"},{"top":516.64,"left":719.02,"width":43.28033447265625,"height":10.83990478515625,"text":"100.00"}],[{"top":527.4799,"left":16.97,"width":1.6187286376953125,"height":10.78009033203125,"text":""},{"top":527.4799,"left":18.588728,"width":153.91128540039062,"height":10.78009033203125,"text":"Eaves /Alger"},{"top":527.4799,"left":172.50002,"width":69.71998596191406,"height":10.78009033203125,"text":"Patricia / Stuart"},{"top":527.4799,"left":242.22,"width":104.760009765625,"height":10.78009033203125,"text":"1143 Portladn Ave."},{"top":527.4799,"left":346.98,"width":70.80001831054688,"height":10.78009033203125,"text":"Saint Paul"},{"top":527.4799,"left":417.78003,"width":20.87994384765625,"height":10.78009033203125,"text":"MN"},{"top":527.4799,"left":438.65997,"width":49.91998291015625,"height":10.78009033203125,"text":"55104"},{"top":527.4799,"left":488.57996,"width":78.47998046875,"height":10.78009033203125,"text":""},{"top":527.4799,"left":567.05994,"width":106.4400634765625,"height":10.78009033203125,"text":""},{"top":527.4799,"left":673.5,"width":45.52001953125,"height":10.78009033203125,"text":"10/3/2012"},{"top":527.4799,"left":719.02,"width":43.28033447265625,"height":10.78009033203125,"text":"100.00"}]]} +{"extraction_method":"lattice","page_number":1,"top":54.315777,"left":16.97,"width":745.3303833007812,"height":483.9442443847656,"right":762.30035,"bottom":538.26,"data":[[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":54.315777,"left":18.588728,"width":153.91128540039062,"height":8.744216918945312,"text":"Last Name"},{"top":54.315777,"left":172.50002,"width":69.71998596191406,"height":8.744216918945312,"text":"First Name"},{"top":54.315777,"left":242.22,"width":104.760009765625,"height":8.744216918945312,"text":"Address"},{"top":54.315777,"left":346.98,"width":70.80001831054688,"height":8.744216918945312,"text":"City"},{"top":54.315777,"left":417.78003,"width":20.87994384765625,"height":8.744216918945312,"text":"State"},{"top":54.315777,"left":438.65997,"width":49.91998291015625,"height":8.744216918945312,"text":"Zip"},{"top":54.315777,"left":488.57996,"width":78.47998046875,"height":8.744216918945312,"text":"Occupation"},{"top":54.315777,"left":567.05994,"width":106.4400634765625,"height":8.744216918945312,"text":"Employer"},{"top":54.315777,"left":673.5,"width":45.52001953125,"height":8.744216918945312,"text":"Date"},{"top":54.315777,"left":719.02,"width":43.28033447265625,"height":8.744216918945312,"text":"Amount"}],[{"top":63.059994,"left":16.97,"width":1.6187286376953125,"height":10.799991607666016,"text":""},{"top":63.059994,"left":18.588728,"width":153.91128540039062,"height":10.799991607666016,"text":"Lidstad"},{"top":63.059994,"left":172.50002,"width":69.71998596191406,"height":10.799991607666016,"text":"Dick \u0026 Peg"},{"top":63.059994,"left":242.22,"width":104.760009765625,"height":10.799991607666016,"text":"62 Mississippi River Blvd N"},{"top":63.059994,"left":346.98,"width":70.80001831054688,"height":10.799991607666016,"text":"Saint Paul"},{"top":63.059994,"left":417.78003,"width":20.87994384765625,"height":10.799991607666016,"text":"MN"},{"top":63.059994,"left":438.65997,"width":49.91998291015625,"height":10.799991607666016,"text":"55104"},{"top":63.059994,"left":488.57996,"width":78.47998046875,"height":10.799991607666016,"text":"retired"},{"top":63.059994,"left":567.05994,"width":106.4400634765625,"height":10.799991607666016,"text":""},{"top":63.059994,"left":673.5,"width":45.52001953125,"height":10.799991607666016,"text":"10/12/2012"},{"top":63.059994,"left":719.02,"width":43.28033447265625,"height":10.799991607666016,"text":"60.00"}],[{"top":73.859985,"left":16.97,"width":1.6187286376953125,"height":10.800025939941406,"text":""},{"top":73.859985,"left":18.588728,"width":153.91128540039062,"height":10.800025939941406,"text":"Strom"},{"top":73.859985,"left":172.50002,"width":69.71998596191406,"height":10.800025939941406,"text":"Pam"},{"top":73.859985,"left":242.22,"width":104.760009765625,"height":10.800025939941406,"text":"1229 Hague Ave"},{"top":73.859985,"left":346.98,"width":70.80001831054688,"height":10.800025939941406,"text":"St. Paul"},{"top":73.859985,"left":417.78003,"width":20.87994384765625,"height":10.800025939941406,"text":"MN"},{"top":73.859985,"left":438.65997,"width":49.91998291015625,"height":10.800025939941406,"text":"55104"},{"top":73.859985,"left":488.57996,"width":78.47998046875,"height":10.800025939941406,"text":""},{"top":73.859985,"left":567.05994,"width":106.4400634765625,"height":10.800025939941406,"text":""},{"top":73.859985,"left":673.5,"width":45.52001953125,"height":10.800025939941406,"text":"9/12/2012"},{"top":73.859985,"left":719.02,"width":43.28033447265625,"height":10.800025939941406,"text":"60.00"}],[{"top":84.66001,"left":16.97,"width":1.6187286376953125,"height":10.799995422363281,"text":""},{"top":84.66001,"left":18.588728,"width":153.91128540039062,"height":10.799995422363281,"text":"Seeba"},{"top":84.66001,"left":172.50002,"width":69.71998596191406,"height":10.799995422363281,"text":"Louise \u0026 Paul"},{"top":84.66001,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"1399 Sheldon St"},{"top":84.66001,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"Saint Paul"},{"top":84.66001,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":84.66001,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55108"},{"top":84.66001,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":"BOE"},{"top":84.66001,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":"City of Saint Paul"},{"top":84.66001,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"10/12/2012"},{"top":84.66001,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"60.00"}],[{"top":95.46001,"left":16.97,"width":1.6187286376953125,"height":10.799995422363281,"text":""},{"top":95.46001,"left":18.588728,"width":153.91128540039062,"height":10.799995422363281,"text":"Schumacher / Bales"},{"top":95.46001,"left":172.50002,"width":69.71998596191406,"height":10.799995422363281,"text":"Douglas L. / Patricia"},{"top":95.46001,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"948 County Rd. D W"},{"top":95.46001,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"Saint Paul"},{"top":95.46001,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":95.46001,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55126"},{"top":95.46001,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":""},{"top":95.46001,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":""},{"top":95.46001,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"10/13/2012"},{"top":95.46001,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"60.00"}],[{"top":106.26,"left":16.97,"width":1.6187286376953125,"height":10.799995422363281,"text":""},{"top":106.26,"left":18.588728,"width":153.91128540039062,"height":10.799995422363281,"text":"Abrams"},{"top":106.26,"left":172.50002,"width":69.71998596191406,"height":10.799995422363281,"text":"Marjorie"},{"top":106.26,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"238 8th St east"},{"top":106.26,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"St Paul"},{"top":106.26,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":106.26,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55101"},{"top":106.26,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":"Retired"},{"top":106.26,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":"Retired"},{"top":106.26,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"8/8/2012"},{"top":106.26,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"75.00"}],[{"top":117.06,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":117.06,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Crouse / Schroeder"},{"top":117.06,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Abigail / Jonathan"},{"top":117.06,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1545 Branston St."},{"top":117.06,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":117.06,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":117.06,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55108"},{"top":117.06,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":""},{"top":117.06,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":""},{"top":117.06,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/6/2012"},{"top":117.06,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"75.00"}],[{"top":127.859985,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":127.859985,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"O\u0027Connell"},{"top":127.859985,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"Jean"},{"top":127.859985,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"888 Ivy Ave W."},{"top":127.859985,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Saint Paul"},{"top":127.859985,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":127.859985,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55117"},{"top":127.859985,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":""},{"top":127.859985,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":""},{"top":127.859985,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/30/2012"},{"top":127.859985,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"75.00"}],[{"top":138.65999,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":138.65999,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Reese"},{"top":138.65999,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Cheri A"},{"top":138.65999,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"981 Davern St."},{"top":138.65999,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":138.65999,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":138.65999,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55116"},{"top":138.65999,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Public Relations"},{"top":138.65999,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Far North Spirits"},{"top":138.65999,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/3/2012"},{"top":138.65999,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"75.00"}],[{"top":149.46,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":149.46,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"Serrano"},{"top":149.46,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"Luz Maria"},{"top":149.46,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"5559 Park Place Drive"},{"top":149.46,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Shoreview"},{"top":149.46,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":149.46,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55126"},{"top":149.46,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"retired"},{"top":149.46,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":"SPPS"},{"top":149.46,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/20/2012"},{"top":149.46,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"75.00"}],[{"top":160.26001,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":160.26001,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Alsiddiqui"},{"top":160.26001,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Jaber"},{"top":160.26001,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"13056 Euclid Ave"},{"top":160.26001,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Apple Valley"},{"top":160.26001,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":160.26001,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55124"},{"top":160.26001,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"SPPS budget analyst"},{"top":160.26001,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"SPPS"},{"top":160.26001,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"9/20/2012"},{"top":160.26001,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":171.06,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":171.06,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"Andrastek"},{"top":171.06,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"John"},{"top":171.06,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"774 Ashland Ave"},{"top":171.06,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"St Paul"},{"top":171.06,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":171.06,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55104"},{"top":171.06,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"asst principal"},{"top":171.06,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":"SPPS"},{"top":171.06,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"8/22/2012"},{"top":171.06,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"100.00"}],[{"top":181.86,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":181.86,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Anfang"},{"top":181.86,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Heather L. \u0026 Matt"},{"top":181.86,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1635 Bayard Ave"},{"top":181.86,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":181.86,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":181.86,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55116"},{"top":181.86,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"Ex Director"},{"top":181.86,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"BOMA"},{"top":181.86,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/6/2012"},{"top":181.86,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":192.65999,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":192.65999,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Aronson"},{"top":192.65999,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Roger J."},{"top":192.65999,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4852 Emerson Ave. S."},{"top":192.65999,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Minneapolis"},{"top":192.65999,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":192.65999,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55419"},{"top":192.65999,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Attorney at Law"},{"top":192.65999,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":""},{"top":192.65999,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"9/20/2012"},{"top":192.65999,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":203.46,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":203.46,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"Aronson"},{"top":203.46,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"Roger J."},{"top":203.46,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"4852 Emerson Ave. S."},{"top":203.46,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Minneapolis"},{"top":203.46,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":203.46,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55419"},{"top":203.46,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"Attorney at Law"},{"top":203.46,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":""},{"top":203.46,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/20/2012"},{"top":203.46,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"100.00"}],[{"top":214.26001,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":214.26001,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Banks"},{"top":214.26001,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Michael or Patricia"},{"top":214.26001,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1664 Van Buren Ave."},{"top":214.26001,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":214.26001,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":214.26001,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":214.26001,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"retired social worker"},{"top":214.26001,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":""},{"top":214.26001,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"9/20/2012"},{"top":214.26001,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":225.06,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":225.06,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Bennett"},{"top":225.06,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"David"},{"top":225.06,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"25 Birchwood Rd."},{"top":225.06,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Mahtomedi"},{"top":225.06,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":225.06,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55110"},{"top":225.06,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"retired"},{"top":225.06,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"retired"},{"top":225.06,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"9/3/2012"},{"top":225.06,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":235.86002,"left":16.97,"width":1.6187286376953125,"height":10.799972534179688,"text":""},{"top":235.86002,"left":18.588728,"width":153.91128540039062,"height":10.799972534179688,"text":"Berry"},{"top":235.86002,"left":172.50002,"width":69.71998596191406,"height":10.799972534179688,"text":"Margaret"},{"top":235.86002,"left":242.22,"width":104.760009765625,"height":10.799972534179688,"text":"1267 Pike Lake Dr"},{"top":235.86002,"left":346.98,"width":70.80001831054688,"height":10.799972534179688,"text":"New Brighton"},{"top":235.86002,"left":417.78003,"width":20.87994384765625,"height":10.799972534179688,"text":"MN"},{"top":235.86002,"left":438.65997,"width":49.91998291015625,"height":10.799972534179688,"text":"55112"},{"top":235.86002,"left":488.57996,"width":78.47998046875,"height":10.799972534179688,"text":"Asst Principal"},{"top":235.86002,"left":567.05994,"width":106.4400634765625,"height":10.799972534179688,"text":"Saint Paul Public Schools"},{"top":235.86002,"left":673.5,"width":45.52001953125,"height":10.799972534179688,"text":"8/8/2012"},{"top":235.86002,"left":719.02,"width":43.28033447265625,"height":10.799972534179688,"text":"100.00"}],[{"top":246.65999,"left":16.97,"width":1.6187286376953125,"height":10.800033569335938,"text":""},{"top":246.65999,"left":18.588728,"width":153.91128540039062,"height":10.800033569335938,"text":"Boyle"},{"top":246.65999,"left":172.50002,"width":69.71998596191406,"height":10.800033569335938,"text":"Matthew C. \u0026 Eliza"},{"top":246.65999,"left":242.22,"width":104.760009765625,"height":10.800033569335938,"text":"2165 Princeton Ave"},{"top":246.65999,"left":346.98,"width":70.80001831054688,"height":10.800033569335938,"text":"Saint Paul"},{"top":246.65999,"left":417.78003,"width":20.87994384765625,"height":10.800033569335938,"text":"MN"},{"top":246.65999,"left":438.65997,"width":49.91998291015625,"height":10.800033569335938,"text":"55105"},{"top":246.65999,"left":488.57996,"width":78.47998046875,"height":10.800033569335938,"text":""},{"top":246.65999,"left":567.05994,"width":106.4400634765625,"height":10.800033569335938,"text":""},{"top":246.65999,"left":673.5,"width":45.52001953125,"height":10.800033569335938,"text":"10/6/2012"},{"top":246.65999,"left":719.02,"width":43.28033447265625,"height":10.800033569335938,"text":"100.00"}],[{"top":257.46002,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":257.46002,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Brodrick"},{"top":257.46002,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"John"},{"top":257.46002,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1007 Charles"},{"top":257.46002,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"St. Paul"},{"top":257.46002,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":257.46002,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":257.46002,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"BOE"},{"top":257.46002,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"retired"},{"top":257.46002,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"8/26/2012"},{"top":257.46002,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":268.26,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":268.26,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Brodrick"},{"top":268.26,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"John F."},{"top":268.26,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1007 Charles Ave"},{"top":268.26,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":268.26,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":268.26,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":268.26,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"BoE"},{"top":268.26,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"SPPS"},{"top":268.26,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/4/2012"},{"top":268.26,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":279.06,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":279.06,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Brown"},{"top":279.06,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Mike"},{"top":279.06,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1440 Goodrich Ave"},{"top":279.06,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":279.06,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":279.06,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":279.06,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":279.06,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":279.06,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"9/23/2012"},{"top":279.06,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":289.84,"left":16.97,"width":1.6187286376953125,"height":10.819976806640625,"text":""},{"top":289.84,"left":18.588728,"width":153.91128540039062,"height":10.819976806640625,"text":"Cacy"},{"top":289.84,"left":172.50002,"width":69.71998596191406,"height":10.819976806640625,"text":"Jill"},{"top":289.84,"left":242.22,"width":104.760009765625,"height":10.819976806640625,"text":"1409 Smith Ave So"},{"top":289.84,"left":346.98,"width":70.80001831054688,"height":10.819976806640625,"text":"West St. Paul"},{"top":289.84,"left":417.78003,"width":20.87994384765625,"height":10.819976806640625,"text":"MN"},{"top":289.84,"left":438.65997,"width":49.91998291015625,"height":10.819976806640625,"text":"55118"},{"top":289.84,"left":488.57996,"width":78.47998046875,"height":10.819976806640625,"text":"Administrator"},{"top":289.84,"left":567.05994,"width":106.4400634765625,"height":10.819976806640625,"text":"SPPS"},{"top":289.84,"left":673.5,"width":45.52001953125,"height":10.819976806640625,"text":"9/12/2012"},{"top":289.84,"left":719.02,"width":43.28033447265625,"height":10.819976806640625,"text":"100.00"}],[{"top":300.65997,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":300.65997,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Cardwell"},{"top":300.65997,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Eileen"},{"top":300.65997,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4172 Bridgewater Ct"},{"top":300.65997,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Vadnais Height"},{"top":300.65997,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":300.65997,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55127"},{"top":300.65997,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Retired"},{"top":300.65997,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Retired"},{"top":300.65997,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/3/2012"},{"top":300.65997,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":311.46,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":311.46,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Carlstrom"},{"top":311.46,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Cheryl"},{"top":311.46,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4244 Oakmede Ln"},{"top":311.46,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"St Paul"},{"top":311.46,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":311.46,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55110"},{"top":311.46,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Satff"},{"top":311.46,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Saint Paul Public Schools"},{"top":311.46,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/8/2012"},{"top":311.46,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":322.26,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":322.26,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Carter"},{"top":322.26,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Melvin W. \u0026 Willet"},{"top":322.26,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"405 Western Ave N"},{"top":322.26,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":322.26,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":322.26,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55103"},{"top":322.26,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Cty Commissioner"},{"top":322.26,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Ramsey Cty"},{"top":322.26,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/8/2012"},{"top":322.26,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":333.06003,"left":16.97,"width":1.6187286376953125,"height":10.8199462890625,"text":""},{"top":333.06003,"left":18.588728,"width":153.91128540039062,"height":10.8199462890625,"text":"Caruso"},{"top":333.06003,"left":172.50002,"width":69.71998596191406,"height":10.8199462890625,"text":"Sarah"},{"top":333.06003,"left":242.22,"width":104.760009765625,"height":10.8199462890625,"text":"2615 Newton Ave S"},{"top":333.06003,"left":346.98,"width":70.80001831054688,"height":10.8199462890625,"text":"Minneapolis"},{"top":333.06003,"left":417.78003,"width":20.87994384765625,"height":10.8199462890625,"text":"MN"},{"top":333.06003,"left":438.65997,"width":49.91998291015625,"height":10.8199462890625,"text":"55405"},{"top":333.06003,"left":488.57996,"width":78.47998046875,"height":10.8199462890625,"text":"CEO"},{"top":333.06003,"left":567.05994,"width":106.4400634765625,"height":10.8199462890625,"text":"United Way"},{"top":333.06003,"left":673.5,"width":45.52001953125,"height":10.8199462890625,"text":"9/12/2012"},{"top":333.06003,"left":719.02,"width":43.28033447265625,"height":10.8199462890625,"text":"100.00"}],[{"top":343.87997,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":343.87997,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Casey /Trewartha"},{"top":343.87997,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Kerry F. / Kelly A."},{"top":343.87997,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"2266 Goodrich Ave"},{"top":343.87997,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":343.87997,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":343.87997,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":343.87997,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":343.87997,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":343.87997,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/4/2012"},{"top":343.87997,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":354.65997,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":354.65997,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Cassidy"},{"top":354.65997,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Paul D."},{"top":354.65997,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1990 Dayton Ave"},{"top":354.65997,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":354.65997,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":354.65997,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55104"},{"top":354.65997,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":354.65997,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":354.65997,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/6/2012"},{"top":354.65997,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":365.43997,"left":16.97,"width":1.6187286376953125,"height":10.820037841796875,"text":""},{"top":365.43997,"left":18.588728,"width":153.91128540039062,"height":10.820037841796875,"text":"Causey"},{"top":365.43997,"left":172.50002,"width":69.71998596191406,"height":10.820037841796875,"text":"Christopher"},{"top":365.43997,"left":242.22,"width":104.760009765625,"height":10.820037841796875,"text":"2181 Doswell Avenue"},{"top":365.43997,"left":346.98,"width":70.80001831054688,"height":10.820037841796875,"text":"Saint Paul"},{"top":365.43997,"left":417.78003,"width":20.87994384765625,"height":10.820037841796875,"text":"MN"},{"top":365.43997,"left":438.65997,"width":49.91998291015625,"height":10.820037841796875,"text":"55108"},{"top":365.43997,"left":488.57996,"width":78.47998046875,"height":10.820037841796875,"text":"finance"},{"top":365.43997,"left":567.05994,"width":106.4400634765625,"height":10.820037841796875,"text":""},{"top":365.43997,"left":673.5,"width":45.52001953125,"height":10.820037841796875,"text":"9/3/2012"},{"top":365.43997,"left":719.02,"width":43.28033447265625,"height":10.820037841796875,"text":"100.00"}],[{"top":376.26,"left":16.97,"width":1.6187286376953125,"height":10.819976806640625,"text":""},{"top":376.26,"left":18.588728,"width":153.91128540039062,"height":10.819976806640625,"text":"Christiansen"},{"top":376.26,"left":172.50002,"width":69.71998596191406,"height":10.819976806640625,"text":"Lena"},{"top":376.26,"left":242.22,"width":104.760009765625,"height":10.819976806640625,"text":"2453 Como Ave"},{"top":376.26,"left":346.98,"width":70.80001831054688,"height":10.819976806640625,"text":"St Paul"},{"top":376.26,"left":417.78003,"width":20.87994384765625,"height":10.819976806640625,"text":"MN"},{"top":376.26,"left":438.65997,"width":49.91998291015625,"height":10.819976806640625,"text":"55108"},{"top":376.26,"left":488.57996,"width":78.47998046875,"height":10.819976806640625,"text":"Principal"},{"top":376.26,"left":567.05994,"width":106.4400634765625,"height":10.819976806640625,"text":"Saint Paul Public Schools"},{"top":376.26,"left":673.5,"width":45.52001953125,"height":10.819976806640625,"text":"8/8/2012"},{"top":376.26,"left":719.02,"width":43.28033447265625,"height":10.819976806640625,"text":"100.00"}],[{"top":387.08,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":387.08,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Clapp"},{"top":387.08,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Agustus (Bill)"},{"top":387.08,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"757 Osceola Ave #1"},{"top":387.08,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":387.08,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":387.08,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":387.08,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":"retired"},{"top":387.08,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":387.08,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/2/2012"},{"top":387.08,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":397.86,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":397.86,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Cohen"},{"top":397.86,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Brad A."},{"top":397.86,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1460 Raymond Ave"},{"top":397.86,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":397.86,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":397.86,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55108"},{"top":397.86,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":"academic technology"},{"top":397.86,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":"UMN"},{"top":397.86,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/17/2012"},{"top":397.86,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":408.63998,"left":16.97,"width":1.6187286376953125,"height":10.82000732421875,"text":""},{"top":408.63998,"left":18.588728,"width":153.91128540039062,"height":10.82000732421875,"text":"Commers"},{"top":408.63998,"left":172.50002,"width":69.71998596191406,"height":10.82000732421875,"text":"Beth"},{"top":408.63998,"left":242.22,"width":104.760009765625,"height":10.82000732421875,"text":"2294 Commonwealth Ave"},{"top":408.63998,"left":346.98,"width":70.80001831054688,"height":10.82000732421875,"text":"St Paul"},{"top":408.63998,"left":417.78003,"width":20.87994384765625,"height":10.82000732421875,"text":"MN"},{"top":408.63998,"left":438.65997,"width":49.91998291015625,"height":10.82000732421875,"text":"55108"},{"top":408.63998,"left":488.57996,"width":78.47998046875,"height":10.82000732421875,"text":"Self Employed"},{"top":408.63998,"left":567.05994,"width":106.4400634765625,"height":10.82000732421875,"text":"Homemaker"},{"top":408.63998,"left":673.5,"width":45.52001953125,"height":10.82000732421875,"text":"8/1/2012"},{"top":408.63998,"left":719.02,"width":43.28033447265625,"height":10.82000732421875,"text":"100.00"}],[{"top":419.46,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":419.46,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Couture"},{"top":419.46,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Steven"},{"top":419.46,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"7950 Victoria Way"},{"top":419.46,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Louise Park"},{"top":419.46,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":419.46,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55426"},{"top":419.46,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Principal"},{"top":419.46,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Saint Paul Public Schools"},{"top":419.46,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/10/2012"},{"top":419.46,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":430.26,"left":16.97,"width":1.6187286376953125,"height":10.800048828125,"text":""},{"top":430.26,"left":18.588728,"width":153.91128540039062,"height":10.800048828125,"text":"Crawford"},{"top":430.26,"left":172.50002,"width":69.71998596191406,"height":10.800048828125,"text":"Lydia P."},{"top":430.26,"left":242.22,"width":104.760009765625,"height":10.800048828125,"text":"1140 Edgcumbe Rd"},{"top":430.26,"left":346.98,"width":70.80001831054688,"height":10.800048828125,"text":"Saint Paul"},{"top":430.26,"left":417.78003,"width":20.87994384765625,"height":10.800048828125,"text":"MN"},{"top":430.26,"left":438.65997,"width":49.91998291015625,"height":10.800048828125,"text":"55105"},{"top":430.26,"left":488.57996,"width":78.47998046875,"height":10.800048828125,"text":""},{"top":430.26,"left":567.05994,"width":106.4400634765625,"height":10.800048828125,"text":""},{"top":430.26,"left":673.5,"width":45.52001953125,"height":10.800048828125,"text":"9/15/2012"},{"top":430.26,"left":719.02,"width":43.28033447265625,"height":10.800048828125,"text":"100.00"}],[{"top":441.06006,"left":16.97,"width":1.6187286376953125,"height":10.799896240234375,"text":""},{"top":441.06006,"left":18.588728,"width":153.91128540039062,"height":10.799896240234375,"text":"Cudahy / Ricker"},{"top":441.06006,"left":172.50002,"width":69.71998596191406,"height":10.799896240234375,"text":"Robert \u0026 Mary C"},{"top":441.06006,"left":242.22,"width":104.760009765625,"height":10.799896240234375,"text":"616 Cherokee Ave."},{"top":441.06006,"left":346.98,"width":70.80001831054688,"height":10.799896240234375,"text":"Saint Paul"},{"top":441.06006,"left":417.78003,"width":20.87994384765625,"height":10.799896240234375,"text":"MN"},{"top":441.06006,"left":438.65997,"width":49.91998291015625,"height":10.799896240234375,"text":"55107"},{"top":441.06006,"left":488.57996,"width":78.47998046875,"height":10.799896240234375,"text":"Teacher/Union Presid"},{"top":441.06006,"left":567.05994,"width":106.4400634765625,"height":10.799896240234375,"text":"SPPS/SPFT"},{"top":441.06006,"left":673.5,"width":45.52001953125,"height":10.799896240234375,"text":"9/18/2012"},{"top":441.06006,"left":719.02,"width":43.28033447265625,"height":10.799896240234375,"text":"100.00"}],[{"top":451.85995,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":451.85995,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Cudahy / Ricker"},{"top":451.85995,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Robert \u0026 Mary C"},{"top":451.85995,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"616 Cherokee Ave."},{"top":451.85995,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":451.85995,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":451.85995,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55107"},{"top":451.85995,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Teacher/Union Presid"},{"top":451.85995,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"SPPS/SPFT"},{"top":451.85995,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/6/2012"},{"top":451.85995,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":462.65997,"left":16.97,"width":1.6187286376953125,"height":10.82000732421875,"text":""},{"top":462.65997,"left":18.588728,"width":153.91128540039062,"height":10.82000732421875,"text":"Currie"},{"top":462.65997,"left":172.50002,"width":69.71998596191406,"height":10.82000732421875,"text":"Elisabeth"},{"top":462.65997,"left":242.22,"width":104.760009765625,"height":10.82000732421875,"text":"2274 Hillside Ave"},{"top":462.65997,"left":346.98,"width":70.80001831054688,"height":10.82000732421875,"text":"St. Paul"},{"top":462.65997,"left":417.78003,"width":20.87994384765625,"height":10.82000732421875,"text":"MN"},{"top":462.65997,"left":438.65997,"width":49.91998291015625,"height":10.82000732421875,"text":"55108"},{"top":462.65997,"left":488.57996,"width":78.47998046875,"height":10.82000732421875,"text":""},{"top":462.65997,"left":567.05994,"width":106.4400634765625,"height":10.82000732421875,"text":""},{"top":462.65997,"left":673.5,"width":45.52001953125,"height":10.82000732421875,"text":"9/12/2012"},{"top":462.65997,"left":719.02,"width":43.28033447265625,"height":10.82000732421875,"text":"100.00"}],[{"top":473.47998,"left":16.97,"width":1.6187286376953125,"height":10.780029296875,"text":""},{"top":473.47998,"left":18.588728,"width":153.91128540039062,"height":10.780029296875,"text":"Doane"},{"top":473.47998,"left":172.50002,"width":69.71998596191406,"height":10.780029296875,"text":"Paul V \u0026 Helen R"},{"top":473.47998,"left":242.22,"width":104.760009765625,"height":10.780029296875,"text":"444 Portland Ave"},{"top":473.47998,"left":346.98,"width":70.80001831054688,"height":10.780029296875,"text":"Sant Paul"},{"top":473.47998,"left":417.78003,"width":20.87994384765625,"height":10.780029296875,"text":"MN"},{"top":473.47998,"left":438.65997,"width":49.91998291015625,"height":10.780029296875,"text":"55102"},{"top":473.47998,"left":488.57996,"width":78.47998046875,"height":10.780029296875,"text":"Ex director"},{"top":473.47998,"left":567.05994,"width":106.4400634765625,"height":10.780029296875,"text":"St Paul Teachers\u0027 Retirement A"},{"top":473.47998,"left":673.5,"width":45.52001953125,"height":10.780029296875,"text":"10/3/2012"},{"top":473.47998,"left":719.02,"width":43.28033447265625,"height":10.780029296875,"text":"100.00"}],[{"top":484.26,"left":16.97,"width":1.6187286376953125,"height":10.800048828125,"text":""},{"top":484.26,"left":18.588728,"width":153.91128540039062,"height":10.800048828125,"text":"Dougherty"},{"top":484.26,"left":172.50002,"width":69.71998596191406,"height":10.800048828125,"text":"Richards S \u0026 Patrici"},{"top":484.26,"left":242.22,"width":104.760009765625,"height":10.800048828125,"text":"812 Goodrich Ave"},{"top":484.26,"left":346.98,"width":70.80001831054688,"height":10.800048828125,"text":"Saint Paul"},{"top":484.26,"left":417.78003,"width":20.87994384765625,"height":10.800048828125,"text":"MN"},{"top":484.26,"left":438.65997,"width":49.91998291015625,"height":10.800048828125,"text":"55105"},{"top":484.26,"left":488.57996,"width":78.47998046875,"height":10.800048828125,"text":""},{"top":484.26,"left":567.05994,"width":106.4400634765625,"height":10.800048828125,"text":""},{"top":484.26,"left":673.5,"width":45.52001953125,"height":10.800048828125,"text":"10/4/2012"},{"top":484.26,"left":719.02,"width":43.28033447265625,"height":10.800048828125,"text":"100.00"}],[{"top":495.06006,"left":16.97,"width":1.6187286376953125,"height":10.799896240234375,"text":""},{"top":495.06006,"left":18.588728,"width":153.91128540039062,"height":10.799896240234375,"text":"Driscoll"},{"top":495.06006,"left":172.50002,"width":69.71998596191406,"height":10.799896240234375,"text":"Joe \u0026 Jill"},{"top":495.06006,"left":242.22,"width":104.760009765625,"height":10.799896240234375,"text":"2383 Bourne Ave"},{"top":495.06006,"left":346.98,"width":70.80001831054688,"height":10.799896240234375,"text":"Saint Paul"},{"top":495.06006,"left":417.78003,"width":20.87994384765625,"height":10.799896240234375,"text":"MN"},{"top":495.06006,"left":438.65997,"width":49.91998291015625,"height":10.799896240234375,"text":"55108"},{"top":495.06006,"left":488.57996,"width":78.47998046875,"height":10.799896240234375,"text":"HR Manager"},{"top":495.06006,"left":567.05994,"width":106.4400634765625,"height":10.799896240234375,"text":"LexisNexis"},{"top":495.06006,"left":673.5,"width":45.52001953125,"height":10.799896240234375,"text":"10/18/2012"},{"top":495.06006,"left":719.02,"width":43.28033447265625,"height":10.799896240234375,"text":"100.00"}],[{"top":505.85995,"left":16.97,"width":1.6187286376953125,"height":10.780059814453125,"text":""},{"top":505.85995,"left":18.588728,"width":153.91128540039062,"height":10.780059814453125,"text":"Dubaille / Haugee"},{"top":505.85995,"left":172.50002,"width":69.71998596191406,"height":10.780059814453125,"text":"Florence M. /Eric"},{"top":505.85995,"left":242.22,"width":104.760009765625,"height":10.780059814453125,"text":"1009 Edmund Ave"},{"top":505.85995,"left":346.98,"width":70.80001831054688,"height":10.780059814453125,"text":"Saint Paul"},{"top":505.85995,"left":417.78003,"width":20.87994384765625,"height":10.780059814453125,"text":"MN"},{"top":505.85995,"left":438.65997,"width":49.91998291015625,"height":10.780059814453125,"text":"55104"},{"top":505.85995,"left":488.57996,"width":78.47998046875,"height":10.780059814453125,"text":"Florence‐teacher"},{"top":505.85995,"left":567.05994,"width":106.4400634765625,"height":10.780059814453125,"text":"SPPS"},{"top":505.85995,"left":673.5,"width":45.52001953125,"height":10.780059814453125,"text":"10/3/2012"},{"top":505.85995,"left":719.02,"width":43.28033447265625,"height":10.780059814453125,"text":"100.00"}],[{"top":516.64,"left":16.97,"width":1.6187286376953125,"height":10.83990478515625,"text":""},{"top":516.64,"left":18.588728,"width":153.91128540039062,"height":10.83990478515625,"text":"Eaton"},{"top":516.64,"left":172.50002,"width":69.71998596191406,"height":10.83990478515625,"text":"Jim"},{"top":516.64,"left":242.22,"width":104.760009765625,"height":10.83990478515625,"text":"2133 Berkeley Ave"},{"top":516.64,"left":346.98,"width":70.80001831054688,"height":10.83990478515625,"text":"St Paul"},{"top":516.64,"left":417.78003,"width":20.87994384765625,"height":10.83990478515625,"text":"MN"},{"top":516.64,"left":438.65997,"width":49.91998291015625,"height":10.83990478515625,"text":"55105"},{"top":516.64,"left":488.57996,"width":78.47998046875,"height":10.83990478515625,"text":"Principal"},{"top":516.64,"left":567.05994,"width":106.4400634765625,"height":10.83990478515625,"text":"Saint Paul Public Schools"},{"top":516.64,"left":673.5,"width":45.52001953125,"height":10.83990478515625,"text":"8/23/2012"},{"top":516.64,"left":719.02,"width":43.28033447265625,"height":10.83990478515625,"text":"100.00"}],[{"top":527.4799,"left":16.97,"width":1.6187286376953125,"height":10.78009033203125,"text":""},{"top":527.4799,"left":18.588728,"width":153.91128540039062,"height":10.78009033203125,"text":"Eaves /Alger"},{"top":527.4799,"left":172.50002,"width":69.71998596191406,"height":10.78009033203125,"text":"Patricia / Stuart"},{"top":527.4799,"left":242.22,"width":104.760009765625,"height":10.78009033203125,"text":"1143 Portladn Ave."},{"top":527.4799,"left":346.98,"width":70.80001831054688,"height":10.78009033203125,"text":"Saint Paul"},{"top":527.4799,"left":417.78003,"width":20.87994384765625,"height":10.78009033203125,"text":"MN"},{"top":527.4799,"left":438.65997,"width":49.91998291015625,"height":10.78009033203125,"text":"55104"},{"top":527.4799,"left":488.57996,"width":78.47998046875,"height":10.78009033203125,"text":""},{"top":527.4799,"left":567.05994,"width":106.4400634765625,"height":10.78009033203125,"text":""},{"top":527.4799,"left":673.5,"width":45.52001953125,"height":10.78009033203125,"text":"10/3/2012"},{"top":527.4799,"left":719.02,"width":43.28033447265625,"height":10.78009033203125,"text":"100.00"}]]} diff --git a/src/test/resources/technology/tabula/json/spanning_cells.json b/src/test/resources/technology/tabula/json/spanning_cells.json index 89bb9707..7f68ee99 100644 --- a/src/test/resources/technology/tabula/json/spanning_cells.json +++ b/src/test/resources/technology/tabula/json/spanning_cells.json @@ -1 +1 @@ -[{"extraction_method":"lattice","top":146.51932,"left":119.78943,"width":355.312255859375,"height":270.5516052246094,"right":475.10168,"bottom":417.07092,"data":[[{"top":146.51932,"left":119.78943,"width":355.312255859375,"height":12.938491821289062,"text":"Improved operation scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":159.45781,"left":119.78943,"width":121.92680358886719,"height":12.785446166992188,"text":"Volume servers in:"},{"top":159.45781,"left":241.71623,"width":46.66426086425781,"height":12.785446166992188,"text":"2007"},{"top":159.45781,"left":288.3805,"width":46.663482666015625,"height":12.785446166992188,"text":"2008"},{"top":159.45781,"left":335.04398,"width":46.663818359375,"height":12.785446166992188,"text":"2009"},{"top":159.45781,"left":381.7078,"width":46.663360595703125,"height":12.785446166992188,"text":"2010"},{"top":159.45781,"left":428.37115,"width":46.73052978515625,"height":12.785446166992188,"text":"2011"}],[{"top":172.24326,"left":119.78943,"width":121.92680358886719,"height":12.832427978515625,"text":"Server closets"},{"top":172.24326,"left":241.71623,"width":46.66426086425781,"height":12.832427978515625,"text":"1,505"},{"top":172.24326,"left":288.3805,"width":46.663482666015625,"height":12.832427978515625,"text":"1,580"},{"top":172.24326,"left":335.04398,"width":46.663818359375,"height":12.832427978515625,"text":"1,643"},{"top":172.24326,"left":381.7078,"width":46.663360595703125,"height":12.832427978515625,"text":"1,673"},{"top":172.24326,"left":428.37115,"width":46.73052978515625,"height":12.832427978515625,"text":"1,689"}],[{"top":185.07568,"left":119.78943,"width":121.92680358886719,"height":12.891036987304688,"text":"Server rooms"},{"top":185.07568,"left":241.71623,"width":46.66426086425781,"height":12.891036987304688,"text":"1,512"},{"top":185.07568,"left":288.3805,"width":46.663482666015625,"height":12.891036987304688,"text":"1,586"},{"top":185.07568,"left":335.04398,"width":46.663818359375,"height":12.891036987304688,"text":"1,646"},{"top":185.07568,"left":381.7078,"width":46.663360595703125,"height":12.891036987304688,"text":"1,677"},{"top":185.07568,"left":428.37115,"width":46.73052978515625,"height":12.891036987304688,"text":"1,693"}],[{"top":197.96672,"left":119.78943,"width":121.92680358886719,"height":12.890411376953125,"text":"Localized data centers"},{"top":197.96672,"left":241.71623,"width":46.66426086425781,"height":12.890411376953125,"text":"1,512"},{"top":197.96672,"left":288.3805,"width":46.663482666015625,"height":12.890411376953125,"text":"1,586"},{"top":197.96672,"left":335.04398,"width":46.663818359375,"height":12.890411376953125,"text":"1,646"},{"top":197.96672,"left":381.7078,"width":46.663360595703125,"height":12.890411376953125,"text":"1,677"},{"top":197.96672,"left":428.37115,"width":46.73052978515625,"height":12.890411376953125,"text":"1,693"}],[{"top":210.85713,"left":119.78943,"width":121.92680358886719,"height":12.890716552734375,"text":"Mid-tier data centers"},{"top":210.85713,"left":241.71623,"width":46.66426086425781,"height":12.890716552734375,"text":"1,512"},{"top":210.85713,"left":288.3805,"width":46.663482666015625,"height":12.890716552734375,"text":"1,586"},{"top":210.85713,"left":335.04398,"width":46.663818359375,"height":12.890716552734375,"text":"1,646"},{"top":210.85713,"left":381.7078,"width":46.663360595703125,"height":12.890716552734375,"text":"1,677"},{"top":210.85713,"left":428.37115,"width":46.73052978515625,"height":12.890716552734375,"text":"1,693"}],[{"top":223.74785,"left":119.78943,"width":121.92680358886719,"height":12.785018920898438,"text":"Enterprise-class data centers"},{"top":223.74785,"left":241.71623,"width":46.66426086425781,"height":12.785018920898438,"text":"1,512"},{"top":223.74785,"left":288.3805,"width":46.663482666015625,"height":12.785018920898438,"text":"1,586"},{"top":223.74785,"left":335.04398,"width":46.663818359375,"height":12.785018920898438,"text":"1,646"},{"top":223.74785,"left":381.7078,"width":46.663360595703125,"height":12.785018920898438,"text":"1,677"},{"top":223.74785,"left":428.37115,"width":46.73052978515625,"height":12.785018920898438,"text":"1,693"}],[{"top":236.53287,"left":119.78943,"width":355.312255859375,"height":13.1025390625,"text":"Best practice scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":249.6354,"left":119.78943,"width":121.92680358886719,"height":12.72686767578125,"text":"Volume servers in:"},{"top":249.6354,"left":241.71623,"width":46.66426086425781,"height":12.72686767578125,"text":"2007"},{"top":249.6354,"left":288.3805,"width":46.663482666015625,"height":12.72686767578125,"text":"2008"},{"top":249.6354,"left":335.04398,"width":46.663818359375,"height":12.72686767578125,"text":"2009"},{"top":249.6354,"left":381.7078,"width":46.663360595703125,"height":12.72686767578125,"text":"2010"},{"top":249.6354,"left":428.37115,"width":46.73052978515625,"height":12.72686767578125,"text":"2011"}],[{"top":262.36227,"left":119.78943,"width":121.92680358886719,"height":12.890472412109375,"text":"Server closets"},{"top":262.36227,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,456"},{"top":262.36227,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,439"},{"top":262.36227,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,386"},{"top":262.36227,"left":381.7078,"width":46.663360595703125,"height":12.890472412109375,"text":"1,296"},{"top":262.36227,"left":428.37115,"width":46.73052978515625,"height":12.890472412109375,"text":"1,326"}],[{"top":275.25275,"left":119.78943,"width":121.92680358886719,"height":12.8914794921875,"text":"Server rooms"},{"top":275.25275,"left":241.71623,"width":46.66426086425781,"height":12.8914794921875,"text":"1,465"},{"top":275.25275,"left":288.3805,"width":46.663482666015625,"height":12.8914794921875,"text":"1,472"},{"top":275.25275,"left":335.04398,"width":46.663818359375,"height":12.8914794921875,"text":"1,427"},{"top":275.25275,"left":381.7078,"width":46.663360595703125,"height":12.8914794921875,"text":"1,334"},{"top":275.25275,"left":428.37115,"width":46.73052978515625,"height":12.8914794921875,"text":"1,371"}],[{"top":288.14423,"left":119.78943,"width":121.92680358886719,"height":12.890167236328125,"text":"Localized data centers"},{"top":288.14423,"left":241.71623,"width":46.66426086425781,"height":12.890167236328125,"text":"1,465"},{"top":288.14423,"left":288.3805,"width":46.663482666015625,"height":12.890167236328125,"text":"1,471"},{"top":288.14423,"left":335.04398,"width":46.663818359375,"height":12.890167236328125,"text":"1,426"},{"top":288.14423,"left":381.7078,"width":46.663360595703125,"height":12.890167236328125,"text":"1,334"},{"top":288.14423,"left":428.37115,"width":46.73052978515625,"height":12.890167236328125,"text":"1,371"}],[{"top":301.0344,"left":119.78943,"width":121.92680358886719,"height":12.890472412109375,"text":"Mid-tier data centers"},{"top":301.0344,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,465"},{"top":301.0344,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,471"},{"top":301.0344,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,426"},{"top":301.0344,"left":381.7078,"width":46.663360595703125,"height":12.890472412109375,"text":"1,334"},{"top":301.0344,"left":428.37115,"width":46.73052978515625,"height":12.890472412109375,"text":"1,371"}],[{"top":313.92487,"left":119.78943,"width":121.92680358886719,"height":12.7855224609375,"text":"Enterprise-class data centers"},{"top":313.92487,"left":241.71623,"width":46.66426086425781,"height":12.7855224609375,"text":"1,465"},{"top":313.92487,"left":288.3805,"width":46.663482666015625,"height":12.7855224609375,"text":"1,471"},{"top":313.92487,"left":335.04398,"width":46.663818359375,"height":12.7855224609375,"text":"1,426"},{"top":313.92487,"left":381.7078,"width":46.663360595703125,"height":12.7855224609375,"text":"1,334"},{"top":313.92487,"left":428.37115,"width":46.73052978515625,"height":12.7855224609375,"text":"1,371"}],[{"top":326.7104,"left":119.78943,"width":355.312255859375,"height":13.0440673828125,"text":"State-of-the-art scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":339.75446,"left":119.78943,"width":121.92680358886719,"height":12.784912109375,"text":"Volume servers in:"},{"top":339.75446,"left":241.71623,"width":46.66426086425781,"height":12.784912109375,"text":"2007"},{"top":339.75446,"left":288.3805,"width":46.663482666015625,"height":12.784912109375,"text":"2008"},{"top":339.75446,"left":335.04398,"width":46.663818359375,"height":12.784912109375,"text":"2009"},{"top":339.75446,"left":381.7078,"width":46.663360595703125,"height":12.784912109375,"text":"2010"},{"top":339.75446,"left":428.37115,"width":46.73052978515625,"height":12.784912109375,"text":"2011"}],[{"top":352.53937,"left":119.78943,"width":121.92680358886719,"height":12.89111328125,"text":"Server closets"},{"top":352.53937,"left":241.71623,"width":46.66426086425781,"height":12.89111328125,"text":"1,485"},{"top":352.53937,"left":288.3805,"width":46.663482666015625,"height":12.89111328125,"text":"1,471"},{"top":352.53937,"left":335.04398,"width":46.663818359375,"height":12.89111328125,"text":"1,424"},{"top":352.53937,"left":381.7078,"width":46.663360595703125,"height":12.89111328125,"text":"1,315"},{"top":352.53937,"left":428.37115,"width":46.73052978515625,"height":12.89111328125,"text":"1,349"}],[{"top":365.43048,"left":119.78943,"width":121.92680358886719,"height":12.8902587890625,"text":"Server rooms"},{"top":365.43048,"left":241.71623,"width":46.66426086425781,"height":12.8902587890625,"text":"1,495"},{"top":365.43048,"left":288.3805,"width":46.663482666015625,"height":12.8902587890625,"text":"1,573"},{"top":365.43048,"left":335.04398,"width":46.663818359375,"height":12.8902587890625,"text":"1,586"},{"top":365.43048,"left":381.7078,"width":46.663360595703125,"height":12.8902587890625,"text":"1,424"},{"top":365.43048,"left":428.37115,"width":46.73052978515625,"height":12.8902587890625,"text":"1,485"}],[{"top":378.32074,"left":119.78943,"width":121.92680358886719,"height":12.891143798828125,"text":"Localized data centers"},{"top":378.32074,"left":241.71623,"width":46.66426086425781,"height":12.891143798828125,"text":"1,495"},{"top":378.32074,"left":288.3805,"width":46.663482666015625,"height":12.891143798828125,"text":"1,572"},{"top":378.32074,"left":335.04398,"width":46.663818359375,"height":12.891143798828125,"text":"1,585"},{"top":378.32074,"left":381.7078,"width":46.663360595703125,"height":12.891143798828125,"text":"1,424"},{"top":378.32074,"left":428.37115,"width":46.73052978515625,"height":12.891143798828125,"text":"1,485"}],[{"top":391.21188,"left":119.78943,"width":121.92680358886719,"height":12.89068603515625,"text":"Mid-tier data centers"},{"top":391.21188,"left":241.71623,"width":46.66426086425781,"height":12.89068603515625,"text":"1,495"},{"top":391.21188,"left":288.3805,"width":46.663482666015625,"height":12.89068603515625,"text":"1,572"},{"top":391.21188,"left":335.04398,"width":46.663818359375,"height":12.89068603515625,"text":"1,585"},{"top":391.21188,"left":381.7078,"width":46.663360595703125,"height":12.89068603515625,"text":"1,424"},{"top":391.21188,"left":428.37115,"width":46.73052978515625,"height":12.89068603515625,"text":"1,485"}],[{"top":404.10257,"left":119.78943,"width":121.92680358886719,"height":12.968353271484375,"text":"Enterprise-class data centers"},{"top":404.10257,"left":241.71623,"width":46.66426086425781,"height":12.968353271484375,"text":"1,495"},{"top":404.10257,"left":288.3805,"width":46.663482666015625,"height":12.968353271484375,"text":"1,572"},{"top":404.10257,"left":335.04398,"width":46.663818359375,"height":12.968353271484375,"text":"1,585"},{"top":404.10257,"left":381.7078,"width":46.663360595703125,"height":12.968353271484375,"text":"1,424"},{"top":404.10257,"left":428.37115,"width":46.73052978515625,"height":12.968353271484375,"text":"1,485"}]]},{"extraction_method":"lattice","top":538.25964,"left":130.8136,"width":333.3038024902344,"height":51.52508544921875,"right":464.1174,"bottom":589.7847,"data":[[{"top":538.25964,"left":130.8136,"width":333.3038024902344,"height":12.880615234375,"text":"All alternative scenarios"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":551.14026,"left":130.8136,"width":99.93739318847656,"height":12.78485107421875,"text":"Server class"},{"top":551.14026,"left":230.75099,"width":46.66316223144531,"height":12.78485107421875,"text":"2007"},{"top":551.14026,"left":277.41415,"width":46.663726806640625,"height":12.78485107421875,"text":"2008"},{"top":551.14026,"left":324.07788,"width":46.663970947265625,"height":12.78485107421875,"text":"2009"},{"top":551.14026,"left":370.74185,"width":46.663177490234375,"height":12.78485107421875,"text":"2010"},{"top":551.14026,"left":417.40503,"width":46.712371826171875,"height":12.78485107421875,"text":"2011"}],[{"top":563.9251,"left":130.8136,"width":99.93739318847656,"height":12.8905029296875,"text":"Mid-range"},{"top":563.9251,"left":230.75099,"width":46.66316223144531,"height":12.8905029296875,"text":"4,921"},{"top":563.9251,"left":277.41415,"width":46.663726806640625,"height":12.8905029296875,"text":"5,467"},{"top":563.9251,"left":324.07788,"width":46.663970947265625,"height":12.8905029296875,"text":"6,152"},{"top":563.9251,"left":370.74185,"width":46.663177490234375,"height":12.8905029296875,"text":"6,649"},{"top":563.9251,"left":417.40503,"width":46.712371826171875,"height":12.8905029296875,"text":"7,185"}],[{"top":576.8156,"left":130.8136,"width":99.93739318847656,"height":12.9691162109375,"text":"High-end"},{"top":576.8156,"left":230.75099,"width":46.66316223144531,"height":12.9691162109375,"text":"76,295"},{"top":576.8156,"left":277.41415,"width":46.663726806640625,"height":12.9691162109375,"text":"81,624"},{"top":576.8156,"left":324.07788,"width":46.663970947265625,"height":12.9691162109375,"text":"86,849"},{"top":576.8156,"left":370.74185,"width":46.663177490234375,"height":12.9691162109375,"text":"92,662"},{"top":576.8156,"left":417.40503,"width":46.712371826171875,"height":12.9691162109375,"text":"98,864"}]]}] +[{"extraction_method":"lattice","page_number":1,"top":146.51932,"left":119.78943,"width":355.312255859375,"height":270.5516052246094,"right":475.10168,"bottom":417.07092,"data":[[{"top":146.51932,"left":119.78943,"width":355.312255859375,"height":12.938491821289062,"text":"Improved operation scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":159.45781,"left":119.78943,"width":121.92680358886719,"height":12.785446166992188,"text":"Volume servers in:"},{"top":159.45781,"left":241.71623,"width":46.66426086425781,"height":12.785446166992188,"text":"2007"},{"top":159.45781,"left":288.3805,"width":46.663482666015625,"height":12.785446166992188,"text":"2008"},{"top":159.45781,"left":335.04398,"width":46.663818359375,"height":12.785446166992188,"text":"2009"},{"top":159.45781,"left":381.7078,"width":46.663360595703125,"height":12.785446166992188,"text":"2010"},{"top":159.45781,"left":428.37115,"width":46.73052978515625,"height":12.785446166992188,"text":"2011"}],[{"top":172.24326,"left":119.78943,"width":121.92680358886719,"height":12.832427978515625,"text":"Server closets"},{"top":172.24326,"left":241.71623,"width":46.66426086425781,"height":12.832427978515625,"text":"1,505"},{"top":172.24326,"left":288.3805,"width":46.663482666015625,"height":12.832427978515625,"text":"1,580"},{"top":172.24326,"left":335.04398,"width":46.663818359375,"height":12.832427978515625,"text":"1,643"},{"top":172.24326,"left":381.7078,"width":46.663360595703125,"height":12.832427978515625,"text":"1,673"},{"top":172.24326,"left":428.37115,"width":46.73052978515625,"height":12.832427978515625,"text":"1,689"}],[{"top":185.07568,"left":119.78943,"width":121.92680358886719,"height":12.891036987304688,"text":"Server rooms"},{"top":185.07568,"left":241.71623,"width":46.66426086425781,"height":12.891036987304688,"text":"1,512"},{"top":185.07568,"left":288.3805,"width":46.663482666015625,"height":12.891036987304688,"text":"1,586"},{"top":185.07568,"left":335.04398,"width":46.663818359375,"height":12.891036987304688,"text":"1,646"},{"top":185.07568,"left":381.7078,"width":46.663360595703125,"height":12.891036987304688,"text":"1,677"},{"top":185.07568,"left":428.37115,"width":46.73052978515625,"height":12.891036987304688,"text":"1,693"}],[{"top":197.96672,"left":119.78943,"width":121.92680358886719,"height":12.890411376953125,"text":"Localized data centers"},{"top":197.96672,"left":241.71623,"width":46.66426086425781,"height":12.890411376953125,"text":"1,512"},{"top":197.96672,"left":288.3805,"width":46.663482666015625,"height":12.890411376953125,"text":"1,586"},{"top":197.96672,"left":335.04398,"width":46.663818359375,"height":12.890411376953125,"text":"1,646"},{"top":197.96672,"left":381.7078,"width":46.663360595703125,"height":12.890411376953125,"text":"1,677"},{"top":197.96672,"left":428.37115,"width":46.73052978515625,"height":12.890411376953125,"text":"1,693"}],[{"top":210.85713,"left":119.78943,"width":121.92680358886719,"height":12.890716552734375,"text":"Mid-tier data centers"},{"top":210.85713,"left":241.71623,"width":46.66426086425781,"height":12.890716552734375,"text":"1,512"},{"top":210.85713,"left":288.3805,"width":46.663482666015625,"height":12.890716552734375,"text":"1,586"},{"top":210.85713,"left":335.04398,"width":46.663818359375,"height":12.890716552734375,"text":"1,646"},{"top":210.85713,"left":381.7078,"width":46.663360595703125,"height":12.890716552734375,"text":"1,677"},{"top":210.85713,"left":428.37115,"width":46.73052978515625,"height":12.890716552734375,"text":"1,693"}],[{"top":223.74785,"left":119.78943,"width":121.92680358886719,"height":12.785018920898438,"text":"Enterprise-class data centers"},{"top":223.74785,"left":241.71623,"width":46.66426086425781,"height":12.785018920898438,"text":"1,512"},{"top":223.74785,"left":288.3805,"width":46.663482666015625,"height":12.785018920898438,"text":"1,586"},{"top":223.74785,"left":335.04398,"width":46.663818359375,"height":12.785018920898438,"text":"1,646"},{"top":223.74785,"left":381.7078,"width":46.663360595703125,"height":12.785018920898438,"text":"1,677"},{"top":223.74785,"left":428.37115,"width":46.73052978515625,"height":12.785018920898438,"text":"1,693"}],[{"top":236.53287,"left":119.78943,"width":355.312255859375,"height":13.1025390625,"text":"Best practice scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":249.6354,"left":119.78943,"width":121.92680358886719,"height":12.72686767578125,"text":"Volume servers in:"},{"top":249.6354,"left":241.71623,"width":46.66426086425781,"height":12.72686767578125,"text":"2007"},{"top":249.6354,"left":288.3805,"width":46.663482666015625,"height":12.72686767578125,"text":"2008"},{"top":249.6354,"left":335.04398,"width":46.663818359375,"height":12.72686767578125,"text":"2009"},{"top":249.6354,"left":381.7078,"width":46.663360595703125,"height":12.72686767578125,"text":"2010"},{"top":249.6354,"left":428.37115,"width":46.73052978515625,"height":12.72686767578125,"text":"2011"}],[{"top":262.36227,"left":119.78943,"width":121.92680358886719,"height":12.890472412109375,"text":"Server closets"},{"top":262.36227,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,456"},{"top":262.36227,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,439"},{"top":262.36227,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,386"},{"top":262.36227,"left":381.7078,"width":46.663360595703125,"height":12.890472412109375,"text":"1,296"},{"top":262.36227,"left":428.37115,"width":46.73052978515625,"height":12.890472412109375,"text":"1,326"}],[{"top":275.25275,"left":119.78943,"width":121.92680358886719,"height":12.8914794921875,"text":"Server rooms"},{"top":275.25275,"left":241.71623,"width":46.66426086425781,"height":12.8914794921875,"text":"1,465"},{"top":275.25275,"left":288.3805,"width":46.663482666015625,"height":12.8914794921875,"text":"1,472"},{"top":275.25275,"left":335.04398,"width":46.663818359375,"height":12.8914794921875,"text":"1,427"},{"top":275.25275,"left":381.7078,"width":46.663360595703125,"height":12.8914794921875,"text":"1,334"},{"top":275.25275,"left":428.37115,"width":46.73052978515625,"height":12.8914794921875,"text":"1,371"}],[{"top":288.14423,"left":119.78943,"width":121.92680358886719,"height":12.890167236328125,"text":"Localized data centers"},{"top":288.14423,"left":241.71623,"width":46.66426086425781,"height":12.890167236328125,"text":"1,465"},{"top":288.14423,"left":288.3805,"width":46.663482666015625,"height":12.890167236328125,"text":"1,471"},{"top":288.14423,"left":335.04398,"width":46.663818359375,"height":12.890167236328125,"text":"1,426"},{"top":288.14423,"left":381.7078,"width":46.663360595703125,"height":12.890167236328125,"text":"1,334"},{"top":288.14423,"left":428.37115,"width":46.73052978515625,"height":12.890167236328125,"text":"1,371"}],[{"top":301.0344,"left":119.78943,"width":121.92680358886719,"height":12.890472412109375,"text":"Mid-tier data centers"},{"top":301.0344,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,465"},{"top":301.0344,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,471"},{"top":301.0344,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,426"},{"top":301.0344,"left":381.7078,"width":46.663360595703125,"height":12.890472412109375,"text":"1,334"},{"top":301.0344,"left":428.37115,"width":46.73052978515625,"height":12.890472412109375,"text":"1,371"}],[{"top":313.92487,"left":119.78943,"width":121.92680358886719,"height":12.7855224609375,"text":"Enterprise-class data centers"},{"top":313.92487,"left":241.71623,"width":46.66426086425781,"height":12.7855224609375,"text":"1,465"},{"top":313.92487,"left":288.3805,"width":46.663482666015625,"height":12.7855224609375,"text":"1,471"},{"top":313.92487,"left":335.04398,"width":46.663818359375,"height":12.7855224609375,"text":"1,426"},{"top":313.92487,"left":381.7078,"width":46.663360595703125,"height":12.7855224609375,"text":"1,334"},{"top":313.92487,"left":428.37115,"width":46.73052978515625,"height":12.7855224609375,"text":"1,371"}],[{"top":326.7104,"left":119.78943,"width":355.312255859375,"height":13.0440673828125,"text":"State-of-the-art scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":339.75446,"left":119.78943,"width":121.92680358886719,"height":12.784912109375,"text":"Volume servers in:"},{"top":339.75446,"left":241.71623,"width":46.66426086425781,"height":12.784912109375,"text":"2007"},{"top":339.75446,"left":288.3805,"width":46.663482666015625,"height":12.784912109375,"text":"2008"},{"top":339.75446,"left":335.04398,"width":46.663818359375,"height":12.784912109375,"text":"2009"},{"top":339.75446,"left":381.7078,"width":46.663360595703125,"height":12.784912109375,"text":"2010"},{"top":339.75446,"left":428.37115,"width":46.73052978515625,"height":12.784912109375,"text":"2011"}],[{"top":352.53937,"left":119.78943,"width":121.92680358886719,"height":12.89111328125,"text":"Server closets"},{"top":352.53937,"left":241.71623,"width":46.66426086425781,"height":12.89111328125,"text":"1,485"},{"top":352.53937,"left":288.3805,"width":46.663482666015625,"height":12.89111328125,"text":"1,471"},{"top":352.53937,"left":335.04398,"width":46.663818359375,"height":12.89111328125,"text":"1,424"},{"top":352.53937,"left":381.7078,"width":46.663360595703125,"height":12.89111328125,"text":"1,315"},{"top":352.53937,"left":428.37115,"width":46.73052978515625,"height":12.89111328125,"text":"1,349"}],[{"top":365.43048,"left":119.78943,"width":121.92680358886719,"height":12.8902587890625,"text":"Server rooms"},{"top":365.43048,"left":241.71623,"width":46.66426086425781,"height":12.8902587890625,"text":"1,495"},{"top":365.43048,"left":288.3805,"width":46.663482666015625,"height":12.8902587890625,"text":"1,573"},{"top":365.43048,"left":335.04398,"width":46.663818359375,"height":12.8902587890625,"text":"1,586"},{"top":365.43048,"left":381.7078,"width":46.663360595703125,"height":12.8902587890625,"text":"1,424"},{"top":365.43048,"left":428.37115,"width":46.73052978515625,"height":12.8902587890625,"text":"1,485"}],[{"top":378.32074,"left":119.78943,"width":121.92680358886719,"height":12.891143798828125,"text":"Localized data centers"},{"top":378.32074,"left":241.71623,"width":46.66426086425781,"height":12.891143798828125,"text":"1,495"},{"top":378.32074,"left":288.3805,"width":46.663482666015625,"height":12.891143798828125,"text":"1,572"},{"top":378.32074,"left":335.04398,"width":46.663818359375,"height":12.891143798828125,"text":"1,585"},{"top":378.32074,"left":381.7078,"width":46.663360595703125,"height":12.891143798828125,"text":"1,424"},{"top":378.32074,"left":428.37115,"width":46.73052978515625,"height":12.891143798828125,"text":"1,485"}],[{"top":391.21188,"left":119.78943,"width":121.92680358886719,"height":12.89068603515625,"text":"Mid-tier data centers"},{"top":391.21188,"left":241.71623,"width":46.66426086425781,"height":12.89068603515625,"text":"1,495"},{"top":391.21188,"left":288.3805,"width":46.663482666015625,"height":12.89068603515625,"text":"1,572"},{"top":391.21188,"left":335.04398,"width":46.663818359375,"height":12.89068603515625,"text":"1,585"},{"top":391.21188,"left":381.7078,"width":46.663360595703125,"height":12.89068603515625,"text":"1,424"},{"top":391.21188,"left":428.37115,"width":46.73052978515625,"height":12.89068603515625,"text":"1,485"}],[{"top":404.10257,"left":119.78943,"width":121.92680358886719,"height":12.968353271484375,"text":"Enterprise-class data centers"},{"top":404.10257,"left":241.71623,"width":46.66426086425781,"height":12.968353271484375,"text":"1,495"},{"top":404.10257,"left":288.3805,"width":46.663482666015625,"height":12.968353271484375,"text":"1,572"},{"top":404.10257,"left":335.04398,"width":46.663818359375,"height":12.968353271484375,"text":"1,585"},{"top":404.10257,"left":381.7078,"width":46.663360595703125,"height":12.968353271484375,"text":"1,424"},{"top":404.10257,"left":428.37115,"width":46.73052978515625,"height":12.968353271484375,"text":"1,485"}]]},{"extraction_method":"lattice","page_number":1,"top":538.25964,"left":130.8136,"width":333.3038024902344,"height":51.52508544921875,"right":464.1174,"bottom":589.7847,"data":[[{"top":538.25964,"left":130.8136,"width":333.3038024902344,"height":12.880615234375,"text":"All alternative scenarios"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":551.14026,"left":130.8136,"width":99.93739318847656,"height":12.78485107421875,"text":"Server class"},{"top":551.14026,"left":230.75099,"width":46.66316223144531,"height":12.78485107421875,"text":"2007"},{"top":551.14026,"left":277.41415,"width":46.663726806640625,"height":12.78485107421875,"text":"2008"},{"top":551.14026,"left":324.07788,"width":46.663970947265625,"height":12.78485107421875,"text":"2009"},{"top":551.14026,"left":370.74185,"width":46.663177490234375,"height":12.78485107421875,"text":"2010"},{"top":551.14026,"left":417.40503,"width":46.712371826171875,"height":12.78485107421875,"text":"2011"}],[{"top":563.9251,"left":130.8136,"width":99.93739318847656,"height":12.8905029296875,"text":"Mid-range"},{"top":563.9251,"left":230.75099,"width":46.66316223144531,"height":12.8905029296875,"text":"4,921"},{"top":563.9251,"left":277.41415,"width":46.663726806640625,"height":12.8905029296875,"text":"5,467"},{"top":563.9251,"left":324.07788,"width":46.663970947265625,"height":12.8905029296875,"text":"6,152"},{"top":563.9251,"left":370.74185,"width":46.663177490234375,"height":12.8905029296875,"text":"6,649"},{"top":563.9251,"left":417.40503,"width":46.712371826171875,"height":12.8905029296875,"text":"7,185"}],[{"top":576.8156,"left":130.8136,"width":99.93739318847656,"height":12.9691162109375,"text":"High-end"},{"top":576.8156,"left":230.75099,"width":46.66316223144531,"height":12.9691162109375,"text":"76,295"},{"top":576.8156,"left":277.41415,"width":46.663726806640625,"height":12.9691162109375,"text":"81,624"},{"top":576.8156,"left":324.07788,"width":46.663970947265625,"height":12.9691162109375,"text":"86,849"},{"top":576.8156,"left":370.74185,"width":46.663177490234375,"height":12.9691162109375,"text":"92,662"},{"top":576.8156,"left":417.40503,"width":46.712371826171875,"height":12.9691162109375,"text":"98,864"}]]}] diff --git a/src/test/resources/technology/tabula/json/spanning_cells_basic.json b/src/test/resources/technology/tabula/json/spanning_cells_basic.json index d0164280..6efaef5b 100644 --- a/src/test/resources/technology/tabula/json/spanning_cells_basic.json +++ b/src/test/resources/technology/tabula/json/spanning_cells_basic.json @@ -1 +1 @@ -[{"extraction_method":"lattice","top":150.56,"left":119.789444,"width":355.31939697265625,"height":266.5108642578125,"right":475.10883,"bottom":417.07086,"data":[[{"top":150.56,"left":119.789444,"width":355.31939697265625,"height":8.897811889648438,"text":"Improved operation scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":159.45781,"left":119.789444,"width":121.92678833007812,"height":12.785446166992188,"text":"Volume servers in:"},{"top":159.45781,"left":241.71623,"width":46.66426086425781,"height":12.785446166992188,"text":"2007"},{"top":159.45781,"left":288.3805,"width":46.663482666015625,"height":12.785446166992188,"text":"2008"},{"top":159.45781,"left":335.04398,"width":46.663818359375,"height":12.785446166992188,"text":"2009"},{"top":159.45781,"left":381.7078,"width":46.663330078125,"height":12.785446166992188,"text":"2010"},{"top":159.45781,"left":428.37112,"width":46.73773193359375,"height":12.785446166992188,"text":"2011"}],[{"top":172.24326,"left":119.789444,"width":121.92678833007812,"height":12.832427978515625,"text":"Server closets"},{"top":172.24326,"left":241.71623,"width":46.66426086425781,"height":12.832427978515625,"text":"1,505"},{"top":172.24326,"left":288.3805,"width":46.663482666015625,"height":12.832427978515625,"text":"1,580"},{"top":172.24326,"left":335.04398,"width":46.663818359375,"height":12.832427978515625,"text":"1,643"},{"top":172.24326,"left":381.7078,"width":46.663330078125,"height":12.832427978515625,"text":"1,673"},{"top":172.24326,"left":428.37112,"width":46.73773193359375,"height":12.832427978515625,"text":"1,689"}],[{"top":185.07568,"left":119.789444,"width":121.92678833007812,"height":12.891036987304688,"text":"Server rooms"},{"top":185.07568,"left":241.71623,"width":46.66426086425781,"height":12.891036987304688,"text":"1,512"},{"top":185.07568,"left":288.3805,"width":46.663482666015625,"height":12.891036987304688,"text":"1,586"},{"top":185.07568,"left":335.04398,"width":46.663818359375,"height":12.891036987304688,"text":"1,646"},{"top":185.07568,"left":381.7078,"width":46.663330078125,"height":12.891036987304688,"text":"1,677"},{"top":185.07568,"left":428.37112,"width":46.73773193359375,"height":12.891036987304688,"text":"1,693"}],[{"top":197.96672,"left":119.789444,"width":121.92678833007812,"height":12.890411376953125,"text":"Localized data centers"},{"top":197.96672,"left":241.71623,"width":46.66426086425781,"height":12.890411376953125,"text":"1,512"},{"top":197.96672,"left":288.3805,"width":46.663482666015625,"height":12.890411376953125,"text":"1,586"},{"top":197.96672,"left":335.04398,"width":46.663818359375,"height":12.890411376953125,"text":"1,646"},{"top":197.96672,"left":381.7078,"width":46.663330078125,"height":12.890411376953125,"text":"1,677"},{"top":197.96672,"left":428.37112,"width":46.73773193359375,"height":12.890411376953125,"text":"1,693"}],[{"top":210.85713,"left":119.789444,"width":121.92678833007812,"height":12.890716552734375,"text":"Mid-tier data centers"},{"top":210.85713,"left":241.71623,"width":46.66426086425781,"height":12.890716552734375,"text":"1,512"},{"top":210.85713,"left":288.3805,"width":46.663482666015625,"height":12.890716552734375,"text":"1,586"},{"top":210.85713,"left":335.04398,"width":46.663818359375,"height":12.890716552734375,"text":"1,646"},{"top":210.85713,"left":381.7078,"width":46.663330078125,"height":12.890716552734375,"text":"1,677"},{"top":210.85713,"left":428.37112,"width":46.73773193359375,"height":12.890716552734375,"text":"1,693"}],[{"top":223.74785,"left":119.789444,"width":121.92678833007812,"height":12.785003662109375,"text":"Enterprise-class data centers"},{"top":223.74785,"left":241.71623,"width":46.66426086425781,"height":12.785003662109375,"text":"1,512"},{"top":223.74785,"left":288.3805,"width":46.663482666015625,"height":12.785003662109375,"text":"1,586"},{"top":223.74785,"left":335.04398,"width":46.663818359375,"height":12.785003662109375,"text":"1,646"},{"top":223.74785,"left":381.7078,"width":46.663330078125,"height":12.785003662109375,"text":"1,677"},{"top":223.74785,"left":428.37112,"width":46.73773193359375,"height":12.785003662109375,"text":"1,693"}],[{"top":236.53285,"left":119.789444,"width":355.31939697265625,"height":13.1025390625,"text":"Best practice scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":249.63539,"left":119.789444,"width":121.92678833007812,"height":12.726882934570312,"text":"Volume servers in:"},{"top":249.63539,"left":241.71623,"width":46.66426086425781,"height":12.726882934570312,"text":"2007"},{"top":249.63539,"left":288.3805,"width":46.663482666015625,"height":12.726882934570312,"text":"2008"},{"top":249.63539,"left":335.04398,"width":46.663818359375,"height":12.726882934570312,"text":"2009"},{"top":249.63539,"left":381.7078,"width":46.663330078125,"height":12.726882934570312,"text":"2010"},{"top":249.63539,"left":428.37112,"width":46.73773193359375,"height":12.726882934570312,"text":"2011"}],[{"top":262.36227,"left":119.789444,"width":121.92678833007812,"height":12.890472412109375,"text":"Server closets"},{"top":262.36227,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,456"},{"top":262.36227,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,439"},{"top":262.36227,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,386"},{"top":262.36227,"left":381.7078,"width":46.663330078125,"height":12.890472412109375,"text":"1,296"},{"top":262.36227,"left":428.37112,"width":46.73773193359375,"height":12.890472412109375,"text":"1,326"}],[{"top":275.25275,"left":119.789444,"width":121.92678833007812,"height":12.8914794921875,"text":"Server rooms"},{"top":275.25275,"left":241.71623,"width":46.66426086425781,"height":12.8914794921875,"text":"1,465"},{"top":275.25275,"left":288.3805,"width":46.663482666015625,"height":12.8914794921875,"text":"1,472"},{"top":275.25275,"left":335.04398,"width":46.663818359375,"height":12.8914794921875,"text":"1,427"},{"top":275.25275,"left":381.7078,"width":46.663330078125,"height":12.8914794921875,"text":"1,334"},{"top":275.25275,"left":428.37112,"width":46.73773193359375,"height":12.8914794921875,"text":"1,371"}],[{"top":288.14423,"left":119.789444,"width":121.92678833007812,"height":12.890167236328125,"text":"Localized data centers"},{"top":288.14423,"left":241.71623,"width":46.66426086425781,"height":12.890167236328125,"text":"1,465"},{"top":288.14423,"left":288.3805,"width":46.663482666015625,"height":12.890167236328125,"text":"1,471"},{"top":288.14423,"left":335.04398,"width":46.663818359375,"height":12.890167236328125,"text":"1,426"},{"top":288.14423,"left":381.7078,"width":46.663330078125,"height":12.890167236328125,"text":"1,334"},{"top":288.14423,"left":428.37112,"width":46.73773193359375,"height":12.890167236328125,"text":"1,371"}],[{"top":301.0344,"left":119.789444,"width":121.92678833007812,"height":12.890472412109375,"text":"Mid-tier data centers"},{"top":301.0344,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,465"},{"top":301.0344,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,471"},{"top":301.0344,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,426"},{"top":301.0344,"left":381.7078,"width":46.663330078125,"height":12.890472412109375,"text":"1,334"},{"top":301.0344,"left":428.37112,"width":46.73773193359375,"height":12.890472412109375,"text":"1,371"}],[{"top":313.92487,"left":119.789444,"width":121.92678833007812,"height":12.785552978515625,"text":"Enterprise-class data centers"},{"top":313.92487,"left":241.71623,"width":46.66426086425781,"height":12.785552978515625,"text":"1,465"},{"top":313.92487,"left":288.3805,"width":46.663482666015625,"height":12.785552978515625,"text":"1,471"},{"top":313.92487,"left":335.04398,"width":46.663818359375,"height":12.785552978515625,"text":"1,426"},{"top":313.92487,"left":381.7078,"width":46.663330078125,"height":12.785552978515625,"text":"1,334"},{"top":313.92487,"left":428.37112,"width":46.73773193359375,"height":12.785552978515625,"text":"1,371"}],[{"top":326.71042,"left":119.789444,"width":355.31939697265625,"height":13.04400634765625,"text":"State-of-the-art scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":339.75443,"left":119.789444,"width":121.92678833007812,"height":12.784942626953125,"text":"Volume servers in:"},{"top":339.75443,"left":241.71623,"width":46.66426086425781,"height":12.784942626953125,"text":"2007"},{"top":339.75443,"left":288.3805,"width":46.663482666015625,"height":12.784942626953125,"text":"2008"},{"top":339.75443,"left":335.04398,"width":46.663818359375,"height":12.784942626953125,"text":"2009"},{"top":339.75443,"left":381.7078,"width":46.663330078125,"height":12.784942626953125,"text":"2010"},{"top":339.75443,"left":428.37112,"width":46.73773193359375,"height":12.784942626953125,"text":"2011"}],[{"top":352.53937,"left":119.789444,"width":121.92678833007812,"height":12.89111328125,"text":"Server closets"},{"top":352.53937,"left":241.71623,"width":46.66426086425781,"height":12.89111328125,"text":"1,485"},{"top":352.53937,"left":288.3805,"width":46.663482666015625,"height":12.89111328125,"text":"1,471"},{"top":352.53937,"left":335.04398,"width":46.663818359375,"height":12.89111328125,"text":"1,424"},{"top":352.53937,"left":381.7078,"width":46.663330078125,"height":12.89111328125,"text":"1,315"},{"top":352.53937,"left":428.37112,"width":46.73773193359375,"height":12.89111328125,"text":"1,349"}],[{"top":365.43048,"left":119.789444,"width":121.92678833007812,"height":12.8902587890625,"text":"Server rooms"},{"top":365.43048,"left":241.71623,"width":46.66426086425781,"height":12.8902587890625,"text":"1,495"},{"top":365.43048,"left":288.3805,"width":46.663482666015625,"height":12.8902587890625,"text":"1,573"},{"top":365.43048,"left":335.04398,"width":46.663818359375,"height":12.8902587890625,"text":"1,586"},{"top":365.43048,"left":381.7078,"width":46.663330078125,"height":12.8902587890625,"text":"1,424"},{"top":365.43048,"left":428.37112,"width":46.73773193359375,"height":12.8902587890625,"text":"1,485"}],[{"top":378.32074,"left":119.789444,"width":121.92678833007812,"height":12.891143798828125,"text":"Localized data centers"},{"top":378.32074,"left":241.71623,"width":46.66426086425781,"height":12.891143798828125,"text":"1,495"},{"top":378.32074,"left":288.3805,"width":46.663482666015625,"height":12.891143798828125,"text":"1,572"},{"top":378.32074,"left":335.04398,"width":46.663818359375,"height":12.891143798828125,"text":"1,585"},{"top":378.32074,"left":381.7078,"width":46.663330078125,"height":12.891143798828125,"text":"1,424"},{"top":378.32074,"left":428.37112,"width":46.73773193359375,"height":12.891143798828125,"text":"1,485"}],[{"top":391.21188,"left":119.789444,"width":121.92678833007812,"height":12.89068603515625,"text":"Mid-tier data centers"},{"top":391.21188,"left":241.71623,"width":46.66426086425781,"height":12.89068603515625,"text":"1,495"},{"top":391.21188,"left":288.3805,"width":46.663482666015625,"height":12.89068603515625,"text":"1,572"},{"top":391.21188,"left":335.04398,"width":46.663818359375,"height":12.89068603515625,"text":"1,585"},{"top":391.21188,"left":381.7078,"width":46.663330078125,"height":12.89068603515625,"text":"1,424"},{"top":391.21188,"left":428.37112,"width":46.73773193359375,"height":12.89068603515625,"text":"1,485"}],[{"top":404.10257,"left":119.789444,"width":121.92678833007812,"height":12.968292236328125,"text":"Enterprise-class data centers"},{"top":404.10257,"left":241.71623,"width":46.66426086425781,"height":12.968292236328125,"text":"1,495"},{"top":404.10257,"left":288.3805,"width":46.663482666015625,"height":12.968292236328125,"text":"1,572"},{"top":404.10257,"left":335.04398,"width":46.663818359375,"height":12.968292236328125,"text":"1,585"},{"top":404.10257,"left":381.7078,"width":46.663330078125,"height":12.968292236328125,"text":"1,424"},{"top":404.10257,"left":428.37112,"width":46.73773193359375,"height":12.968292236328125,"text":"1,485"}]]},{"extraction_method":"lattice","top":538.25964,"left":130.8136,"width":333.3038024902344,"height":51.5423583984375,"right":464.1174,"bottom":589.802,"data":[[{"top":538.25964,"left":130.8136,"width":333.3038024902344,"height":12.88055419921875,"text":"All alternative scenarios"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":551.1402,"left":130.8136,"width":99.93739318847656,"height":12.784912109375,"text":"Server class"},{"top":551.1402,"left":230.75099,"width":46.66316223144531,"height":12.784912109375,"text":"2007"},{"top":551.1402,"left":277.41415,"width":46.663726806640625,"height":12.784912109375,"text":"2008"},{"top":551.1402,"left":324.07788,"width":46.663970947265625,"height":12.784912109375,"text":"2009"},{"top":551.1402,"left":370.74185,"width":46.663177490234375,"height":12.784912109375,"text":"2010"},{"top":551.1402,"left":417.40503,"width":46.712371826171875,"height":12.784912109375,"text":"2011"}],[{"top":563.9251,"left":130.8136,"width":99.93739318847656,"height":12.8905029296875,"text":"Mid-range"},{"top":563.9251,"left":230.75099,"width":46.66316223144531,"height":12.8905029296875,"text":"4,921"},{"top":563.9251,"left":277.41415,"width":46.663726806640625,"height":12.8905029296875,"text":"5,467"},{"top":563.9251,"left":324.07788,"width":46.663970947265625,"height":12.8905029296875,"text":"6,152"},{"top":563.9251,"left":370.74185,"width":46.663177490234375,"height":12.8905029296875,"text":"6,649"},{"top":563.9251,"left":417.40503,"width":46.712371826171875,"height":12.8905029296875,"text":"7,185"}],[{"top":576.8156,"left":130.8136,"width":99.93739318847656,"height":12.98638916015625,"text":"High-end"},{"top":576.8156,"left":230.75099,"width":46.66316223144531,"height":12.98638916015625,"text":"76,295"},{"top":576.8156,"left":277.41415,"width":46.663726806640625,"height":12.98638916015625,"text":"81,624"},{"top":576.8156,"left":324.07788,"width":46.663970947265625,"height":12.98638916015625,"text":"86,849"},{"top":576.8156,"left":370.74185,"width":46.663177490234375,"height":12.98638916015625,"text":"92,662"},{"top":576.8156,"left":417.40503,"width":46.712371826171875,"height":12.98638916015625,"text":"98,864"}]]}] +[{"extraction_method":"lattice","page_number":1,"top":150.56,"left":119.789444,"width":355.31939697265625,"height":266.5108642578125,"right":475.10883,"bottom":417.07086,"data":[[{"top":150.56,"left":119.789444,"width":355.31939697265625,"height":8.897811889648438,"text":"Improved operation scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":159.45781,"left":119.789444,"width":121.92678833007812,"height":12.785446166992188,"text":"Volume servers in:"},{"top":159.45781,"left":241.71623,"width":46.66426086425781,"height":12.785446166992188,"text":"2007"},{"top":159.45781,"left":288.3805,"width":46.663482666015625,"height":12.785446166992188,"text":"2008"},{"top":159.45781,"left":335.04398,"width":46.663818359375,"height":12.785446166992188,"text":"2009"},{"top":159.45781,"left":381.7078,"width":46.663330078125,"height":12.785446166992188,"text":"2010"},{"top":159.45781,"left":428.37112,"width":46.73773193359375,"height":12.785446166992188,"text":"2011"}],[{"top":172.24326,"left":119.789444,"width":121.92678833007812,"height":12.832427978515625,"text":"Server closets"},{"top":172.24326,"left":241.71623,"width":46.66426086425781,"height":12.832427978515625,"text":"1,505"},{"top":172.24326,"left":288.3805,"width":46.663482666015625,"height":12.832427978515625,"text":"1,580"},{"top":172.24326,"left":335.04398,"width":46.663818359375,"height":12.832427978515625,"text":"1,643"},{"top":172.24326,"left":381.7078,"width":46.663330078125,"height":12.832427978515625,"text":"1,673"},{"top":172.24326,"left":428.37112,"width":46.73773193359375,"height":12.832427978515625,"text":"1,689"}],[{"top":185.07568,"left":119.789444,"width":121.92678833007812,"height":12.891036987304688,"text":"Server rooms"},{"top":185.07568,"left":241.71623,"width":46.66426086425781,"height":12.891036987304688,"text":"1,512"},{"top":185.07568,"left":288.3805,"width":46.663482666015625,"height":12.891036987304688,"text":"1,586"},{"top":185.07568,"left":335.04398,"width":46.663818359375,"height":12.891036987304688,"text":"1,646"},{"top":185.07568,"left":381.7078,"width":46.663330078125,"height":12.891036987304688,"text":"1,677"},{"top":185.07568,"left":428.37112,"width":46.73773193359375,"height":12.891036987304688,"text":"1,693"}],[{"top":197.96672,"left":119.789444,"width":121.92678833007812,"height":12.890411376953125,"text":"Localized data centers"},{"top":197.96672,"left":241.71623,"width":46.66426086425781,"height":12.890411376953125,"text":"1,512"},{"top":197.96672,"left":288.3805,"width":46.663482666015625,"height":12.890411376953125,"text":"1,586"},{"top":197.96672,"left":335.04398,"width":46.663818359375,"height":12.890411376953125,"text":"1,646"},{"top":197.96672,"left":381.7078,"width":46.663330078125,"height":12.890411376953125,"text":"1,677"},{"top":197.96672,"left":428.37112,"width":46.73773193359375,"height":12.890411376953125,"text":"1,693"}],[{"top":210.85713,"left":119.789444,"width":121.92678833007812,"height":12.890716552734375,"text":"Mid-tier data centers"},{"top":210.85713,"left":241.71623,"width":46.66426086425781,"height":12.890716552734375,"text":"1,512"},{"top":210.85713,"left":288.3805,"width":46.663482666015625,"height":12.890716552734375,"text":"1,586"},{"top":210.85713,"left":335.04398,"width":46.663818359375,"height":12.890716552734375,"text":"1,646"},{"top":210.85713,"left":381.7078,"width":46.663330078125,"height":12.890716552734375,"text":"1,677"},{"top":210.85713,"left":428.37112,"width":46.73773193359375,"height":12.890716552734375,"text":"1,693"}],[{"top":223.74785,"left":119.789444,"width":121.92678833007812,"height":12.785003662109375,"text":"Enterprise-class data centers"},{"top":223.74785,"left":241.71623,"width":46.66426086425781,"height":12.785003662109375,"text":"1,512"},{"top":223.74785,"left":288.3805,"width":46.663482666015625,"height":12.785003662109375,"text":"1,586"},{"top":223.74785,"left":335.04398,"width":46.663818359375,"height":12.785003662109375,"text":"1,646"},{"top":223.74785,"left":381.7078,"width":46.663330078125,"height":12.785003662109375,"text":"1,677"},{"top":223.74785,"left":428.37112,"width":46.73773193359375,"height":12.785003662109375,"text":"1,693"}],[{"top":236.53285,"left":119.789444,"width":355.31939697265625,"height":13.1025390625,"text":"Best practice scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":249.63539,"left":119.789444,"width":121.92678833007812,"height":12.726882934570312,"text":"Volume servers in:"},{"top":249.63539,"left":241.71623,"width":46.66426086425781,"height":12.726882934570312,"text":"2007"},{"top":249.63539,"left":288.3805,"width":46.663482666015625,"height":12.726882934570312,"text":"2008"},{"top":249.63539,"left":335.04398,"width":46.663818359375,"height":12.726882934570312,"text":"2009"},{"top":249.63539,"left":381.7078,"width":46.663330078125,"height":12.726882934570312,"text":"2010"},{"top":249.63539,"left":428.37112,"width":46.73773193359375,"height":12.726882934570312,"text":"2011"}],[{"top":262.36227,"left":119.789444,"width":121.92678833007812,"height":12.890472412109375,"text":"Server closets"},{"top":262.36227,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,456"},{"top":262.36227,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,439"},{"top":262.36227,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,386"},{"top":262.36227,"left":381.7078,"width":46.663330078125,"height":12.890472412109375,"text":"1,296"},{"top":262.36227,"left":428.37112,"width":46.73773193359375,"height":12.890472412109375,"text":"1,326"}],[{"top":275.25275,"left":119.789444,"width":121.92678833007812,"height":12.8914794921875,"text":"Server rooms"},{"top":275.25275,"left":241.71623,"width":46.66426086425781,"height":12.8914794921875,"text":"1,465"},{"top":275.25275,"left":288.3805,"width":46.663482666015625,"height":12.8914794921875,"text":"1,472"},{"top":275.25275,"left":335.04398,"width":46.663818359375,"height":12.8914794921875,"text":"1,427"},{"top":275.25275,"left":381.7078,"width":46.663330078125,"height":12.8914794921875,"text":"1,334"},{"top":275.25275,"left":428.37112,"width":46.73773193359375,"height":12.8914794921875,"text":"1,371"}],[{"top":288.14423,"left":119.789444,"width":121.92678833007812,"height":12.890167236328125,"text":"Localized data centers"},{"top":288.14423,"left":241.71623,"width":46.66426086425781,"height":12.890167236328125,"text":"1,465"},{"top":288.14423,"left":288.3805,"width":46.663482666015625,"height":12.890167236328125,"text":"1,471"},{"top":288.14423,"left":335.04398,"width":46.663818359375,"height":12.890167236328125,"text":"1,426"},{"top":288.14423,"left":381.7078,"width":46.663330078125,"height":12.890167236328125,"text":"1,334"},{"top":288.14423,"left":428.37112,"width":46.73773193359375,"height":12.890167236328125,"text":"1,371"}],[{"top":301.0344,"left":119.789444,"width":121.92678833007812,"height":12.890472412109375,"text":"Mid-tier data centers"},{"top":301.0344,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,465"},{"top":301.0344,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,471"},{"top":301.0344,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,426"},{"top":301.0344,"left":381.7078,"width":46.663330078125,"height":12.890472412109375,"text":"1,334"},{"top":301.0344,"left":428.37112,"width":46.73773193359375,"height":12.890472412109375,"text":"1,371"}],[{"top":313.92487,"left":119.789444,"width":121.92678833007812,"height":12.785552978515625,"text":"Enterprise-class data centers"},{"top":313.92487,"left":241.71623,"width":46.66426086425781,"height":12.785552978515625,"text":"1,465"},{"top":313.92487,"left":288.3805,"width":46.663482666015625,"height":12.785552978515625,"text":"1,471"},{"top":313.92487,"left":335.04398,"width":46.663818359375,"height":12.785552978515625,"text":"1,426"},{"top":313.92487,"left":381.7078,"width":46.663330078125,"height":12.785552978515625,"text":"1,334"},{"top":313.92487,"left":428.37112,"width":46.73773193359375,"height":12.785552978515625,"text":"1,371"}],[{"top":326.71042,"left":119.789444,"width":355.31939697265625,"height":13.04400634765625,"text":"State-of-the-art scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":339.75443,"left":119.789444,"width":121.92678833007812,"height":12.784942626953125,"text":"Volume servers in:"},{"top":339.75443,"left":241.71623,"width":46.66426086425781,"height":12.784942626953125,"text":"2007"},{"top":339.75443,"left":288.3805,"width":46.663482666015625,"height":12.784942626953125,"text":"2008"},{"top":339.75443,"left":335.04398,"width":46.663818359375,"height":12.784942626953125,"text":"2009"},{"top":339.75443,"left":381.7078,"width":46.663330078125,"height":12.784942626953125,"text":"2010"},{"top":339.75443,"left":428.37112,"width":46.73773193359375,"height":12.784942626953125,"text":"2011"}],[{"top":352.53937,"left":119.789444,"width":121.92678833007812,"height":12.89111328125,"text":"Server closets"},{"top":352.53937,"left":241.71623,"width":46.66426086425781,"height":12.89111328125,"text":"1,485"},{"top":352.53937,"left":288.3805,"width":46.663482666015625,"height":12.89111328125,"text":"1,471"},{"top":352.53937,"left":335.04398,"width":46.663818359375,"height":12.89111328125,"text":"1,424"},{"top":352.53937,"left":381.7078,"width":46.663330078125,"height":12.89111328125,"text":"1,315"},{"top":352.53937,"left":428.37112,"width":46.73773193359375,"height":12.89111328125,"text":"1,349"}],[{"top":365.43048,"left":119.789444,"width":121.92678833007812,"height":12.8902587890625,"text":"Server rooms"},{"top":365.43048,"left":241.71623,"width":46.66426086425781,"height":12.8902587890625,"text":"1,495"},{"top":365.43048,"left":288.3805,"width":46.663482666015625,"height":12.8902587890625,"text":"1,573"},{"top":365.43048,"left":335.04398,"width":46.663818359375,"height":12.8902587890625,"text":"1,586"},{"top":365.43048,"left":381.7078,"width":46.663330078125,"height":12.8902587890625,"text":"1,424"},{"top":365.43048,"left":428.37112,"width":46.73773193359375,"height":12.8902587890625,"text":"1,485"}],[{"top":378.32074,"left":119.789444,"width":121.92678833007812,"height":12.891143798828125,"text":"Localized data centers"},{"top":378.32074,"left":241.71623,"width":46.66426086425781,"height":12.891143798828125,"text":"1,495"},{"top":378.32074,"left":288.3805,"width":46.663482666015625,"height":12.891143798828125,"text":"1,572"},{"top":378.32074,"left":335.04398,"width":46.663818359375,"height":12.891143798828125,"text":"1,585"},{"top":378.32074,"left":381.7078,"width":46.663330078125,"height":12.891143798828125,"text":"1,424"},{"top":378.32074,"left":428.37112,"width":46.73773193359375,"height":12.891143798828125,"text":"1,485"}],[{"top":391.21188,"left":119.789444,"width":121.92678833007812,"height":12.89068603515625,"text":"Mid-tier data centers"},{"top":391.21188,"left":241.71623,"width":46.66426086425781,"height":12.89068603515625,"text":"1,495"},{"top":391.21188,"left":288.3805,"width":46.663482666015625,"height":12.89068603515625,"text":"1,572"},{"top":391.21188,"left":335.04398,"width":46.663818359375,"height":12.89068603515625,"text":"1,585"},{"top":391.21188,"left":381.7078,"width":46.663330078125,"height":12.89068603515625,"text":"1,424"},{"top":391.21188,"left":428.37112,"width":46.73773193359375,"height":12.89068603515625,"text":"1,485"}],[{"top":404.10257,"left":119.789444,"width":121.92678833007812,"height":12.968292236328125,"text":"Enterprise-class data centers"},{"top":404.10257,"left":241.71623,"width":46.66426086425781,"height":12.968292236328125,"text":"1,495"},{"top":404.10257,"left":288.3805,"width":46.663482666015625,"height":12.968292236328125,"text":"1,572"},{"top":404.10257,"left":335.04398,"width":46.663818359375,"height":12.968292236328125,"text":"1,585"},{"top":404.10257,"left":381.7078,"width":46.663330078125,"height":12.968292236328125,"text":"1,424"},{"top":404.10257,"left":428.37112,"width":46.73773193359375,"height":12.968292236328125,"text":"1,485"}]]},{"extraction_method":"lattice","page_number":1,"top":538.25964,"left":130.8136,"width":333.3038024902344,"height":51.5423583984375,"right":464.1174,"bottom":589.802,"data":[[{"top":538.25964,"left":130.8136,"width":333.3038024902344,"height":12.88055419921875,"text":"All alternative scenarios"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":551.1402,"left":130.8136,"width":99.93739318847656,"height":12.784912109375,"text":"Server class"},{"top":551.1402,"left":230.75099,"width":46.66316223144531,"height":12.784912109375,"text":"2007"},{"top":551.1402,"left":277.41415,"width":46.663726806640625,"height":12.784912109375,"text":"2008"},{"top":551.1402,"left":324.07788,"width":46.663970947265625,"height":12.784912109375,"text":"2009"},{"top":551.1402,"left":370.74185,"width":46.663177490234375,"height":12.784912109375,"text":"2010"},{"top":551.1402,"left":417.40503,"width":46.712371826171875,"height":12.784912109375,"text":"2011"}],[{"top":563.9251,"left":130.8136,"width":99.93739318847656,"height":12.8905029296875,"text":"Mid-range"},{"top":563.9251,"left":230.75099,"width":46.66316223144531,"height":12.8905029296875,"text":"4,921"},{"top":563.9251,"left":277.41415,"width":46.663726806640625,"height":12.8905029296875,"text":"5,467"},{"top":563.9251,"left":324.07788,"width":46.663970947265625,"height":12.8905029296875,"text":"6,152"},{"top":563.9251,"left":370.74185,"width":46.663177490234375,"height":12.8905029296875,"text":"6,649"},{"top":563.9251,"left":417.40503,"width":46.712371826171875,"height":12.8905029296875,"text":"7,185"}],[{"top":576.8156,"left":130.8136,"width":99.93739318847656,"height":12.98638916015625,"text":"High-end"},{"top":576.8156,"left":230.75099,"width":46.66316223144531,"height":12.98638916015625,"text":"76,295"},{"top":576.8156,"left":277.41415,"width":46.663726806640625,"height":12.98638916015625,"text":"81,624"},{"top":576.8156,"left":324.07788,"width":46.663970947265625,"height":12.98638916015625,"text":"86,849"},{"top":576.8156,"left":370.74185,"width":46.663177490234375,"height":12.98638916015625,"text":"92,662"},{"top":576.8156,"left":417.40503,"width":46.712371826171875,"height":12.98638916015625,"text":"98,864"}]]}] diff --git a/src/test/resources/technology/tabula/json/twotables.json b/src/test/resources/technology/tabula/json/twotables.json index f6bd4f36..35a4c50f 100644 --- a/src/test/resources/technology/tabula/json/twotables.json +++ b/src/test/resources/technology/tabula/json/twotables.json @@ -1 +1 @@ -[{"extraction_method":"lattice","top":111.79087,"left":67.41156,"width":342.44476318359375,"height":174.0704345703125,"right":409.85632,"bottom":285.8613,"data":[[{"top":111.79087,"left":67.41156,"width":85.61141967773438,"height":28.536293029785156,"text":""},{"top":111.79087,"left":153.02298,"width":256.8333435058594,"height":14.267105102539062,"text":"株主資本"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":126.057976,"left":153.02298,"width":51.367401123046875,"height":14.269187927246094,"text":"資本金"},{"top":126.057976,"left":204.39038,"width":51.36669921875,"height":14.269187927246094,"text":"資本剰余金"},{"top":126.057976,"left":255.75708,"width":51.3656005859375,"height":14.269187927246094,"text":"利益剰余金"},{"top":126.057976,"left":307.12268,"width":51.366668701171875,"height":14.269187927246094,"text":"自己株式"},{"top":126.057976,"left":358.48935,"width":51.366973876953125,"height":14.269187927246094,"text":"株主資本合計"}],[{"top":140.32716,"left":67.41156,"width":85.61141967773438,"height":14.265777587890625,"text":"当期首残高"},{"top":140.32716,"left":153.02298,"width":51.367401123046875,"height":14.265777587890625,"text":"5,664"},{"top":140.32716,"left":204.39038,"width":51.36669921875,"height":14.265777587890625,"text":"749"},{"top":140.32716,"left":255.75708,"width":51.3656005859375,"height":14.265777587890625,"text":"12,017"},{"top":140.32716,"left":307.12268,"width":51.366668701171875,"height":14.265777587890625,"text":"△747"},{"top":140.32716,"left":358.48935,"width":51.366973876953125,"height":14.265777587890625,"text":"17,683"}],[{"top":154.59294,"left":67.41156,"width":85.61141967773438,"height":14.26910400390625,"text":"当期変動額"},{"top":154.59294,"left":153.02298,"width":51.367401123046875,"height":14.26910400390625,"text":""},{"top":154.59294,"left":204.39038,"width":51.36669921875,"height":14.26910400390625,"text":""},{"top":154.59294,"left":255.75708,"width":51.3656005859375,"height":14.26910400390625,"text":""},{"top":154.59294,"left":307.12268,"width":51.366668701171875,"height":14.26910400390625,"text":""},{"top":154.59294,"left":358.48935,"width":51.366973876953125,"height":14.26910400390625,"text":""}],[{"top":168.86205,"left":67.41156,"width":85.61141967773438,"height":14.268997192382812,"text":"剰余金の配当"},{"top":168.86205,"left":153.02298,"width":51.367401123046875,"height":14.268997192382812,"text":""},{"top":168.86205,"left":204.39038,"width":51.36669921875,"height":14.268997192382812,"text":""},{"top":168.86205,"left":255.75708,"width":51.3656005859375,"height":14.268997192382812,"text":"△525"},{"top":168.86205,"left":307.12268,"width":51.366668701171875,"height":14.268997192382812,"text":""},{"top":168.86205,"left":358.48935,"width":51.366973876953125,"height":14.268997192382812,"text":"△525"}],[{"top":183.13104,"left":67.41156,"width":85.61141967773438,"height":14.2691650390625,"text":"当期純利益"},{"top":183.13104,"left":153.02298,"width":51.367401123046875,"height":14.2691650390625,"text":""},{"top":183.13104,"left":204.39038,"width":51.36669921875,"height":14.2691650390625,"text":""},{"top":183.13104,"left":255.75708,"width":51.3656005859375,"height":14.2691650390625,"text":"1,269"},{"top":183.13104,"left":307.12268,"width":51.366668701171875,"height":14.2691650390625,"text":""},{"top":183.13104,"left":358.48935,"width":51.366973876953125,"height":14.2691650390625,"text":"1,269"}],[{"top":197.4002,"left":67.41156,"width":85.61141967773438,"height":14.268936157226562,"text":"自己株式の取得"},{"top":197.4002,"left":153.02298,"width":51.367401123046875,"height":14.268936157226562,"text":""},{"top":197.4002,"left":204.39038,"width":51.36669921875,"height":14.268936157226562,"text":""},{"top":197.4002,"left":255.75708,"width":51.3656005859375,"height":14.268936157226562,"text":""},{"top":197.4002,"left":307.12268,"width":51.366668701171875,"height":14.268936157226562,"text":"△0"},{"top":197.4002,"left":358.48935,"width":51.366973876953125,"height":14.268936157226562,"text":"△0"}],[{"top":211.66914,"left":67.41156,"width":85.61141967773438,"height":22.82952880859375,"text":"持分法の適用範囲\rの変動"},{"top":211.66914,"left":153.02298,"width":51.367401123046875,"height":22.82952880859375,"text":""},{"top":211.66914,"left":204.39038,"width":51.36669921875,"height":22.82952880859375,"text":""},{"top":211.66914,"left":255.75708,"width":51.3656005859375,"height":22.82952880859375,"text":"85"},{"top":211.66914,"left":307.12268,"width":51.366668701171875,"height":22.82952880859375,"text":""},{"top":211.66914,"left":358.48935,"width":51.366973876953125,"height":22.82952880859375,"text":"85"}],[{"top":234.49867,"left":67.41156,"width":85.61141967773438,"height":22.829910278320312,"text":"株主資本以外の項目\rの当期変動額(純額)"},{"top":234.49867,"left":153.02298,"width":51.367401123046875,"height":22.829910278320312,"text":""},{"top":234.49867,"left":204.39038,"width":51.36669921875,"height":22.829910278320312,"text":""},{"top":234.49867,"left":255.75708,"width":51.3656005859375,"height":22.829910278320312,"text":""},{"top":234.49867,"left":307.12268,"width":51.366668701171875,"height":22.829910278320312,"text":""},{"top":234.49867,"left":358.48935,"width":51.366973876953125,"height":22.829910278320312,"text":""}],[{"top":257.32858,"left":67.41156,"width":85.61141967773438,"height":14.2686767578125,"text":"当期変動額合計"},{"top":257.32858,"left":153.02298,"width":51.367401123046875,"height":14.2686767578125,"text":"―"},{"top":257.32858,"left":204.39038,"width":51.36669921875,"height":14.2686767578125,"text":"―"},{"top":257.32858,"left":255.75708,"width":51.3656005859375,"height":14.2686767578125,"text":"829"},{"top":257.32858,"left":307.12268,"width":51.366668701171875,"height":14.2686767578125,"text":"△0"},{"top":257.32858,"left":358.48935,"width":51.366973876953125,"height":14.2686767578125,"text":"829"}],[{"top":271.59726,"left":67.41156,"width":85.61141967773438,"height":14.2640380859375,"text":"当期末残高"},{"top":271.59726,"left":153.02298,"width":51.367401123046875,"height":14.2640380859375,"text":"5,664"},{"top":271.59726,"left":204.39038,"width":51.36669921875,"height":14.2640380859375,"text":"749"},{"top":271.59726,"left":255.75708,"width":51.3656005859375,"height":14.2640380859375,"text":"12,846"},{"top":271.59726,"left":307.12268,"width":51.366668701171875,"height":14.2640380859375,"text":"△747"},{"top":271.59726,"left":358.48935,"width":51.366973876953125,"height":14.2640380859375,"text":"18,512"}]]},{"extraction_method":"lattice","top":312.25272,"left":67.41156,"width":445.17803955078125,"height":191.19696044921875,"right":512.5896,"bottom":503.44968,"data":[[{"top":312.25272,"left":67.41156,"width":85.61141967773438,"height":45.658233642578125,"text":""},{"top":312.25272,"left":153.02298,"width":256.8333435058594,"height":14.267730712890625,"text":"その他の包括利益累計額"},{"top":312.25272,"left":409.85632,"width":51.365631103515625,"height":45.658233642578125,"text":"少数株主持分"},{"top":312.25272,"left":461.22195,"width":51.367645263671875,"height":45.658233642578125,"text":"純資産合計"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":326.52045,"left":153.02298,"width":51.367401123046875,"height":31.3905029296875,"text":"その他有価証券\r評価差額金"},{"top":326.52045,"left":204.39038,"width":51.36669921875,"height":31.3905029296875,"text":"繰延ヘッジ\r損益"},{"top":326.52045,"left":255.75708,"width":51.3656005859375,"height":31.3905029296875,"text":"為替換算\r調整勘定"},{"top":326.52045,"left":307.12268,"width":51.366668701171875,"height":31.3905029296875,"text":"退職給付に係る\r調整累計額"},{"top":326.52045,"left":358.48935,"width":51.366973876953125,"height":31.3905029296875,"text":"その他の\r包括利益\r累計額合計"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":357.91095,"left":67.41156,"width":85.61141967773438,"height":14.268768310546875,"text":"当期首残高"},{"top":357.91095,"left":153.02298,"width":51.367401123046875,"height":14.268768310546875,"text":"△669"},{"top":357.91095,"left":204.39038,"width":51.36669921875,"height":14.268768310546875,"text":"61"},{"top":357.91095,"left":255.75708,"width":51.3656005859375,"height":14.268768310546875,"text":"△109"},{"top":357.91095,"left":307.12268,"width":51.366668701171875,"height":14.268768310546875,"text":"―"},{"top":357.91095,"left":358.48935,"width":51.366973876953125,"height":14.268768310546875,"text":"△717"},{"top":357.91095,"left":409.85632,"width":51.365631103515625,"height":14.268768310546875,"text":"246"},{"top":357.91095,"left":461.22195,"width":51.367645263671875,"height":14.268768310546875,"text":"17,212"}],[{"top":372.17972,"left":67.41156,"width":85.61141967773438,"height":14.2691650390625,"text":"当期変動額"},{"top":372.17972,"left":153.02298,"width":51.367401123046875,"height":14.2691650390625,"text":""},{"top":372.17972,"left":204.39038,"width":51.36669921875,"height":14.2691650390625,"text":""},{"top":372.17972,"left":255.75708,"width":51.3656005859375,"height":14.2691650390625,"text":""},{"top":372.17972,"left":307.12268,"width":51.366668701171875,"height":14.2691650390625,"text":""},{"top":372.17972,"left":358.48935,"width":51.366973876953125,"height":14.2691650390625,"text":""},{"top":372.17972,"left":409.85632,"width":51.365631103515625,"height":14.2691650390625,"text":""},{"top":372.17972,"left":461.22195,"width":51.367645263671875,"height":14.2691650390625,"text":""}],[{"top":386.44888,"left":67.41156,"width":85.61141967773438,"height":14.268646240234375,"text":"剰余金の配当"},{"top":386.44888,"left":153.02298,"width":51.367401123046875,"height":14.268646240234375,"text":""},{"top":386.44888,"left":204.39038,"width":51.36669921875,"height":14.268646240234375,"text":""},{"top":386.44888,"left":255.75708,"width":51.3656005859375,"height":14.268646240234375,"text":""},{"top":386.44888,"left":307.12268,"width":51.366668701171875,"height":14.268646240234375,"text":""},{"top":386.44888,"left":358.48935,"width":51.366973876953125,"height":14.268646240234375,"text":""},{"top":386.44888,"left":409.85632,"width":51.365631103515625,"height":14.268646240234375,"text":""},{"top":386.44888,"left":461.22195,"width":51.367645263671875,"height":14.268646240234375,"text":"△525"}],[{"top":400.71753,"left":67.41156,"width":85.61141967773438,"height":14.26812744140625,"text":"当期純利益"},{"top":400.71753,"left":153.02298,"width":51.367401123046875,"height":14.26812744140625,"text":""},{"top":400.71753,"left":204.39038,"width":51.36669921875,"height":14.26812744140625,"text":""},{"top":400.71753,"left":255.75708,"width":51.3656005859375,"height":14.26812744140625,"text":""},{"top":400.71753,"left":307.12268,"width":51.366668701171875,"height":14.26812744140625,"text":""},{"top":400.71753,"left":358.48935,"width":51.366973876953125,"height":14.26812744140625,"text":""},{"top":400.71753,"left":409.85632,"width":51.365631103515625,"height":14.26812744140625,"text":""},{"top":400.71753,"left":461.22195,"width":51.367645263671875,"height":14.26812744140625,"text":"1,269"}],[{"top":414.98566,"left":67.41156,"width":85.61141967773438,"height":14.26678466796875,"text":"自己株式の取得"},{"top":414.98566,"left":153.02298,"width":51.367401123046875,"height":14.26678466796875,"text":""},{"top":414.98566,"left":204.39038,"width":51.36669921875,"height":14.26678466796875,"text":""},{"top":414.98566,"left":255.75708,"width":51.3656005859375,"height":14.26678466796875,"text":""},{"top":414.98566,"left":307.12268,"width":51.366668701171875,"height":14.26678466796875,"text":""},{"top":414.98566,"left":358.48935,"width":51.366973876953125,"height":14.26678466796875,"text":""},{"top":414.98566,"left":409.85632,"width":51.365631103515625,"height":14.26678466796875,"text":""},{"top":414.98566,"left":461.22195,"width":51.367645263671875,"height":14.26678466796875,"text":"△0"}],[{"top":429.25244,"left":67.41156,"width":85.61141967773438,"height":22.8292236328125,"text":"持分法の適用範囲\rの変動"},{"top":429.25244,"left":153.02298,"width":51.367401123046875,"height":22.8292236328125,"text":""},{"top":429.25244,"left":204.39038,"width":51.36669921875,"height":22.8292236328125,"text":""},{"top":429.25244,"left":255.75708,"width":51.3656005859375,"height":22.8292236328125,"text":""},{"top":429.25244,"left":307.12268,"width":51.366668701171875,"height":22.8292236328125,"text":""},{"top":429.25244,"left":358.48935,"width":51.366973876953125,"height":22.8292236328125,"text":""},{"top":429.25244,"left":409.85632,"width":51.365631103515625,"height":22.8292236328125,"text":""},{"top":429.25244,"left":461.22195,"width":51.367645263671875,"height":22.8292236328125,"text":"85"}],[{"top":452.08167,"left":67.41156,"width":85.61141967773438,"height":22.830596923828125,"text":"株主資本以外の項目\rの当期変動額(純額)"},{"top":452.08167,"left":153.02298,"width":51.367401123046875,"height":22.830596923828125,"text":"556"},{"top":452.08167,"left":204.39038,"width":51.36669921875,"height":22.830596923828125,"text":"80"},{"top":452.08167,"left":255.75708,"width":51.3656005859375,"height":22.830596923828125,"text":"5"},{"top":452.08167,"left":307.12268,"width":51.366668701171875,"height":22.830596923828125,"text":"―"},{"top":452.08167,"left":358.48935,"width":51.366973876953125,"height":22.830596923828125,"text":"642"},{"top":452.08167,"left":409.85632,"width":51.365631103515625,"height":22.830596923828125,"text":"△0"},{"top":452.08167,"left":461.22195,"width":51.367645263671875,"height":22.830596923828125,"text":"642"}],[{"top":474.91226,"left":67.41156,"width":85.61141967773438,"height":14.26776123046875,"text":"当期変動額合計"},{"top":474.91226,"left":153.02298,"width":51.367401123046875,"height":14.26776123046875,"text":"556"},{"top":474.91226,"left":204.39038,"width":51.36669921875,"height":14.26776123046875,"text":"80"},{"top":474.91226,"left":255.75708,"width":51.3656005859375,"height":14.26776123046875,"text":"5"},{"top":474.91226,"left":307.12268,"width":51.366668701171875,"height":14.26776123046875,"text":"―"},{"top":474.91226,"left":358.48935,"width":51.366973876953125,"height":14.26776123046875,"text":"642"},{"top":474.91226,"left":409.85632,"width":51.365631103515625,"height":14.26776123046875,"text":"△0"},{"top":474.91226,"left":461.22195,"width":51.367645263671875,"height":14.26776123046875,"text":"1,471"}],[{"top":489.18002,"left":67.41156,"width":85.61141967773438,"height":14.2696533203125,"text":"当期末残高"},{"top":489.18002,"left":153.02298,"width":51.367401123046875,"height":14.2696533203125,"text":"△113"},{"top":489.18002,"left":204.39038,"width":51.36669921875,"height":14.2696533203125,"text":"142"},{"top":489.18002,"left":255.75708,"width":51.3656005859375,"height":14.2696533203125,"text":"△104"},{"top":489.18002,"left":307.12268,"width":51.366668701171875,"height":14.2696533203125,"text":"―"},{"top":489.18002,"left":358.48935,"width":51.366973876953125,"height":14.2696533203125,"text":"△75"},{"top":489.18002,"left":409.85632,"width":51.365631103515625,"height":14.2696533203125,"text":"245"},{"top":489.18002,"left":461.22195,"width":51.367645263671875,"height":14.2696533203125,"text":"18,683"}]]}] +[{"extraction_method":"lattice","page_number":1,"top":111.79087,"left":67.41156,"width":342.44476318359375,"height":174.0704345703125,"right":409.85632,"bottom":285.8613,"data":[[{"top":111.79087,"left":67.41156,"width":85.61141967773438,"height":28.536293029785156,"text":""},{"top":111.79087,"left":153.02298,"width":256.8333435058594,"height":14.267105102539062,"text":"株主資本"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":126.057976,"left":153.02298,"width":51.367401123046875,"height":14.269187927246094,"text":"資本金"},{"top":126.057976,"left":204.39038,"width":51.36669921875,"height":14.269187927246094,"text":"資本剰余金"},{"top":126.057976,"left":255.75708,"width":51.3656005859375,"height":14.269187927246094,"text":"利益剰余金"},{"top":126.057976,"left":307.12268,"width":51.366668701171875,"height":14.269187927246094,"text":"自己株式"},{"top":126.057976,"left":358.48935,"width":51.366973876953125,"height":14.269187927246094,"text":"株主資本合計"}],[{"top":140.32716,"left":67.41156,"width":85.61141967773438,"height":14.265777587890625,"text":"当期首残高"},{"top":140.32716,"left":153.02298,"width":51.367401123046875,"height":14.265777587890625,"text":"5,664"},{"top":140.32716,"left":204.39038,"width":51.36669921875,"height":14.265777587890625,"text":"749"},{"top":140.32716,"left":255.75708,"width":51.3656005859375,"height":14.265777587890625,"text":"12,017"},{"top":140.32716,"left":307.12268,"width":51.366668701171875,"height":14.265777587890625,"text":"△747"},{"top":140.32716,"left":358.48935,"width":51.366973876953125,"height":14.265777587890625,"text":"17,683"}],[{"top":154.59294,"left":67.41156,"width":85.61141967773438,"height":14.26910400390625,"text":"当期変動額"},{"top":154.59294,"left":153.02298,"width":51.367401123046875,"height":14.26910400390625,"text":""},{"top":154.59294,"left":204.39038,"width":51.36669921875,"height":14.26910400390625,"text":""},{"top":154.59294,"left":255.75708,"width":51.3656005859375,"height":14.26910400390625,"text":""},{"top":154.59294,"left":307.12268,"width":51.366668701171875,"height":14.26910400390625,"text":""},{"top":154.59294,"left":358.48935,"width":51.366973876953125,"height":14.26910400390625,"text":""}],[{"top":168.86205,"left":67.41156,"width":85.61141967773438,"height":14.268997192382812,"text":"剰余金の配当"},{"top":168.86205,"left":153.02298,"width":51.367401123046875,"height":14.268997192382812,"text":""},{"top":168.86205,"left":204.39038,"width":51.36669921875,"height":14.268997192382812,"text":""},{"top":168.86205,"left":255.75708,"width":51.3656005859375,"height":14.268997192382812,"text":"△525"},{"top":168.86205,"left":307.12268,"width":51.366668701171875,"height":14.268997192382812,"text":""},{"top":168.86205,"left":358.48935,"width":51.366973876953125,"height":14.268997192382812,"text":"△525"}],[{"top":183.13104,"left":67.41156,"width":85.61141967773438,"height":14.2691650390625,"text":"当期純利益"},{"top":183.13104,"left":153.02298,"width":51.367401123046875,"height":14.2691650390625,"text":""},{"top":183.13104,"left":204.39038,"width":51.36669921875,"height":14.2691650390625,"text":""},{"top":183.13104,"left":255.75708,"width":51.3656005859375,"height":14.2691650390625,"text":"1,269"},{"top":183.13104,"left":307.12268,"width":51.366668701171875,"height":14.2691650390625,"text":""},{"top":183.13104,"left":358.48935,"width":51.366973876953125,"height":14.2691650390625,"text":"1,269"}],[{"top":197.4002,"left":67.41156,"width":85.61141967773438,"height":14.268936157226562,"text":"自己株式の取得"},{"top":197.4002,"left":153.02298,"width":51.367401123046875,"height":14.268936157226562,"text":""},{"top":197.4002,"left":204.39038,"width":51.36669921875,"height":14.268936157226562,"text":""},{"top":197.4002,"left":255.75708,"width":51.3656005859375,"height":14.268936157226562,"text":""},{"top":197.4002,"left":307.12268,"width":51.366668701171875,"height":14.268936157226562,"text":"△0"},{"top":197.4002,"left":358.48935,"width":51.366973876953125,"height":14.268936157226562,"text":"△0"}],[{"top":211.66914,"left":67.41156,"width":85.61141967773438,"height":22.82952880859375,"text":"持分法の適用範囲\rの変動"},{"top":211.66914,"left":153.02298,"width":51.367401123046875,"height":22.82952880859375,"text":""},{"top":211.66914,"left":204.39038,"width":51.36669921875,"height":22.82952880859375,"text":""},{"top":211.66914,"left":255.75708,"width":51.3656005859375,"height":22.82952880859375,"text":"85"},{"top":211.66914,"left":307.12268,"width":51.366668701171875,"height":22.82952880859375,"text":""},{"top":211.66914,"left":358.48935,"width":51.366973876953125,"height":22.82952880859375,"text":"85"}],[{"top":234.49867,"left":67.41156,"width":85.61141967773438,"height":22.829910278320312,"text":"株主資本以外の項目\rの当期変動額(純額)"},{"top":234.49867,"left":153.02298,"width":51.367401123046875,"height":22.829910278320312,"text":""},{"top":234.49867,"left":204.39038,"width":51.36669921875,"height":22.829910278320312,"text":""},{"top":234.49867,"left":255.75708,"width":51.3656005859375,"height":22.829910278320312,"text":""},{"top":234.49867,"left":307.12268,"width":51.366668701171875,"height":22.829910278320312,"text":""},{"top":234.49867,"left":358.48935,"width":51.366973876953125,"height":22.829910278320312,"text":""}],[{"top":257.32858,"left":67.41156,"width":85.61141967773438,"height":14.2686767578125,"text":"当期変動額合計"},{"top":257.32858,"left":153.02298,"width":51.367401123046875,"height":14.2686767578125,"text":"―"},{"top":257.32858,"left":204.39038,"width":51.36669921875,"height":14.2686767578125,"text":"―"},{"top":257.32858,"left":255.75708,"width":51.3656005859375,"height":14.2686767578125,"text":"829"},{"top":257.32858,"left":307.12268,"width":51.366668701171875,"height":14.2686767578125,"text":"△0"},{"top":257.32858,"left":358.48935,"width":51.366973876953125,"height":14.2686767578125,"text":"829"}],[{"top":271.59726,"left":67.41156,"width":85.61141967773438,"height":14.2640380859375,"text":"当期末残高"},{"top":271.59726,"left":153.02298,"width":51.367401123046875,"height":14.2640380859375,"text":"5,664"},{"top":271.59726,"left":204.39038,"width":51.36669921875,"height":14.2640380859375,"text":"749"},{"top":271.59726,"left":255.75708,"width":51.3656005859375,"height":14.2640380859375,"text":"12,846"},{"top":271.59726,"left":307.12268,"width":51.366668701171875,"height":14.2640380859375,"text":"△747"},{"top":271.59726,"left":358.48935,"width":51.366973876953125,"height":14.2640380859375,"text":"18,512"}]]},{"extraction_method":"lattice","page_number":1,"top":312.25272,"left":67.41156,"width":445.17803955078125,"height":191.19696044921875,"right":512.5896,"bottom":503.44968,"data":[[{"top":312.25272,"left":67.41156,"width":85.61141967773438,"height":45.658233642578125,"text":""},{"top":312.25272,"left":153.02298,"width":256.8333435058594,"height":14.267730712890625,"text":"その他の包括利益累計額"},{"top":312.25272,"left":409.85632,"width":51.365631103515625,"height":45.658233642578125,"text":"少数株主持分"},{"top":312.25272,"left":461.22195,"width":51.367645263671875,"height":45.658233642578125,"text":"純資産合計"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":326.52045,"left":153.02298,"width":51.367401123046875,"height":31.3905029296875,"text":"その他有価証券\r評価差額金"},{"top":326.52045,"left":204.39038,"width":51.36669921875,"height":31.3905029296875,"text":"繰延ヘッジ\r損益"},{"top":326.52045,"left":255.75708,"width":51.3656005859375,"height":31.3905029296875,"text":"為替換算\r調整勘定"},{"top":326.52045,"left":307.12268,"width":51.366668701171875,"height":31.3905029296875,"text":"退職給付に係る\r調整累計額"},{"top":326.52045,"left":358.48935,"width":51.366973876953125,"height":31.3905029296875,"text":"その他の\r包括利益\r累計額合計"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":357.91095,"left":67.41156,"width":85.61141967773438,"height":14.268768310546875,"text":"当期首残高"},{"top":357.91095,"left":153.02298,"width":51.367401123046875,"height":14.268768310546875,"text":"△669"},{"top":357.91095,"left":204.39038,"width":51.36669921875,"height":14.268768310546875,"text":"61"},{"top":357.91095,"left":255.75708,"width":51.3656005859375,"height":14.268768310546875,"text":"△109"},{"top":357.91095,"left":307.12268,"width":51.366668701171875,"height":14.268768310546875,"text":"―"},{"top":357.91095,"left":358.48935,"width":51.366973876953125,"height":14.268768310546875,"text":"△717"},{"top":357.91095,"left":409.85632,"width":51.365631103515625,"height":14.268768310546875,"text":"246"},{"top":357.91095,"left":461.22195,"width":51.367645263671875,"height":14.268768310546875,"text":"17,212"}],[{"top":372.17972,"left":67.41156,"width":85.61141967773438,"height":14.2691650390625,"text":"当期変動額"},{"top":372.17972,"left":153.02298,"width":51.367401123046875,"height":14.2691650390625,"text":""},{"top":372.17972,"left":204.39038,"width":51.36669921875,"height":14.2691650390625,"text":""},{"top":372.17972,"left":255.75708,"width":51.3656005859375,"height":14.2691650390625,"text":""},{"top":372.17972,"left":307.12268,"width":51.366668701171875,"height":14.2691650390625,"text":""},{"top":372.17972,"left":358.48935,"width":51.366973876953125,"height":14.2691650390625,"text":""},{"top":372.17972,"left":409.85632,"width":51.365631103515625,"height":14.2691650390625,"text":""},{"top":372.17972,"left":461.22195,"width":51.367645263671875,"height":14.2691650390625,"text":""}],[{"top":386.44888,"left":67.41156,"width":85.61141967773438,"height":14.268646240234375,"text":"剰余金の配当"},{"top":386.44888,"left":153.02298,"width":51.367401123046875,"height":14.268646240234375,"text":""},{"top":386.44888,"left":204.39038,"width":51.36669921875,"height":14.268646240234375,"text":""},{"top":386.44888,"left":255.75708,"width":51.3656005859375,"height":14.268646240234375,"text":""},{"top":386.44888,"left":307.12268,"width":51.366668701171875,"height":14.268646240234375,"text":""},{"top":386.44888,"left":358.48935,"width":51.366973876953125,"height":14.268646240234375,"text":""},{"top":386.44888,"left":409.85632,"width":51.365631103515625,"height":14.268646240234375,"text":""},{"top":386.44888,"left":461.22195,"width":51.367645263671875,"height":14.268646240234375,"text":"△525"}],[{"top":400.71753,"left":67.41156,"width":85.61141967773438,"height":14.26812744140625,"text":"当期純利益"},{"top":400.71753,"left":153.02298,"width":51.367401123046875,"height":14.26812744140625,"text":""},{"top":400.71753,"left":204.39038,"width":51.36669921875,"height":14.26812744140625,"text":""},{"top":400.71753,"left":255.75708,"width":51.3656005859375,"height":14.26812744140625,"text":""},{"top":400.71753,"left":307.12268,"width":51.366668701171875,"height":14.26812744140625,"text":""},{"top":400.71753,"left":358.48935,"width":51.366973876953125,"height":14.26812744140625,"text":""},{"top":400.71753,"left":409.85632,"width":51.365631103515625,"height":14.26812744140625,"text":""},{"top":400.71753,"left":461.22195,"width":51.367645263671875,"height":14.26812744140625,"text":"1,269"}],[{"top":414.98566,"left":67.41156,"width":85.61141967773438,"height":14.26678466796875,"text":"自己株式の取得"},{"top":414.98566,"left":153.02298,"width":51.367401123046875,"height":14.26678466796875,"text":""},{"top":414.98566,"left":204.39038,"width":51.36669921875,"height":14.26678466796875,"text":""},{"top":414.98566,"left":255.75708,"width":51.3656005859375,"height":14.26678466796875,"text":""},{"top":414.98566,"left":307.12268,"width":51.366668701171875,"height":14.26678466796875,"text":""},{"top":414.98566,"left":358.48935,"width":51.366973876953125,"height":14.26678466796875,"text":""},{"top":414.98566,"left":409.85632,"width":51.365631103515625,"height":14.26678466796875,"text":""},{"top":414.98566,"left":461.22195,"width":51.367645263671875,"height":14.26678466796875,"text":"△0"}],[{"top":429.25244,"left":67.41156,"width":85.61141967773438,"height":22.8292236328125,"text":"持分法の適用範囲\rの変動"},{"top":429.25244,"left":153.02298,"width":51.367401123046875,"height":22.8292236328125,"text":""},{"top":429.25244,"left":204.39038,"width":51.36669921875,"height":22.8292236328125,"text":""},{"top":429.25244,"left":255.75708,"width":51.3656005859375,"height":22.8292236328125,"text":""},{"top":429.25244,"left":307.12268,"width":51.366668701171875,"height":22.8292236328125,"text":""},{"top":429.25244,"left":358.48935,"width":51.366973876953125,"height":22.8292236328125,"text":""},{"top":429.25244,"left":409.85632,"width":51.365631103515625,"height":22.8292236328125,"text":""},{"top":429.25244,"left":461.22195,"width":51.367645263671875,"height":22.8292236328125,"text":"85"}],[{"top":452.08167,"left":67.41156,"width":85.61141967773438,"height":22.830596923828125,"text":"株主資本以外の項目\rの当期変動額(純額)"},{"top":452.08167,"left":153.02298,"width":51.367401123046875,"height":22.830596923828125,"text":"556"},{"top":452.08167,"left":204.39038,"width":51.36669921875,"height":22.830596923828125,"text":"80"},{"top":452.08167,"left":255.75708,"width":51.3656005859375,"height":22.830596923828125,"text":"5"},{"top":452.08167,"left":307.12268,"width":51.366668701171875,"height":22.830596923828125,"text":"―"},{"top":452.08167,"left":358.48935,"width":51.366973876953125,"height":22.830596923828125,"text":"642"},{"top":452.08167,"left":409.85632,"width":51.365631103515625,"height":22.830596923828125,"text":"△0"},{"top":452.08167,"left":461.22195,"width":51.367645263671875,"height":22.830596923828125,"text":"642"}],[{"top":474.91226,"left":67.41156,"width":85.61141967773438,"height":14.26776123046875,"text":"当期変動額合計"},{"top":474.91226,"left":153.02298,"width":51.367401123046875,"height":14.26776123046875,"text":"556"},{"top":474.91226,"left":204.39038,"width":51.36669921875,"height":14.26776123046875,"text":"80"},{"top":474.91226,"left":255.75708,"width":51.3656005859375,"height":14.26776123046875,"text":"5"},{"top":474.91226,"left":307.12268,"width":51.366668701171875,"height":14.26776123046875,"text":"―"},{"top":474.91226,"left":358.48935,"width":51.366973876953125,"height":14.26776123046875,"text":"642"},{"top":474.91226,"left":409.85632,"width":51.365631103515625,"height":14.26776123046875,"text":"△0"},{"top":474.91226,"left":461.22195,"width":51.367645263671875,"height":14.26776123046875,"text":"1,471"}],[{"top":489.18002,"left":67.41156,"width":85.61141967773438,"height":14.2696533203125,"text":"当期末残高"},{"top":489.18002,"left":153.02298,"width":51.367401123046875,"height":14.2696533203125,"text":"△113"},{"top":489.18002,"left":204.39038,"width":51.36669921875,"height":14.2696533203125,"text":"142"},{"top":489.18002,"left":255.75708,"width":51.3656005859375,"height":14.2696533203125,"text":"△104"},{"top":489.18002,"left":307.12268,"width":51.366668701171875,"height":14.2696533203125,"text":"―"},{"top":489.18002,"left":358.48935,"width":51.366973876953125,"height":14.2696533203125,"text":"△75"},{"top":489.18002,"left":409.85632,"width":51.365631103515625,"height":14.2696533203125,"text":"245"},{"top":489.18002,"left":461.22195,"width":51.367645263671875,"height":14.2696533203125,"text":"18,683"}]]}] From c65783d4f089b6f94f06579f8f36d51c3d4195c5 Mon Sep 17 00:00:00 2001 From: Tilman Hausherr Date: Fri, 22 Apr 2022 11:11:01 +0200 Subject: [PATCH 168/200] update pdfbox, bc, big2 and plugins --- pom.xml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pom.xml b/pom.xml index e2947ada..ec8e4f88 100644 --- a/pom.xml +++ b/pom.xml @@ -73,7 +73,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.2.0 + 3.3.1 true @@ -111,7 +111,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.2.0 + 3.3.1 8 @@ -195,7 +195,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.2.0 + 3.3.1 8 @@ -262,19 +262,19 @@ org.apache.pdfbox pdfbox - 2.0.25 + 2.0.26 org.bouncycastle bcprov-jdk15on - 1.69 + 1.70 org.bouncycastle bcmail-jdk15on - 1.69 + 1.70 @@ -317,7 +317,7 @@ org.apache.pdfbox jbig2-imageio - 3.0.3 + 3.0.4 From ab93da966b42b3384ba69556b491e82a0580bcda Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 17 May 2022 04:29:52 +0000 Subject: [PATCH 169/200] Bump gson from 2.8.7 to 2.9.0 Bumps [gson](https://github.com/google/gson) from 2.8.7 to 2.9.0. - [Release notes](https://github.com/google/gson/releases) - [Changelog](https://github.com/google/gson/blob/master/CHANGELOG.md) - [Commits](https://github.com/google/gson/compare/gson-parent-2.8.7...gson-parent-2.9.0) --- updated-dependencies: - dependency-name: com.google.code.gson:gson dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index ec8e4f88..fb1f7e08 100644 --- a/pom.xml +++ b/pom.xml @@ -299,7 +299,7 @@ com.google.code.gson gson - 2.8.7 + 2.9.0 From 50ff2df2e62644260d519e2d875a4db7d87d6746 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Tue, 9 Aug 2022 23:36:48 -0300 Subject: [PATCH 170/200] Run tests on pull request --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index cf36b0ce..b8aa9c14 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,6 +1,6 @@ name: Java CI -on: [push] +on: [push, pull_request] jobs: build: From 84aef7f0c84f4ec0fdb5adee789c93c4a216073c Mon Sep 17 00:00:00 2001 From: Martin Skopp Date: Wed, 7 Dec 2022 15:11:10 +0100 Subject: [PATCH 171/200] Add a simple API usage example applying SpreadsheetExtractionAlgorithm --- README.md | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8d3c8210..c3a5f92f 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ tabula-java [![Build Status](https://travis-ci.org/tabulapdf/tabula-java.svg?bra Download a version of the tabula-java's jar, with all dependencies included, that works on Mac, Windows and Linux from our [releases page](../../releases). -## Usage Examples +## Commandline Usage Examples `tabula-java` provides a command line application: @@ -81,6 +81,44 @@ JVM start-up time is a lot of the cost of the `tabula` command, so if you're try - writing your own program in any JVM language (Java, JRuby, Scala) that imports tabula-java. - waiting for us to implement an API/server-style system (it's on the [roadmap](https://github.com/tabulapdf/tabula-api)) +## API Usage Examples + +A simple Java code example which extracts all rows and cells from all tables of all pages of a PDF document: + + InputStream in = this.getClass().getResourceAsStream("my.pdf"); + try (PDDocument document = PDDocument.load(in)) { + SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); + PageIterator pi = new ObjectExtractor(document).extract(); + while (pi.hasNext()) { + // iterate over the pages of the document + Page page = pi.next(); + List
table = sea.extract(page); + // iterate over the tables of the page + for(Table tables: table) { + List> rows = tables.getRows(); + // iterate over the rows of the table + for (List cells : rows) { + // print all column-cells of the row plus linefeed + for (RectangularTextContainer content : cells) { + // Note: Cell.getText() uses \r to concat text chunks + String text = content.getText().replace("\r", " "); + System.out.print(text + "|"); + } + System.out.println(); + } + } + } + } + +For more detail information check the Javadoc. +The Javadoc API documentation can be generated (see also '_Building from Source_' section) via + +``` +mvn javadoc:javadoc +``` + +which generates the HTML files to directory ```target/site/apidocs/``` + ## Building from Source Clone this repo and run: From 3c2af18f7c3daedafb6a4d33ab5f818cdc468d09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= Date: Wed, 7 Dec 2022 12:04:34 -0300 Subject: [PATCH 172/200] Fix Markdown formatting for code example --- README.md | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index c3a5f92f..db7b0023 100644 --- a/README.md +++ b/README.md @@ -85,30 +85,33 @@ JVM start-up time is a lot of the cost of the `tabula` command, so if you're try A simple Java code example which extracts all rows and cells from all tables of all pages of a PDF document: - InputStream in = this.getClass().getResourceAsStream("my.pdf"); - try (PDDocument document = PDDocument.load(in)) { - SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - PageIterator pi = new ObjectExtractor(document).extract(); - while (pi.hasNext()) { - // iterate over the pages of the document - Page page = pi.next(); - List
table = sea.extract(page); - // iterate over the tables of the page - for(Table tables: table) { - List> rows = tables.getRows(); - // iterate over the rows of the table - for (List cells : rows) { - // print all column-cells of the row plus linefeed - for (RectangularTextContainer content : cells) { - // Note: Cell.getText() uses \r to concat text chunks - String text = content.getText().replace("\r", " "); - System.out.print(text + "|"); - } - System.out.println(); - } +```java +InputStream in = this.getClass().getResourceAsStream("my.pdf"); +try (PDDocument document = PDDocument.load(in)) { + SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); + PageIterator pi = new ObjectExtractor(document).extract(); + while (pi.hasNext()) { + // iterate over the pages of the document + Page page = pi.next(); + List
table = sea.extract(page); + // iterate over the tables of the page + for(Table tables: table) { + List> rows = tables.getRows(); + // iterate over the rows of the table + for (List cells : rows) { + // print all column-cells of the row plus linefeed + for (RectangularTextContainer content : cells) { + // Note: Cell.getText() uses \r to concat text chunks + String text = content.getText().replace("\r", " "); + System.out.print(text + "|"); } + System.out.println(); } } + } +} +``` + For more detail information check the Javadoc. The Javadoc API documentation can be generated (see also '_Building from Source_' section) via From b0fde49e6aa06593d16c8aa0b8da0e3172db1ec2 Mon Sep 17 00:00:00 2001 From: Aki Ariga Date: Mon, 20 Feb 2023 18:17:19 -0800 Subject: [PATCH 173/200] Enforce checkout with LF Windows CI fails when parsing CSV with line breaks within a cell. This is due to the difference of line endings between CRLF vs LF, and test CSV parser implementation. To mitigate this issue, tweak Windows CI to enforce git checkout with LF. --- .github/workflows/tests-windows.yml | 11 ++++++++--- .github/workflows/tests.yml | 4 ++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/workflows/tests-windows.yml b/.github/workflows/tests-windows.yml index 4ff7f542..5cc1031a 100644 --- a/.github/workflows/tests-windows.yml +++ b/.github/workflows/tests-windows.yml @@ -1,4 +1,4 @@ -name: Java CI +name: Java CI (Windows) on: [push] @@ -7,9 +7,14 @@ jobs: runs-on: windows-latest steps: - - uses: actions/checkout@v2 + # https://github.com/actions/checkout/issues/135#issuecomment-602171132 + - name: Set git to use LF + run: | + git config --global core.autocrlf false + git config --global core.eol lf + - uses: actions/checkout@v3 - name: Set up JDK 11 - uses: actions/setup-java@v2 + uses: actions/setup-java@v3 with: java-version: '11' distribution: 'adopt' diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b8aa9c14..da2d019b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -7,9 +7,9 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up JDK 11 - uses: actions/setup-java@v2 + uses: actions/setup-java@v3 with: java-version: '11' distribution: 'adopt' From 8bfa3ad23af34f757f72fe46584a34abfc022ed3 Mon Sep 17 00:00:00 2001 From: Tilman Hausherr Date: Fri, 14 Apr 2023 17:46:46 +0200 Subject: [PATCH 174/200] update pdfbox to 2.0.28 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index fb1f7e08..27a03e73 100644 --- a/pom.xml +++ b/pom.xml @@ -262,7 +262,7 @@ org.apache.pdfbox pdfbox - 2.0.26 + 2.0.28 From bc60be27d663f40e132a21407ec81790a8aee361 Mon Sep 17 00:00:00 2001 From: young Date: Wed, 6 Mar 2024 14:53:55 +0800 Subject: [PATCH 175/200] update pdfbox to 3.0.1 --- pom.xml | 2 +- .../technology/tabula/CommandLineApp.java | 3 +- .../java/technology/tabula/debug/Debug.java | 5 +- .../detectors/NurminenDetectionAlgorithm.java | 6 +- src/test/java/technology/tabula/TestCell.java | 3 +- src/test/java/technology/tabula/TestLine.java | 11 +- .../tabula/TestObjectExtractor.java | 21 +- .../tabula/TestProjectionProfile.java | 8 +- .../technology/tabula/TestTableDetection.java | 3 +- .../technology/tabula/TestTextElement.java | 393 +++++++++--------- .../java/technology/tabula/TestUtils.java | 3 +- .../technology/tabula/UtilsForTesting.java | 7 +- 12 files changed, 238 insertions(+), 227 deletions(-) diff --git a/pom.xml b/pom.xml index 27a03e73..6c71b426 100644 --- a/pom.xml +++ b/pom.xml @@ -262,7 +262,7 @@ org.apache.pdfbox pdfbox - 2.0.28 + 3.0.1 diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java index 3a6773a9..1b422303 100644 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ b/src/main/java/technology/tabula/CommandLineApp.java @@ -15,6 +15,7 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.cli.DefaultParser; +import org.apache.pdfbox.Loader; import org.apache.pdfbox.pdmodel.PDDocument; import technology.tabula.detectors.DetectionAlgorithm; @@ -158,7 +159,7 @@ public void extractFileInto(File pdfFile, File outputFile) throws ParseException private void extractFile(File pdfFile, Appendable outFile) throws ParseException { PDDocument pdfDocument = null; try { - pdfDocument = this.password == null ? PDDocument.load(pdfFile) : PDDocument.load(pdfFile, this.password); + pdfDocument = this.password == null ? Loader.loadPDF(pdfFile) : Loader.loadPDF(pdfFile,password); PageIterator pageIterator = getPageIterator(pdfDocument); List
tables = new ArrayList<>(); diff --git a/src/main/java/technology/tabula/debug/Debug.java b/src/main/java/technology/tabula/debug/Debug.java index 91609045..d6d257ce 100644 --- a/src/main/java/technology/tabula/debug/Debug.java +++ b/src/main/java/technology/tabula/debug/Debug.java @@ -16,6 +16,7 @@ import java.util.List; import org.apache.commons.cli.*; +import org.apache.pdfbox.Loader; import technology.tabula.Cell; import technology.tabula.CommandLineApp; import technology.tabula.Line; @@ -215,7 +216,7 @@ public static void renderPage(String pdfPath, String outPath, int pageNumber, Re boolean drawColumns, boolean drawCharacters, boolean drawArea, boolean drawCells, boolean drawUnprocessedRulings, boolean drawProjectionProfile, boolean drawClippingPaths, boolean drawDetectedTables) throws IOException { - PDDocument document = PDDocument.load(new File(pdfPath)); + PDDocument document = Loader.loadPDF(new File(pdfPath)); ObjectExtractor oe = new ObjectExtractor(document); @@ -349,7 +350,7 @@ public static void main(String[] args) throws IOException { if (pages == null) { // user specified all pages - PDDocument document = PDDocument.load(pdfFile); + PDDocument document = Loader.loadPDF(pdfFile); int numPages = document.getNumberOfPages(); pages = new ArrayList<>(numPages); diff --git a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java index fb43622a..9a377854 100644 --- a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java +++ b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java @@ -805,12 +805,12 @@ private PDDocument removeText(PDPage page) throws IOException { PDFStreamParser parser = new PDFStreamParser(page); parser.parse(); - List tokens = parser.getTokens(); List newTokens = new ArrayList<>(); - for (Object token : tokens) { + while (page.hasContents()) { + Object token = parser.parseNextToken(); if (token instanceof Operator) { Operator op = (Operator) token; - if (op.getName().equals("TJ") || op.getName().equals("Tj")) { + if ("TJ".equals(op.getName()) || "Tj".equals(op.getName())) { //remove the one argument to this operator newTokens.remove(newTokens.size() - 1); continue; diff --git a/src/test/java/technology/tabula/TestCell.java b/src/test/java/technology/tabula/TestCell.java index de1b8cb8..2795565c 100644 --- a/src/test/java/technology/tabula/TestCell.java +++ b/src/test/java/technology/tabula/TestCell.java @@ -6,6 +6,7 @@ import java.util.ArrayList; import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.font.Standard14Fonts; import org.junit.Test; public class TestCell { @@ -31,7 +32,7 @@ public void testGetTextElements() { Cell cell = new Cell(0, 0, 0, 0); assertTrue(cell.getTextElements().isEmpty()); - TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5); + TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5); TextChunk tChunk = new TextChunk(tElement); List tList = new ArrayList<>(); tList.add(tChunk); diff --git a/src/test/java/technology/tabula/TestLine.java b/src/test/java/technology/tabula/TestLine.java index 90df0e31..f7a6a88d 100644 --- a/src/test/java/technology/tabula/TestLine.java +++ b/src/test/java/technology/tabula/TestLine.java @@ -6,6 +6,7 @@ import java.util.List; import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.font.Standard14Fonts; import org.junit.Test; public class TestLine { @@ -14,7 +15,7 @@ public class TestLine { public void testSetTextElements() { Line line = new Line(); - TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5); + TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5); TextChunk tChunk = new TextChunk(tElement); List tList = new ArrayList<>(); tList.add(tChunk); @@ -28,7 +29,7 @@ public void testSetTextElements() { public void testAddTextChunkIntTextChunk() { Line line = new Line(); - TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5); + TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5); TextChunk tChunk = new TextChunk(tElement); line.addTextChunk(3, tChunk); @@ -39,7 +40,7 @@ public void testAddTextChunkIntTextChunk() { public void testLessThanAddTextChunkIntTextChunk() { Line line = new Line(); - TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5); + TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5); TextChunk tChunk = new TextChunk(tElement); line.addTextChunk(0, tChunk); line.addTextChunk(0, tChunk); @@ -51,7 +52,7 @@ public void testLessThanAddTextChunkIntTextChunk() { public void testErrorAddTextChunkIntTextChunk() { Line line = new Line(); - TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5); + TextElement tElement = new TextElement(0, 0, 0, 0,new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5); TextChunk tChunk = new TextChunk(tElement); line.addTextChunk(-1, tChunk); } @@ -60,7 +61,7 @@ public void testErrorAddTextChunkIntTextChunk() { public void testToString() { Line line = new Line(); - TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5); + TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5); TextChunk tChunk = new TextChunk(tElement); line.addTextChunk(0, tChunk); line.addTextChunk(0, tChunk); diff --git a/src/test/java/technology/tabula/TestObjectExtractor.java b/src/test/java/technology/tabula/TestObjectExtractor.java index 9db7ad18..69864c61 100644 --- a/src/test/java/technology/tabula/TestObjectExtractor.java +++ b/src/test/java/technology/tabula/TestObjectExtractor.java @@ -7,6 +7,7 @@ import java.util.ArrayList; import java.util.List; +import org.apache.pdfbox.Loader; import org.apache.pdfbox.pdmodel.PDDocument; import org.junit.Test; @@ -21,7 +22,7 @@ public void testWrongPasswordRaisesException() throws IOException { @Test(expected = IOException.class) public void testEmptyOnEncryptedFileRaisesException() throws IOException { - PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/encrypted.pdf")); + PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/encrypted.pdf")); try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { oe.extract().next(); } @@ -29,7 +30,7 @@ public void testEmptyOnEncryptedFileRaisesException() throws IOException { @Test public void testCanReadPDFWithOwnerEncryption() throws IOException { - PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf")); + PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf")); try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { PageIterator pi = oe.extract(); int i = 0; @@ -44,7 +45,7 @@ public void testCanReadPDFWithOwnerEncryption() throws IOException { @Test public void testGoodPassword() throws IOException { - PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/encrypted.pdf"), "userpassword"); + PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/encrypted.pdf"), "userpassword"); try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { List pages = new ArrayList<>(); PageIterator pi = oe.extract(); @@ -58,7 +59,7 @@ public void testGoodPassword() throws IOException { @Test public void testTextExtractionDoesNotRaise() throws IOException { - PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/rotated_page.pdf")); + PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/rotated_page.pdf")); try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { PageIterator pi = oe.extract(); @@ -70,7 +71,7 @@ public void testTextExtractionDoesNotRaise() throws IOException { @Test public void testShouldDetectRulings() throws IOException { - PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/should_detect_rulings.pdf")); + PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/should_detect_rulings.pdf")); try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { PageIterator pi = oe.extract(); @@ -85,7 +86,7 @@ public void testShouldDetectRulings() throws IOException { @Test public void testDontThrowNPEInShfill() throws IOException { - PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/labor.pdf")); + PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/labor.pdf")); try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { PageIterator pi = oe.extract(); @@ -101,7 +102,7 @@ public void testDontThrowNPEInShfill() throws IOException { @Test public void testExtractOnePage() throws IOException { - PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf")); + PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf")); assertEquals(2, pdf_document.getNumberOfPages()); try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { @@ -114,7 +115,7 @@ public void testExtractOnePage() throws IOException { @Test(expected = IndexOutOfBoundsException.class) public void testExtractWrongPageNumber() throws IOException { - PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf")); + PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf")); assertEquals(2, pdf_document.getNumberOfPages()); try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { @@ -124,7 +125,7 @@ public void testExtractWrongPageNumber() throws IOException { @Test public void testTextElementsContainedInPage() throws IOException { - PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/cs-en-us-pbms.pdf")); + PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/cs-en-us-pbms.pdf")); try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { Page page = oe.extractPage(1); @@ -137,7 +138,7 @@ public void testTextElementsContainedInPage() throws IOException { } @Test public void testDoNotNPEInPointComparator() throws IOException { - PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/npe_issue_206.pdf")); + PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/npe_issue_206.pdf")); try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { Page p = oe.extractPage(1); diff --git a/src/test/java/technology/tabula/TestProjectionProfile.java b/src/test/java/technology/tabula/TestProjectionProfile.java index e7af882f..e6d93b39 100644 --- a/src/test/java/technology/tabula/TestProjectionProfile.java +++ b/src/test/java/technology/tabula/TestProjectionProfile.java @@ -8,6 +8,7 @@ import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.font.Standard14Fonts; import org.junit.Before; import org.junit.Test; @@ -20,9 +21,10 @@ public class TestProjectionProfile { public void setUpProjectionProfile() { PDPage pdPage = new PDPage(); PDDocument pdDocument = new PDDocument(); - - TextElement textElement = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "test", 1f); - TextElement textElement2 = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "test", 1f); + + PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA); + TextElement textElement = new TextElement(5f, 15f, 10f, 20f, font, 1f, "test", 1f); + TextElement textElement2 = new TextElement(5f, 15f, 10f, 20f, font, 1f, "test", 1f); List textList = new ArrayList<>(); textList.add(textElement); textList.add(textElement2); diff --git a/src/test/java/technology/tabula/TestTableDetection.java b/src/test/java/technology/tabula/TestTableDetection.java index 6e58f6a4..80d21350 100644 --- a/src/test/java/technology/tabula/TestTableDetection.java +++ b/src/test/java/technology/tabula/TestTableDetection.java @@ -11,6 +11,7 @@ import static org.junit.Assert.*; import com.google.gson.Gson; +import org.apache.pdfbox.Loader; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; @@ -162,7 +163,7 @@ public void testDetectionOfTables() throws Exception { NodeList tables = regionDocument.getElementsByTagName("table"); // tabula extractors - PDDocument pdfDocument = PDDocument.load(this.pdf); + PDDocument pdfDocument = Loader.loadPDF(this.pdf); ObjectExtractor extractor = new ObjectExtractor(pdfDocument); // parse expected tables from the ground truth dataset diff --git a/src/test/java/technology/tabula/TestTextElement.java b/src/test/java/technology/tabula/TestTextElement.java index feaaa5e6..3db1ca31 100644 --- a/src/test/java/technology/tabula/TestTextElement.java +++ b/src/test/java/technology/tabula/TestTextElement.java @@ -3,205 +3,208 @@ import java.util.ArrayList; import java.util.List; +import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.font.Standard14Fonts; import org.junit.Assert; import org.junit.Test; public class TestTextElement { - - - @Test - public void createTextElement() { - - TextElement textElement = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f); - - Assert.assertNotNull(textElement); - Assert.assertEquals("A", textElement.getText()); - Assert.assertEquals(1f, textElement.getFontSize(), 0); - Assert.assertEquals(15f, textElement.getLeft(), 0); - Assert.assertEquals(5f, textElement.getTop(), 0); - Assert.assertEquals(10f, textElement.getWidth(), 0); - Assert.assertEquals(20f, textElement.getHeight(), 0); - Assert.assertEquals(PDType1Font.HELVETICA, textElement.getFont()); - Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0); - Assert.assertEquals(0f, textElement.getDirection(), 0); - - - } - - @Test - public void createTextElementWithDirection() { - - TextElement textElement = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f); - - Assert.assertNotNull(textElement); - Assert.assertEquals("A", textElement.getText()); - Assert.assertEquals(1f, textElement.getFontSize(), 0); - Assert.assertEquals(15f, textElement.getLeft(), 0); - Assert.assertEquals(5f, textElement.getTop(), 0); - Assert.assertEquals(10f, textElement.getWidth(), 0); - Assert.assertEquals(20f, textElement.getHeight(), 0); - Assert.assertEquals(PDType1Font.HELVETICA, textElement.getFont()); - Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0); - Assert.assertEquals(6f, textElement.getDirection(), 0); - - - } - - @Test - public void mergeFourElementsIntoFourWords() { - - List elements = new ArrayList<>(); - elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); - elements.add(new TextElement(20f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)); - elements.add(new TextElement(40f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)); - elements.add(new TextElement(60f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f)); - - List words = TextElement.mergeWords(elements); - - List expectedWords = new ArrayList<>(); - expectedWords.add(new TextChunk(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f))); - expectedWords.add(new TextChunk(new TextElement(20f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f))); - expectedWords.add(new TextChunk(new TextElement(40f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f))); - expectedWords.add(new TextChunk(new TextElement(60f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f))); - - Assert.assertEquals(expectedWords, words); - - } - - @Test - public void mergeFourElementsIntoOneWord() { - - List elements = new ArrayList<>(); - elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); - elements.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)); - elements.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)); - elements.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f)); - - List words = TextElement.mergeWords(elements); - - List expectedWords = new ArrayList<>(); - TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); - textChunk.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)); - textChunk.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)); - textChunk.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f)); - expectedWords.add(textChunk); - - Assert.assertEquals(expectedWords, words); - - } - - @Test - public void mergeElementsShouldBeIdempotent() { - /* - * a bug in TextElement.merge_words would delete the first TextElement in the array - * it was called with. Discussion here: https://github.com/tabulapdf/tabula-java/issues/78 - */ - - List elements = new ArrayList<>(); - elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); - elements.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)); - elements.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)); - elements.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f)); - - List words = TextElement.mergeWords(elements); - List words2 = TextElement.mergeWords(elements); - Assert.assertEquals(words, words2); - } - - @Test - public void mergeElementsWithSkippingRules() { - - List elements = new ArrayList<>(); - elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); - elements.add(new TextElement(0f, 17f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); - elements.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)); - elements.add(new TextElement(0.001f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, " ", 1f, 6f)); - elements.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)); - elements.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.TIMES_ROMAN, 10f, "D", 1f, 6f)); - - List words = TextElement.mergeWords(elements); - - List expectedWords = new ArrayList<>(); - TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); - textChunk.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)); - textChunk.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)); - textChunk.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.TIMES_ROMAN, 10f, "D", 1f, 6f)); - expectedWords.add(textChunk); - - Assert.assertEquals(expectedWords, words); - - } - - @Test - public void mergeTenElementsIntoTwoWords() { - - List elements = new ArrayList<>(); - elements.add(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f)); - elements.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f)); - elements.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f)); - elements.add(new TextElement(0f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); - elements.add(new TextElement(0f, 60f, 10f, 20f, PDType1Font.HELVETICA, 1f, "M", 1f, 6f)); - elements.add(new TextElement(0f, 70f, 10f, 20f, PDType1Font.HELVETICA, 1f, "U", 1f, 6f)); - elements.add(new TextElement(0f, 80f, 10f, 20f, PDType1Font.HELVETICA, 1f, "N", 1f, 6f)); - elements.add(new TextElement(0f, 90f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f)); - elements.add(new TextElement(0f, 100f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f)); - - List words = TextElement.mergeWords(elements); - - List expectedWords = new ArrayList<>(); - TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f)); - textChunk.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f)); - textChunk.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f)); - textChunk.add(new TextElement(0f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); - textChunk.add(new TextElement(0f, 30f, 10.5f, 20f, PDType1Font.HELVETICA, 1f, " ", 1f)); //Check why width=10.5? - expectedWords.add(textChunk); - TextChunk textChunk2 = new TextChunk(new TextElement(0f, 60f, 10f, 20f, PDType1Font.HELVETICA, 1f, "M", 1f, 6f)); - textChunk2.add(new TextElement(0f, 70f, 10f, 20f, PDType1Font.HELVETICA, 1f, "U", 1f, 6f)); - textChunk2.add(new TextElement(0f, 80f, 10f, 20f, PDType1Font.HELVETICA, 1f, "N", 1f, 6f)); - textChunk2.add(new TextElement(0f, 90f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f)); - textChunk2.add(new TextElement(0f, 100f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f)); - expectedWords.add(textChunk2); - - Assert.assertEquals(2, words.size()); - Assert.assertEquals(expectedWords, words); - - } - - @Test - public void mergeTenElementsIntoTwoLines() { - - List elements = new ArrayList<>(); - elements.add(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f)); - elements.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f)); - elements.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f)); - elements.add(new TextElement(0f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); - elements.add(new TextElement(20f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "M", 1f, 6f)); - elements.add(new TextElement(20f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "U", 1f, 6f)); - elements.add(new TextElement(20f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "N", 1f, 6f)); - elements.add(new TextElement(20f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f)); - elements.add(new TextElement(20f, 40f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f)); - - List words = TextElement.mergeWords(elements); - - List expectedWords = new ArrayList<>(); - TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f)); - textChunk.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f)); - textChunk.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f)); - textChunk.add(new TextElement(0f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); - expectedWords.add(textChunk); - TextChunk textChunk2 = new TextChunk(new TextElement(20f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "M", 1f, 6f)); - textChunk2.add(new TextElement(20f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "U", 1f, 6f)); - textChunk2.add(new TextElement(20f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "N", 1f, 6f)); - textChunk2.add(new TextElement(20f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f)); - textChunk2.add(new TextElement(20f, 40f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f)); - expectedWords.add(textChunk2); - - Assert.assertEquals(2, words.size()); - Assert.assertEquals(expectedWords, words); - - } - - + + + @Test + public void createTextElement() { + + TextElement textElement = new TextElement(5f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f); + + Assert.assertNotNull(textElement); + Assert.assertEquals("A", textElement.getText()); + Assert.assertEquals(1f, textElement.getFontSize(), 0); + Assert.assertEquals(15f, textElement.getLeft(), 0); + Assert.assertEquals(5f, textElement.getTop(), 0); + Assert.assertEquals(10f, textElement.getWidth(), 0); + Assert.assertEquals(20f, textElement.getHeight(), 0); + Assert.assertEquals(new PDType1Font(Standard14Fonts.FontName.HELVETICA), textElement.getFont()); + Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0); + Assert.assertEquals(0f, textElement.getDirection(), 0); + + + } + + @Test + public void createTextElementWithDirection() { + + TextElement textElement = new TextElement(5f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f); + + Assert.assertNotNull(textElement); + Assert.assertEquals("A", textElement.getText()); + Assert.assertEquals(1f, textElement.getFontSize(), 0); + Assert.assertEquals(15f, textElement.getLeft(), 0); + Assert.assertEquals(5f, textElement.getTop(), 0); + Assert.assertEquals(10f, textElement.getWidth(), 0); + Assert.assertEquals(20f, textElement.getHeight(), 0); + Assert.assertEquals(new PDType1Font(Standard14Fonts.FontName.HELVETICA), textElement.getFont()); + Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0); + Assert.assertEquals(6f, textElement.getDirection(), 0); + + + } + + @Test + public void mergeFourElementsIntoFourWords() { + + List elements = new ArrayList<>(); + elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f)); + elements.add(new TextElement(20f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f)); + elements.add(new TextElement(40f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f)); + elements.add(new TextElement(60f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f)); + + List words = TextElement.mergeWords(elements); + + List expectedWords = new ArrayList<>(); + expectedWords.add(new TextChunk(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f))); + expectedWords.add(new TextChunk(new TextElement(20f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f))); + expectedWords.add(new TextChunk(new TextElement(40f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f))); + expectedWords.add(new TextChunk(new TextElement(60f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f))); + + Assert.assertEquals(expectedWords, words); + + } + + @Test + public void mergeFourElementsIntoOneWord() { + + List elements = new ArrayList<>(); + elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f)); + elements.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f)); + elements.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f)); + elements.add(new TextElement(0f, 45f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f)); + + List words = TextElement.mergeWords(elements); + + List expectedWords = new ArrayList<>(); + TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f)); + textChunk.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f)); + textChunk.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f)); + textChunk.add(new TextElement(0f, 45f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f)); + expectedWords.add(textChunk); + + Assert.assertEquals(expectedWords, words); + + } + + @Test + public void mergeElementsShouldBeIdempotent() { + /* + * a bug in TextElement.merge_words would delete the first TextElement in the array + * it was called with. Discussion here: https://github.com/tabulapdf/tabula-java/issues/78 + */ + + List elements = new ArrayList<>(); + elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f)); + elements.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f)); + elements.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f)); + elements.add(new TextElement(0f, 45f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f)); + + List words = TextElement.mergeWords(elements); + List words2 = TextElement.mergeWords(elements); + Assert.assertEquals(words, words2); + } + + @Test + public void mergeElementsWithSkippingRules() { + + List elements = new ArrayList<>(); + elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f)); + elements.add(new TextElement(0f, 17f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f)); + elements.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f)); + elements.add(new TextElement(0.001f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, " ", 1f, 6f)); + elements.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f)); + PDFont TIMES_ROMAN = new PDType1Font(Standard14Fonts.FontName.TIMES_ROMAN); + elements.add(new TextElement(0f, 45f, 10f, 20f, TIMES_ROMAN, 10f, "D", 1f, 6f)); + + List words = TextElement.mergeWords(elements); + + List expectedWords = new ArrayList<>(); + TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f)); + textChunk.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f)); + textChunk.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f)); + textChunk.add(new TextElement(0f, 45f, 10f, 20f, TIMES_ROMAN, 10f, "D", 1f, 6f)); + expectedWords.add(textChunk); + + Assert.assertEquals(expectedWords, words); + + } + + @Test + public void mergeTenElementsIntoTwoWords() { + + List elements = new ArrayList<>(); + elements.add(new TextElement(0f, 0f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "H", 1f, 6f)); + elements.add(new TextElement(0f, 10f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f)); + elements.add(new TextElement(0f, 20f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "L", 1f, 6f)); + elements.add(new TextElement(0f, 30f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f)); + elements.add(new TextElement(0f, 60f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "M", 1f, 6f)); + elements.add(new TextElement(0f, 70f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "U", 1f, 6f)); + elements.add(new TextElement(0f, 80f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "N", 1f, 6f)); + elements.add(new TextElement(0f, 90f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f)); + elements.add(new TextElement(0f, 100f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f)); + + List words = TextElement.mergeWords(elements); + + List expectedWords = new ArrayList<>(); + TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "H", 1f, 6f)); + textChunk.add(new TextElement(0f, 10f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f)); + textChunk.add(new TextElement(0f, 20f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "L", 1f, 6f)); + textChunk.add(new TextElement(0f, 30f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f)); + textChunk.add(new TextElement(0f, 30f, 10.5f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, " ", 1f)); //Check why width=10.5? + expectedWords.add(textChunk); + TextChunk textChunk2 = new TextChunk(new TextElement(0f, 60f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "M", 1f, 6f)); + textChunk2.add(new TextElement(0f, 70f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "U", 1f, 6f)); + textChunk2.add(new TextElement(0f, 80f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "N", 1f, 6f)); + textChunk2.add(new TextElement(0f, 90f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f)); + textChunk2.add(new TextElement(0f, 100f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f)); + expectedWords.add(textChunk2); + + Assert.assertEquals(2, words.size()); + Assert.assertEquals(expectedWords, words); + + } + + @Test + public void mergeTenElementsIntoTwoLines() { + + List elements = new ArrayList<>(); + PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA); + elements.add(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f)); + elements.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f)); + elements.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f)); + elements.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f)); + elements.add(new TextElement(20f, 0f, 10f, 20f, font, 1f, "M", 1f, 6f)); + elements.add(new TextElement(20f, 10f, 10f, 20f, font, 1f, "U", 1f, 6f)); + elements.add(new TextElement(20f, 20f, 10f, 20f, font, 1f, "N", 1f, 6f)); + elements.add(new TextElement(20f, 30f, 10f, 20f, font, 1f, "D", 1f, 6f)); + elements.add(new TextElement(20f, 40f, 10f, 20f, font, 1f, "O", 1f, 6f)); + + List words = TextElement.mergeWords(elements); + + List expectedWords = new ArrayList<>(); + TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f)); + textChunk.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f)); + textChunk.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f)); + textChunk.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f)); + expectedWords.add(textChunk); + TextChunk textChunk2 = new TextChunk(new TextElement(20f, 0f, 10f, 20f, font, 1f, "M", 1f, 6f)); + textChunk2.add(new TextElement(20f, 10f, 10f, 20f, font, 1f, "U", 1f, 6f)); + textChunk2.add(new TextElement(20f, 20f, 10f, 20f, font, 1f, "N", 1f, 6f)); + textChunk2.add(new TextElement(20f, 30f, 10f, 20f, font, 1f, "D", 1f, 6f)); + textChunk2.add(new TextElement(20f, 40f, 10f, 20f, font, 1f, "O", 1f, 6f)); + expectedWords.add(textChunk2); + + Assert.assertEquals(2, words.size()); + Assert.assertEquals(expectedWords, words); + + } + } diff --git a/src/test/java/technology/tabula/TestUtils.java b/src/test/java/technology/tabula/TestUtils.java index e68411df..cb85cb7b 100644 --- a/src/test/java/technology/tabula/TestUtils.java +++ b/src/test/java/technology/tabula/TestUtils.java @@ -12,6 +12,7 @@ import java.util.Collections; import java.util.List; +import org.apache.pdfbox.Loader; import org.apache.pdfbox.rendering.ImageType; import org.apache.commons.cli.ParseException; import org.apache.pdfbox.pdmodel.PDDocument; @@ -122,7 +123,7 @@ public void testQuickSortLongList() { @Test public void testJPEG2000DoesNotRaise() throws IOException { - PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/jpeg2000.pdf")); + PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/jpeg2000.pdf")); PDPage page = pdf_document.getPage(0); Utils.pageConvertToImage(pdf_document, page, 360, ImageType.RGB); } diff --git a/src/test/java/technology/tabula/UtilsForTesting.java b/src/test/java/technology/tabula/UtilsForTesting.java index 3ee8efde..8d3c91cf 100644 --- a/src/test/java/technology/tabula/UtilsForTesting.java +++ b/src/test/java/technology/tabula/UtilsForTesting.java @@ -7,6 +7,7 @@ import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVParser; import org.apache.commons.csv.CSVPrinter; +import org.apache.pdfbox.Loader; import org.apache.pdfbox.pdmodel.PDDocument; import org.junit.Assert; @@ -23,11 +24,9 @@ public static Page getAreaFromPage(String path, int page, float top, float left, public static Page getPage(String path, int pageNumber) throws IOException { ObjectExtractor oe = null; try { - PDDocument document = PDDocument - .load(new File(path)); + PDDocument document = Loader.loadPDF(new File(path)); oe = new ObjectExtractor(document); - Page page = oe.extract(pageNumber); - return page; + return oe.extract(pageNumber); } finally { if (oe != null) oe.close(); From d0241fb5ff9182d7980c3ccd572cc8bb2dba9357 Mon Sep 17 00:00:00 2001 From: young Date: Wed, 6 Mar 2024 14:59:35 +0800 Subject: [PATCH 176/200] remove useless variable --- .../tabula/detectors/SpreadsheetDetectionAlgorithm.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java index 243cc3bf..43136ba5 100644 --- a/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java +++ b/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java @@ -20,8 +20,6 @@ public class SpreadsheetDetectionAlgorithm implements DetectionAlgorithm { public List detect(Page page) { List cells = SpreadsheetExtractionAlgorithm.findCells(page.getHorizontalRulings(), page.getVerticalRulings()); - SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - List tables = SpreadsheetExtractionAlgorithm.findSpreadsheetsFromCells(cells); // we want tables to be returned from top to bottom on the page From 63de16a4e102b44ea370919625221561dc783e75 Mon Sep 17 00:00:00 2001 From: young Date: Thu, 11 Apr 2024 14:09:16 +0800 Subject: [PATCH 177/200] exclude junit-jupiter from pdfbox --- pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pom.xml b/pom.xml index 6c71b426..f60528d0 100644 --- a/pom.xml +++ b/pom.xml @@ -263,6 +263,12 @@ org.apache.pdfbox pdfbox 3.0.1 + + + org.junit.jupiter + junit-jupiter + + From e0ee0728ca398023ab67f59626a55525de0355b0 Mon Sep 17 00:00:00 2001 From: young Date: Thu, 11 Apr 2024 14:31:10 +0800 Subject: [PATCH 178/200] update pdfbox to 3.0.2 --- pom.xml | 341 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 168 insertions(+), 173 deletions(-) diff --git a/pom.xml b/pom.xml index f60528d0..52943fbf 100644 --- a/pom.xml +++ b/pom.xml @@ -1,4 +1,5 @@ - + 4.0.0 technology.tabula tabula @@ -33,16 +34,16 @@ - - snapshots - https://repository.apache.org/content/repositories/snapshots/ - - false - - - true - - + + snapshots + https://repository.apache.org/content/repositories/snapshots/ + + false + + + true + + @@ -109,20 +110,20 @@ - org.apache.maven.plugins - maven-javadoc-plugin - 3.3.1 - - 8 - - - - attach-javadocs - - jar - - - + org.apache.maven.plugins + maven-javadoc-plugin + 3.3.1 + + 8 + + + + attach-javadocs + + jar + + + org.apache.maven.plugins @@ -159,172 +160,166 @@ technology.tabula.CommandLineApp - - - jar-with-dependencies - + + + jar-with-dependencies + - - - org.apache.maven.plugins - maven-surefire-plugin - 2.22.2 - - - -Xms1024m -Xmx2048m - - - - - org.apache.maven.plugins - maven-eclipse-plugin - 2.10 - - true - true - - - - - - - - release - - + org.apache.maven.plugins - maven-javadoc-plugin - 3.3.1 + maven-surefire-plugin + 2.22.2 - 8 + + -Xms1024m -Xmx2048m - - - attach-javadocs - - jar - - - - - org.apache.maven.plugins - maven-source-plugin - 3.2.1 - - - attach-sources - - jar-no-fork - - - - - - org.apache.maven.plugins - maven-gpg-plugin - 1.6 - - - sign-artifacts - verify - - sign - - - - - - - - + + + org.apache.maven.plugins + maven-eclipse-plugin + 2.10 + + true + true + + + + + + + + release + + + + org.apache.maven.plugins + maven-javadoc-plugin + 3.3.1 + + 8 + + + + attach-javadocs + + jar + + + + + + org.apache.maven.plugins + maven-source-plugin + 3.2.1 + + + attach-sources + + jar-no-fork + + + + + + org.apache.maven.plugins + maven-gpg-plugin + 1.6 + + + sign-artifacts + verify + + sign + + + + + + + + - - - org.locationtech.jts - jts-core - 1.18.1 - + + + org.locationtech.jts + jts-core + 1.18.1 + - - org.slf4j - slf4j-api - 1.7.35 - + + org.slf4j + slf4j-api + 1.7.35 + - - org.slf4j - slf4j-simple - 1.7.32 - + + org.slf4j + slf4j-simple + 1.7.32 + - - org.apache.pdfbox - pdfbox - 3.0.1 - - - org.junit.jupiter - junit-jupiter - - - + + org.apache.pdfbox + pdfbox + 3.0.2 + - - org.bouncycastle - bcprov-jdk15on - 1.70 - + + org.bouncycastle + bcprov-jdk15on + 1.70 + - - org.bouncycastle - bcmail-jdk15on - 1.70 - + + org.bouncycastle + bcmail-jdk15on + 1.70 + - - junit - junit - 4.13.2 - test - + + junit + junit + 4.13.2 + test + - - commons-cli - commons-cli - 1.4 - + + commons-cli + commons-cli + 1.4 + - - org.apache.commons - commons-csv - 1.9.0 - + + org.apache.commons + commons-csv + 1.9.0 + - - com.google.code.gson - gson - 2.9.0 - + + com.google.code.gson + gson + 2.9.0 + - - com.github.jai-imageio - jai-imageio-core - 1.4.0 - + + com.github.jai-imageio + jai-imageio-core + 1.4.0 + - - com.github.jai-imageio - jai-imageio-jpeg2000 - 1.4.0 - + + com.github.jai-imageio + jai-imageio-jpeg2000 + 1.4.0 + - - org.apache.pdfbox - jbig2-imageio - 3.0.4 - - + + org.apache.pdfbox + jbig2-imageio + 3.0.4 + + From 20b1053a24402a1e3a587ee90211661027d66484 Mon Sep 17 00:00:00 2001 From: young Date: Mon, 29 Apr 2024 17:40:46 +0800 Subject: [PATCH 179/200] fix: oom for removeText --- .../detectors/NurminenDetectionAlgorithm.java | 86 ++++++++++++------- .../technology/tabula/TestTableDetection.java | 53 ++++++------ 2 files changed, 79 insertions(+), 60 deletions(-) diff --git a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java index 9a377854..86639f66 100644 --- a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java +++ b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java @@ -1,14 +1,8 @@ package technology.tabula.detectors; -import java.awt.geom.Line2D; -import java.awt.geom.Point2D; -import java.awt.image.BufferedImage; -import java.awt.image.Raster; -import java.io.IOException; -import java.io.OutputStream; -import java.util.*; - +import org.apache.pdfbox.contentstream.PDContentStream; import org.apache.pdfbox.contentstream.operator.Operator; +import org.apache.pdfbox.contentstream.operator.OperatorName; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdfparser.PDFStreamParser; import org.apache.pdfbox.pdfwriter.ContentStreamWriter; @@ -16,16 +10,17 @@ import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.PDStream; import org.apache.pdfbox.rendering.ImageType; - -import technology.tabula.Line; -import technology.tabula.Page; -import technology.tabula.Rectangle; -import technology.tabula.Ruling; -import technology.tabula.TextChunk; -import technology.tabula.TextElement; -import technology.tabula.Utils; +import technology.tabula.*; import technology.tabula.extractors.SpreadsheetExtractionAlgorithm; +import java.awt.geom.Line2D; +import java.awt.geom.Point2D; +import java.awt.image.BufferedImage; +import java.awt.image.Raster; +import java.io.IOException; +import java.io.OutputStream; +import java.util.*; + /** * Created by matt on 2015-12-17. *

@@ -799,25 +794,10 @@ private List getVerticalRulings(BufferedImage image) { return verticalRulings; } - - // taken from http://www.docjar.com/html/api/org/apache/pdfbox/examples/util/RemoveAllText.java.html private PDDocument removeText(PDPage page) throws IOException { PDFStreamParser parser = new PDFStreamParser(page); parser.parse(); - List newTokens = new ArrayList<>(); - while (page.hasContents()) { - Object token = parser.parseNextToken(); - if (token instanceof Operator) { - Operator op = (Operator) token; - if ("TJ".equals(op.getName()) || "Tj".equals(op.getName())) { - //remove the one argument to this operator - newTokens.remove(newTokens.size() - 1); - continue; - } - } - newTokens.add(token); - } PDDocument document = new PDDocument(); PDPage newPage = document.importPage(page); @@ -826,9 +806,51 @@ private PDDocument removeText(PDPage page) throws IOException { PDStream newContents = new PDStream(document); OutputStream out = newContents.createOutputStream(COSName.FLATE_DECODE); ContentStreamWriter writer = new ContentStreamWriter(out); - writer.writeTokens(newTokens); + List tokensWithoutText = createTokensWithoutText(page); + writer.writeTokens(tokensWithoutText); out.close(); newPage.setContents(newContents); return document; } + + + /** + * @param contentStream contentStream + * @return newTokens + * @throws IOException When parseNextToken on Error + * @see ... + */ + private static List createTokensWithoutText(PDContentStream contentStream) throws IOException { + PDFStreamParser parser = new PDFStreamParser(contentStream); + Object token = parser.parseNextToken(); + List newTokens = new ArrayList<>(); + while (token != null) { + if (token instanceof Operator) { + Operator op = (Operator) token; + String opName = op.getName(); + if (OperatorName.SHOW_TEXT_ADJUSTED.equals(opName) + || OperatorName.SHOW_TEXT.equals(opName) + || OperatorName.SHOW_TEXT_LINE.equals(opName)) { + // remove the argument to this operator + newTokens.remove(newTokens.size() - 1); + + token = parser.parseNextToken(); + continue; + } else if (OperatorName.SHOW_TEXT_LINE_AND_SPACE.equals(opName)) { + // remove the 3 arguments to this operator + newTokens.remove(newTokens.size() - 1); + newTokens.remove(newTokens.size() - 1); + newTokens.remove(newTokens.size() - 1); + + token = parser.parseNextToken(); + continue; + } + } + newTokens.add(token); + token = parser.parseNextToken(); + } + return newTokens; + } + + } diff --git a/src/test/java/technology/tabula/TestTableDetection.java b/src/test/java/technology/tabula/TestTableDetection.java index 80d21350..c13ff201 100644 --- a/src/test/java/technology/tabula/TestTableDetection.java +++ b/src/test/java/technology/tabula/TestTableDetection.java @@ -1,29 +1,29 @@ package technology.tabula; -import java.io.File; -import java.io.FileWriter; -import java.io.FilenameFilter; -import java.io.IOException; -import java.util.*; -import java.util.logging.Level; -import java.util.logging.Logger; - -import static org.junit.Assert.*; - import com.google.gson.Gson; import org.apache.pdfbox.Loader; +import org.apache.pdfbox.pdmodel.PDDocument; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import org.w3c.dom.*; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.NodeList; +import technology.tabula.detectors.NurminenDetectionAlgorithm; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.*; +import java.util.logging.Level; +import java.util.logging.Logger; -import org.apache.pdfbox.pdmodel.PDDocument; -import technology.tabula.detectors.NurminenDetectionAlgorithm; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; /** * Created by matt on 2015-12-14. @@ -111,15 +111,10 @@ public static Collection data() { String directoryName = "src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-" + regionCode + "/"; File dir = new File(directoryName); - File[] pdfs = dir.listFiles(new FilenameFilter() { - @Override - public boolean accept(File dir, String name) { - return name.toLowerCase().endsWith(".pdf"); - } - }); + File[] pdfs = dir.listFiles((dir1, name) -> name.toLowerCase().endsWith(".pdf")); for (File pdf : pdfs) { - data.add(new Object[] {pdf}); + data.add(new Object[]{pdf}); } } @@ -163,6 +158,8 @@ public void testDetectionOfTables() throws Exception { NodeList tables = regionDocument.getElementsByTagName("table"); // tabula extractors + + PDDocument pdfDocument = Loader.loadPDF(this.pdf); ObjectExtractor extractor = new ObjectExtractor(pdfDocument); @@ -171,7 +168,7 @@ public void testDetectionOfTables() throws Exception { int numExpectedTables = 0; - for (int i=0; i tablesOnPage = detectionAlgorithm.detect(page); - if (tablesOnPage.size() > 0) { - detectedTables.put(new Integer(page.getPageNumber()), tablesOnPage); + if (!tablesOnPage.isEmpty()) { + detectedTables.put(page.getPageNumber(), tablesOnPage); } } @@ -267,7 +264,7 @@ public void testDetectionOfTables() throws Exception { System.out.println(totalErroneouslyDetectedTables + " tables incorrectly detected"); - if(this.status.isFirstRun()) { + if (this.status.isFirstRun()) { // make the baseline this.status.expectedFailure = failed; this.status.numCorrectlyDetectedTables = this.numCorrectlyDetectedTables; @@ -293,14 +290,14 @@ private List comparePages(Integer page, List detected, List detectedIterator = detected.iterator(); detectedIterator.hasNext();) { + for (Iterator detectedIterator = detected.iterator(); detectedIterator.hasNext(); ) { Rectangle detectedTable = detectedIterator.next(); - for (int i=0; i Date: Mon, 29 Apr 2024 17:51:40 +0800 Subject: [PATCH 180/200] fix: unit test --- .../technology/tabula/TestTextElement.java | 109 +++++++++--------- 1 file changed, 57 insertions(+), 52 deletions(-) diff --git a/src/test/java/technology/tabula/TestTextElement.java b/src/test/java/technology/tabula/TestTextElement.java index 3db1ca31..ee0fbf3d 100644 --- a/src/test/java/technology/tabula/TestTextElement.java +++ b/src/test/java/technology/tabula/TestTextElement.java @@ -1,14 +1,14 @@ package technology.tabula; -import java.util.ArrayList; -import java.util.List; - import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDType1Font; import org.apache.pdfbox.pdmodel.font.Standard14Fonts; import org.junit.Assert; import org.junit.Test; +import java.util.ArrayList; +import java.util.List; + public class TestTextElement { @@ -24,7 +24,7 @@ public void createTextElement() { Assert.assertEquals(5f, textElement.getTop(), 0); Assert.assertEquals(10f, textElement.getWidth(), 0); Assert.assertEquals(20f, textElement.getHeight(), 0); - Assert.assertEquals(new PDType1Font(Standard14Fonts.FontName.HELVETICA), textElement.getFont()); + Assert.assertEquals(Standard14Fonts.FontName.HELVETICA.getName(), textElement.getFont().getName()); Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0); Assert.assertEquals(0f, textElement.getDirection(), 0); @@ -43,7 +43,7 @@ public void createTextElementWithDirection() { Assert.assertEquals(5f, textElement.getTop(), 0); Assert.assertEquals(10f, textElement.getWidth(), 0); Assert.assertEquals(20f, textElement.getHeight(), 0); - Assert.assertEquals(new PDType1Font(Standard14Fonts.FontName.HELVETICA), textElement.getFont()); + Assert.assertEquals(Standard14Fonts.FontName.HELVETICA.getName(), textElement.getFont().getName()); Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0); Assert.assertEquals(6f, textElement.getDirection(), 0); @@ -54,18 +54,19 @@ public void createTextElementWithDirection() { public void mergeFourElementsIntoFourWords() { List elements = new ArrayList<>(); - elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f)); - elements.add(new TextElement(20f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f)); - elements.add(new TextElement(40f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f)); - elements.add(new TextElement(60f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f)); + PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA); + elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f)); + elements.add(new TextElement(20f, 15f, 10f, 20f, font, 1f, "B", 1f, 6f)); + elements.add(new TextElement(40f, 15f, 10f, 20f, font, 1f, "C", 1f, 6f)); + elements.add(new TextElement(60f, 15f, 10f, 20f, font, 1f, "D", 1f, 6f)); List words = TextElement.mergeWords(elements); List expectedWords = new ArrayList<>(); - expectedWords.add(new TextChunk(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f))); - expectedWords.add(new TextChunk(new TextElement(20f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f))); - expectedWords.add(new TextChunk(new TextElement(40f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f))); - expectedWords.add(new TextChunk(new TextElement(60f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f))); + expectedWords.add(new TextChunk(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f))); + expectedWords.add(new TextChunk(new TextElement(20f, 15f, 10f, 20f, font, 1f, "B", 1f, 6f))); + expectedWords.add(new TextChunk(new TextElement(40f, 15f, 10f, 20f, font, 1f, "C", 1f, 6f))); + expectedWords.add(new TextChunk(new TextElement(60f, 15f, 10f, 20f, font, 1f, "D", 1f, 6f))); Assert.assertEquals(expectedWords, words); @@ -75,18 +76,19 @@ public void mergeFourElementsIntoFourWords() { public void mergeFourElementsIntoOneWord() { List elements = new ArrayList<>(); - elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f)); - elements.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f)); - elements.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f)); - elements.add(new TextElement(0f, 45f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f)); + PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA); + elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f)); + elements.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f)); + elements.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f)); + elements.add(new TextElement(0f, 45f, 10f, 20f, font, 1f, "D", 1f, 6f)); List words = TextElement.mergeWords(elements); List expectedWords = new ArrayList<>(); - TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f)); - textChunk.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f)); - textChunk.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f)); - textChunk.add(new TextElement(0f, 45f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f)); + TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f)); + textChunk.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f)); + textChunk.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f)); + textChunk.add(new TextElement(0f, 45f, 10f, 20f, font, 1f, "D", 1f, 6f)); expectedWords.add(textChunk); Assert.assertEquals(expectedWords, words); @@ -101,10 +103,11 @@ public void mergeElementsShouldBeIdempotent() { */ List elements = new ArrayList<>(); - elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f)); - elements.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f)); - elements.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f)); - elements.add(new TextElement(0f, 45f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f)); + PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA); + elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f)); + elements.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f)); + elements.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f)); + elements.add(new TextElement(0f, 45f, 10f, 20f, font, 1f, "D", 1f, 6f)); List words = TextElement.mergeWords(elements); List words2 = TextElement.mergeWords(elements); @@ -115,20 +118,21 @@ public void mergeElementsShouldBeIdempotent() { public void mergeElementsWithSkippingRules() { List elements = new ArrayList<>(); - elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f)); - elements.add(new TextElement(0f, 17f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f)); - elements.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f)); - elements.add(new TextElement(0.001f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, " ", 1f, 6f)); - elements.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f)); + PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA); + elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f)); + elements.add(new TextElement(0f, 17f, 10f, 20f, font, 1f, "A", 1f, 6f)); + elements.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f)); + elements.add(new TextElement(0.001f, 25f, 10f, 20f, font, 1f, " ", 1f, 6f)); + elements.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f)); PDFont TIMES_ROMAN = new PDType1Font(Standard14Fonts.FontName.TIMES_ROMAN); elements.add(new TextElement(0f, 45f, 10f, 20f, TIMES_ROMAN, 10f, "D", 1f, 6f)); List words = TextElement.mergeWords(elements); List expectedWords = new ArrayList<>(); - TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f)); - textChunk.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f)); - textChunk.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f)); + TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f)); + textChunk.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f)); + textChunk.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f)); textChunk.add(new TextElement(0f, 45f, 10f, 20f, TIMES_ROMAN, 10f, "D", 1f, 6f)); expectedWords.add(textChunk); @@ -140,30 +144,31 @@ public void mergeElementsWithSkippingRules() { public void mergeTenElementsIntoTwoWords() { List elements = new ArrayList<>(); - elements.add(new TextElement(0f, 0f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "H", 1f, 6f)); - elements.add(new TextElement(0f, 10f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f)); - elements.add(new TextElement(0f, 20f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "L", 1f, 6f)); - elements.add(new TextElement(0f, 30f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f)); - elements.add(new TextElement(0f, 60f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "M", 1f, 6f)); - elements.add(new TextElement(0f, 70f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "U", 1f, 6f)); - elements.add(new TextElement(0f, 80f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "N", 1f, 6f)); - elements.add(new TextElement(0f, 90f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f)); - elements.add(new TextElement(0f, 100f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f)); + PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA); + elements.add(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f)); + elements.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f)); + elements.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f)); + elements.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f)); + elements.add(new TextElement(0f, 60f, 10f, 20f, font, 1f, "M", 1f, 6f)); + elements.add(new TextElement(0f, 70f, 10f, 20f, font, 1f, "U", 1f, 6f)); + elements.add(new TextElement(0f, 80f, 10f, 20f, font, 1f, "N", 1f, 6f)); + elements.add(new TextElement(0f, 90f, 10f, 20f, font, 1f, "D", 1f, 6f)); + elements.add(new TextElement(0f, 100f, 10f, 20f, font, 1f, "O", 1f, 6f)); List words = TextElement.mergeWords(elements); List expectedWords = new ArrayList<>(); - TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "H", 1f, 6f)); - textChunk.add(new TextElement(0f, 10f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f)); - textChunk.add(new TextElement(0f, 20f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "L", 1f, 6f)); - textChunk.add(new TextElement(0f, 30f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f)); - textChunk.add(new TextElement(0f, 30f, 10.5f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, " ", 1f)); //Check why width=10.5? + TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f)); + textChunk.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f)); + textChunk.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f)); + textChunk.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f)); + textChunk.add(new TextElement(0f, 30f, 10.5f, 20f, font, 1f, " ", 1f)); //Check why width=10.5? expectedWords.add(textChunk); - TextChunk textChunk2 = new TextChunk(new TextElement(0f, 60f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "M", 1f, 6f)); - textChunk2.add(new TextElement(0f, 70f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "U", 1f, 6f)); - textChunk2.add(new TextElement(0f, 80f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "N", 1f, 6f)); - textChunk2.add(new TextElement(0f, 90f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f)); - textChunk2.add(new TextElement(0f, 100f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f)); + TextChunk textChunk2 = new TextChunk(new TextElement(0f, 60f, 10f, 20f, font, 1f, "M", 1f, 6f)); + textChunk2.add(new TextElement(0f, 70f, 10f, 20f, font, 1f, "U", 1f, 6f)); + textChunk2.add(new TextElement(0f, 80f, 10f, 20f, font, 1f, "N", 1f, 6f)); + textChunk2.add(new TextElement(0f, 90f, 10f, 20f, font, 1f, "D", 1f, 6f)); + textChunk2.add(new TextElement(0f, 100f, 10f, 20f, font, 1f, "O", 1f, 6f)); expectedWords.add(textChunk2); Assert.assertEquals(2, words.size()); From 6d59cddd5e4523d74aa03739be5992d35372fdd3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 Feb 2023 21:12:28 +0000 Subject: [PATCH 181/200] Bump maven-compiler-plugin from 3.8.1 to 3.11.0 Bumps [maven-compiler-plugin](https://github.com/apache/maven-compiler-plugin) from 3.8.1 to 3.11.0. - [Release notes](https://github.com/apache/maven-compiler-plugin/releases) - [Commits](https://github.com/apache/maven-compiler-plugin/compare/maven-compiler-plugin-3.8.1...maven-compiler-plugin-3.11.0) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-compiler-plugin dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 52943fbf..b3344e12 100644 --- a/pom.xml +++ b/pom.xml @@ -147,7 +147,7 @@ maven-compiler-plugin - 3.8.1 + 3.11.0 1.8 1.8 From 2bdeb954675cb2ad05431210d3f06db74a490fe9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 16 Jul 2024 21:29:09 +0000 Subject: [PATCH 182/200] Bump org.apache.maven.plugins:maven-gpg-plugin from 1.6 to 3.2.4 Bumps [org.apache.maven.plugins:maven-gpg-plugin](https://github.com/apache/maven-gpg-plugin) from 1.6 to 3.2.4. - [Release notes](https://github.com/apache/maven-gpg-plugin/releases) - [Commits](https://github.com/apache/maven-gpg-plugin/compare/maven-gpg-plugin-1.6...maven-gpg-plugin-3.2.4) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-gpg-plugin dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index b3344e12..7f30e7a4 100644 --- a/pom.xml +++ b/pom.xml @@ -128,7 +128,7 @@ org.apache.maven.plugins maven-gpg-plugin - 1.6 + 3.2.4 sign-artifacts @@ -225,7 +225,7 @@ org.apache.maven.plugins maven-gpg-plugin - 1.6 + 3.2.4 sign-artifacts From c831cf6ac36c5315b96ff6a49212bb67908ce48e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 16 Jul 2024 21:28:58 +0000 Subject: [PATCH 183/200] Bump commons-cli:commons-cli from 1.4 to 1.8.0 Bumps commons-cli:commons-cli from 1.4 to 1.8.0. --- updated-dependencies: - dependency-name: commons-cli:commons-cli dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 7f30e7a4..eb362e89 100644 --- a/pom.xml +++ b/pom.xml @@ -288,7 +288,7 @@ commons-cli commons-cli - 1.4 + 1.8.0 From 9dc64f867a01e69e6e929feaa5a909c02b9bd3e9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 16 Jul 2024 21:28:56 +0000 Subject: [PATCH 184/200] Bump org.slf4j:slf4j-api from 1.7.35 to 2.0.13 Bumps org.slf4j:slf4j-api from 1.7.35 to 2.0.13. --- updated-dependencies: - dependency-name: org.slf4j:slf4j-api dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index eb362e89..adf29ce5 100644 --- a/pom.xml +++ b/pom.xml @@ -251,7 +251,7 @@ org.slf4j slf4j-api - 1.7.35 + 2.0.13 From 3f7445380ec4f48dfc545dd6d33e89d4c501af55 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 16 Jul 2024 21:28:55 +0000 Subject: [PATCH 185/200] Bump org.slf4j:slf4j-simple from 1.7.32 to 2.0.13 Bumps org.slf4j:slf4j-simple from 1.7.32 to 2.0.13. --- updated-dependencies: - dependency-name: org.slf4j:slf4j-simple dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index adf29ce5..8e0736c4 100644 --- a/pom.xml +++ b/pom.xml @@ -257,7 +257,7 @@ org.slf4j slf4j-simple - 1.7.32 + 2.0.13 From 2ef079f2a14dc6d66c68c5ce8d03853eea7436f4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 21 Jun 2022 21:32:54 +0000 Subject: [PATCH 186/200] Bump jts-core from 1.18.1 to 1.19.0 Bumps jts-core from 1.18.1 to 1.19.0. --- updated-dependencies: - dependency-name: org.locationtech.jts:jts-core dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 8e0736c4..19bf0133 100644 --- a/pom.xml +++ b/pom.xml @@ -245,7 +245,7 @@ org.locationtech.jts jts-core - 1.18.1 + 1.19.0 From c1e4e326eddc1a2dfe59febf24a569d11bde5cfb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 15 Feb 2023 21:59:29 +0000 Subject: [PATCH 187/200] Bump maven-javadoc-plugin from 3.3.1 to 3.5.0 Bumps [maven-javadoc-plugin](https://github.com/apache/maven-javadoc-plugin) from 3.3.1 to 3.5.0. - [Release notes](https://github.com/apache/maven-javadoc-plugin/releases) - [Commits](https://github.com/apache/maven-javadoc-plugin/compare/maven-javadoc-plugin-3.3.1...maven-javadoc-plugin-3.5.0) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-javadoc-plugin dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/pom.xml b/pom.xml index 19bf0133..749fa0b7 100644 --- a/pom.xml +++ b/pom.xml @@ -74,7 +74,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.3.1 + 3.5.0 true @@ -110,20 +110,20 @@ - org.apache.maven.plugins - maven-javadoc-plugin - 3.3.1 - - 8 - - - - attach-javadocs - - jar - - - + org.apache.maven.plugins + maven-javadoc-plugin + 3.5.0 + + 8 + + + + attach-javadocs + + jar + + + org.apache.maven.plugins From 5761334b86f58723e761b4941f2950d7b6e53d82 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 16 Jul 2024 21:29:03 +0000 Subject: [PATCH 188/200] Bump org.sonatype.plugins:nexus-staging-maven-plugin from 1.6.8 to 1.7.0 Bumps org.sonatype.plugins:nexus-staging-maven-plugin from 1.6.8 to 1.7.0. --- updated-dependencies: - dependency-name: org.sonatype.plugins:nexus-staging-maven-plugin dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 749fa0b7..6a66162f 100644 --- a/pom.xml +++ b/pom.xml @@ -87,7 +87,7 @@ org.sonatype.plugins nexus-staging-maven-plugin - 1.6.8 + 1.7.0 true ossrh From ab7c4bd54bd20ca03c2bfad400c5cc6c26e34d59 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 16 Jul 2024 21:28:53 +0000 Subject: [PATCH 189/200] Bump org.apache.maven.plugins:maven-source-plugin from 3.2.1 to 3.3.1 Bumps [org.apache.maven.plugins:maven-source-plugin](https://github.com/apache/maven-source-plugin) from 3.2.1 to 3.3.1. - [Release notes](https://github.com/apache/maven-source-plugin/releases) - [Commits](https://github.com/apache/maven-source-plugin/compare/maven-source-plugin-3.2.1...maven-source-plugin-3.3.1) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-source-plugin dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 6a66162f..77cda400 100644 --- a/pom.xml +++ b/pom.xml @@ -99,7 +99,7 @@ org.apache.maven.plugins maven-source-plugin - 3.2.1 + 3.3.1 attach-sources @@ -212,7 +212,7 @@ org.apache.maven.plugins maven-source-plugin - 3.2.1 + 3.3.1 attach-sources From ebe8e30dedfd6f7553046bbe6bbd3640b121d3dd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 17 Jul 2024 21:36:59 +0000 Subject: [PATCH 190/200] Bump org.apache.commons:commons-csv from 1.9.0 to 1.11.0 Bumps [org.apache.commons:commons-csv](https://github.com/apache/commons-csv) from 1.9.0 to 1.11.0. - [Changelog](https://github.com/apache/commons-csv/blob/master/RELEASE-NOTES.txt) - [Commits](https://github.com/apache/commons-csv/compare/rel/commons-csv-1.9.0...rel/commons-csv-1.11.0) --- updated-dependencies: - dependency-name: org.apache.commons:commons-csv dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 77cda400..a45e6089 100644 --- a/pom.xml +++ b/pom.xml @@ -294,7 +294,7 @@ org.apache.commons commons-csv - 1.9.0 + 1.11.0 From db3f6dfd74801c824efd2a25dc26b4a3cb8d7922 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 17 Jul 2024 21:36:45 +0000 Subject: [PATCH 191/200] Bump org.apache.maven.plugins:maven-compiler-plugin Bumps [org.apache.maven.plugins:maven-compiler-plugin](https://github.com/apache/maven-compiler-plugin) from 3.11.0 to 3.13.0. - [Release notes](https://github.com/apache/maven-compiler-plugin/releases) - [Commits](https://github.com/apache/maven-compiler-plugin/compare/maven-compiler-plugin-3.11.0...maven-compiler-plugin-3.13.0) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-compiler-plugin dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index a45e6089..b73d8b1e 100644 --- a/pom.xml +++ b/pom.xml @@ -147,7 +147,7 @@ maven-compiler-plugin - 3.11.0 + 3.13.0 1.8 1.8 From fd3a32c579f672ba17c5f1231985e980c4e3ec4e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 17 Jul 2024 21:36:48 +0000 Subject: [PATCH 192/200] Bump com.google.code.gson:gson from 2.9.0 to 2.11.0 Bumps [com.google.code.gson:gson](https://github.com/google/gson) from 2.9.0 to 2.11.0. - [Release notes](https://github.com/google/gson/releases) - [Changelog](https://github.com/google/gson/blob/main/CHANGELOG.md) - [Commits](https://github.com/google/gson/compare/gson-parent-2.9.0...gson-parent-2.11.0) --- updated-dependencies: - dependency-name: com.google.code.gson:gson dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index b73d8b1e..394ea68f 100644 --- a/pom.xml +++ b/pom.xml @@ -300,7 +300,7 @@ com.google.code.gson gson - 2.9.0 + 2.11.0 From 097559d0a185ca1dda25d7b7ff103e884848c70c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 17 Jul 2024 21:36:52 +0000 Subject: [PATCH 193/200] Bump org.apache.maven.plugins:maven-javadoc-plugin from 3.3.1 to 3.7.0 Bumps [org.apache.maven.plugins:maven-javadoc-plugin](https://github.com/apache/maven-javadoc-plugin) from 3.3.1 to 3.7.0. - [Release notes](https://github.com/apache/maven-javadoc-plugin/releases) - [Commits](https://github.com/apache/maven-javadoc-plugin/compare/maven-javadoc-plugin-3.3.1...maven-javadoc-plugin-3.7.0) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-javadoc-plugin dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 394ea68f..a4871012 100644 --- a/pom.xml +++ b/pom.xml @@ -74,7 +74,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.5.0 + 3.7.0 true @@ -112,7 +112,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.5.0 + 3.7.0 8 @@ -196,7 +196,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.3.1 + 3.7.0 8 From bde6d765cfab25d53ff885de33a4556fc41bb9d7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 17 Jul 2024 21:36:55 +0000 Subject: [PATCH 194/200] Bump org.apache.maven.plugins:maven-surefire-plugin from 2.22.2 to 3.3.1 Bumps [org.apache.maven.plugins:maven-surefire-plugin](https://github.com/apache/maven-surefire) from 2.22.2 to 3.3.1. - [Release notes](https://github.com/apache/maven-surefire/releases) - [Commits](https://github.com/apache/maven-surefire/compare/surefire-2.22.2...surefire-3.3.1) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-surefire-plugin dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index a4871012..8fd27509 100644 --- a/pom.xml +++ b/pom.xml @@ -169,7 +169,7 @@ org.apache.maven.plugins maven-surefire-plugin - 2.22.2 + 3.3.1 -Xms1024m -Xmx2048m From 0c73e698b979a74cac0e917718b2c5dfd098dacc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Jul 2024 21:03:37 +0000 Subject: [PATCH 195/200] Bump org.apache.maven.plugins:maven-javadoc-plugin from 3.7.0 to 3.8.0 Bumps [org.apache.maven.plugins:maven-javadoc-plugin](https://github.com/apache/maven-javadoc-plugin) from 3.7.0 to 3.8.0. - [Release notes](https://github.com/apache/maven-javadoc-plugin/releases) - [Commits](https://github.com/apache/maven-javadoc-plugin/compare/maven-javadoc-plugin-3.7.0...maven-javadoc-plugin-3.8.0) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-javadoc-plugin dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 8fd27509..d0b40101 100644 --- a/pom.xml +++ b/pom.xml @@ -74,7 +74,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.7.0 + 3.8.0 true @@ -112,7 +112,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.7.0 + 3.8.0 8 @@ -196,7 +196,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.7.0 + 3.8.0 8 From 818c9a2f5a5ea8dc72d3efa775f192381e84b8c1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 9 Aug 2024 21:53:01 +0000 Subject: [PATCH 196/200] Bump org.apache.pdfbox:pdfbox from 3.0.2 to 3.0.3 Bumps org.apache.pdfbox:pdfbox from 3.0.2 to 3.0.3. --- updated-dependencies: - dependency-name: org.apache.pdfbox:pdfbox dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d0b40101..a963e35a 100644 --- a/pom.xml +++ b/pom.xml @@ -263,7 +263,7 @@ org.apache.pdfbox pdfbox - 3.0.2 + 3.0.3 From 5d91f1d733c4895d31854a641c152220f8c5f341 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 30 Aug 2024 21:39:59 +0000 Subject: [PATCH 197/200] Bump org.locationtech.jts:jts-core from 1.19.0 to 1.20.0 Bumps org.locationtech.jts:jts-core from 1.19.0 to 1.20.0. --- updated-dependencies: - dependency-name: org.locationtech.jts:jts-core dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index a963e35a..49057e90 100644 --- a/pom.xml +++ b/pom.xml @@ -245,7 +245,7 @@ org.locationtech.jts jts-core - 1.19.0 + 1.20.0 From 971ae765e84f09ed83f5808b66f764590146e923 Mon Sep 17 00:00:00 2001 From: Kyle Lacy Date: Thu, 20 Feb 2025 15:29:09 -0800 Subject: [PATCH 198/200] Upgrade BouncyCastle dependencies --- pom.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pom.xml b/pom.xml index 49057e90..8b7b3b2d 100644 --- a/pom.xml +++ b/pom.xml @@ -268,14 +268,14 @@ org.bouncycastle - bcprov-jdk15on - 1.70 + bcprov-jdk18on + 1.80 org.bouncycastle - bcmail-jdk15on - 1.70 + bcmail-jdk18on + 1.80 From 88154e2c15967cc4c2a2606a8da25d47b9b916c3 Mon Sep 17 00:00:00 2001 From: Tilman Hausherr Date: Wed, 19 Mar 2025 15:36:11 +0100 Subject: [PATCH 199/200] Update PDFBox --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 8b7b3b2d..211d0d4d 100644 --- a/pom.xml +++ b/pom.xml @@ -263,7 +263,7 @@ org.apache.pdfbox pdfbox - 3.0.3 + 3.0.4 From 2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f Mon Sep 17 00:00:00 2001 From: Tilman Hausherr Date: Wed, 19 Mar 2025 15:38:16 +0100 Subject: [PATCH 200/200] Adjust test Test needs to be adjusted because PDFBox supports the /ActualText feature of PDFBox. --- src/test/java/technology/tabula/TestBasicExtractor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/technology/tabula/TestBasicExtractor.java b/src/test/java/technology/tabula/TestBasicExtractor.java index d120546f..b56fd6ea 100644 --- a/src/test/java/technology/tabula/TestBasicExtractor.java +++ b/src/test/java/technology/tabula/TestBasicExtractor.java @@ -203,7 +203,7 @@ public void testCheckSqueezeDoesntBreak() throws IOException { List> rows = table.getRows(); List firstRow = rows.get(0); List lastRow = rows.get(rows.size() - 1); - assertTrue(firstRow.get(0).getText().equals("Violent crime . . . . . . . . . . . . . . . . . .")); + assertTrue(firstRow.get(0).getText().equals("Violent crime. . . . . . . . . . . . . . . . . .")); assertTrue(lastRow.get(lastRow.size() - 1).getText().equals("(X)")); page.getPDDoc().close(); }