diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..a217b347 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,7 @@ +version: 2 +updates: +- package-ecosystem: maven + directory: "/" + schedule: + interval: daily + open-pull-requests-limit: 10 diff --git a/.github/workflows/tests-windows.yml b/.github/workflows/tests-windows.yml new file mode 100644 index 00000000..5cc1031a --- /dev/null +++ b/.github/workflows/tests-windows.yml @@ -0,0 +1,23 @@ +name: Java CI (Windows) + +on: [push] + +jobs: + build: + runs-on: windows-latest + + steps: + # https://github.com/actions/checkout/issues/135#issuecomment-602171132 + - name: Set git to use LF + run: | + git config --global core.autocrlf false + git config --global core.eol lf + - uses: actions/checkout@v3 + - name: Set up JDK 11 + uses: actions/setup-java@v3 + with: + java-version: '11' + distribution: 'adopt' + cache: maven + - name: Build with Maven + run: mvn --batch-mode test diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 00000000..da2d019b --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,18 @@ +name: Java CI + +on: [push, pull_request] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up JDK 11 + uses: actions/setup-java@v3 + with: + java-version: '11' + distribution: 'adopt' + cache: maven + - name: Build with Maven + run: mvn --batch-mode test diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 46e41e94..00000000 --- a/.travis.yml +++ /dev/null @@ -1,10 +0,0 @@ -language: java -install: mvn install -DskipTests=true -Dmaven.javadoc.skip=true -Dgpg.skip=true -B -V -script: mvn test -Dgpg.skip=true -jdk: - - openjdk7 - - oraclejdk8 -sudo: false - - - diff --git a/README.md b/README.md index cd9717c6..db7b0023 100644 --- a/README.md +++ b/README.md @@ -1,37 +1,42 @@ -tabula-java [![Build Status](https://travis-ci.org/tabulapdf/tabula-java.svg?branch=master)](https://travis-ci.org/tabulapdf/tabula-java) [![Join the chat at https://gitter.im/tabulapdf/tabula-java](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/tabulapdf/tabula-java?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) +tabula-java [![Build Status](https://travis-ci.org/tabulapdf/tabula-java.svg?branch=master)](https://travis-ci.org/tabulapdf/tabula-java) =========== `tabula-java` is a library for extracting tables from PDF files — it is the table extraction engine that powers [Tabula](http://tabula.technology/) ([repo](http://github.com/tabulapdf/tabula)). You can use `tabula-java` as a command-line tool to programmatically extract tables from PDFs. -(This is the new version of the extraction engine; the previous code can be found at [`tabula-extractor`](http://github.com/tabulapdf/tabula-extractor).) - -© 2014-2016 Manuel Aristarán. Available under MIT License. See [`LICENSE`](LICENSE). +© 2014-2020 Manuel Aristarán. Available under MIT License. See [`LICENSE`](LICENSE). ## Download Download a version of the tabula-java's jar, with all dependencies included, that works on Mac, Windows and Linux from our [releases page](../../releases). -## Usage Examples +## Commandline Usage Examples `tabula-java` provides a command line application: ``` -$ java -jar target/tabula-1.0.1-jar-with-dependencies.jar --help -usage: tabula [-a ] [-b ] [-c ] [-d] [-f - ] [-g] [-h] [-i] [-l] [-n] [-o ] [-p ] [-r] - [-s ] [-t] [-u] [-v] +$ java -jar target/tabula-1.0.5-jar-with-dependencies.jar --help +usage: tabula [-a ] [-b ] [-c ] [-f ] + [-g] [-h] [-i] [-l] [-n] [-o ] [-p ] [-r] [-s + ] [-t] [-u] [-v] Tabula helps you extract tables from PDFs - -a,--area Portion of the page to analyze - (top,left,bottom,right). Example: --area - 269.875,12.75,790.5,561. Default is entire - page + -a,--area -a/--area = Portion of the page to analyze. + Example: --area 269.875,12.75,790.5,561. + Accepts top,left,bottom,right i.e. y1,x1,y2,x2 + where all values are in points relative to the + top left corner. If all values are between + 0-100 (inclusive) and preceded by '%', input + will be taken as % of actual height or width + of the page. Example: --area %0,0,100,50. To + specify multiple areas, -a option should be + repeated. Default is entire page -b,--batch Convert all .pdfs in the provided directory. -c,--columns X coordinates of column boundaries. Example - --columns 10.1,20.2,30.3 - -d,--debug Print detected table areas instead of - processing. + --columns 10.1,20.2,30.3. If all values are + between 0-100 (inclusive) and preceded by '%', + input will be taken as % of actual width of + the page. Example: --columns %25,50,80.6 -f,--format Output format: (CSV,TSV,JSON). Default: CSV -g,--guess Guess the portion of the page to analyze per page. @@ -64,17 +69,59 @@ Tabula helps you extract tables from PDFs -v,--version Print version and exit. ``` -It also includes a debugging tool, run `java -cp ./target/tabula-1.0.1-jar-with-dependencies.jar technology.tabula.debug.Debug -h` for the available options. +It also includes a debugging tool, run `java -cp ./target/tabula-1.0.5-jar-with-dependencies.jar technology.tabula.debug.Debug -h` for the available options. You can also integrate `tabula-java` with any JVM language. For Java examples, see the [`tests`](src/test/java/technology/tabula/) folder. JVM start-up time is a lot of the cost of the `tabula` command, so if you're trying to extract many tables from PDFs, you have a few options for speeding it up: + - the -b option, which allows you to convert all pdfs in a given directory - the [drip](https://github.com/ninjudd/drip) utility - the [Ruby](http://github.com/tabulapdf/tabula-extractor), [Python](https://github.com/chezou/tabula-py), [R](https://github.com/leeper/tabulizer), and [Node.js](https://github.com/ezodude/tabula-js) bindings - writing your own program in any JVM language (Java, JRuby, Scala) that imports tabula-java. - waiting for us to implement an API/server-style system (it's on the [roadmap](https://github.com/tabulapdf/tabula-api)) +## API Usage Examples + +A simple Java code example which extracts all rows and cells from all tables of all pages of a PDF document: + +```java +InputStream in = this.getClass().getResourceAsStream("my.pdf"); +try (PDDocument document = PDDocument.load(in)) { + SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); + PageIterator pi = new ObjectExtractor(document).extract(); + while (pi.hasNext()) { + // iterate over the pages of the document + Page page = pi.next(); + List table = sea.extract(page); + // iterate over the tables of the page + for(Table tables: table) { + List> rows = tables.getRows(); + // iterate over the rows of the table + for (List cells : rows) { + // print all column-cells of the row plus linefeed + for (RectangularTextContainer content : cells) { + // Note: Cell.getText() uses \r to concat text chunks + String text = content.getText().replace("\r", " "); + System.out.print(text + "|"); + } + System.out.println(); + } + } + } +} +``` + + +For more detail information check the Javadoc. +The Javadoc API documentation can be generated (see also '_Building from Source_' section) via + +``` +mvn javadoc:javadoc +``` + +which generates the HTML files to directory ```target/site/apidocs/``` + ## Building from Source Clone this repo and run: @@ -96,7 +143,7 @@ You can help by: ### Backers -You can also support our continued work on `tabula-java` with a one-time or monthly donation [on OpenCollective](https://opencollective.com/tabulapdf#support). Organizations who use `tabula-java` can also [sponsor the project](https://opencollective.com/tabulapdf#support) for acknolwedgement on [our official site](http://tabula.technology/) and this README. +You can also support our continued work on `tabula-java` with a one-time or monthly donation [on OpenCollective](https://opencollective.com/tabulapdf#support). Organizations who use `tabula-java` can also [sponsor the project](https://opencollective.com/tabulapdf#support) for acknowledgement on [our official site](http://tabula.technology/) and this README. Special thanks to the following users and organizations for generously supporting Tabula with donations and grants: @@ -107,5 +154,5 @@ Special thanks to the following users and organizations for generously supportin -The John S. and James L. Knight Foundation +The John S. and James L. Knight Foundation The Shuttleworth Foundation diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index f60e8fd5..00000000 --- a/appveyor.yml +++ /dev/null @@ -1,19 +0,0 @@ -version: '{build}' -install: - - ps: | - Add-Type -AssemblyName System.IO.Compression.FileSystem - if (!(Test-Path -Path "C:\maven" )) { - (new-object System.Net.WebClient).DownloadFile( - 'http://www.us.apache.org/dist/maven/maven-3/3.5.0/binaries/apache-maven-3.5.0-bin.zip', - 'C:\maven-bin.zip' - ) - [System.IO.Compression.ZipFile]::ExtractToDirectory("C:\maven-bin.zip", "C:\maven") - } - - cmd: SET PATH=C:\maven\apache-maven-3.2.5\bin;%JAVA_HOME%\bin;%PATH% - - cmd: SET MAVEN_OPTS=-XX:MaxPermSize=2g -Xmx4g - - cmd: SET JAVA_OPTS=-XX:MaxPermSize=2g -Xmx4g -test_script: - - mvn clean install --batch-mode -cache: - - C:\maven\ - - C:\Users\appveyor\.m2 diff --git a/jbang-catalog.json b/jbang-catalog.json new file mode 100644 index 00000000..b7f71347 --- /dev/null +++ b/jbang-catalog.json @@ -0,0 +1,8 @@ +{ + "catalogs": {}, + "aliases": { + "tabula": { + "script-ref": "https://github.com/tabulapdf/tabula-java/releases/download/v1.0.4/tabula-1.0.4-jar-with-dependencies.jar" + } + } +} \ No newline at end of file diff --git a/pom.xml b/pom.xml index 2145aa06..211d0d4d 100644 --- a/pom.xml +++ b/pom.xml @@ -1,8 +1,9 @@ - + 4.0.0 technology.tabula tabula - 1.0.2-SNAPSHOT + 1.0.6-SNAPSHOT Tabula Extract tables from PDF files http://github.com/tabulapdf/tabula-java @@ -32,21 +33,26 @@ + + + snapshots + https://repository.apache.org/content/repositories/snapshots/ + + false + + + true + + + + scm:git:git@github.com:tabulapdf/tabula-java.git scm:git:git@github.com:tabulapdf/tabula-java.git git@github.com:tabulapdf/tabula-java.git - tabula-1.0.0-SNAPSHOT + v1.0.2 - - - sonatype - Sonatype repository - https://oss.sonatype.org/content/repositories/snapshots/ - - - UTF-8 UTF-8 @@ -68,7 +74,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 2.10.3 + 3.8.0 true @@ -81,7 +87,7 @@ org.sonatype.plugins nexus-staging-maven-plugin - 1.6.3 + 1.7.0 true ossrh @@ -93,7 +99,7 @@ org.apache.maven.plugins maven-source-plugin - 2.2.1 + 3.3.1 attach-sources @@ -103,11 +109,13 @@ - org.apache.maven.plugins maven-javadoc-plugin - 2.9.1 + 3.8.0 + + 8 + attach-javadocs @@ -120,7 +128,7 @@ org.apache.maven.plugins maven-gpg-plugin - 1.5 + 3.2.4 sign-artifacts @@ -128,15 +136,21 @@ sign + + + --pinentry-mode + loopback + + maven-compiler-plugin - 3.1 + 3.13.0 - 1.7 - 1.7 + 1.8 + 1.8 @@ -146,152 +160,166 @@ technology.tabula.CommandLineApp - - - jar-with-dependencies - + + + jar-with-dependencies + - - - org.apache.maven.plugins - maven-surefire-plugin + + + org.apache.maven.plugins + maven-surefire-plugin + 3.3.1 -Xms1024m -Xmx2048m - - - - - - - release - - + + org.apache.maven.plugins - maven-javadoc-plugin - 2.9.1 - - - attach-javadocs - - jar - - - + maven-eclipse-plugin + 2.10 + + true + true + - - org.apache.maven.plugins - maven-source-plugin - 2.2.1 - - - attach-sources - - jar-no-fork - - - - - - org.apache.maven.plugins - maven-gpg-plugin - 1.5 - - - sign-artifacts - verify - - sign - - - - - - - - + + + + + + release + + + + org.apache.maven.plugins + maven-javadoc-plugin + 3.8.0 + + 8 + + + + attach-javadocs + + jar + + + + + + org.apache.maven.plugins + maven-source-plugin + 3.3.1 + + + attach-sources + + jar-no-fork + + + + + + org.apache.maven.plugins + maven-gpg-plugin + 3.2.4 + + + sign-artifacts + verify + + sign + + + + + + + + - - - net.sf.jsi - jsi - 1.1.0-SNAPSHOT - + + + org.locationtech.jts + jts-core + 1.20.0 + - - org.slf4j - slf4j-api - 1.7.25 - + + org.slf4j + slf4j-api + 2.0.13 + - - org.slf4j - slf4j-simple - 1.7.25 - + + org.slf4j + slf4j-simple + 2.0.13 + - - org.apache.pdfbox - pdfbox - 2.0.7 - + + org.apache.pdfbox + pdfbox + 3.0.4 + - - org.bouncycastle - bcprov-jdk15on - 1.56 - + + org.bouncycastle + bcprov-jdk18on + 1.80 + - - org.bouncycastle - bcmail-jdk15on - 1.56 - + + org.bouncycastle + bcmail-jdk18on + 1.80 + - - junit - junit - 4.11 - test - + + junit + junit + 4.13.2 + test + - - commons-cli - commons-cli - 1.4 - + + commons-cli + commons-cli + 1.8.0 + - - org.apache.commons - commons-csv - 1.4 - + + org.apache.commons + commons-csv + 1.11.0 + - - com.google.code.gson - gson - 2.8.0 - + + com.google.code.gson + gson + 2.11.0 + - - com.github.jai-imageio - jai-imageio-core - 1.3.1 - + + com.github.jai-imageio + jai-imageio-core + 1.4.0 + - - com.github.jai-imageio - jai-imageio-jpeg2000 - 1.3.0 - + + com.github.jai-imageio + jai-imageio-jpeg2000 + 1.4.0 + - - com.levigo.jbig2 - levigo-jbig2-imageio - 2.0 - - + + org.apache.pdfbox + jbig2-imageio + 3.0.4 + + diff --git a/src/main/java/technology/tabula/Cell.java b/src/main/java/technology/tabula/Cell.java index b7e568db..d02c8c50 100644 --- a/src/main/java/technology/tabula/Cell.java +++ b/src/main/java/technology/tabula/Cell.java @@ -1,75 +1,62 @@ package technology.tabula; import java.awt.geom.Point2D; -import java.util.ArrayList; import java.util.Collections; -import java.util.List; @SuppressWarnings("serial") public class Cell extends RectangularTextContainer { - private boolean spanning; - private boolean placeholder; - private List textElements; - - public Cell(float top, float left, float width, float height) { - super(top, left, width, height); - this.setPlaceholder(false); - this.setSpanning(false); - this.setTextElements(new ArrayList()); - } - - public Cell(Point2D topLeft, Point2D bottomRight) { - super((float) topLeft.getY(), (float) topLeft.getX(), (float) (bottomRight.getX() - topLeft.getX()), (float) (bottomRight.getY() - topLeft.getY())); - this.setPlaceholder(false); - this.setSpanning(false); - this.setTextElements(new ArrayList()); - } - - @Override - public String getText(boolean useLineReturns) { - if (this.textElements.size() == 0) { - return ""; - } - StringBuilder sb = new StringBuilder(); - Collections.sort(this.textElements); - double curTop = this.textElements.get(0).getTop(); - for (TextChunk tc: this.textElements) { - if (useLineReturns && tc.getTop() > curTop) { - sb.append('\r'); - } - sb.append(tc.getText()); - curTop = tc.getTop(); - } - return sb.toString().trim(); - } - - public String getText() { - return getText(true); - } - - public boolean isSpanning() { - return spanning; - } - - public void setSpanning(boolean spanning) { - this.spanning = spanning; - } - - public boolean isPlaceholder() { - return placeholder; - } - - public void setPlaceholder(boolean placeholder) { - this.placeholder = placeholder; - } - - - public List getTextElements() { - return textElements; - } - - public void setTextElements(List textElements) { - this.textElements = textElements; - } + public Cell(float top, float left, float width, float height) { + super(top, left, width, height); + this.setPlaceholder(false); + this.setSpanning(false); + } + + public Cell(Point2D topLeft, Point2D bottomRight) { + super((float) topLeft.getY(), (float) topLeft.getX(), (float) (bottomRight.getX() - topLeft.getX()), (float) (bottomRight.getY() - topLeft.getY())); + this.setPlaceholder(false); + this.setSpanning(false); + } + + private boolean spanning; + private boolean placeholder; + + @Override + public String getText(boolean useLineReturns) { + if (this.textElements.size() == 0) { + return ""; + } + StringBuilder sb = new StringBuilder(); + this.textElements.sort(Rectangle.ILL_DEFINED_ORDER); + double curTop = this.textElements.get(0).getTop(); + for (TextChunk tc : this.textElements) { + if (useLineReturns && tc.getTop() > curTop) { + sb.append('\r'); + } + sb.append(tc.getText()); + curTop = tc.getTop(); + } + return sb.toString().trim(); + } + + @Override + public String getText() { + return getText(true); + } + + public boolean isSpanning() { + return spanning; + } + + public void setSpanning(boolean spanning) { + this.spanning = spanning; + } + + public boolean isPlaceholder() { + return placeholder; + } + + public void setPlaceholder(boolean placeholder) { + this.placeholder = placeholder; + } } diff --git a/src/main/java/technology/tabula/CohenSutherlandClipping.java b/src/main/java/technology/tabula/CohenSutherlandClipping.java index 5e170ad8..db9153e9 100644 --- a/src/main/java/technology/tabula/CohenSutherlandClipping.java +++ b/src/main/java/technology/tabula/CohenSutherlandClipping.java @@ -18,122 +18,124 @@ * Implements the well known Cohen Sutherland line * clipping algorithm (line against clip rectangle). */ -public final class CohenSutherlandClipping -{ +public final class CohenSutherlandClipping { + private double xMin; private double yMin; private double xMax; private double yMax; + private static final int INSIDE = 0; + private static final int LEFT = 1; + private static final int RIGHT = 2; + private static final int BOTTOM = 4; + private static final int TOP = 8; + + private final static float MINIMUM_DELTA = 0.01f; + /** - * Creates a Cohen Sutherland clipper with clip rect (0, 0, 0, 0). + * Creates a Cohen Sutherland clipper with clip window (0, 0, 0, 0). */ - public CohenSutherlandClipping() { - } + public CohenSutherlandClipping() {} /** - * Creates a Cohen Sutherland clipper with the given clip rectangle. - * @param clip the clip rectangle to use + * Creates a Cohen Sutherland clipper with the given clip window. + * @param clipWindow the clip window to use. */ - public CohenSutherlandClipping(Rectangle2D clip) { - setClip(clip); + public CohenSutherlandClipping(Rectangle2D clipWindow) { + setClip(clipWindow); } /** * Sets the clip rectangle. - * @param clip the clip rectangle + * @param clipWindow the clip window. */ - public void setClip(Rectangle2D clip) { - xMin = clip.getX(); - xMax = xMin + clip.getWidth(); - yMin = clip.getY(); - yMax = yMin + clip.getHeight(); - } - - private static final int INSIDE = 0; - private static final int LEFT = 1; - private static final int RIGHT = 2; - private static final int BOTTOM = 4; - private static final int TOP = 8; - - private final int regionCode(double x, double y) { - int code = x < xMin - ? LEFT - : x > xMax - ? RIGHT - : INSIDE; - if (y < yMin) code |= BOTTOM; - else if (y > yMax) code |= TOP; - return code; + public void setClip(Rectangle2D clipWindow) { + xMin = clipWindow.getX(); + xMax = xMin + clipWindow.getWidth(); + yMin = clipWindow.getY(); + yMax = yMin + clipWindow.getHeight(); } + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // /** - * Clips a given line against the clip rectangle. + * Clips a given line against the clip window. * The modification (if needed) is done in place. - * @param line the line to clip + * @param line the line to clip. * @return true if line is clipped, false if line is - * totally outside the clip rect. + * totally outside the clip window. */ public boolean clip(Line2D.Float line) { + Point point1 = new Point(line.getX1(), line.getY1()); + Point point2 = new Point(line.getX2(), line.getY2()); + Point outsidePoint = new Point(0d, 0d); - double p1x = line.getX1(); - double p1y = line.getY1(); - double p2x = line.getX2(); - double p2y = line.getY2(); + boolean lineIsVertical = (point1.x == point2.x); + double lineSlope = lineIsVertical ? 0d : (point2.y-point1.y)/(point2.x-point1.x); - double qx = 0d; - double qy = 0d; + while (point1.region != INSIDE || point2.region != INSIDE) { + if ((point1.region & point2.region) != 0) return false; - boolean vertical = p1x == p2x; + outsidePoint.region = (point1.region == INSIDE) ? point2.region : point1.region; - double slope = vertical - ? 0d - : (p2y-p1y)/(p2x-p1x); - - int c1 = regionCode(p1x, p1y); - int c2 = regionCode(p2x, p2y); - - while (c1 != INSIDE || c2 != INSIDE) { - - if ((c1 & c2) != INSIDE) - return false; - - int c = c1 == INSIDE ? c2 : c1; - - if ((c & LEFT) != INSIDE) { - qx = xMin; - qy = (Utils.feq(qx, p1x) ? 0 : qx-p1x)*slope + p1y; + if ((outsidePoint.region & LEFT) != 0) { + outsidePoint.x = xMin; + outsidePoint.y = delta(outsidePoint.x, point1.x)*lineSlope + point1.y; } - else if ((c & RIGHT) != INSIDE) { - qx = xMax; - qy = (Utils.feq(qx, p1x) ? 0 : qx-p1x)*slope + p1y; + else if ((outsidePoint.region & RIGHT) != 0) { + outsidePoint.x = xMax; + outsidePoint.y = delta(outsidePoint.x, point1.x)*lineSlope + point1.y; } - else if ((c & BOTTOM) != INSIDE) { - qy = yMin; - qx = vertical - ? p1x - : (Utils.feq(qy, p1y) ? 0 : qy-p1y)/slope + p1x; + else if ((outsidePoint.region & BOTTOM) != 0) { + outsidePoint.y = yMin; + outsidePoint.x = lineIsVertical + ? point1.x + : delta(outsidePoint.y, point1.y)/lineSlope + point1.x; } - else if ((c & TOP) != INSIDE) { - qy = yMax; - qx = vertical - ? p1x - : (Utils.feq(qy, p1y) ? 0 : qy-p1y)/slope + p1x; + else if ((outsidePoint.region & TOP) != 0) { + outsidePoint.y = yMax; + outsidePoint.x = lineIsVertical + ? point1.x + : delta(outsidePoint.y, point1.y)/lineSlope + point1.x; } - if (c == c1) { - p1x = qx; - p1y = qy; - c1 = regionCode(p1x, p1y); + if (outsidePoint.isInTheSameRegionAs(point1)) { + point1.setPositionAndRegion(outsidePoint.x, outsidePoint.y); } else { - p2x = qx; - p2y = qy; - c2 = regionCode(p2x, p2y); + point2.setPositionAndRegion(outsidePoint.x, outsidePoint.y); } } - line.setLine(p1x, p1y, p2x, p2y); + line.setLine(point1.x, point1.y, point2.x, point2.y); return true; } + + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // + private static double delta(double value1, double value2) { + return (Math.abs(value1 - value2) < MINIMUM_DELTA) ? 0 : (value1 - value2); + } + + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // + class Point { + double x, y; + int region; + + Point(double x, double y) { + setPositionAndRegion(x, y); + } + + void setPositionAndRegion(double x, double y) { + this.x = x; this.y = y; + region = (x < xMin) ? LEFT : (x > xMax) ? RIGHT : INSIDE; + if (y < yMin) + region |= BOTTOM; + else if (y > yMax) + region |= TOP; + } + + boolean isInTheSameRegionAs(Point otherPoint) { + return this.region == otherPoint.region; + } + } + } -// end of file \ No newline at end of file diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java index 21df07b5..1b422303 100644 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ b/src/main/java/technology/tabula/CommandLineApp.java @@ -15,6 +15,7 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.cli.DefaultParser; +import org.apache.pdfbox.Loader; import org.apache.pdfbox.pdmodel.PDDocument; import technology.tabula.detectors.DetectionAlgorithm; @@ -29,12 +30,17 @@ public class CommandLineApp { - private static String VERSION = "1.0.2"; - private static String VERSION_STRING = String.format("tabula %s (c) 2012-2017 Manuel Aristarán", VERSION); + private static String VERSION = "1.0.6-SNAPSHOT"; + private static String VERSION_STRING = String.format("tabula %s (c) 2012-2020 Manuel Aristarán", VERSION); private static String BANNER = "\nTabula helps you extract tables from PDFs\n\n"; + private static final int RELATIVE_AREA_CALCULATION_MODE = 0; + private static final int ABSOLUTE_AREA_CALCULATION_MODE = 1; + + private Appendable defaultOutput; - private Rectangle pageArea; + + private List> pageAreas; private List pages; private OutputFormat outputFormat; private String password; @@ -42,7 +48,7 @@ public class CommandLineApp { public CommandLineApp(Appendable defaultOutput, CommandLine line) throws ParseException { this.defaultOutput = defaultOutput; - this.pageArea = CommandLineApp.whichArea(line); + this.pageAreas = CommandLineApp.whichAreas(line); this.pages = CommandLineApp.whichPages(line); this.outputFormat = CommandLineApp.whichOutputFormat(line); this.tableExtractor = CommandLineApp.createExtractor(line); @@ -109,13 +115,17 @@ public boolean accept(File dir, String name) { }); for (File pdfFile : pdfs) { - File outputFile = new File(getOutputFilename(pdfFile)); + File outputFile = new File(getOutputFilename(pdfFile)); + try { extractFileInto(pdfFile, outputFile); + } catch (ParseException e) { + System.err.println("Caught exception while processing file: " + pdfFile.toString()); + throw e; + } } } public void extractFileTables(CommandLine line, File pdfFile) throws ParseException { - Appendable outFile = this.defaultOutput; if (!line.hasOption('o')) { extractFile(pdfFile, this.defaultOutput); return; @@ -149,18 +159,32 @@ public void extractFileInto(File pdfFile, File outputFile) throws ParseException private void extractFile(File pdfFile, Appendable outFile) throws ParseException { PDDocument pdfDocument = null; try { - pdfDocument = this.password == null ? PDDocument.load(pdfFile) : PDDocument.load(pdfFile, this.password); + pdfDocument = this.password == null ? Loader.loadPDF(pdfFile) : Loader.loadPDF(pdfFile,password); PageIterator pageIterator = getPageIterator(pdfDocument); - List
tables = new ArrayList
(); + List
tables = new ArrayList<>(); while (pageIterator.hasNext()) { Page page = pageIterator.next(); - if (pageArea != null) { - page = page.getArea(pageArea); + if (tableExtractor.verticalRulingPositions != null) { + for (Float verticalRulingPosition : tableExtractor.verticalRulingPositions) { + page.addRuling(new Ruling(0, verticalRulingPosition, 0.0f, (float) page.getHeight())); + } } - tables.addAll(tableExtractor.extractTables(page)); + if (pageAreas != null) { + for (Pair areaPair : pageAreas) { + Rectangle area = areaPair.getRight(); + if (areaPair.getLeft() == RELATIVE_AREA_CALCULATION_MODE) { + area = new Rectangle((float) (area.getTop() / 100 * page.getHeight()), + (float) (area.getLeft() / 100 * page.getWidth()), (float) (area.getWidth() / 100 * page.getWidth()), + (float) (area.getHeight() / 100 * page.getHeight())); + } + tables.addAll(tableExtractor.extractTables(page.getArea(area))); + } + } else { + tables.addAll(tableExtractor.extractTables(page)); + } } writeTables(tables, outFile); } catch (IOException e) { @@ -200,16 +224,28 @@ private static OutputFormat whichOutputFormat(CommandLine line) throws ParseExce } } - private static Rectangle whichArea(CommandLine line) throws ParseException { + private static List> whichAreas(CommandLine line) throws ParseException { if (!line.hasOption('a')) { return null; } - List f = parseFloatList(line.getOptionValue('a')); - if (f.size() != 4) { - throw new ParseException("area parameters must be top,left,bottom,right"); + String[] optionValues = line.getOptionValues('a'); + + List> areaList = new ArrayList>(); + for (String optionValue : optionValues) { + int areaCalculationMode = ABSOLUTE_AREA_CALCULATION_MODE; + int startIndex = 0; + if (optionValue.startsWith("%")) { + startIndex = 1; + areaCalculationMode = RELATIVE_AREA_CALCULATION_MODE; + } + List f = parseFloatList(optionValue.substring(startIndex)); + if (f.size() != 4) { + throw new ParseException("area parameters must be top,left,bottom,right optionally preceded by %"); + } + areaList.add(new Pair(areaCalculationMode, new Rectangle(f.get(0), f.get(1), f.get(3) - f.get(1), f.get(2) - f.get(0)))); } - return new Rectangle(f.get(0), f.get(1), f.get(3) - f.get(1), f.get(2) - f.get(0)); + return areaList; } private static List whichPages(CommandLine line) throws ParseException { @@ -224,7 +260,7 @@ private static ExtractionMethod whichExtractionMethod(CommandLine line) { } // -n/--no-spreadsheet [deprecated; use -t] or -c/--columns or -g/--guess or -t/--stream - if (line.hasOption('n') || line.hasOption('c') || line.hasOption('g') || line.hasOption('t')) { + if (line.hasOption('n') || line.hasOption('c') || line.hasOption('t')) { return ExtractionMethod.BASIC; } return ExtractionMethod.DECIDE; @@ -237,8 +273,14 @@ private static TableExtractor createExtractor(CommandLine line) throws ParseExce extractor.setUseLineReturns(line.hasOption('u')); if (line.hasOption('c')) { - extractor.setVerticalRulingPositions(parseFloatList(line.getOptionValue('c'))); + String optionString = line.getOptionValue('c'); + if (optionString.startsWith("%")) { + extractor.setVerticalRulingPositionsRelative(true); + optionString = optionString.substring(1); + } + extractor.setVerticalRulingPositions(parseFloatList(optionString)); } + return extractor; } @@ -246,10 +288,10 @@ private static TableExtractor createExtractor(CommandLine line) throws ParseExce public static List parseFloatList(String option) throws ParseException { String[] f = option.split(","); - List rv = new ArrayList(); + List rv = new ArrayList<>(); try { - for (int i = 0; i < f.length; i++) { - rv.add(Float.parseFloat(f[i])); + for (final String element : f) { + rv.add(Float.parseFloat(element)); } return rv; } catch (NumberFormatException e) { @@ -262,7 +304,6 @@ private static void printHelp() { formatter.printHelp("tabula", BANNER, buildOptions(), "", true); } - @SuppressWarnings("static-access") public static Options buildOptions() { Options o = new Options(); @@ -275,7 +316,7 @@ public static Options buildOptions() { o.addOption("t", "stream", false, "Force PDF to be extracted using stream-mode extraction (if there are no ruling lines separating each cell)"); o.addOption("i", "silent", false, "Suppress all stderr output."); o.addOption("u", "use-line-returns", false, "Use embedded line returns in cells. (Only in spreadsheet mode.)"); - o.addOption("d", "debug", false, "Print detected table areas instead of processing."); + // o.addOption("d", "debug", false, "Print detected table areas instead of processing."); o.addOption(Option.builder("b") .longOpt("batch") .desc("Convert all .pdfs in the provided directory.") @@ -302,13 +343,18 @@ public static Options buildOptions() { .build()); o.addOption(Option.builder("c") .longOpt("columns") - .desc("X coordinates of column boundaries. Example --columns 10.1,20.2,30.3") + .desc("X coordinates of column boundaries. Example --columns 10.1,20.2,30.3. " + + "If all values are between 0-100 (inclusive) and preceded by '%', input will be taken as % of actual width of the page. " + + "Example: --columns %25,50,80.6") .hasArg() .argName("COLUMNS") .build()); o.addOption(Option.builder("a") .longOpt("area") - .desc("Portion of the page to analyze (top,left,bottom,right). Example: --area 269.875,12.75,790.5,561. Default is entire page") + .desc("-a/--area = Portion of the page to analyze. Example: --area 269.875,12.75,790.5,561. " + + "Accepts top,left,bottom,right i.e. y1,x1,y2,x2 where all values are in points relative to the top left corner. " + + "If all values are between 0-100 (inclusive) and preceded by '%', input will be taken as % of actual height or width of the page. " + + "Example: --area %0,0,100,50. To specify multiple areas, -a option should be repeated. Default is entire page") .hasArg() .argName("AREA") .build()); @@ -327,7 +373,10 @@ private static class TableExtractor { private boolean useLineReturns = false; private BasicExtractionAlgorithm basicExtractor = new BasicExtractionAlgorithm(); private SpreadsheetExtractionAlgorithm spreadsheetExtractor = new SpreadsheetExtractionAlgorithm(); + + private boolean verticalRulingPositionsRelative = false; private List verticalRulingPositions = null; + private ExtractionMethod method = ExtractionMethod.BASIC; public TableExtractor() { @@ -337,6 +386,10 @@ public void setVerticalRulingPositions(List positions) { this.verticalRulingPositions = positions; } + public void setVerticalRulingPositionsRelative(boolean relative) { + this.verticalRulingPositionsRelative = relative; + } + public void setGuess(boolean guess) { this.guess = guess; } @@ -362,7 +415,7 @@ public List
extractTables(Page page) { case SPREADSHEET: return extractTablesSpreadsheet(page); default: - return new ArrayList
(); + return new ArrayList<>(); } } @@ -372,7 +425,7 @@ public List
extractTablesBasic(Page page) { // currently we only have a detector that uses spreadsheets to find table areas DetectionAlgorithm detector = new NurminenDetectionAlgorithm(); List guesses = detector.detect(page); - List
tables = new ArrayList
(); + List
tables = new ArrayList<>(); for (Rectangle guessRect : guesses) { Page guess = page.getArea(guessRect); @@ -382,14 +435,27 @@ public List
extractTablesBasic(Page page) { } if (verticalRulingPositions != null) { - return basicExtractor.extract(page, verticalRulingPositions); + List absoluteRulingPositions; + + if (this.verticalRulingPositionsRelative) { + // convert relative to absolute + absoluteRulingPositions = new ArrayList<>(verticalRulingPositions.size()); + for (float relative : this.verticalRulingPositions) { + float absolute = (float) (relative / 100.0 * page.getWidth()); + absoluteRulingPositions.add(absolute); + } + } else { + absoluteRulingPositions = this.verticalRulingPositions; + } + return basicExtractor.extract(page, absoluteRulingPositions); } + return basicExtractor.extract(page); } public List
extractTablesSpreadsheet(Page page) { // TODO add useLineReturns - return (List
) spreadsheetExtractor.extract(page); + return spreadsheetExtractor.extract(page); } } diff --git a/src/main/java/technology/tabula/HasText.java b/src/main/java/technology/tabula/HasText.java index 6f375dbc..1a9bda99 100644 --- a/src/main/java/technology/tabula/HasText.java +++ b/src/main/java/technology/tabula/HasText.java @@ -1,7 +1,8 @@ package technology.tabula; public interface HasText { - - String getText(); + + String getText(); + String getText(boolean useLineReturns); } diff --git a/src/main/java/technology/tabula/Line.java b/src/main/java/technology/tabula/Line.java index ed2f6895..31d10529 100644 --- a/src/main/java/technology/tabula/Line.java +++ b/src/main/java/technology/tabula/Line.java @@ -8,7 +8,7 @@ @SuppressWarnings("serial") public class Line extends Rectangle { - List textChunks = new ArrayList(); + List textChunks = new ArrayList<>(); public static final Character[] WHITE_SPACE_CHARS = { ' ', '\t', '\r', '\n', '\f' }; @@ -52,7 +52,7 @@ public void addTextChunk(TextChunk textChunk) { public String toString() { StringBuilder sb = new StringBuilder(); String s = super.toString(); - sb.append(s.substring(0, s.length() - 1)); + sb.append(s, 0, s.length() - 1); sb.append(",chunks="); for (TextChunk te: this.textChunks) { sb.append("'" + te.getText() + "', "); diff --git a/src/main/java/technology/tabula/ObjectExtractor.java b/src/main/java/technology/tabula/ObjectExtractor.java index 2b97a5a8..9f3f6a03 100644 --- a/src/main/java/technology/tabula/ObjectExtractor.java +++ b/src/main/java/technology/tabula/ObjectExtractor.java @@ -5,63 +5,69 @@ import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; -public class ObjectExtractor { +public class ObjectExtractor implements java.io.Closeable { private final PDDocument pdfDocument; - public ObjectExtractor(PDDocument pdfDocument) throws IOException { + public ObjectExtractor(PDDocument pdfDocument) { this.pdfDocument = pdfDocument; } + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // protected Page extractPage(Integer pageNumber) throws IOException { - - if (pageNumber > this.pdfDocument.getNumberOfPages() || pageNumber < 1) { - throw new java.lang.IndexOutOfBoundsException( - "Page number does not exist"); + if (pageNumber > pdfDocument.getNumberOfPages() || pageNumber < 1) { + throw new java.lang.IndexOutOfBoundsException("Page number does not exist."); } + PDPage page = pdfDocument.getPage(pageNumber - 1); - PDPage p = this.pdfDocument.getPage(pageNumber - 1); - - ObjectExtractorStreamEngine se = new ObjectExtractorStreamEngine(p); - se.processPage(p); - - - TextStripper pdfTextStripper = new TextStripper(this.pdfDocument, pageNumber); + ObjectExtractorStreamEngine streamEngine = new ObjectExtractorStreamEngine(page); + streamEngine.processPage(page); - pdfTextStripper.process(); + TextStripper textStripper = new TextStripper(pdfDocument, pageNumber); + textStripper.process(); - Utils.sort(pdfTextStripper.textElements); + Utils.sort(textStripper.getTextElements(), Rectangle.ILL_DEFINED_ORDER); - float w, h; - int pageRotation = p.getRotation(); - if (Math.abs(pageRotation) == 90 || Math.abs(pageRotation) == 270) { - w = p.getCropBox().getHeight(); - h = p.getCropBox().getWidth(); + float width, height; + int rotation = page.getRotation(); + if (Math.abs(rotation) == 90 || Math.abs(rotation) == 270) { + width = page.getCropBox().getHeight(); + height = page.getCropBox().getWidth(); } else { - w = p.getCropBox().getWidth(); - h = p.getCropBox().getHeight(); + width = page.getCropBox().getWidth(); + height = page.getCropBox().getHeight(); } - return new Page(0, 0, w, h, pageRotation, pageNumber, p, pdfTextStripper.textElements, - se.rulings, pdfTextStripper.minCharWidth, pdfTextStripper.minCharHeight, pdfTextStripper.spatialIndex); + return Page.Builder.newInstance() + .withPageDims(PageDims.of(0, 0, width, height)) + .withRotation(rotation) + .withNumber(pageNumber) + .withPdPage(page) + .withPdDocument(pdfDocument) + .withRulings(streamEngine.rulings) + .withTextElements(textStripper.getTextElements()) + .withMinCharWidth(textStripper.getMinCharWidth()) + .withMinCharHeight(textStripper.getMinCharHeight()) + .withIndex(textStripper.getSpatialIndex()) + .build(); } + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // public PageIterator extract(Iterable pages) { return new PageIterator(this, pages); } public PageIterator extract() { - return extract(Utils.range(1, this.pdfDocument.getNumberOfPages() + 1)); + return extract(Utils.range(1, pdfDocument.getNumberOfPages() + 1)); } public Page extract(int pageNumber) { return extract(Utils.range(pageNumber, pageNumber + 1)).next(); } + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // public void close() throws IOException { - this.pdfDocument.close(); + pdfDocument.close(); } - - - + } diff --git a/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java b/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java index 1538cfa6..9907eca1 100644 --- a/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java +++ b/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java @@ -7,7 +7,6 @@ import java.awt.geom.PathIterator; import java.awt.geom.Point2D; import java.awt.geom.Rectangle2D; -import java.io.IOException; import java.util.ArrayList; import java.util.Comparator; import java.util.List; @@ -17,85 +16,77 @@ import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.graphics.image.PDImage; -import org.apache.pdfbox.util.Matrix; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -class ObjectExtractorStreamEngine extends PDFGraphicsStreamEngine { +import static java.awt.geom.PathIterator.*; - private static final String NBSP = "\u00A0"; +class ObjectExtractorStreamEngine extends PDFGraphicsStreamEngine { protected List rulings; private AffineTransform pageTransform; - private boolean debugClippingPaths; private boolean extractRulingLines = true; - private Logger log; + private Logger logger; private int clipWindingRule = -1; private GeneralPath currentPath = new GeneralPath(); - public List clippingPaths; - private Matrix translateMatrix; + private static final float RULING_MINIMUM_LENGTH = 0.01f; protected ObjectExtractorStreamEngine(PDPage page) { super(page); + logger = LoggerFactory.getLogger(ObjectExtractorStreamEngine.class); + rulings = new ArrayList<>(); - this.log = LoggerFactory.getLogger(ObjectExtractorStreamEngine.class); - - this.rulings = new ArrayList(); - this.pageTransform = null; + // Calculate page transform: + pageTransform = new AffineTransform(); + PDRectangle pageCropBox = getPage().getCropBox(); + int rotationAngleInDegrees = getPage().getRotation(); - // calculate page transform - PDRectangle cb = this.getPage().getCropBox(); - int rotation = this.getPage().getRotation(); - - this.pageTransform = new AffineTransform(); - - if (Math.abs(rotation) == 90 || Math.abs(rotation) == 270) { - this.pageTransform = AffineTransform.getRotateInstance(rotation * (Math.PI / 180.0), 0, 0); - this.pageTransform.concatenate(AffineTransform.getScaleInstance(1, -1)); + if (Math.abs(rotationAngleInDegrees) == 90 || Math.abs(rotationAngleInDegrees) == 270) { + double rotationAngleInRadians = rotationAngleInDegrees * (Math.PI / 180.0); + pageTransform = AffineTransform.getRotateInstance(rotationAngleInRadians, 0, 0); } else { - this.pageTransform.concatenate(AffineTransform.getTranslateInstance(0, cb.getHeight())); - this.pageTransform.concatenate(AffineTransform.getScaleInstance(1, -1)); + double deltaX = 0; + double deltaY = pageCropBox.getHeight(); + pageTransform.concatenate(AffineTransform.getTranslateInstance(deltaX, deltaY)); } - this.pageTransform.translate(-cb.getLowerLeftX(), -cb.getLowerLeftY()); + pageTransform.concatenate(AffineTransform.getScaleInstance(1, -1)); + pageTransform.translate(-pageCropBox.getLowerLeftX(), -pageCropBox.getLowerLeftY()); } + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // @Override - public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) throws IOException { + public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) { currentPath.moveTo((float) p0.getX(), (float) p0.getY()); currentPath.lineTo((float) p1.getX(), (float) p1.getY()); currentPath.lineTo((float) p2.getX(), (float) p2.getY()); currentPath.lineTo((float) p3.getX(), (float) p3.getY()); - currentPath.closePath(); } @Override - public void clip(int windingRule) throws IOException { - // the clipping path will not be updated until the succeeding painting - // operator is called + public void clip(int windingRule) { + // The clipping path will not be updated until the succeeding painting + // operator is called. clipWindingRule = windingRule; } @Override - public void closePath() throws IOException { + public void closePath() { currentPath.closePath(); } @Override - public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) throws IOException { + public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) { currentPath.curveTo(x1, y1, x2, y2, x3, y3); } @Override - public void drawImage(PDImage arg0) throws IOException { - // TODO Auto-generated method stub - - } + public void drawImage(PDImage arg0) {} @Override - public void endPath() throws IOException { + public void endPath() { if (clipWindingRule != -1) { currentPath.setWindingRule(clipWindingRule); getGraphicsState().intersectClippingPath(currentPath); @@ -105,170 +96,176 @@ public void endPath() throws IOException { } @Override - public void fillAndStrokePath(int arg0) throws IOException { + public void fillAndStrokePath(int arg0) { strokeOrFillPath(true); } @Override - public void fillPath(int arg0) throws IOException { + public void fillPath(int arg0) { strokeOrFillPath(true); } @Override - public Point2D getCurrentPoint() throws IOException { + public Point2D getCurrentPoint() { return currentPath.getCurrentPoint(); } @Override - public void lineTo(float x, float y) throws IOException { + public void lineTo(float x, float y) { currentPath.lineTo(x, y); } @Override - public void moveTo(float x, float y) throws IOException { + public void moveTo(float x, float y) { currentPath.moveTo(x, y); } @Override - public void shadingFill(COSName arg0) throws IOException { - // TODO Auto-generated method stub - - } + public void shadingFill(COSName arg0) {} + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // @Override - public void strokePath() throws IOException { + public void strokePath() { strokeOrFillPath(false); } private void strokeOrFillPath(boolean isFill) { - GeneralPath path = this.currentPath; - - if (!this.extractRulingLines) { - this.currentPath.reset(); + if (!extractRulingLines) { + currentPath.reset(); return; } - PathIterator pi = path.getPathIterator(this.getPageTransform()); - float[] c = new float[6]; - int currentSegment; - - // skip paths whose first operation is not a MOVETO - // or contains operations other than LINETO, MOVETO or CLOSE - if ((pi.currentSegment(c) != PathIterator.SEG_MOVETO)) { - path.reset(); - return; - } - pi.next(); - while (!pi.isDone()) { - currentSegment = pi.currentSegment(c); - if (currentSegment != PathIterator.SEG_LINETO && currentSegment != PathIterator.SEG_CLOSE - && currentSegment != PathIterator.SEG_MOVETO) { - path.reset(); - return; - } - pi.next(); - } + boolean didNotPassedTheFilter = filterPathBySegmentType(); + if (didNotPassedTheFilter) return; // TODO: how to implement color filter? - // skip the first path operation and save it as the starting position - float[] first = new float[6]; - pi = path.getPathIterator(this.getPageTransform()); - pi.currentSegment(first); - // last move - Point2D.Float start_pos = new Point2D.Float(Utils.round(first[0], 2), Utils.round(first[1], 2)); - Point2D.Float last_move = start_pos; - Point2D.Float end_pos = null; + // Skip the first path operation and save it as the starting point. + PathIterator pathIterator = currentPath.getPathIterator(getPageTransform()); + + float[] coordinates = new float[6]; + int currentSegment; + + Point2D.Float startPoint = getStartPoint(pathIterator); + Point2D.Float last_move = startPoint; + Point2D.Float endPoint = null; Line2D.Float line; - PointComparator pc = new PointComparator(); - while (!pi.isDone()) { - pi.next(); - // This can be the last segment, when pi.isDone, but we need to - // process it - // otherwise us-017.pdf fails the last value. + PointComparator pointComparator = new PointComparator(); + + while (!pathIterator.isDone()) { + pathIterator.next(); + // This can be the last segment, when pathIterator.isDone, but we need to + // process it otherwise us-017.pdf fails the last value. try { - currentSegment = pi.currentSegment(c); + currentSegment = pathIterator.currentSegment(coordinates); } catch (IndexOutOfBoundsException ex) { continue; } switch (currentSegment) { - case PathIterator.SEG_LINETO: - end_pos = new Point2D.Float(c[0], c[1]); - - line = pc.compare(start_pos, end_pos) == -1 ? new Line2D.Float(start_pos, end_pos) - : new Line2D.Float(end_pos, start_pos); - - if (line.intersects(this.currentClippingPath())) { - Ruling r = new Ruling(line.getP1(), line.getP2()).intersect(this.currentClippingPath()); - - if (r.length() > 0.01) { - this.rulings.add(r); - } + case SEG_LINETO: + endPoint = new Point2D.Float(coordinates[0], coordinates[1]); + if (startPoint == null || endPoint == null) { + break; } + line = getLineBetween(startPoint, endPoint, pointComparator); + verifyLineIntersectsClipping(line); break; - case PathIterator.SEG_MOVETO: - last_move = new Point2D.Float(c[0], c[1]); - end_pos = last_move; + case SEG_MOVETO: + last_move = new Point2D.Float(coordinates[0], coordinates[1]); + endPoint = last_move; break; - case PathIterator.SEG_CLOSE: - // according to PathIterator docs: - // "the preceding subpath should be closed by appending a line - // segment - // back to the point corresponding to the most recent + case SEG_CLOSE: + // According to PathIterator docs: + // "The preceding sub-path should be closed by appending a line + // segment back to the point corresponding to the most recent // SEG_MOVETO." - line = pc.compare(end_pos, last_move) == -1 ? new Line2D.Float(end_pos, last_move) - : new Line2D.Float(last_move, end_pos); - - if (line.intersects(this.currentClippingPath())) { - Ruling r = new Ruling(line.getP1(), line.getP2()).intersect(this.currentClippingPath()); - - if (r.length() > 0.01) { - this.rulings.add(r); - } + if (startPoint == null || endPoint == null) { + break; } + line = getLineBetween(endPoint, last_move, pointComparator); + verifyLineIntersectsClipping(line); break; } - start_pos = end_pos; + startPoint = endPoint; } - path.reset(); + currentPath.reset(); } - public AffineTransform getPageTransform() { - return this.pageTransform; + private boolean filterPathBySegmentType() { + PathIterator pathIterator = currentPath.getPathIterator(pageTransform); + float[] coordinates = new float[6]; + int currentSegmentType = pathIterator.currentSegment(coordinates); + if (currentSegmentType != SEG_MOVETO) { + currentPath.reset(); + return true; + } + pathIterator.next(); + while (!pathIterator.isDone()) { + currentSegmentType = pathIterator.currentSegment(coordinates); + if (currentSegmentType != SEG_LINETO && currentSegmentType != SEG_CLOSE && currentSegmentType != SEG_MOVETO) { + currentPath.reset(); + return true; + } + pathIterator.next(); + } + return false; } - public Rectangle2D currentClippingPath() { - Shape clippingPath = this.getGraphicsState().getCurrentClippingPath(); - Shape transformedClippingPath = this.getPageTransform().createTransformedShape(clippingPath); + private Point2D.Float getStartPoint(PathIterator pathIterator) { + float[] startPointCoordinates = new float[6]; + pathIterator.currentSegment(startPointCoordinates); + float x = Utils.round(startPointCoordinates[0], 2); + float y = Utils.round(startPointCoordinates[1], 2); + return new Point2D.Float(x, y); + } - return transformedClippingPath.getBounds2D(); + private Line2D.Float getLineBetween(Point2D.Float pointA, Point2D.Float pointB, PointComparator pointComparator) { + if (pointComparator.compare(pointA, pointB) == -1) { + return new Line2D.Float(pointA, pointB); + } + return new Line2D.Float(pointB, pointA); + } + + private void verifyLineIntersectsClipping(Line2D.Float line) { + Rectangle2D currentClippingPath = currentClippingPath(); + if (line.intersects(currentClippingPath)) { + Ruling ruling = new Ruling(line.getP1(), line.getP2()).intersect(currentClippingPath); + if (ruling.length() > RULING_MINIMUM_LENGTH) { + rulings.add(ruling); + } + } } - public boolean isDebugClippingPaths() { - return debugClippingPaths; + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // + public AffineTransform getPageTransform() { + return pageTransform; } - public void setDebugClippingPaths(boolean debugClippingPaths) { - this.debugClippingPaths = debugClippingPaths; + public Rectangle2D currentClippingPath() { + Shape currentClippingPath = getGraphicsState().getCurrentClippingPath(); + Shape transformedClippingPath = getPageTransform().createTransformedShape(currentClippingPath); + return transformedClippingPath.getBounds2D(); } + // TODO: repeated in SpreadsheetExtractionAlgorithm. class PointComparator implements Comparator { @Override - public int compare(Point2D o1, Point2D o2) { - float o1X = Utils.round(o1.getX(), 2); - float o1Y = Utils.round(o1.getY(), 2); - float o2X = Utils.round(o2.getX(), 2); - float o2Y = Utils.round(o2.getY(), 2); + public int compare(Point2D p1, Point2D p2) { + float p1X = Utils.round(p1.getX(), 2); + float p1Y = Utils.round(p1.getY(), 2); + float p2X = Utils.round(p2.getX(), 2); + float p2Y = Utils.round(p2.getY(), 2); - if (o1Y > o2Y) + if (p1Y > p2Y) return 1; - if (o1Y < o2Y) + if (p1Y < p2Y) return -1; - if (o1X > o2X) + if (p1X > p2X) return 1; - if (o1X < o2X) + if (p1X < p2X) return -1; return 0; } } + } diff --git a/src/main/java/technology/tabula/Page.java b/src/main/java/technology/tabula/Page.java index 8177921b..ed74d14a 100644 --- a/src/main/java/technology/tabula/Page.java +++ b/src/main/java/technology/tabula/Page.java @@ -6,132 +6,216 @@ import java.util.Comparator; import java.util.List; +import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; +import static java.lang.Float.compare; +import static java.util.Collections.min; + @SuppressWarnings("serial") // TODO: this class should probably be called "PageArea" or something like that public class Page extends Rectangle { + private int number; private Integer rotation; - private int pageNumber; - private List texts; - private List rulings, cleanRulings = null, verticalRulingLines = null, horizontalRulingLines = null; private float minCharWidth; private float minCharHeight; - private RectangleSpatialIndex spatial_index; + + private List textElements; + + // TODO: Create a class for 'List ' that encapsulates all of these lists and their behaviors? + private List rulings, + cleanRulings = null, + verticalRulingLines = null, + horizontalRulingLines = null; + private PDPage pdPage; + private PDDocument pdDoc; - public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage) { - super(top, left, width, height); + private RectangleSpatialIndex spatialIndex; + + private static final float DEFAULT_MIN_CHAR_LENGTH = 7; + + private Page( + PageDims pageDims, + int rotation, + int number, + PDPage pdPage, + PDDocument doc, + List characters, + List rulings, + float minCharWidth, + float minCharHeight, + RectangleSpatialIndex index + ) { + super(pageDims.getTop(), pageDims.getLeft(), pageDims.getWidth(), pageDims.getHeight()); this.rotation = rotation; - this.pageNumber = page_number; + this.number = number; this.pdPage = pdPage; + this.pdDoc = doc; + this.textElements = characters; + this.rulings = rulings; + this.minCharWidth = minCharWidth; + this.minCharHeight = minCharHeight; + this.spatialIndex = index; } - - public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage, - List characters, List rulings) { - this(top, left, width, height, rotation, page_number, pdPage); - this.texts = characters; - this.rulings = rulings; + /** + * + * @deprecated use {@link Builder} instead + */ + @Deprecated + public Page(float top, float left, float width, float height, int rotation, int number, PDPage pdPage, PDDocument doc) { + super(top, left, width, height); + this.rotation = rotation; + this.number = number; + this.pdPage = pdPage; + this.pdDoc = doc; + } + + /** + * + * @deprecated use {@link Builder} instead + */ + public Page(float top, float left, float width, float height, int rotation, int number, PDPage pdPage, PDDocument doc, + List characters, List rulings) { + this(top, left, width, height, rotation, number, pdPage, doc); + this.textElements = characters; + this.rulings = rulings; } + /** + * + * @deprecated use {@link Builder} instead + */ + public Page(float top, float left, float width, float height, int rotation, int number, PDPage pdPage, PDDocument doc, + ObjectExtractorStreamEngine streamEngine, TextStripper textStripper) { + this(top, left, width, height, rotation, number, pdPage, doc, textStripper.getTextElements(), streamEngine.rulings); + this.minCharWidth = textStripper.getMinCharWidth(); + this.minCharHeight = textStripper.getMinCharHeight(); + this.spatialIndex = textStripper.getSpatialIndex(); + } - public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage, - List characters, List rulings, - float minCharWidth, float minCharHeight, RectangleSpatialIndex index) { - this(top, left, width, height, rotation, page_number, pdPage, characters, rulings); - this.minCharHeight = minCharHeight; - this.minCharWidth = minCharWidth; - this.spatial_index = index; + + /** + * + * @deprecated use {@link Builder} instead + */ + public Page(float top, float left, float width, float height, int rotation, int number, PDPage pdPage, PDDocument doc, + List characters, List rulings, + float minCharWidth, float minCharHeight, RectangleSpatialIndex index) { + this(top, left, width, height, rotation, number, pdPage, doc, characters, rulings); + this.minCharHeight = minCharHeight; + this.minCharWidth = minCharWidth; + this.spatialIndex = index; } - + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // public Page getArea(Rectangle area) { - List t = getText(area); - float min_char_width = 7; - float min_char_height = 7; - - if(t.size() > 0){ - min_char_width = Collections.min(t, new Comparator() { - @Override - public int compare(TextElement te1, TextElement te2) { - return java.lang.Float.compare(te1.width, te2.width); - }}).width; - min_char_height = Collections.min(t, new Comparator() { - @Override - public int compare(TextElement te1, TextElement te2) { - return java.lang.Float.compare(te1.height, te2.height); - }}).height; - } - Page rv = new Page( - (float) area.getTop(), - (float) area.getLeft(), - (float) area.getWidth(), - (float) area.getHeight(), - rotation, - pageNumber, - pdPage, - t, - Ruling.cropRulingsToArea(getRulings(), area), - min_char_width, - min_char_height, - spatial_index); - - rv.addRuling(new Ruling( - new Point2D.Double(rv.getLeft(), - rv.getTop()), - new Point2D.Double(rv.getRight(), - rv.getTop()))); - rv.addRuling(new Ruling( - new Point2D.Double(rv.getRight(), - rv.getTop()), - new Point2D.Double(rv.getRight(), - rv.getBottom()))); - rv.addRuling(new Ruling( - new Point2D.Double(rv.getRight(), - rv.getBottom()), - new Point2D.Double(rv.getLeft(), - rv.getBottom()))); - rv.addRuling(new Ruling( - new Point2D.Double(rv.getLeft(), - rv.getBottom()), - new Point2D.Double(rv.getLeft(), - rv.getTop()))); - - return rv; - } - - public Page getArea(float top, float left, float bottom, float right) { - Rectangle area = new Rectangle(top, left, right - left, bottom - top); - return this.getArea(area); + List areaTextElements = getText(area); + + float minimumCharWidth = getMinimumCharWidthFrom(areaTextElements); + float minimumCharHeight = getMinimumCharHeightFrom(areaTextElements); + + final Page page = Page.Builder.newInstance() + .withPageDims(PageDims.of(area.getTop(), area.getLeft(), (float) area.getWidth(), (float) area.getHeight())) + .withRotation(rotation) + .withNumber(number) + .withPdPage(pdPage) + .withPdDocument(pdDoc) + .withTextElements(areaTextElements) + .withRulings(Ruling.cropRulingsToArea(getRulings(), area)) + .withMinCharWidth(minimumCharWidth) + .withMinCharHeight(minimumCharHeight) + .withIndex(spatialIndex) + .build(); + + addBorderRulingsTo(page); + + return page; } - - public List getText() { - return texts; + + private float getMinimumCharWidthFrom(List areaTextElements) { + if (!areaTextElements.isEmpty()) { + return min(areaTextElements, (te1, te2) -> compare(te1.width, te2.width)).width; + } + return DEFAULT_MIN_CHAR_LENGTH; } - - public List getText(Rectangle area) { - return this.spatial_index.contains(area); + + private float getMinimumCharHeightFrom(List areaTextElements) { + if (!areaTextElements.isEmpty()) { + return min(areaTextElements, (te1, te2) -> compare(te1.height, te2.height)).height; + } + return DEFAULT_MIN_CHAR_LENGTH; } - - public List getText(float top, float left, float bottom, float right) { - return this.getText(new Rectangle(top, left, right - left, bottom - top)); + + private void addBorderRulingsTo(Page page) { + Point2D.Double leftTop = new Point2D.Double(page.getLeft(), page.getTop()), + rightTop = new Point2D.Double(page.getRight(), page.getTop()), + rightBottom = new Point2D.Double(page.getRight(), page.getBottom()), + leftBottom = new Point2D.Double(page.getLeft(), page.getBottom()); + page.addRuling(new Ruling(leftTop, rightTop)); + page.addRuling(new Ruling(rightTop, rightBottom)); + page.addRuling(new Ruling(rightBottom, leftBottom)); + page.addRuling(new Ruling(leftBottom, leftTop)); + } + + public Page getArea(float top, float left, float bottom, float right) { + Rectangle area = new Rectangle(top, left, right - left, bottom - top); + return getArea(area); } + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // public Integer getRotation() { return rotation; } public int getPageNumber() { - return pageNumber; + return number; + } + + /** + * @deprecated with no replacement + */ + @Deprecated + public float getMinCharWidth() { + return minCharWidth; + } + + /** + * @deprecated with no replacement + */ + @Deprecated + public float getMinCharHeight() { + return minCharHeight; + } + + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // + public List getText() { + return textElements; + } + + public List getText(Rectangle area) { + return spatialIndex.contains(area); + } + + /** + * @deprecated use {@linkplain #getText(Rectangle)} instead + */ + @Deprecated + public List getText(float top, float left, float bottom, float right) { + return getText(new Rectangle(top, left, right - left, bottom - top)); } + /** + * @deprecated use {@linkplain #getText()} instead + */ + @Deprecated public List getTexts() { - return texts; + return textElements; } - + /** * Returns the minimum bounding box that contains all the TextElements on this Page */ @@ -139,99 +223,194 @@ public Rectangle getTextBounds() { List texts = this.getText(); if (!texts.isEmpty()) { return Utils.bounds(texts); - } - else { + } else { return new Rectangle(); } - } + /** + * @deprecated with no replacement + */ + @Deprecated + public boolean hasText() { + return textElements.size() > 0; + } + + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // public List getRulings() { - if (this.cleanRulings != null) { - return this.cleanRulings; - } - - if (this.rulings == null || this.rulings.isEmpty()) { - this.verticalRulingLines = new ArrayList(); - this.horizontalRulingLines = new ArrayList(); - return new ArrayList(); - } - - Utils.snapPoints(this.rulings, this.minCharWidth, this.minCharHeight); - - List vrs = new ArrayList(); - for (Ruling vr: this.rulings) { - if (vr.vertical()) { - vrs.add(vr); + if (cleanRulings != null) { + return cleanRulings; + } + + if (rulings == null || rulings.isEmpty()) { + verticalRulingLines = new ArrayList<>(); + horizontalRulingLines = new ArrayList<>(); + return new ArrayList<>(); + } + + // TODO: Move as a static method to the Ruling class? + Utils.snapPoints(rulings, minCharWidth, minCharHeight); + + verticalRulingLines = getCollapsedVerticalRulings(); + horizontalRulingLines = getCollapsedHorizontalRulings(); + + cleanRulings = new ArrayList<>(verticalRulingLines); + cleanRulings.addAll(horizontalRulingLines); + + return cleanRulings; + } + + // TODO: Create a class for 'List ' and encapsulate these behaviors within it? + private List getCollapsedVerticalRulings() { + List verticalRulings = new ArrayList<>(); + for (Ruling ruling : rulings) { + if (ruling.vertical()) { + verticalRulings.add(ruling); } } - this.verticalRulingLines = Ruling.collapseOrientedRulings(vrs); - - List hrs = new ArrayList(); - for (Ruling hr: this.rulings) { - if (hr.horizontal()) { - hrs.add(hr); + return Ruling.collapseOrientedRulings(verticalRulings); + } + + private List getCollapsedHorizontalRulings() { + List horizontalRulings = new ArrayList<>(); + for (Ruling ruling : rulings) { + if (ruling.horizontal()) { + horizontalRulings.add(ruling); } } - this.horizontalRulingLines = Ruling.collapseOrientedRulings(hrs); - - this.cleanRulings = new ArrayList(this.verticalRulingLines); - this.cleanRulings.addAll(this.horizontalRulingLines); - - return this.cleanRulings; - + return Ruling.collapseOrientedRulings(horizontalRulings); } - + public List getVerticalRulings() { - if (this.verticalRulingLines != null) { - return this.verticalRulingLines; + if (verticalRulingLines != null) { + return verticalRulingLines; } - this.getRulings(); - return this.verticalRulingLines; + getRulings(); + return verticalRulingLines; } - + public List getHorizontalRulings() { - if (this.horizontalRulingLines != null) { - return this.horizontalRulingLines; + if (horizontalRulingLines != null) { + return horizontalRulingLines; } - this.getRulings(); - return this.horizontalRulingLines; + getRulings(); + return horizontalRulingLines; } - - public void addRuling(Ruling r) { - if (r.oblique()) { - throw new UnsupportedOperationException("Can't add an oblique ruling"); + + public void addRuling(Ruling ruling) { + if (ruling.oblique()) { + throw new UnsupportedOperationException("Can't add an oblique ruling."); } - this.rulings.add(r); - // clear caches - this.verticalRulingLines = null; - this.horizontalRulingLines = null; - this.cleanRulings = null; + rulings.add(ruling); + // Clear caches: + verticalRulingLines = null; + horizontalRulingLines = null; + cleanRulings = null; } - + public List getUnprocessedRulings() { - return this.rulings; + return rulings; } - public float getMinCharWidth() { - return minCharWidth; + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // + public PDPage getPDPage() { + return pdPage; } - public float getMinCharHeight() { - return minCharHeight; + public PDDocument getPDDoc() { + return pdDoc; } - public PDPage getPDPage() { - return pdPage; - } + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // + /** + * @deprecated with no replacement + */ + @Deprecated public RectangleSpatialIndex getSpatialIndex() { - return this.spatial_index; + return spatialIndex; } - - public boolean hasText() { - return this.texts.size() > 0; + + public static class Builder { + private PageDims pageDims; + private int rotation; + private int number; + private PDPage pdPage; + private PDDocument pdDocument; + private List textElements; + private List rulings; + private float minCharWidth; + private float minCharHeight; + private RectangleSpatialIndex index; + + private Builder() {} + + public static Builder newInstance() { + return new Builder(); + } + + public Builder withPageDims(PageDims pageDims) { + this.pageDims = pageDims; + + return this; + } + + public Builder withRotation(int rotation) { + this.rotation = rotation; + + return this; + } + + public Builder withNumber(int number) { + this.number = number; + + return this; + } + + public Builder withPdPage(PDPage pdPage) { + this.pdPage = pdPage; + + return this; + } + + public Builder withPdDocument(PDDocument pdDocument) { + this.pdDocument = pdDocument; + + return this; + } + + public Builder withTextElements(List textElements) { + this.textElements = textElements; + + return this; + } + + public Builder withRulings(List rulings) { + this.rulings = rulings; + + return this; + } + + public Builder withMinCharWidth(float minCharWidth) { + this.minCharWidth = minCharWidth; + + return this; + } + + public Builder withMinCharHeight(float minCharHeight) { + this.minCharHeight = minCharHeight; + + return this; + } + + public Builder withIndex(RectangleSpatialIndex index) { + this.index = index; + + return this; + } + + public Page build() { + return new Page(pageDims, rotation, number, pdPage, pdDocument, textElements, rulings, minCharWidth, minCharHeight, index); + } } - - } diff --git a/src/main/java/technology/tabula/PageDims.java b/src/main/java/technology/tabula/PageDims.java new file mode 100644 index 00000000..1598d125 --- /dev/null +++ b/src/main/java/technology/tabula/PageDims.java @@ -0,0 +1,35 @@ +package technology.tabula; + +public class PageDims { + private final float top; + private final float left; + private final float width; + private final float height; + + private PageDims(final float top, final float left, final float width, final float height) { + this.top = top; + this.left = left; + this.width = width; + this.height = height; + } + + public static PageDims of(final float top, final float left, final float width, final float height) { + return new PageDims(top, left, width, height); + } + + public float getTop() { + return top; + } + + public float getLeft() { + return left; + } + + public float getWidth() { + return width; + } + + public float getHeight() { + return height; + } +} diff --git a/src/main/java/technology/tabula/PageIterator.java b/src/main/java/technology/tabula/PageIterator.java index 5fec2a77..052ed54a 100644 --- a/src/main/java/technology/tabula/PageIterator.java +++ b/src/main/java/technology/tabula/PageIterator.java @@ -5,39 +5,39 @@ public class PageIterator implements Iterator { - private ObjectExtractor oe; + private ObjectExtractor objectExtractor; private Iterator pageIndexIterator; - - public PageIterator(ObjectExtractor oe, Iterable pages) { + + public PageIterator(ObjectExtractor objectExtractor, Iterable pages) { super(); - this.oe = oe; + this.objectExtractor = objectExtractor; this.pageIndexIterator = pages.iterator(); } + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // @Override public boolean hasNext() { - return this.pageIndexIterator.hasNext(); + return pageIndexIterator.hasNext(); } @Override public Page next() { - Page page = null; + Page nextPage = null; if (!this.hasNext()) { throw new IllegalStateException(); } try { - page = oe.extractPage(this.pageIndexIterator.next()); + nextPage = objectExtractor.extractPage(pageIndexIterator.next()); } catch (IOException e) { - // TODO Auto-generated catch block e.printStackTrace(); } - return page; + return nextPage; } + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // @Override public void remove() { throw new UnsupportedOperationException(); - } -} \ No newline at end of file +} diff --git a/src/main/java/technology/tabula/Pair.java b/src/main/java/technology/tabula/Pair.java new file mode 100644 index 00000000..d54cbbe5 --- /dev/null +++ b/src/main/java/technology/tabula/Pair.java @@ -0,0 +1,19 @@ +package technology.tabula; + +public class Pair { + private final L left; + private final R right; + + public Pair(L left, R right) { + this.left = left; + this.right = right; + } + + public L getLeft() { + return this.left; + } + + public R getRight() { + return this.right; + } +} diff --git a/src/main/java/technology/tabula/ProjectionProfile.java b/src/main/java/technology/tabula/ProjectionProfile.java index d80f18b0..39ab9e41 100644 --- a/src/main/java/technology/tabula/ProjectionProfile.java +++ b/src/main/java/technology/tabula/ProjectionProfile.java @@ -73,7 +73,7 @@ public float[] getHorizontalProjection() { public float[] findVerticalSeparators(float minColumnWidth) { boolean foundNarrower = false; - List verticalSeparators = new ArrayList(); + List verticalSeparators = new ArrayList<>(); for (Ruling r: area.getVerticalRulings()) { if (r.length() / this.textBounds.getHeight() >= 0.95) { verticalSeparators.add(toFixed(r.getPosition() - this.areaLeft)); @@ -105,7 +105,7 @@ public float[] findVerticalSeparators(float minColumnWidth) { public float[] findHorizontalSeparators(float minRowHeight) { boolean foundShorter = false; - List horizontalSeparators = new ArrayList(); + List horizontalSeparators = new ArrayList<>(); for (Ruling r: area.getHorizontalRulings()) { System.out.println(r.length() / this.textBounds.getWidth()); if (r.length() / this.textBounds.getWidth() >= 0.95) { @@ -136,7 +136,7 @@ public float[] findHorizontalSeparators(float minRowHeight) { } private static List findSeparatorsFromProjection(float[] derivative) { - List separators = new ArrayList(); + List separators = new ArrayList<>(); Integer lastNeg = null; float s; boolean positiveSlope = false; @@ -167,7 +167,7 @@ public static float[] smooth(float[] data, int kernelSize) { + kernelSize / 2, data.length); j++) { s += data[j]; } - rv[i] = (float) Math.floor(s / (float) kernelSize); + rv[i] = (float) Math.floor(s / kernelSize); } } return rv; @@ -213,7 +213,7 @@ private static int toFixed(double value) { } private static double toDouble(int value) { - return (double) value / Math.pow(10, DECIMAL_PLACES); + return value / Math.pow(10, DECIMAL_PLACES); } } diff --git a/src/main/java/technology/tabula/QuickSort.java b/src/main/java/technology/tabula/QuickSort.java index 21d26dd5..03388a15 100644 --- a/src/main/java/technology/tabula/QuickSort.java +++ b/src/main/java/technology/tabula/QuickSort.java @@ -16,94 +16,97 @@ */ package technology.tabula; +import java.util.ArrayList; import java.util.Comparator; import java.util.List; +import java.util.RandomAccess; import java.util.Stack; /** - * see http://de.wikipedia.org/wiki/Quicksort. + * An implementation of Quicksort. + * + * @see wikipedia * * @author UWe Pachler */ -public class QuickSort -{ - - private QuickSort() - { - } - - private static final Comparator objComp = new Comparator() - { - public int compare(Comparable object1, Comparable object2) - { - return object1.compareTo(object2); - } - }; +public final class QuickSort { + + private QuickSort() { + // utility + } + + /** + * Sorts the given list according to natural order. + */ + public static > void sort(List list) { + sort(list, QuickSort.naturalOrder()); // JAVA_8 replace with Comparator.naturalOrder() (and cleanup) + } + + /** + * Sorts the given list using the given comparator. + */ + public static void sort(List list, Comparator comparator) { + if (list instanceof RandomAccess) { + quicksort(list, comparator); + } else { + List copy = new ArrayList<>(list); + quicksort(copy, comparator); + list.clear(); + list.addAll(copy); + } + } - /** - * Sorts the given list using the given comparator. - */ - public static void sort(List list, Comparator cmp) - { - quicksort(list, cmp); - } + private static void quicksort(List list, Comparator cmp) { + Stack stack = new Stack<>(); + stack.push(0); + stack.push(list.size()); + while (!stack.isEmpty()) { + int right = stack.pop(); + int left = stack.pop(); + + if (right - left < 2) continue; + int p = left + ((right - left) / 2); + p = partition(list, cmp, p, left, right); - /** - * Sorts the given list using compareTo as comparator. - */ - public static void sort(List list) - { - sort(list, (Comparator) objComp); - } + stack.push(p + 1); + stack.push(right); - private static void quicksort(List list, Comparator cmp) - { - Stack stack = new Stack(); - stack.push(0); - stack.push(list.size()); - while (!stack.isEmpty()) { - int right = stack.pop(); - int left = stack.pop(); - if (right - left < 2) continue; - int p = left + ((right-left)/2); - p = partition(list, cmp, p, left, right); - - stack.push(p+1); - stack.push(right); + stack.push(left); + stack.push(p); + } + } - stack.push(left); - stack.push(p); + private static int partition(List list, Comparator cmp, int p, int start, int end) { + int l = start; + int h = end - 2; + T piv = list.get(p); + swap(list, p, end - 1); - } - } - - private static int partition(List list, Comparator cmp, int p, int start, int end) { - int l = start; - int h = end - 2; - T piv = list.get(p); - swap(list,p,end-1); + while (l < h) { + if (cmp.compare(list.get(l), piv) <= 0) l++; + else if (cmp.compare(piv, list.get(h)) <= 0) h--; + else swap(list, l, h); + } + int idx = h; + if (cmp.compare(list.get(h), piv) < 0) idx++; + swap(list, end - 1, idx); + return idx; + } - while (l < h) { - if (cmp.compare(list.get(l), piv) <= 0) { - l++; - } else if (cmp.compare(piv, list.get(h)) <= 0) { - h--; - } else { - swap(list,l,h); - } - } - int idx = h; - if (cmp.compare(list.get(h), piv) < 0) idx++; - swap(list,end-1,idx); - return idx; - } - + private static void swap(List list, int i, int j) { + T tmp = list.get(i); + list.set(i, list.get(j)); + list.set(j, tmp); + } - private static void swap(List list, int i, int j) - { - T tmp = list.get(i); - list.set(i, list.get(j)); - list.set(j, tmp); - } + @SuppressWarnings({ "rawtypes", "unchecked" }) + private static final Comparator NATURAL_ORDER = new Comparator() { + @Override public int compare(Object l, Object r) { return ((Comparable) l).compareTo(r); } + }; + + @SuppressWarnings("unchecked") + private static > Comparator naturalOrder() { + return NATURAL_ORDER; + } } diff --git a/src/main/java/technology/tabula/Rectangle.java b/src/main/java/technology/tabula/Rectangle.java index 41b79374..b96fcd77 100644 --- a/src/main/java/technology/tabula/Rectangle.java +++ b/src/main/java/technology/tabula/Rectangle.java @@ -2,171 +2,177 @@ import java.awt.geom.Point2D; import java.awt.geom.Rectangle2D; +import java.util.Comparator; import java.util.List; +import java.util.Locale; @SuppressWarnings("serial") -public class Rectangle extends Rectangle2D.Float implements Comparable { - - protected static final float VERTICAL_COMPARISON_THRESHOLD = 0.4f; - - public Rectangle() { - super(); - } - - public Rectangle(float top, float left, float width, float height) { - super(); - this.setRect(left, top, width, height); - } - - @Override - public int compareTo(Rectangle other) { - double thisBottom = this.getBottom(); - double otherBottom = other.getBottom(); - int rv; - - if (this.equals(other)) return 0; - - if (this.verticalOverlap(other) > VERTICAL_COMPARISON_THRESHOLD) { - rv = java.lang.Double.compare(this.getX(), other.getX()); - } - else { - rv = java.lang.Double.compare(thisBottom, otherBottom); - } - return rv; - } - - // I'm bad at Java and need this for fancy sorting in technology.tabula.TextChunk. - public int isLtrDominant(){ - return 0; - } - - - public float getArea() { - return this.width * this.height; - } - - public float verticalOverlap(Rectangle other) { - return (float) Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop())); - } - - public boolean verticallyOverlaps(Rectangle other) { - return verticalOverlap(other) > 0; - } - - public float horizontalOverlap(Rectangle other) { - return (float) Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft())); - } - - public boolean horizontallyOverlaps(Rectangle other) { - return horizontalOverlap(other) > 0; - } - - public float verticalOverlapRatio(Rectangle other) { - float rv = 0, - delta = (float) Math.min(this.getBottom() - this.getTop(), other.getBottom() - other.getTop()); - - if (other.getTop() <= this.getTop() && this.getTop() <= other.getBottom() && other.getBottom() <= this.getBottom()) { - rv = (float) ((other.getBottom() - this.getTop()) / delta); - } - else if (this.getTop() <= other.getTop() && other.getTop() <= this.getBottom() && this.getBottom() <= other.getBottom()) { - rv = (float) ((this.getBottom() - other.getTop()) / delta); - } - else if (this.getTop() <= other.getTop() && other.getTop() <= other.getBottom() && other.getBottom() <= this.getBottom()) { - rv = (float) ((other.getBottom() - other.getTop()) / delta); - } - else if (other.getTop() <= this.getTop() && this.getTop() <= this.getBottom() && this.getBottom() <= other.getBottom()) { - rv = (float) ((this.getBottom() - this.getTop()) / delta); - } - - return rv; - - } - - public float overlapRatio(Rectangle other) { - double intersectionWidth = Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft())); - double intersectionHeight = Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop())); - double intersectionArea = Math.max(0, intersectionWidth * intersectionHeight); - double unionArea = this.getArea() + other.getArea() - intersectionArea; - - return (float) (intersectionArea / unionArea); - } - - public Rectangle merge(Rectangle other) { - this.setRect(this.createUnion(other)); - return this; - } - - public float getTop() { - return (float) this.getMinY(); - } - - public void setTop(float top) { - float deltaHeight = top - this.y; - this.setRect(this.x, top, this.width, this.height - deltaHeight); - } - - public float getRight() { - return (float) this.getMaxX(); - } - - public void setRight(float right) { - this.setRect(this.x, this.y, right - this.x, this.height); - } - - public float getLeft() { - return (float) this.getMinX(); - } - - public void setLeft(float left) { - float deltaWidth = left - this.x; - this.setRect(left, this.y, this.width - deltaWidth, this.height); - } - - public float getBottom() { - return (float) this.getMaxY(); - } - - public void setBottom(float bottom) { - this.setRect(this.x, this.y, this.width, bottom - this.y); - } - - public Point2D[] getPoints() { - return new Point2D[] { - new Point2D.Float((float) this.getLeft(), (float) this.getTop()), - new Point2D.Float((float) this.getRight(), (float) this.getTop()), - new Point2D.Float((float) this.getRight(), (float) this.getBottom()), - new Point2D.Float((float) this.getLeft(), (float) this.getBottom()) - }; - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - String s = super.toString(); - sb.append(s.substring(0, s.length() - 1)); - sb.append(String.format(",bottom=%f,right=%f]", this.getBottom(), this.getRight())); - return sb.toString(); - } - - - /** - * @param rectangles - * @return minimum bounding box that contains all the rectangles - */ - public static Rectangle boundingBoxOf(List rectangles) { - float minx = java.lang.Float.MAX_VALUE; - float miny = java.lang.Float.MAX_VALUE; - float maxx = java.lang.Float.MIN_VALUE; - float maxy = java.lang.Float.MIN_VALUE; - - for (Rectangle r: rectangles) { - minx = (float) Math.min(r.getMinX(), minx); - miny = (float) Math.min(r.getMinY(), miny); - maxx = (float) Math.max(r.getMaxX(), maxx); - maxy = (float) Math.max(r.getMaxY(), maxy); - } - return new Rectangle(miny, minx, maxx - minx, maxy - miny); - } - +public class Rectangle extends Rectangle2D.Float { + + /** + * Ill-defined comparator, from when Rectangle was Comparable. + * + * @see PR 116 + * @deprecated with no replacement + */ + @Deprecated + public static final Comparator ILL_DEFINED_ORDER = new Comparator() { + @Override public int compare(Rectangle o1, Rectangle o2) { + if (o1.equals(o2)) return 0; + if (o1.verticalOverlap(o2) > VERTICAL_COMPARISON_THRESHOLD) { + return o1.isLtrDominant() == -1 && o2.isLtrDominant() == -1 + ? - java.lang.Double.compare(o1.getX(), o2.getX()) + : java.lang.Double.compare(o1.getX(), o2.getX()); + } else { + return java.lang.Float.compare(o1.getBottom(), o2.getBottom()); + } + } + }; + + protected static final float VERTICAL_COMPARISON_THRESHOLD = 0.4f; + + public Rectangle() { + super(); + } + + public Rectangle(float top, float left, float width, float height) { + super(); + this.setRect(left, top, width, height); + } + + public int compareTo(Rectangle other) { + return ILL_DEFINED_ORDER.compare(this, other); + } + + // I'm bad at Java and need this for fancy sorting in + // technology.tabula.TextChunk. + public int isLtrDominant() { + return 0; + } + + public float getArea() { + return this.width * this.height; + } + + public float verticalOverlap(Rectangle other) { + return Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop())); + } + + public boolean verticallyOverlaps(Rectangle other) { + return verticalOverlap(other) > 0; + } + + public float horizontalOverlap(Rectangle other) { + return Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft())); + } + + public boolean horizontallyOverlaps(Rectangle other) { + return horizontalOverlap(other) > 0; + } + + public float verticalOverlapRatio(Rectangle other) { + float rv = 0, delta = Math.min(this.getBottom() - this.getTop(), other.getBottom() - other.getTop()); + + if (other.getTop() <= this.getTop() && this.getTop() <= other.getBottom() + && other.getBottom() <= this.getBottom()) { + rv = (other.getBottom() - this.getTop()) / delta; + } else if (this.getTop() <= other.getTop() && other.getTop() <= this.getBottom() + && this.getBottom() <= other.getBottom()) { + rv = (this.getBottom() - other.getTop()) / delta; + } else if (this.getTop() <= other.getTop() && other.getTop() <= other.getBottom() + && other.getBottom() <= this.getBottom()) { + rv = (other.getBottom() - other.getTop()) / delta; + } else if (other.getTop() <= this.getTop() && this.getTop() <= this.getBottom() + && this.getBottom() <= other.getBottom()) { + rv = (this.getBottom() - this.getTop()) / delta; + } + + return rv; + + } + + public float overlapRatio(Rectangle other) { + double intersectionWidth = Math.max(0, + Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft())); + double intersectionHeight = Math.max(0, + Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop())); + double intersectionArea = Math.max(0, intersectionWidth * intersectionHeight); + double unionArea = this.getArea() + other.getArea() - intersectionArea; + + return (float) (intersectionArea / unionArea); + } + + public Rectangle merge(Rectangle other) { + this.setRect(this.createUnion(other)); + return this; + } + + public float getTop() { + return (float) this.getMinY(); + } + + public void setTop(float top) { + float deltaHeight = top - this.y; + this.setRect(this.x, top, this.width, this.height - deltaHeight); + } + + public float getRight() { + return (float) this.getMaxX(); + } + + public void setRight(float right) { + this.setRect(this.x, this.y, right - this.x, this.height); + } + + public float getLeft() { + return (float) this.getMinX(); + } + + public void setLeft(float left) { + float deltaWidth = left - this.x; + this.setRect(left, this.y, this.width - deltaWidth, this.height); + } + + public float getBottom() { + return (float) this.getMaxY(); + } + + public void setBottom(float bottom) { + this.setRect(this.x, this.y, this.width, bottom - this.y); + } + + public Point2D[] getPoints() { + return new Point2D[] { new Point2D.Float(this.getLeft(), this.getTop()), + new Point2D.Float(this.getRight(), this.getTop()), new Point2D.Float(this.getRight(), this.getBottom()), + new Point2D.Float(this.getLeft(), this.getBottom()) }; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + String s = super.toString(); + sb.append(s.substring(0, s.length() - 1)); + sb.append(String.format(Locale.US, ",bottom=%f,right=%f]", this.getBottom(), this.getRight())); + return sb.toString(); + } + + /** + * @param rectangles + * @return minimum bounding box that contains all the rectangles + */ + public static Rectangle boundingBoxOf(List rectangles) { + float minx = java.lang.Float.MAX_VALUE; + float miny = java.lang.Float.MAX_VALUE; + float maxx = java.lang.Float.MIN_VALUE; + float maxy = java.lang.Float.MIN_VALUE; + + for (Rectangle r : rectangles) { + minx = (float) Math.min(r.getMinX(), minx); + miny = (float) Math.min(r.getMinY(), miny); + maxx = (float) Math.max(r.getMaxX(), maxx); + maxy = (float) Math.max(r.getMaxY(), maxy); + } + return new Rectangle(miny, minx, maxx - minx, maxy - miny); + } } diff --git a/src/main/java/technology/tabula/RectangleSpatialIndex.java b/src/main/java/technology/tabula/RectangleSpatialIndex.java index 498106db..0e942545 100644 --- a/src/main/java/technology/tabula/RectangleSpatialIndex.java +++ b/src/main/java/technology/tabula/RectangleSpatialIndex.java @@ -1,79 +1,39 @@ package technology.tabula; -import gnu.trove.procedure.TIntProcedure; - import java.util.ArrayList; import java.util.List; -import net.sf.jsi.SpatialIndex; -import net.sf.jsi.rtree.RTree; +import org.locationtech.jts.geom.Envelope; +import org.locationtech.jts.index.strtree.STRtree; -class RectangleSpatialIndex { +public class RectangleSpatialIndex { - class SaveToListProcedure implements TIntProcedure { - private List ids = new ArrayList(); - - public boolean execute(int id) { - ids.add(id); - return true; - } - private List getIds() { - return ids; - } - } + private final STRtree si = new STRtree(); + private final List rectangles = new ArrayList<>(); - private final SpatialIndex si; - private final List rectangles; - private Rectangle bounds = null; - - public RectangleSpatialIndex() { - si = new RTree(); - si.init(null); - rectangles = new ArrayList(); - } - public void add(T te) { rectangles.add(te); - if (bounds == null) { - bounds = new Rectangle(); - bounds.setRect(te); - } - else { - bounds.merge(te); - } - si.add(rectangleToSpatialIndexRectangle(te), rectangles.size() - 1); + si.insert(new Envelope(te.getLeft(), te.getRight(), te.getBottom(), te.getTop()), te); } public List contains(Rectangle r) { - SaveToListProcedure proc = new SaveToListProcedure(); - si.contains(rectangleToSpatialIndexRectangle(r), proc); - ArrayList rv = new ArrayList(); - for (int i : proc.getIds()) { - rv.add(rectangles.get(i)); + List intersection = si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom())); + List rv = new ArrayList(); + + for (T ir: intersection) { + if (r.contains(ir)) { + rv.add(ir); + } } - Utils.sort(rv); + + Utils.sort(rv, Rectangle.ILL_DEFINED_ORDER); return rv; } public List intersects(Rectangle r) { - SaveToListProcedure proc = new SaveToListProcedure(); - si.intersects(rectangleToSpatialIndexRectangle(r), proc); - ArrayList rv = new ArrayList(); - for (int i : proc.getIds()) { - rv.add(rectangles.get(i)); - } - Utils.sort(rv); - return rv; - } - - private net.sf.jsi.Rectangle rectangleToSpatialIndexRectangle(Rectangle r) { - return new net.sf.jsi.Rectangle((float) r.getX(), - (float) r.getY(), - (float) (r.getX() + r.getWidth()), - (float) (r.getY() + r.getHeight())); + return si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom())); } - /** * Minimum bounding box of all the Rectangles contained on this RectangleSpatialIndex @@ -81,7 +41,7 @@ private net.sf.jsi.Rectangle rectangleToSpatialIndexRectangle(Rectangle r) { * @return a Rectangle */ public Rectangle getBounds() { - return bounds; + return Rectangle.boundingBoxOf(rectangles); } } diff --git a/src/main/java/technology/tabula/RectangularTextContainer.java b/src/main/java/technology/tabula/RectangularTextContainer.java index f9e0036f..934b5f13 100644 --- a/src/main/java/technology/tabula/RectangularTextContainer.java +++ b/src/main/java/technology/tabula/RectangularTextContainer.java @@ -1,35 +1,51 @@ package technology.tabula; +import java.util.ArrayList; import java.util.List; @SuppressWarnings("serial") -public abstract class RectangularTextContainer extends Rectangle { - - public RectangularTextContainer(float top, float left, float width, float height) { - super(top, left, width, height); - } - - public String toString() { - StringBuilder sb = new StringBuilder(); - String s = super.toString(); - sb.append(s.substring(0, s.length() - 1)); - sb.append(String.format(",text=%s]", this.getText() == null ? "null" : "\"" + this.getText() + "\"")); - return sb.toString(); - } - - public RectangularTextContainer merge(RectangularTextContainer other) { - if (this.compareTo(other) < 0) { - this.getTextElements().addAll(other.getTextElements()); - - } - else { - this.getTextElements().addAll(0, other.getTextElements()); - } - super.merge(other); - return this; - } - - public abstract String getText(); - public abstract String getText(boolean useLineReturns); - public abstract List getTextElements(); +public class RectangularTextContainer extends Rectangle implements HasText { + + protected List textElements = new ArrayList<>(); + + protected RectangularTextContainer(float top, float left, float width, float height) { + super(top, left, width, height); + } + + public RectangularTextContainer merge(RectangularTextContainer other) { + if (compareTo(other) < 0) { + this.getTextElements().addAll(other.getTextElements()); + } else { + this.getTextElements().addAll(0, other.getTextElements()); + } + super.merge(other); + return this; + } + + public List getTextElements() { + return textElements; + } + + public void setTextElements(List textElements) { + this.textElements = textElements; + } + + @Override + public String getText() { + throw new UnsupportedOperationException(); + } + + @Override + public String getText(boolean useLineReturns) { + throw new UnsupportedOperationException(); + } + + @Override public String toString() { + StringBuilder sb = new StringBuilder(); + String s = super.toString(); + sb.append(s.substring(0, s.length() - 1)); + sb.append(String.format(",text=%s]", this.getText() == null ? "null" : "\"" + this.getText() + "\"")); + return sb.toString(); + } + } diff --git a/src/main/java/technology/tabula/Ruling.java b/src/main/java/technology/tabula/Ruling.java index 8eb16b5e..213ce87f 100644 --- a/src/main/java/technology/tabula/Ruling.java +++ b/src/main/java/technology/tabula/Ruling.java @@ -8,6 +8,7 @@ import java.util.Comparator; import java.util.Formatter; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.TreeMap; @@ -39,9 +40,6 @@ public void normalize() { else if (Utils.within(angle, 90, 1) || Utils.within(angle, 270, 1)) { // almost vertical this.setLine(this.x1, this.y1, this.x1, this.y2); } -// else { -// System.out.println("oblique: " + this + " ("+ this.getAngle() + ")"); -// } } public boolean vertical() { @@ -230,11 +228,6 @@ public boolean equals(Object other) { return this.getP1().equals(o.getP1()) && this.getP2().equals(o.getP2()); } - @Override - public int hashCode() { - return super.hashCode(); - } - public float getTop() { return this.y1; } @@ -291,13 +284,13 @@ public double getAngle() { public String toString() { StringBuilder sb = new StringBuilder(); Formatter formatter = new Formatter(sb); - String rv = formatter.format("%s[x1=%f y1=%f x2=%f y2=%f]", this.getClass().toString(), this.x1, this.y1, this.x2, this.y2).toString(); + String rv = formatter.format(Locale.US, "%s[x1=%f y1=%f x2=%f y2=%f]", this.getClass().toString(), this.x1, this.y1, this.x2, this.y2).toString(); formatter.close(); return rv; } public static List cropRulingsToArea(List rulings, Rectangle2D area) { - ArrayList rv = new ArrayList(); + ArrayList rv = new ArrayList<>(); for (Ruling r : rulings) { if (r.intersects(area)) { rv.add(r.intersect(area)); @@ -322,15 +315,15 @@ public SortObject(SOType type, float position, Ruling ruling) { } } - List sos = new ArrayList(); + List sos = new ArrayList<>(); - TreeMap tree = new TreeMap(new Comparator() { + TreeMap tree = new TreeMap<>(new Comparator() { @Override public int compare(Ruling o1, Ruling o2) { return java.lang.Double.compare(o1.getTop(), o2.getTop()); }}); - TreeMap rv = new TreeMap(new Comparator() { + TreeMap rv = new TreeMap<>(new Comparator() { @Override public int compare(Point2D o1, Point2D o2) { if (o1.getY() > o2.getY()) return 1; @@ -409,7 +402,7 @@ public static List collapseOrientedRulings(List lines) { } public static List collapseOrientedRulings(List lines, int expandAmount) { - ArrayList rv = new ArrayList(); + ArrayList rv = new ArrayList<>(); Collections.sort(lines, new Comparator() { @Override public int compare(Ruling a, Ruling b) { diff --git a/src/main/java/technology/tabula/Table.java b/src/main/java/technology/tabula/Table.java index eda11251..1e73bedf 100644 --- a/src/main/java/technology/tabula/Table.java +++ b/src/main/java/technology/tabula/Table.java @@ -8,139 +8,98 @@ @SuppressWarnings("serial") public class Table extends Rectangle { - - class CellPosition implements Comparable { - int row, col; - CellPosition(int row, int col) { - this.row = row; this.col = col; - } - - @Override - public boolean equals(Object other) { - if (this == other) - return true; - if (!(other instanceof CellPosition)) - return false; - return other != null && this.row == ((CellPosition) other).row && this.col == ((CellPosition) other).col; - } - - @Override - public int hashCode() { - return this.row * 100000 + this.col; - } - - @Override - public int compareTo(CellPosition other) { - int rv = 0; - if(this.row < other.row) { - rv = -1; - } - else if (this.row > other.row) { - rv = 1; - } - else if (this.col > other.col) { - rv = 1; - } - else if (this.col < other.col) { - rv = -1; - } - return rv; - } - } - - class CellContainer extends TreeMap { - - public int maxRow = 0, maxCol = 0; - - public RectangularTextContainer get(int row, int col) { - return this.get(new CellPosition(row, col)); - } - - public List getRow(int row) { - return new ArrayList(this.subMap(new CellPosition(row, 0), new CellPosition(row, maxRow+1)).values()); - } - - @Override - public RectangularTextContainer put(CellPosition cp, RectangularTextContainer value) { - this.maxRow = Math.max(maxRow, cp.row); - this.maxCol = Math.max(maxCol, cp.col); - if (this.containsKey(cp)) { // adding on an existing CellPosition, concatenate content and resize - value.merge(this.get(cp)); - } - super.put(cp, value); - return value; - } - - @Override - public RectangularTextContainer get(Object key) { - return this.containsKey(key) ? super.get(key) : TextChunk.EMPTY; - } - - public boolean containsKey(int row, int col) { - return this.containsKey(new CellPosition(row, col)); - } - - } - - public static final Table EMPTY = new Table(); - - CellContainer cellContainer = new CellContainer(); - Page page; - ExtractionAlgorithm extractionAlgorithm; - List> rows = null; - - public Table() { - super(); - } - - public Table(Page page, ExtractionAlgorithm extractionAlgorithm) { - this(); - this.page = page; - this.extractionAlgorithm = extractionAlgorithm; - } - - public void add(RectangularTextContainer tc, int i, int j) { - this.merge(tc); - this.cellContainer.put(new CellPosition(i, j), tc); - this.rows = null; // clear the memoized rows - } - - public List> getRows() { - if (this.rows != null) { - return this.rows; - } - - this.rows = new ArrayList>(); - for (int i = 0; i <= this.cellContainer.maxRow; i++) { - List lastRow = new ArrayList(); - this.rows.add(lastRow); - for (int j = 0; j <= this.cellContainer.maxCol; j++) { - lastRow.add(this.cellContainer.containsKey(i, j) ? this.cellContainer.get(i, j) : TextChunk.EMPTY); - } - } - return this.rows; - } - - public RectangularTextContainer getCell(int i, int j) { - return this.cellContainer.get(i, j); - } - - public List> getCols() { - return Utils.transpose(this.getRows()); - } - - public void setExtractionAlgorithm(ExtractionAlgorithm extractionAlgorithm) { - this.extractionAlgorithm = extractionAlgorithm; - } - - public ExtractionAlgorithm getExtractionAlgorithm() { - return extractionAlgorithm; - } - - public List getCells() { - return (List) new ArrayList(this.cellContainer.values()); - } - - + + public static final Table empty() { return new Table(""); } + + private Table(String extractionMethod) { + this.extractionMethod = extractionMethod; + } + + public Table(ExtractionAlgorithm extractionAlgorithm) { + this(extractionAlgorithm.toString()); + } + + private final String extractionMethod; + + private int rowCount = 0; + private int colCount = 0; + private int pageNumber = 0; + + /* visible for testing */ final TreeMap cells = new TreeMap<>(); + + public int getRowCount() { return rowCount; } + public int getColCount() { return colCount; } + public int getPageNumber() { return pageNumber; } + public void setPageNumber(int pageNumber) { this.pageNumber = pageNumber; } + + public String getExtractionMethod() { return extractionMethod; } + + public void add(RectangularTextContainer chunk, int row, int col) { + this.merge(chunk); + + rowCount = Math.max(rowCount, row + 1); + colCount = Math.max(colCount, col + 1); + + CellPosition cp = new CellPosition(row, col); + + RectangularTextContainer old = cells.get(cp); + if (old != null) chunk.merge(old); + cells.put(cp, chunk); + + this.memoizedRows = null; + } + + private List> memoizedRows = null; + + public List> getRows() { + if (this.memoizedRows == null) this.memoizedRows = computeRows(); + return this.memoizedRows; + } + + private List> computeRows() { + List> rows = new ArrayList<>(); + for (int i = 0; i < rowCount; i++) { + List lastRow = new ArrayList<>(); + rows.add(lastRow); + for (int j = 0; j < colCount; j++) { + RectangularTextContainer cell = cells.get(new CellPosition(i,j)); // JAVA_8 use getOrDefault() + lastRow.add(cell != null ? cell : TextChunk.EMPTY); + } + } + return rows; + } + + public RectangularTextContainer getCell(int i, int j) { + RectangularTextContainer cell = cells.get(new CellPosition(i,j)); // JAVA_8 use getOrDefault() + return cell != null ? cell : TextChunk.EMPTY; + } + +} + +class CellPosition implements Comparable { + + CellPosition(int row, int col) { + this.row = row; + this.col = col; + } + + final int row, col; + + @Override public int hashCode() { + return row + 101 * col; + } + + @Override public boolean equals(Object obj) { + if (this == obj) return true; + if (obj == null) return false; + if (getClass() != obj.getClass()) return false; + CellPosition other = (CellPosition) obj; + return row == other.row && col == other.col; + } + + @Override public int compareTo(CellPosition other) { + int rowdiff = row - other.row; + return rowdiff != 0 ? rowdiff : col - other.col; + } } diff --git a/src/main/java/technology/tabula/TableWithRulingLines.java b/src/main/java/technology/tabula/TableWithRulingLines.java index 54de67e2..cde0ce72 100644 --- a/src/main/java/technology/tabula/TableWithRulingLines.java +++ b/src/main/java/technology/tabula/TableWithRulingLines.java @@ -6,25 +6,21 @@ import java.util.Iterator; import java.util.List; +import technology.tabula.extractors.ExtractionAlgorithm; + @SuppressWarnings("serial") public class TableWithRulingLines extends Table { List verticalRulings, horizontalRulings; - RectangleSpatialIndex si = new RectangleSpatialIndex(); + RectangleSpatialIndex si = new RectangleSpatialIndex<>(); - public TableWithRulingLines() { - super(); - } - - public TableWithRulingLines(Rectangle area, Page page, List cells, - List horizontalRulings, - List verticalRulings) { - this(); + public TableWithRulingLines(Rectangle area, List cells, List horizontalRulings, List verticalRulings, ExtractionAlgorithm extractionAlgorithm, int pageNumber) { + super(extractionAlgorithm); this.setRect(area); - this.page = page; this.verticalRulings = verticalRulings; this.horizontalRulings = horizontalRulings; this.addCells(cells); + this.setPageNumber(pageNumber); } private void addCells(List cells) { @@ -61,7 +57,7 @@ private void addCells(List cells) { private static List> rowsOfCells(List cells) { Cell c; float lastTop; - List> rv = new ArrayList>(); + List> rv = new ArrayList<>(); List lastRow; if (cells.isEmpty()) { @@ -78,19 +74,19 @@ public int compare(Cell arg0, Cell arg1) { Iterator iter = cells.iterator(); c = iter.next(); - lastTop = (float) c.getTop(); - lastRow = new ArrayList(); + lastTop = c.getTop(); + lastRow = new ArrayList<>(); lastRow.add(c); rv.add(lastRow); while (iter.hasNext()) { c = iter.next(); if (!Utils.feq(c.getTop(), lastTop)) { - lastRow = new ArrayList(); + lastRow = new ArrayList<>(); rv.add(lastRow); } lastRow.add(c); - lastTop = (float) c.getTop(); + lastTop = c.getTop(); } return rv; } diff --git a/src/main/java/technology/tabula/TextChunk.java b/src/main/java/technology/tabula/TextChunk.java index 046f10d9..9f5adbd5 100644 --- a/src/main/java/technology/tabula/TextChunk.java +++ b/src/main/java/technology/tabula/TextChunk.java @@ -8,9 +8,9 @@ import java.text.Normalizer; @SuppressWarnings("serial") -public class TextChunk extends RectangularTextContainer implements HasText { +public class TextChunk extends RectangularTextContainer { public static final TextChunk EMPTY = new TextChunk(0, 0, 0, 0); - List textElements = new ArrayList(); +// List textElements = new ArrayList<>(); public TextChunk(float top, float left, float width, float height) { super(top, left, width, height); @@ -37,7 +37,7 @@ private enum DirectionalityOptions { private static HashMap directionalities; static { - directionalities = new HashMap(); + directionalities = new HashMap<>(); // BCT = bidirectional character type directionalities.put(java.lang.Character.DIRECTIONALITY_ARABIC_NUMBER, DirectionalityOptions.LTR); // Weak BCT "AN" in the Unicode specification. directionalities.put(java.lang.Character.DIRECTIONALITY_BOUNDARY_NEUTRAL, DirectionalityOptions.NONE); // Weak BCT "BN" in the Unicode specification. @@ -73,8 +73,8 @@ public TextChunk groupByDirectionality(Boolean isLtrDominant) { throw new IllegalArgumentException(); } - ArrayList> chunks = new ArrayList>(); - ArrayList buff = new ArrayList(); + ArrayList> chunks = new ArrayList<>(); + ArrayList buff = new ArrayList<>(); DirectionalityOptions buffDirectionality = DirectionalityOptions.NONE; // the directionality of the characters in buff; for (TextElement te : this.getTextElements()) { @@ -107,7 +107,7 @@ public TextChunk groupByDirectionality(Boolean isLtrDominant) { // and start a new one buffDirectionality = directionalities.get(Character.getDirectionality(te.getText().charAt(0))); - buff = new ArrayList(); + buff = new ArrayList<>(); buff.add(te); } } @@ -116,7 +116,7 @@ public TextChunk groupByDirectionality(Boolean isLtrDominant) { Collections.reverse(buff); } chunks.add(buff); - ArrayList everything = new ArrayList(); + ArrayList everything = new ArrayList<>(); if (!isLtrDominant) { Collections.reverse(chunks); } @@ -126,34 +126,7 @@ public TextChunk groupByDirectionality(Boolean isLtrDominant) { return new TextChunk(everything); } - @Override - /* - We're comparing based on ordering in the logical ordering of text here. - Assuming identical Y-axis positions, if TextChunk A has a lower X-axis - than TextChunk B, then A is "before" it -- iff this is LTR text. Otherwise, - it is A is after B. - */ - public int compareTo(Rectangle other) { - double thisBottom = this.getBottom(); - double otherBottom = other.getBottom(); - int rv; - - if (this.equals(other)) return 0; - - if (this.verticalOverlap(other) > VERTICAL_COMPARISON_THRESHOLD) { - rv = java.lang.Double.compare(this.getX(), other.getX()); - - // reverse the ordering if both TextChunks are RTL - if (this.isLtrDominant() == -1 && other.isLtrDominant() == -1) { - rv = -1 * rv; - } - } else { - rv = java.lang.Double.compare(thisBottom, otherBottom); - } - return rv; - } - - public int isLtrDominant() { + @Override public int isLtrDominant() { int ltrCnt = 0; int rtlCnt = 0; for (int i = 0; i < this.getTextElements().size(); i++) { @@ -186,16 +159,13 @@ public void add(TextElement textElement) { this.merge(textElement); } - public void add(List textElements) { - for (TextElement te : textElements) { + public void add(List elements) { + for (TextElement te : elements) { this.add(te); } } - public List getTextElements() { - return textElements; - } - + @Override public String getText() { if (this.textElements.size() == 0) { return ""; @@ -210,11 +180,9 @@ public String getText() { @Override public String getText(boolean useLineReturns) { - // TODO Auto-generated method stub - return null; + return getText(); } - /** * Returns true if text contained in this TextChunk is the same repeated character */ @@ -241,11 +209,10 @@ public TextChunk[] splitAt(int i) { throw new IllegalArgumentException(); } - TextChunk[] rv = new TextChunk[]{ - new TextChunk(this.getTextElements().subList(0, i)), - new TextChunk(this.getTextElements().subList(i, this.getTextElements().size())) - }; - return rv; + return new TextChunk[]{ + new TextChunk(this.getTextElements().subList(0, i)), + new TextChunk(this.getTextElements().subList(i, this.getTextElements().size())) + }; } /** @@ -258,7 +225,7 @@ public List squeeze(Character c, int minRunLength) { Character currentChar, lastChar = null; int subSequenceLength = 0, subSequenceStart = 0; TextChunk[] t; - List rv = new ArrayList(); + List rv = new ArrayList<>(); for (int i = 0; i < this.getTextElements().size(); i++) { TextElement textElement = this.getTextElements().get(i); @@ -325,11 +292,8 @@ public boolean equals(Object obj) { return false; TextChunk other = (TextChunk) obj; if (textElements == null) { - if (other.textElements != null) - return false; - } else if (!textElements.equals(other.textElements)) - return false; - return true; + return other.textElements == null; + } else return textElements.equals(other.textElements); } public static boolean allSameChar(List textChunks) { @@ -358,7 +322,7 @@ public static boolean allSameChar(List textChunks) { } public static List groupByLines(List textChunks) { - List lines = new ArrayList(); + List lines = new ArrayList<>(); if (textChunks.size() == 0) { return lines; @@ -387,7 +351,7 @@ public static List groupByLines(List textChunks) { lines.remove(lines.size() - 1); } - List rv = new ArrayList(lines.size()); + List rv = new ArrayList<>(lines.size()); for (Line line : lines) { rv.add(Line.removeRepeatedCharacters(line, ' ', 3)); diff --git a/src/main/java/technology/tabula/TextElement.java b/src/main/java/technology/tabula/TextElement.java index 6e232854..a0f24fa0 100644 --- a/src/main/java/technology/tabula/TextElement.java +++ b/src/main/java/technology/tabula/TextElement.java @@ -30,10 +30,16 @@ public TextElement(float y, float x, float width, float height, this.dir = dir; } + @Override public String getText() { return text; } + @Override + public String getText(boolean useLineReturns) { + return text; + } + public float getDirection() { return dir; } @@ -50,7 +56,7 @@ public float getFontSize() { return fontSize; } - public String toString() { + @Override public String toString() { StringBuilder sb = new StringBuilder(); String s = super.toString(); sb.append(s.substring(0, s.length() - 1)); @@ -110,7 +116,7 @@ public static List mergeWords(List textElements) { */ public static List mergeWords(List textElements, List verticalRulings) { - List textChunks = new ArrayList(); + List textChunks = new ArrayList<>(); if (textElements.isEmpty()) { return textChunks; @@ -120,15 +126,15 @@ public static List mergeWords(List textElements, List copyOfTextElements = new ArrayList(textElements); + List copyOfTextElements = new ArrayList<>(textElements); textChunks.add(new TextChunk(copyOfTextElements.remove(0))); TextChunk firstTC = textChunks.get(0); float previousAveCharWidth = (float) firstTC.getWidth(); - float endOfLastTextX = (float) firstTC.getRight(); - float maxYForLine = (float) firstTC.getBottom(); + float endOfLastTextX = firstTC.getRight(); + float maxYForLine = firstTC.getBottom(); float maxHeightForLine = (float) firstTC.getHeight(); - float minYTopForLine = (float) firstTC.getTop(); + float minYTopForLine = firstTC.getTop(); float lastWordSpacing = -1; float wordSpacing, deltaSpace, averageCharWidth, deltaCharWidth; float expectedStartOfNextWordX, dist; @@ -202,7 +208,7 @@ public static List mergeWords(List textElements, List mergeWords(List textElements, List mergeWords(List textElements, List mergeWords(List textElements, List mergeWords(List textElements, List textChunksSeparatedByDirectionality = new ArrayList(); + List textChunksSeparatedByDirectionality = new ArrayList<>(); // count up characters by directionality for (TextChunk chunk : textChunks) { // choose the dominant direction diff --git a/src/main/java/technology/tabula/TextStripper.java b/src/main/java/technology/tabula/TextStripper.java index 3e1c82a1..557fa439 100644 --- a/src/main/java/technology/tabula/TextStripper.java +++ b/src/main/java/technology/tabula/TextStripper.java @@ -1,6 +1,10 @@ package technology.tabula; +import org.apache.fontbox.util.BoundingBox; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDFontDescriptor; +import org.apache.pdfbox.pdmodel.font.PDType3Font; import org.apache.pdfbox.text.PDFTextStripper; import org.apache.pdfbox.text.TextPosition; @@ -9,12 +13,18 @@ import java.util.List; public class TextStripper extends PDFTextStripper { + private static final String NBSP = "\u00A0"; - private PDDocument document; - public ArrayList textElements; - public RectangleSpatialIndex spatialIndex; - public float minCharWidth = Float.MAX_VALUE; - public float minCharHeight = Float.MAX_VALUE; + private static final float AVG_HEIGHT_MULT_THRESHOLD = 6.0f; + private static final float MAX_BLANK_FONT_SIZE = 40.0f; + private static final float MIN_BLANK_FONT_SIZE = 2.0f; + private final PDDocument document; + private final ArrayList textElements; + private final RectangleSpatialIndex spatialIndex; + private float minCharWidth = Float.MAX_VALUE; + private float minCharHeight = Float.MAX_VALUE; + private float totalHeight = 0.0f; + private int countHeight = 0; public TextStripper(PDDocument document, int pageNumber) throws IOException { super(); @@ -55,7 +65,7 @@ protected void writeString(String string, List textPositions) thro TextElement te = new TextElement(Utils.round(textPosition.getYDirAdj() - h, 2), Utils.round(textPosition.getXDirAdj(), 2), Utils.round(textPosition.getWidthDirAdj(), 2), - Utils.round(textPosition.getHeightDir(), 2), textPosition.getFont(), textPosition.getFontSize(), c, + Utils.round(textPosition.getHeightDir(), 2), textPosition.getFont(), textPosition.getFontSizeInPt(), c, // workaround a possible bug in PDFBox: // https://issues.apache.org/jira/browse/PDFBOX-1755 wos, textPosition.getDir()); @@ -63,11 +73,78 @@ protected void writeString(String string, List textPositions) thro this.minCharWidth = (float) Math.min(this.minCharWidth, te.getWidth()); this.minCharHeight = (float) Math.min(this.minCharHeight, te.getHeight()); + countHeight++; + totalHeight += te.getHeight(); + float avgHeight = totalHeight / countHeight; + + //We have an issue where tall blank cells throw off the row height calculation + //Introspect a blank cell a bit here to see if it should be thrown away + if ((te.getText() == null || te.getText().trim().equals(""))) { + //if the cell height is more than AVG_HEIGHT_MULT_THRESHOLDxaverage, throw it away + if (avgHeight > 0 + && te.getHeight() >= (avgHeight * AVG_HEIGHT_MULT_THRESHOLD)) { + continue; + } + + //if the font size is outside of reasonable ranges, throw it away + if (textPosition.getFontSizeInPt() > MAX_BLANK_FONT_SIZE || textPosition.getFontSizeInPt() < MIN_BLANK_FONT_SIZE) { + continue; + } + } + this.spatialIndex.add(te); this.textElements.add(te); } } + @Override + protected float computeFontHeight(PDFont font) throws IOException + { + BoundingBox bbox = font.getBoundingBox(); + if (bbox.getLowerLeftY() < Short.MIN_VALUE) + { + // PDFBOX-2158 and PDFBOX-3130 + // files by Salmat eSolutions / ClibPDF Library + bbox.setLowerLeftY(- (bbox.getLowerLeftY() + 65536)); + } + // 1/2 the bbox is used as the height todo: why? + float glyphHeight = bbox.getHeight() / 2; + + // sometimes the bbox has very high values, but CapHeight is OK + PDFontDescriptor fontDescriptor = font.getFontDescriptor(); + if (fontDescriptor != null) + { + float capHeight = fontDescriptor.getCapHeight(); + if (Float.compare(capHeight, 0) != 0 && + (capHeight < glyphHeight || Float.compare(glyphHeight, 0) == 0)) + { + glyphHeight = capHeight; + } + // PDFBOX-3464, PDFBOX-448: + // sometimes even CapHeight has very high value, but Ascent and Descent are ok + float ascent = fontDescriptor.getAscent(); + float descent = fontDescriptor.getDescent(); + if (ascent > 0 && descent < 0 && + ((ascent - descent) / 2 < glyphHeight || Float.compare(glyphHeight, 0) == 0)) + { + glyphHeight = (ascent - descent) / 2; + } + } + + // transformPoint from glyph space -> text space + float height; + if (font instanceof PDType3Font) + { + height = font.getFontMatrix().transformPoint(0, glyphHeight).y; + } + else + { + height = glyphHeight / 1000; + } + + return height; + } + private boolean isPrintable(String s) { Character c; Character.UnicodeBlock block; @@ -79,4 +156,20 @@ private boolean isPrintable(String s) { } return printable; } -} \ No newline at end of file + + public List getTextElements() { + return this.textElements; + } + + public RectangleSpatialIndex getSpatialIndex() { + return spatialIndex; + } + + public float getMinCharWidth() { + return minCharWidth; + } + + public float getMinCharHeight() { + return minCharHeight; + } +} diff --git a/src/main/java/technology/tabula/Utils.java b/src/main/java/technology/tabula/Utils.java index 1f04a4c2..00814429 100644 --- a/src/main/java/technology/tabula/Utils.java +++ b/src/main/java/technology/tabula/Utils.java @@ -105,10 +105,10 @@ public static String join(String glue, String... s) { } public static List> transpose(List> table) { - List> ret = new ArrayList>(); + List> ret = new ArrayList<>(); final int N = table.get(0).size(); for (int i = 0; i < N; i++) { - List col = new ArrayList(); + List col = new ArrayList<>(); for (List row : table) { col.add(row.get(i)); } @@ -117,17 +117,19 @@ public static List> transpose(List> table) { return ret; } - /** - * Wrap Collections.sort so we can fallback to a non-stable quicksort - * if we're running on JDK7+ - */ - public static > void sort(List list) { - if (useQuickSort) { - QuickSort.sort(list); - } else { - Collections.sort(list); - } - } + /** + * Wrap Collections.sort so we can fallback to a non-stable quicksort if we're + * running on JDK7+ + */ + public static > void sort(List list) { + if (useQuickSort) QuickSort.sort(list); + else Collections.sort(list); + } + + public static void sort(List list, Comparator comparator) { + if (useQuickSort) QuickSort.sort(list, comparator); + else Collections.sort(list, comparator); + } private static boolean useCustomQuickSort() { // taken from PDFBOX: @@ -160,7 +162,7 @@ public static List parsePagesOption(String pagesSpec) throws ParseExcep return null; } - List rv = new ArrayList(); + List rv = new ArrayList<>(); String[] ranges = pagesSpec.split(","); for (int i = 0; i < ranges.length; i++) { @@ -188,8 +190,8 @@ public static List parsePagesOption(String pagesSpec) throws ParseExcep public static void snapPoints(List rulings, float xThreshold, float yThreshold) { // collect points and keep a Line -> p1,p2 map - Map linesToPoints = new HashMap(); - List points = new ArrayList(); + Map linesToPoints = new HashMap<>(); + List points = new ArrayList<>(); for (Line2D.Float r : rulings) { Point2D p1 = r.getP1(); Point2D p2 = r.getP2(); @@ -206,15 +208,15 @@ public int compare(Point2D arg0, Point2D arg1) { } }); - List> groupedPoints = new ArrayList>(); - groupedPoints.add(new ArrayList(Arrays.asList(new Point2D[]{points.get(0)}))); + List> groupedPoints = new ArrayList<>(); + groupedPoints.add(new ArrayList<>(Arrays.asList(new Point2D[]{points.get(0)}))); for (Point2D p : points.subList(1, points.size() - 1)) { List last = groupedPoints.get(groupedPoints.size() - 1); if (Math.abs(p.getX() - last.get(0).getX()) < xThreshold) { groupedPoints.get(groupedPoints.size() - 1).add(p); } else { - groupedPoints.add(new ArrayList(Arrays.asList(new Point2D[]{p}))); + groupedPoints.add(new ArrayList<>(Arrays.asList(new Point2D[]{p}))); } } @@ -238,15 +240,15 @@ public int compare(Point2D arg0, Point2D arg1) { } }); - groupedPoints = new ArrayList>(); - groupedPoints.add(new ArrayList(Arrays.asList(new Point2D[]{points.get(0)}))); + groupedPoints = new ArrayList<>(); + groupedPoints.add(new ArrayList<>(Arrays.asList(new Point2D[]{points.get(0)}))); for (Point2D p : points.subList(1, points.size() - 1)) { List last = groupedPoints.get(groupedPoints.size() - 1); if (Math.abs(p.getY() - last.get(0).getY()) < yThreshold) { groupedPoints.get(groupedPoints.size() - 1).add(p); } else { - groupedPoints.add(new ArrayList(Arrays.asList(new Point2D[]{p}))); + groupedPoints.add(new ArrayList<>(Arrays.asList(new Point2D[]{p}))); } } @@ -269,18 +271,18 @@ public int compare(Point2D arg0, Point2D arg1) { } } - public static BufferedImage pageConvertToImage(PDPage page, int dpi, ImageType imageType) throws IOException { - // Yeah, this sucks. But PDFBox 2 wants PDFRenderers to have - // a reference to a PDDocument (unnecessarily, IMHO) - - PDDocument document = new PDDocument(); - document.addPage(page); - - PDFRenderer renderer = new PDFRenderer(document); - - document.close(); - - return renderer.renderImageWithDPI(0, dpi, imageType); - } + public static BufferedImage pageConvertToImage(PDPage page, int dpi, ImageType imageType) throws IOException { + try (PDDocument document = new PDDocument()) { + document.addPage(page); + PDFRenderer renderer = new PDFRenderer(document); + document.close(); + return renderer.renderImageWithDPI(0, dpi, imageType); + } + } + + public static BufferedImage pageConvertToImage(PDDocument doc, PDPage page, int dpi, ImageType imageType) throws IOException { + PDFRenderer renderer = new PDFRenderer(doc); + return renderer.renderImageWithDPI(doc.getPages().indexOf(page), dpi, imageType); + } } diff --git a/src/main/java/technology/tabula/debug/Debug.java b/src/main/java/technology/tabula/debug/Debug.java index 0c62784d..d6d257ce 100644 --- a/src/main/java/technology/tabula/debug/Debug.java +++ b/src/main/java/technology/tabula/debug/Debug.java @@ -16,6 +16,7 @@ import java.util.List; import org.apache.commons.cli.*; +import org.apache.pdfbox.Loader; import technology.tabula.Cell; import technology.tabula.CommandLineApp; import technology.tabula.Line; @@ -59,7 +60,7 @@ private static void debugNonCleanRulings(Graphics2D g, Page page) { private static void debugRulings(Graphics2D g, Page page) { // draw detected lines - List rulings = new ArrayList(page.getHorizontalRulings()); + List rulings = new ArrayList<>(page.getHorizontalRulings()); rulings.addAll(page.getVerticalRulings()); drawShapes(g, rulings); } @@ -70,8 +71,8 @@ private static void debugColumns(Graphics2D g, Page page) { List columns = BasicExtractionAlgorithm.columnPositions(lines); int i = 0; for (float p : columns) { - Ruling r = new Ruling(new Point2D.Float(p, (float) page.getTop()), - new Point2D.Float(p, (float) page.getBottom())); + Ruling r = new Ruling(new Point2D.Float(p, page.getTop()), + new Point2D.Float(p, page.getBottom())); g.setColor(COLORS[(i++) % 5]); drawShape(g, r); } @@ -171,7 +172,7 @@ private static void debugProjectionProfile(Graphics2D g, Page page) { g.setStroke(new BasicStroke(1f)); float[] seps = profile.findVerticalSeparators(horizSmoothKernel * 2.5f); for (int i = 0; i < seps.length; i++) { - float x = (float) (page.getLeft() + seps[i]); + float x = page.getLeft() + seps[i]; g.draw(new Line2D.Double(x, page.getTop(), x, page.getBottom())); } @@ -199,7 +200,7 @@ private static void debugProjectionProfile(Graphics2D g, Page page) { g.setStroke(new BasicStroke(1.5f)); seps = profile.findHorizontalSeparators(vertSmoothKernel); for (int i = 0; i < seps.length; i++) { - float y = (float) (page.getTop() + seps[i]); + float y = page.getTop() + seps[i]; g.draw(new Line2D.Double(page.getLeft(), y, page.getRight(), y)); } @@ -215,7 +216,7 @@ public static void renderPage(String pdfPath, String outPath, int pageNumber, Re boolean drawColumns, boolean drawCharacters, boolean drawArea, boolean drawCells, boolean drawUnprocessedRulings, boolean drawProjectionProfile, boolean drawClippingPaths, boolean drawDetectedTables) throws IOException { - PDDocument document = PDDocument.load(new File(pdfPath)); + PDDocument document = Loader.loadPDF(new File(pdfPath)); ObjectExtractor oe = new ObjectExtractor(document); @@ -225,9 +226,9 @@ public static void renderPage(String pdfPath, String outPath, int pageNumber, Re page = page.getArea(area); } - PDPage p = (PDPage) document.getPage(pageNumber); + PDPage p = document.getPage(pageNumber); - BufferedImage image = Utils.pageConvertToImage(p, 72, ImageType.RGB); + BufferedImage image = Utils.pageConvertToImage(document, p, 72, ImageType.RGB); Graphics2D g = (Graphics2D) image.getGraphics(); @@ -276,7 +277,6 @@ public static void renderPage(String pdfPath, String outPath, int pageNumber, Re ImageIO.write(image, "jpg", new File(outPath)); } - @SuppressWarnings("static-access") private static Options buildOptions() { Options o = new Options(); @@ -314,7 +314,7 @@ public static void main(String[] args) throws IOException { try { // parse the command line arguments CommandLine line = parser.parse(buildOptions(), args); - List pages = new ArrayList(); + List pages = new ArrayList<>(); if (line.hasOption('p')) { pages = Utils.parsePagesOption(line.getOptionValue('p')); } else { @@ -350,10 +350,10 @@ public static void main(String[] args) throws IOException { if (pages == null) { // user specified all pages - PDDocument document = PDDocument.load(pdfFile); + PDDocument document = Loader.loadPDF(pdfFile); int numPages = document.getNumberOfPages(); - pages = new ArrayList(numPages); + pages = new ArrayList<>(numPages); for (int i = 1; i <= numPages; i++) { pages.add(i); diff --git a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java index 69f50c9d..86639f66 100644 --- a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java +++ b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java @@ -1,22 +1,8 @@ package technology.tabula.detectors; -import java.awt.geom.Line2D; -import java.awt.geom.Point2D; -import java.awt.image.BufferedImage; -import java.awt.image.Raster; -import java.io.IOException; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Comparator; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeSet; - +import org.apache.pdfbox.contentstream.PDContentStream; import org.apache.pdfbox.contentstream.operator.Operator; +import org.apache.pdfbox.contentstream.operator.OperatorName; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdfparser.PDFStreamParser; import org.apache.pdfbox.pdfwriter.ContentStreamWriter; @@ -24,16 +10,17 @@ import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.PDStream; import org.apache.pdfbox.rendering.ImageType; - -import technology.tabula.Line; -import technology.tabula.Page; -import technology.tabula.Rectangle; -import technology.tabula.Ruling; -import technology.tabula.TextChunk; -import technology.tabula.TextElement; -import technology.tabula.Utils; +import technology.tabula.*; import technology.tabula.extractors.SpreadsheetExtractionAlgorithm; +import java.awt.geom.Line2D; +import java.awt.geom.Point2D; +import java.awt.image.BufferedImage; +import java.awt.image.Raster; +import java.io.IOException; +import java.io.OutputStream; +import java.util.*; + /** * Created by matt on 2015-12-17. *

@@ -106,9 +93,9 @@ public List detect(Page page) { BufferedImage image; PDPage pdfPage = page.getPDPage(); try { - image = Utils.pageConvertToImage(pdfPage, 144, ImageType.GRAY); + image = Utils.pageConvertToImage(page.getPDDoc(), pdfPage, 144, ImageType.GRAY); } catch (IOException e) { - return new ArrayList(); + return new ArrayList<>(); } List horizontalRulings = this.getHorizontalRulings(image); @@ -117,9 +104,10 @@ public List detect(Page page) { PDDocument removeTextDocument = null; try { removeTextDocument = this.removeText(pdfPage); - image = Utils.pageConvertToImage(pdfPage, 144, ImageType.GRAY); + pdfPage = removeTextDocument.getPage(0); + image = Utils.pageConvertToImage(removeTextDocument, pdfPage, 144, ImageType.GRAY); } catch (Exception e) { - return new ArrayList(); + return new ArrayList<>(); } finally { if (removeTextDocument != null) { try { @@ -133,10 +121,10 @@ public List detect(Page page) { List verticalRulings = this.getVerticalRulings(image); - List allEdges = new ArrayList(horizontalRulings); + List allEdges = new ArrayList<>(horizontalRulings); allEdges.addAll(verticalRulings); - List tableAreas = new ArrayList(); + List tableAreas = new ArrayList<>(); // if we found some edges, try to find some tables based on them if (allEdges.size() > 0) { @@ -289,7 +277,7 @@ public List detect(Page page) { } while (foundTable); // create a set of our current tables that will eliminate duplicate tables - Set tableSet = new TreeSet(new Comparator() { + Set tableSet = new TreeSet<>(new Comparator() { @Override public int compare(Rectangle o1, Rectangle o2) { if (o1.equals(o2)) { @@ -317,7 +305,7 @@ public int compare(Rectangle o1, Rectangle o2) { tableSet.addAll(tableAreas); - return new ArrayList(tableSet); + return new ArrayList<>(tableSet); } private Rectangle getTableFromText(List lines, @@ -517,144 +505,115 @@ private TextEdges getTextEdges(List lines) { // get all text edges (lines that align with the left, middle and right of chunks of text) that extend // uninterrupted over at least REQUIRED_TEXT_LINES_FOR_EDGE lines of text - List leftTextEdges = new ArrayList(); - List midTextEdges = new ArrayList(); - List rightTextEdges = new ArrayList(); + List leftTextEdges = new ArrayList<>(); + List midTextEdges = new ArrayList<>(); + List rightTextEdges = new ArrayList<>(); + + Map> currLeftEdges = new HashMap<>(); + Map> currMidEdges = new HashMap<>(); + Map> currRightEdges = new HashMap<>(); - Map> currLeftEdges = new HashMap>(); - Map> currMidEdges = new HashMap>(); - Map> currRightEdges = new HashMap>(); + int numOfLines = lines.size(); for (Line textRow : lines) { for (TextChunk text : textRow.getTextElements()) { - Integer left = new Integer((int) Math.floor(text.getLeft())); - Integer right = new Integer((int) Math.floor(text.getRight())); - Integer mid = new Integer(left + ((right - left) / 2)); + Integer left = (int) Math.floor(text.getLeft()); + Integer right = (int) Math.floor(text.getRight()); + Integer mid = left + ((right - left) / 2); // first put this chunk into any edge buckets it belongs to - List leftEdge = currLeftEdges.get(left); - if (leftEdge == null) { - leftEdge = new ArrayList(); - currLeftEdges.put(left, leftEdge); - } + List leftEdge = currLeftEdges.computeIfAbsent(left, k -> new ArrayList<>()); leftEdge.add(text); - List midEdge = currMidEdges.get(mid); - if (midEdge == null) { - midEdge = new ArrayList(); - currMidEdges.put(mid, midEdge); - } + List midEdge = currMidEdges.computeIfAbsent(mid, k -> new ArrayList<>()); midEdge.add(text); - List rightEdge = currRightEdges.get(right); - if (rightEdge == null) { - rightEdge = new ArrayList(); - currRightEdges.put(right, rightEdge); - } + List rightEdge = currRightEdges.computeIfAbsent(right, k -> new ArrayList<>()); rightEdge.add(text); // now see if this text chunk blows up any other edges - for (Iterator>> iterator = currLeftEdges.entrySet().iterator(); iterator.hasNext(); ) { - Map.Entry> entry = iterator.next(); - Integer key = entry.getKey(); - if (key > left && key < right) { - iterator.remove(); - List edgeChunks = entry.getValue(); - if (edgeChunks.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { - TextChunk first = edgeChunks.get(0); - TextChunk last = edgeChunks.get(edgeChunks.size() - 1); - - TextEdge edge = new TextEdge(key, first.getTop(), key, last.getBottom()); - edge.intersectingTextRowCount = Math.min(edgeChunks.size(), lines.size()); + leftTextEdges.addAll( + calculateExtendedEdges(numOfLines, currLeftEdges, left, right) + ); - leftTextEdges.add(edge); - } - } - } - - for (Iterator>> iterator = currMidEdges.entrySet().iterator(); iterator.hasNext(); ) { - Map.Entry> entry = iterator.next(); - Integer key = entry.getKey(); - if (key > left && key < right && Math.abs(key - mid) > 2) { - iterator.remove(); - List edgeChunks = entry.getValue(); - if (edgeChunks.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { - TextChunk first = edgeChunks.get(0); - TextChunk last = edgeChunks.get(edgeChunks.size() - 1); - - TextEdge edge = new TextEdge(key, first.getTop(), key, last.getBottom()); - edge.intersectingTextRowCount = Math.min(edgeChunks.size(), lines.size()); + midTextEdges.addAll( + calculateExtendedEdges(numOfLines, currMidEdges, left, right, mid, 2) + ); - midTextEdges.add(edge); - } - } - } - - for (Iterator>> iterator = currRightEdges.entrySet().iterator(); iterator.hasNext(); ) { - Map.Entry> entry = iterator.next(); - Integer key = entry.getKey(); - if (key > left && key < right) { - iterator.remove(); - List edgeChunks = entry.getValue(); - if (edgeChunks.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { - TextChunk first = edgeChunks.get(0); - TextChunk last = edgeChunks.get(edgeChunks.size() - 1); - - TextEdge edge = new TextEdge(key, first.getTop(), key, last.getBottom()); - edge.intersectingTextRowCount = Math.min(edgeChunks.size(), lines.size()); - - rightTextEdges.add(edge); - } - } - } + rightTextEdges.addAll( + calculateExtendedEdges(numOfLines, currRightEdges, left, right) + ); } } // add the leftovers - for (Integer key : currLeftEdges.keySet()) { - List edgeChunks = currLeftEdges.get(key); - if (edgeChunks.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { - TextChunk first = edgeChunks.get(0); - TextChunk last = edgeChunks.get(edgeChunks.size() - 1); + leftTextEdges.addAll( + calculateLeftoverEdges(numOfLines, currLeftEdges) + ); - TextEdge edge = new TextEdge(key, first.getTop(), key, last.getBottom()); - edge.intersectingTextRowCount = Math.min(edgeChunks.size(), lines.size()); + midTextEdges.addAll( + calculateLeftoverEdges(numOfLines, currMidEdges) + ); - leftTextEdges.add(edge); - } - } + rightTextEdges.addAll( + calculateLeftoverEdges(numOfLines, currRightEdges) + ); - for (Integer key : currMidEdges.keySet()) { - List edgeChunks = currMidEdges.get(key); - if (edgeChunks.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { - TextChunk first = edgeChunks.get(0); - TextChunk last = edgeChunks.get(edgeChunks.size() - 1); + return new TextEdges(leftTextEdges, midTextEdges, rightTextEdges); + } - TextEdge edge = new TextEdge(key, first.getTop(), key, last.getBottom()); - edge.intersectingTextRowCount = Math.min(edgeChunks.size(), lines.size()); + private Set calculateLeftoverEdges(int numOfLines, Map> currDirectedEdges) { + Set leftoverEdges = new HashSet<>(); + for (Integer key : currDirectedEdges.keySet()) { + List edgeChunks = currDirectedEdges.get(key); + if (edgeChunks.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { + TextEdge edge = getEdgeFromChunks(numOfLines, key, edgeChunks); - midTextEdges.add(edge); + leftoverEdges.add(edge); } } + return leftoverEdges; + } - for (Integer key : currRightEdges.keySet()) { - List edgeChunks = currRightEdges.get(key); - if (edgeChunks.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { - TextChunk first = edgeChunks.get(0); - TextChunk last = edgeChunks.get(edgeChunks.size() - 1); + private TextEdge getEdgeFromChunks(int numOfLines, Integer key, List edgeChunks) { + TextChunk first = edgeChunks.get(0); + TextChunk last = edgeChunks.get(edgeChunks.size() - 1); + + TextEdge edge = new TextEdge(key, first.getTop(), key, last.getBottom()); + edge.intersectingTextRowCount = Math.min(edgeChunks.size(), numOfLines); + return edge; + } - TextEdge edge = new TextEdge(key, first.getTop(), key, last.getBottom()); - edge.intersectingTextRowCount = Math.min(edgeChunks.size(), lines.size()); - rightTextEdges.add(edge); + private Collection calculateExtendedEdges(Integer numOfLines, Map> currDirectedEdges, Integer left, Integer right) { + return calculateExtendedEdges(numOfLines, currDirectedEdges, left, right, null, null); + } + + private Collection calculateExtendedEdges(Integer numOfLines, Map> currDirectedEdges, Integer left, Integer right, Integer mid, Integer minDistToMid) { + Set extendedEdges = new HashSet<>(); + Iterator>> edgeIterator = currDirectedEdges.entrySet().iterator(); + while (edgeIterator.hasNext()) { + Map.Entry> entry = edgeIterator.next(); + Integer key = entry.getKey(); + + // if mid and minDistToMid are set, we calculate if the distance to mid is actually above, + // otherwise we ignore it + boolean hasMinDistToMid = mid == null || minDistToMid == null || Math.abs(key - mid) > minDistToMid; + + if (key > left && key < right && hasMinDistToMid) { + edgeIterator.remove(); + List edgeChunks = entry.getValue(); + if (edgeChunks.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { + TextEdge edge = getEdgeFromChunks(numOfLines, key, edgeChunks); + extendedEdges.add(edge); + } } } - - return new TextEdges(leftTextEdges, midTextEdges, rightTextEdges); + return extendedEdges; } private List getTableAreasFromCells(List cells) { - List> cellGroups = new ArrayList>(); + List> cellGroups = new ArrayList<>(); for (Rectangle cell : cells) { boolean addedToGroup = false; @@ -664,9 +623,9 @@ private List getTableAreasFromCells(List cells) Point2D[] groupCellCorners = groupCell.getPoints(); Point2D[] candidateCorners = cell.getPoints(); - for (int i = 0; i < candidateCorners.length; i++) { - for (int j = 0; j < groupCellCorners.length; j++) { - if (candidateCorners[i].distance(groupCellCorners[j]) < CELL_CORNER_DISTANCE_MAXIMUM) { + for (Point2D candidateCorner : candidateCorners) { + for (Point2D groupCellCorner : groupCellCorners) { + if (candidateCorner.distance(groupCellCorner) < CELL_CORNER_DISTANCE_MAXIMUM) { cellGroup.add(cell); addedToGroup = true; break cellCheck; @@ -677,14 +636,14 @@ private List getTableAreasFromCells(List cells) } if (!addedToGroup) { - ArrayList cellGroup = new ArrayList(); + ArrayList cellGroup = new ArrayList<>(); cellGroup.add(cell); cellGroups.add(cellGroup); } } // create table areas based on cell group - List tableAreas = new ArrayList(); + List tableAreas = new ArrayList<>(); for (List cellGroup : cellGroups) { // less than four cells should not make a table if (cellGroup.size() < REQUIRED_CELLS_FOR_TABLE) { @@ -713,7 +672,7 @@ private List getHorizontalRulings(BufferedImage image) { // get all horizontal edges, which we'll define as a change in grayscale colour // along a straight line of a certain length - ArrayList horizontalRulings = new ArrayList(); + ArrayList horizontalRulings = new ArrayList<>(); Raster r = image.getRaster(); int width = r.getWidth(); @@ -776,7 +735,7 @@ private List getVerticalRulings(BufferedImage image) { // get all vertical edges, which we'll define as a change in grayscale colour // along a straight line of a certain length - ArrayList verticalRulings = new ArrayList(); + ArrayList verticalRulings = new ArrayList<>(); Raster r = image.getRaster(); int width = r.getWidth(); @@ -835,37 +794,63 @@ private List getVerticalRulings(BufferedImage image) { return verticalRulings; } - - // taken from http://www.docjar.com/html/api/org/apache/pdfbox/examples/util/RemoveAllText.java.html private PDDocument removeText(PDPage page) throws IOException { PDFStreamParser parser = new PDFStreamParser(page); parser.parse(); - List tokens = parser.getTokens(); - List newTokens = new ArrayList(); - for (Object token : tokens) { - if (token instanceof Operator) { - Operator op = (Operator) token; - if (op.getName().equals("TJ") || op.getName().equals("Tj")) { - //remove the one argument to this operator - newTokens.remove(newTokens.size() - 1); - continue; - } - } - newTokens.add(token); - } PDDocument document = new PDDocument(); - document.addPage(page); + PDPage newPage = document.importPage(page); + newPage.setResources(page.getResources()); PDStream newContents = new PDStream(document); OutputStream out = newContents.createOutputStream(COSName.FLATE_DECODE); ContentStreamWriter writer = new ContentStreamWriter(out); - writer.writeTokens(newTokens); + List tokensWithoutText = createTokensWithoutText(page); + writer.writeTokens(tokensWithoutText); out.close(); - page.setContents(newContents); - + newPage.setContents(newContents); return document; + } + + + /** + * @param contentStream contentStream + * @return newTokens + * @throws IOException When parseNextToken on Error + * @see ... + */ + private static List createTokensWithoutText(PDContentStream contentStream) throws IOException { + PDFStreamParser parser = new PDFStreamParser(contentStream); + Object token = parser.parseNextToken(); + List newTokens = new ArrayList<>(); + while (token != null) { + if (token instanceof Operator) { + Operator op = (Operator) token; + String opName = op.getName(); + if (OperatorName.SHOW_TEXT_ADJUSTED.equals(opName) + || OperatorName.SHOW_TEXT.equals(opName) + || OperatorName.SHOW_TEXT_LINE.equals(opName)) { + // remove the argument to this operator + newTokens.remove(newTokens.size() - 1); + + token = parser.parseNextToken(); + continue; + } else if (OperatorName.SHOW_TEXT_LINE_AND_SPACE.equals(opName)) { + // remove the 3 arguments to this operator + newTokens.remove(newTokens.size() - 1); + newTokens.remove(newTokens.size() - 1); + newTokens.remove(newTokens.size() - 1); + token = parser.parseNextToken(); + continue; + } + } + newTokens.add(token); + token = parser.parseNextToken(); + } + return newTokens; } + + } diff --git a/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java index 06f966e1..43136ba5 100644 --- a/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java +++ b/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java @@ -20,12 +20,10 @@ public class SpreadsheetDetectionAlgorithm implements DetectionAlgorithm { public List detect(Page page) { List cells = SpreadsheetExtractionAlgorithm.findCells(page.getHorizontalRulings(), page.getVerticalRulings()); - SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - List tables = SpreadsheetExtractionAlgorithm.findSpreadsheetsFromCells(cells); // we want tables to be returned from top to bottom on the page - Collections.sort(tables); + Collections.sort(tables, Rectangle.ILL_DEFINED_ORDER); return tables; } diff --git a/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java b/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java index 3b22f44e..ed2e78e3 100644 --- a/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java +++ b/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java @@ -26,9 +26,9 @@ public BasicExtractionAlgorithm(List verticalRulings) { } public List

extract(Page page, List verticalRulingPositions) { - List verticalRulings = new ArrayList(verticalRulingPositions.size()); + List verticalRulings = new ArrayList<>(verticalRulingPositions.size()); for (Float p: verticalRulingPositions) { - verticalRulings.add(new Ruling((float) page.getTop(), (float) p, 0.0f, (float) page.getHeight())); + verticalRulings.add(new Ruling(page.getTop(), p, 0.0f, (float) page.getHeight())); } this.verticalRulings = verticalRulings; return this.extract(page); @@ -40,7 +40,7 @@ public List
extract(Page page) { List textElements = page.getText(); if (textElements.size() == 0) { - return Arrays.asList(new Table[] { Table.EMPTY }); + return Arrays.asList(new Table[] { Table.empty() }); } List textChunks = this.verticalRulings == null ? TextElement.mergeWords(page.getText()) : TextElement.mergeWords(page.getText(), this.verticalRulings); @@ -54,7 +54,7 @@ public int compare(Ruling arg0, Ruling arg1) { return Double.compare(arg0.getLeft(), arg1.getLeft()); } }); - columns = new ArrayList(this.verticalRulings.size()); + columns = new ArrayList<>(this.verticalRulings.size()); for (Ruling vr: this.verticalRulings) { columns.add(vr.getLeft()); } @@ -63,8 +63,10 @@ public int compare(Ruling arg0, Ruling arg1) { columns = columnPositions(lines); } - Table table = new Table(page, this); - + Table table = new Table(this); + table.setRect(page.getLeft(), page.getTop(), page.getWidth(), page.getHeight()); + table.setPageNumber(page.getPageNumber()); + for (int i = 0; i < lines.size(); i++) { Line line = lines.get(i); List elements = line.getTextElements(); @@ -73,7 +75,7 @@ public int compare(Ruling arg0, Ruling arg1) { @Override public int compare(TextChunk o1, TextChunk o2) { - return new java.lang.Float(o1.getLeft()).compareTo(o2.getLeft()); + return Float.compare(o1.getLeft(), o2.getLeft()); } }); @@ -109,7 +111,7 @@ public String toString() { */ public static List columnPositions(List lines) { - List regions = new ArrayList(); + List regions = new ArrayList<>(); for (TextChunk tc: lines.get(0).getTextElements()) { if (tc.isSameChar(Line.WHITE_SPACE_CHARS)) { continue; @@ -120,7 +122,7 @@ public static List columnPositions(List lines) { } for (Line l: lines.subList(1, lines.size())) { - List lineTextElements = new ArrayList(); + List lineTextElements = new ArrayList<>(); for (TextChunk tc: l.getTextElements()) { if (!tc.isSameChar(Line.WHITE_SPACE_CHARS)) { lineTextElements.add(tc); @@ -129,7 +131,7 @@ public static List columnPositions(List lines) { for (Rectangle cr: regions) { - List overlaps = new ArrayList(); + List overlaps = new ArrayList<>(); for (TextChunk te: lineTextElements) { if (cr.horizontallyOverlaps(te)) { overlaps.add(te); @@ -150,9 +152,9 @@ public static List columnPositions(List lines) { } } - List rv = new ArrayList(); + List rv = new ArrayList<>(); for (Rectangle r: regions) { - rv.add((float) r.getRight()); + rv.add(r.getRight()); } Collections.sort(rv); diff --git a/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java b/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java index f5052e69..5b4af3d5 100644 --- a/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java +++ b/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java @@ -1,23 +1,9 @@ package technology.tabula.extractors; -import java.awt.geom.Point2D; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import technology.tabula.*; -import technology.tabula.Cell; -import technology.tabula.Page; -import technology.tabula.Rectangle; -import technology.tabula.Ruling; -import technology.tabula.Table; -import technology.tabula.TableWithRulingLines; -import technology.tabula.TextElement; -import technology.tabula.Utils; +import java.awt.geom.Point2D; +import java.util.*; /** * @author manuel @@ -27,70 +13,41 @@ public class SpreadsheetExtractionAlgorithm implements ExtractionAlgorithm { private static final float MAGIC_HEURISTIC_NUMBER = 0.65f; - private static final Comparator POINT_COMPARATOR = new Comparator() { - @Override - public int compare(Point2D arg0, Point2D arg1) { - int rv = 0; - float arg0X = Utils.round(arg0.getX(), 2); - float arg0Y = Utils.round(arg0.getY(), 2); - float arg1X = Utils.round(arg1.getX(), 2); - float arg1Y = Utils.round(arg1.getY(), 2); - - - if (arg0Y > arg1Y) { - rv = 1; - } - else if (arg0Y < arg1Y) { - rv = -1; - } - else if (arg0X > arg1X) { - rv = 1; - } - else if (arg0X < arg1X) { - rv = -1; - } - return rv; + private static final Comparator Y_FIRST_POINT_COMPARATOR = (point1, point2) -> { + int compareY = compareRounded(point1.getY(), point2.getY()); + if (compareY == 0) { + return compareRounded(point1.getX(), point2.getX()); } + return compareY; }; - private static final Comparator X_FIRST_POINT_COMPARATOR = new Comparator() { - @Override - public int compare(Point2D arg0, Point2D arg1) { - int rv = 0; - float arg0X = Utils.round(arg0.getX(), 2); - float arg0Y = Utils.round(arg0.getY(), 2); - float arg1X = Utils.round(arg1.getX(), 2); - float arg1Y = Utils.round(arg1.getY(), 2); - - if (arg0X > arg1X) { - rv = 1; - } - else if (arg0X < arg1X) { - rv = -1; - } - else if (arg0Y > arg1Y) { - rv = 1; - } - else if (arg0Y < arg1Y) { - rv = -1; - } - return rv; + private static final Comparator X_FIRST_POINT_COMPARATOR = (point1, point2) -> { + int compareX = compareRounded(point1.getX(), point2.getX()); + if (compareX == 0) { + return compareRounded(point1.getY(), point2.getY()); } + return compareX; }; + private static int compareRounded(double d1, double d2) { + float d1Rounded = Utils.round(d1, 2); + float d2Rounded = Utils.round(d2, 2); + + return Float.compare(d1Rounded, d2Rounded); + } @Override - public List extract(Page page) { + public List
extract(Page page) { return extract(page, page.getRulings()); } /** * Extract a list of Table from page using rulings as separators */ - public List extract(Page page, List rulings) { + public List
extract(Page page, List rulings) { // split rulings into horizontal and vertical - List horizontalR = new ArrayList(), - verticalR = new ArrayList(); + List horizontalR = new ArrayList<>(); + List verticalR = new ArrayList<>(); for (Ruling r: rulings) { if (r.horizontal()) { @@ -106,10 +63,10 @@ else if (r.vertical()) { List cells = findCells(horizontalR, verticalR); List spreadsheetAreas = findSpreadsheetsFromCells(cells); - List spreadsheets = new ArrayList(); + List
spreadsheets = new ArrayList<>(); for (Rectangle area: spreadsheetAreas) { - List overlappingCells = new ArrayList(); + List overlappingCells = new ArrayList<>(); for (Cell c: cells) { if (c.intersects(area)) { @@ -118,27 +75,23 @@ else if (r.vertical()) { } } - List horizontalOverlappingRulings = new ArrayList(); + List horizontalOverlappingRulings = new ArrayList<>(); for (Ruling hr: horizontalR) { if (area.intersectsLine(hr)) { horizontalOverlappingRulings.add(hr); } } - List verticalOverlappingRulings = new ArrayList(); + List verticalOverlappingRulings = new ArrayList<>(); for (Ruling vr: verticalR) { if (area.intersectsLine(vr)) { verticalOverlappingRulings.add(vr); } } - TableWithRulingLines t = new TableWithRulingLines(area, page, overlappingCells, - horizontalOverlappingRulings, verticalOverlappingRulings); - - t.setExtractionAlgorithm(this); - + TableWithRulingLines t = new TableWithRulingLines(area, overlappingCells, horizontalOverlappingRulings, verticalOverlappingRulings, this, page.getPageNumber()); spreadsheets.add(t); } - Utils.sort(spreadsheets); + Utils.sort(spreadsheets, Rectangle.ILL_DEFINED_ORDER); return spreadsheets; } @@ -146,7 +99,7 @@ public boolean isTabular(Page page) { // if there's no text at all on the page, it's not a table // (we won't be able to do anything with it though) - if(page.getText().isEmpty()){ + if (page.getText().isEmpty()){ return false; } @@ -155,42 +108,39 @@ public boolean isTabular(Page page) { Page minimalRegion = page.getArea(Utils.bounds(page.getText())); List tables = new SpreadsheetExtractionAlgorithm().extract(minimalRegion); - if (tables.size() == 0) { + if (tables.isEmpty()) { return false; } Table table = tables.get(0); - int rowsDefinedByLines = table.getRows().size(); - int colsDefinedByLines = table.getCols().size(); + int rowsDefinedByLines = table.getRowCount(); + int colsDefinedByLines = table.getColCount(); tables = new BasicExtractionAlgorithm().extract(minimalRegion); - if (tables.size() == 0) { - // TODO WHAT DO WE DO HERE? + if (tables.isEmpty()) { + return false; } table = tables.get(0); - int rowsDefinedWithoutLines = table.getRows().size(); - int colsDefinedWithoutLines = table.getCols().size(); + int rowsDefinedWithoutLines = table.getRowCount(); + int colsDefinedWithoutLines = table.getColCount(); - float ratio = (((float) colsDefinedByLines / colsDefinedWithoutLines) + ((float) rowsDefinedByLines / rowsDefinedWithoutLines)) / 2.0f; + float ratio = (((float) colsDefinedByLines / colsDefinedWithoutLines) + + ((float) rowsDefinedByLines / rowsDefinedWithoutLines)) / 2.0f; - return ratio > MAGIC_HEURISTIC_NUMBER && ratio < (1/MAGIC_HEURISTIC_NUMBER); + return ratio > MAGIC_HEURISTIC_NUMBER && ratio < (1 / MAGIC_HEURISTIC_NUMBER); } public static List findCells(List horizontalRulingLines, List verticalRulingLines) { - List cellsFound = new ArrayList(); + List cellsFound = new ArrayList<>(); Map intersectionPoints = Ruling.findIntersections(horizontalRulingLines, verticalRulingLines); - List intersectionPointsList = new ArrayList(intersectionPoints.keySet()); - Collections.sort(intersectionPointsList, POINT_COMPARATOR); - boolean doBreak = false; + List intersectionPointsList = new ArrayList<>(intersectionPoints.keySet()); + intersectionPointsList.sort(Y_FIRST_POINT_COMPARATOR); for (int i = 0; i < intersectionPointsList.size(); i++) { Point2D topLeft = intersectionPointsList.get(i); Ruling[] hv = intersectionPoints.get(topLeft); - doBreak = false; - - // CrossingPointsDirectlyBelow( topLeft ); - List xPoints = new ArrayList(); - // CrossingPointsDirectlyToTheRight( topLeft ); - List yPoints = new ArrayList(); + + List xPoints = new ArrayList<>(); + List yPoints = new ArrayList<>(); for (Point2D p: intersectionPointsList.subList(i, intersectionPointsList.size())) { if (p.getX() == topLeft.getX() && p.getY() > topLeft.getY()) { @@ -202,7 +152,6 @@ public static List findCells(List horizontalRulingLines, List findCells(List horizontalRulingLines, List findCells(List horizontalRulingLines, List findSpreadsheetsFromCells(List cells) { // via: http://stackoverflow.com/questions/13746284/merging-multiple-adjacent-rectangles-into-one-polygon - List rectangles = new ArrayList(); - Set pointSet = new HashSet(); - Map edgesH = new HashMap(); - Map edgesV = new HashMap(); + List rectangles = new ArrayList<>(); + Set pointSet = new HashSet<>(); + Map edgesH = new HashMap<>(); + Map edgesV = new HashMap<>(); int i = 0; - cells = new ArrayList(new HashSet(cells)); + cells = new ArrayList<>(new HashSet<>(cells)); - Utils.sort(cells); + Utils.sort(cells, Rectangle.ILL_DEFINED_ORDER); for (Rectangle cell: cells) { for(Point2D pt: cell.getPoints()) { @@ -256,11 +204,11 @@ public static List findSpreadsheetsFromCells(List pointsSortX = new ArrayList(pointSet); - Collections.sort(pointsSortX, X_FIRST_POINT_COMPARATOR); + List pointsSortX = new ArrayList<>(pointSet); + pointsSortX.sort(X_FIRST_POINT_COMPARATOR); // Y first sort - List pointsSortY = new ArrayList(pointSet); - Collections.sort(pointsSortY, POINT_COMPARATOR); + List pointsSortY = new ArrayList<>(pointSet); + pointsSortY.sort(Y_FIRST_POINT_COMPARATOR); while (i < pointSet.size()) { float currY = (float) pointsSortY.get(i).getY(); @@ -282,10 +230,10 @@ public static List findSpreadsheetsFromCells(List> polygons = new ArrayList>(); + List> polygons = new ArrayList<>(); Point2D nextVertex; while (!edgesH.isEmpty()) { - ArrayList polygon = new ArrayList(); + ArrayList polygon = new ArrayList<>(); Point2D first = edgesH.keySet().iterator().next(); polygon.add(new PolygonVertex(first, Direction.HORIZONTAL)); edgesH.remove(first); @@ -296,16 +244,15 @@ public static List findSpreadsheetsFromCells(List> { + + public static final RectangularTextContainerSerializer INSTANCE = new RectangularTextContainerSerializer(); + + private RectangularTextContainerSerializer() {} + + @Override + public JsonElement serialize(RectangularTextContainer textContainer, Type type, JsonSerializationContext context) { + JsonObject json = new JsonObject(); + json.addProperty("top", textContainer.getTop()); + json.addProperty("left", textContainer.getLeft()); + json.addProperty("width", textContainer.getWidth()); + json.addProperty("height", textContainer.getHeight()); + json.addProperty("text", textContainer.getText()); + return json; + } + +} \ No newline at end of file diff --git a/src/main/java/technology/tabula/json/RulingSerializer.java b/src/main/java/technology/tabula/json/RulingSerializer.java deleted file mode 100644 index 8a3fe297..00000000 --- a/src/main/java/technology/tabula/json/RulingSerializer.java +++ /dev/null @@ -1,23 +0,0 @@ -package technology.tabula.json; - -import java.lang.reflect.Type; - -import technology.tabula.Ruling; - -import com.google.gson.JsonElement; -import com.google.gson.JsonObject; -import com.google.gson.JsonSerializationContext; -import com.google.gson.JsonSerializer; - -public class RulingSerializer implements JsonSerializer { - - @Override - public JsonElement serialize(Ruling arg0, Type arg1, - JsonSerializationContext arg2) { - - JsonObject object = new JsonObject(); - - return null; - } - -} diff --git a/src/main/java/technology/tabula/json/TableSerializer.java b/src/main/java/technology/tabula/json/TableSerializer.java index 970c7310..0caaf0e5 100644 --- a/src/main/java/technology/tabula/json/TableSerializer.java +++ b/src/main/java/technology/tabula/json/TableSerializer.java @@ -12,33 +12,35 @@ import com.google.gson.JsonSerializationContext; import com.google.gson.JsonSerializer; -public class TableSerializer implements JsonSerializer
{ +public final class TableSerializer implements JsonSerializer
{ + + public static final TableSerializer INSTANCE = new TableSerializer(); + + private TableSerializer() {} @Override - public JsonElement serialize(Table table, Type type, - JsonSerializationContext context) { - - JsonObject object = new JsonObject(); - if( table.getExtractionAlgorithm() == null){ - object.addProperty("extraction_method", ""); - }else{ - object.addProperty("extraction_method", (table.getExtractionAlgorithm()).toString()); - } - object.addProperty("top", table.getTop()); - object.addProperty("left", table.getLeft()); - object.addProperty("width", table.getWidth()); - object.addProperty("height", table.getHeight()); - - JsonArray jsonDataArray = new JsonArray(); - for (List row: table.getRows()) { - JsonArray jsonRowArray = new JsonArray(); - for (RectangularTextContainer textChunk: row) { - jsonRowArray.add(context.serialize(textChunk)); - } - jsonDataArray.add(jsonRowArray); + public JsonElement serialize(Table table, Type type, JsonSerializationContext context) { + JsonObject json = new JsonObject(); + JsonArray data = new JsonArray(); + + json.addProperty("extraction_method", table.getExtractionMethod()); + json.addProperty("page_number", table.getPageNumber()); + json.addProperty("top", table.getTop()); + json.addProperty("left", table.getLeft()); + json.addProperty("width", table.getWidth()); + json.addProperty("height", table.getHeight()); + json.addProperty("right", table.getRight()); + json.addProperty("bottom", table.getBottom()); + json.add("data", data); + + for (List tableRow : table.getRows()) { + JsonArray jsonRow = new JsonArray(); + for (RectangularTextContainer textChunk : tableRow) + jsonRow.add(context.serialize(textChunk)); + data.add(jsonRow); } - object.add("data", jsonDataArray); - - return object; + + return json; } + } diff --git a/src/main/java/technology/tabula/json/TextChunkSerializer.java b/src/main/java/technology/tabula/json/TextChunkSerializer.java deleted file mode 100644 index 5f4252c1..00000000 --- a/src/main/java/technology/tabula/json/TextChunkSerializer.java +++ /dev/null @@ -1,27 +0,0 @@ -package technology.tabula.json; - -import java.lang.reflect.Type; - -import technology.tabula.RectangularTextContainer; - -import com.google.gson.JsonElement; -import com.google.gson.JsonObject; -import com.google.gson.JsonSerializationContext; -import com.google.gson.JsonSerializer; - -public class TextChunkSerializer implements JsonSerializer { - - @Override - public JsonElement serialize(RectangularTextContainer textChunk, Type arg1, - JsonSerializationContext context) { - JsonObject object = new JsonObject(); - - object.addProperty("top", textChunk.getTop()); - object.addProperty("left", textChunk.getLeft()); - object.addProperty("width", textChunk.getWidth()); - object.addProperty("height", textChunk.getHeight()); - object.addProperty("text", textChunk.getText()); - - return object; - } -} \ No newline at end of file diff --git a/src/main/java/technology/tabula/writers/CSVWriter.java b/src/main/java/technology/tabula/writers/CSVWriter.java index 16382585..682397b8 100644 --- a/src/main/java/technology/tabula/writers/CSVWriter.java +++ b/src/main/java/technology/tabula/writers/CSVWriter.java @@ -2,6 +2,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import org.apache.commons.csv.CSVPrinter; @@ -11,47 +12,36 @@ import technology.tabula.Table; public class CSVWriter implements Writer { - - CSVPrinter printer; - private boolean useLineReturns = true; - -// public CSVWriter() { -// super(); -// } -// -// public CSVWriter(boolean useLineReturns) { -// super(); -// this.useLineReturns = useLineReturns; -// } - - void createWriter(Appendable out) { - try { - this.printer = new CSVPrinter(out, CSVFormat.EXCEL); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } + + private final CSVFormat format; + + public CSVWriter() { + this(CSVFormat.EXCEL); + } + + protected CSVWriter(CSVFormat format) { + this.format = format; } - + + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // @Override public void write(Appendable out, Table table) throws IOException { - this.createWriter(out); - for (List row: table.getRows()) { - List cells = new ArrayList(row.size()); - for (RectangularTextContainer tc: row) { - cells.add(tc.getText()); + write(out, Collections.singletonList(table)); + } + + @Override + public void write(Appendable out, List
tables) throws IOException { + try (CSVPrinter printer = new CSVPrinter(out, format)) { + for (Table table : tables) { + for (List row : table.getRows()) { + List cells = new ArrayList<>(row.size()); + for (RectangularTextContainer cell : row) + cells.add(cell.getText()); + printer.printRecord(cells); + } } - this.printer.printRecord(cells); + printer.flush(); } - printer.flush(); } - @Override - public void write(Appendable out, List
tables) throws IOException { - for (Table table : tables) { - write(out, table); - } - - } - } diff --git a/src/main/java/technology/tabula/writers/JSONWriter.java b/src/main/java/technology/tabula/writers/JSONWriter.java index cf9a843b..bb566f2d 100644 --- a/src/main/java/technology/tabula/writers/JSONWriter.java +++ b/src/main/java/technology/tabula/writers/JSONWriter.java @@ -1,63 +1,61 @@ package technology.tabula.writers; -import java.io.IOException; -import java.lang.reflect.Modifier; -import java.util.List; +import com.google.gson.ExclusionStrategy; +import com.google.gson.FieldAttributes; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.gson.JsonArray; import technology.tabula.Cell; import technology.tabula.RectangularTextContainer; import technology.tabula.Table; import technology.tabula.TextChunk; +import technology.tabula.json.RectangularTextContainerSerializer; import technology.tabula.json.TableSerializer; -import technology.tabula.json.TextChunkSerializer; -import com.google.gson.ExclusionStrategy; -import com.google.gson.FieldAttributes; -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; -import com.google.gson.JsonArray; +import java.io.IOException; +import java.util.List; -public class JSONWriter implements Writer { +import static java.lang.reflect.Modifier.PUBLIC; - class TableSerializerExclusionStrategy implements ExclusionStrategy { +public class JSONWriter implements Writer { + private static final ExclusionStrategy ALL_CLASSES_SKIPPING_NON_PUBLIC_FIELDS = new ExclusionStrategy() { @Override - public boolean shouldSkipClass(Class arg0) { + public boolean shouldSkipClass(Class c) { return false; } @Override - public boolean shouldSkipField(FieldAttributes fa) { - return !fa.hasModifier(Modifier.PUBLIC); + public boolean shouldSkipField(FieldAttributes fieldAttributes) { + return !fieldAttributes.hasModifier(PUBLIC); } - } - - - final Gson gson; - - public JSONWriter() { - gson = new GsonBuilder() - .addSerializationExclusionStrategy(new TableSerializerExclusionStrategy()) - .registerTypeAdapter(Table.class, new TableSerializer()) - .registerTypeAdapter(RectangularTextContainer.class, new TextChunkSerializer()) - .registerTypeAdapter(Cell.class, new TextChunkSerializer()) - .registerTypeAdapter(TextChunk.class, new TextChunkSerializer()) - .create(); - } + }; + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // @Override public void write(Appendable out, Table table) throws IOException { - - out.append(gson.toJson(table, Table.class)); + out.append(gson().toJson(table, Table.class)); } + @Override public void write(Appendable out, List
tables) throws IOException { + Gson gson = gson(); + JsonArray jsonElements = new JsonArray(); + for (Table table : tables) + jsonElements.add(gson.toJsonTree(table, Table.class)); + out.append(gson.toJson(jsonElements)); + } - JsonArray array = new JsonArray(); - for (Table table : tables) { - array.add(gson.toJsonTree(table, Table.class)); - } - out.append(gson.toJson(array)); - + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // + private static Gson gson() { + return new GsonBuilder() + .addSerializationExclusionStrategy(ALL_CLASSES_SKIPPING_NON_PUBLIC_FIELDS) + .registerTypeAdapter(Table.class, TableSerializer.INSTANCE) + .registerTypeAdapter(RectangularTextContainer.class, RectangularTextContainerSerializer.INSTANCE) + .registerTypeAdapter(Cell.class, RectangularTextContainerSerializer.INSTANCE) + .registerTypeAdapter(TextChunk.class, RectangularTextContainerSerializer.INSTANCE) + .create(); } + } diff --git a/src/main/java/technology/tabula/writers/TSVWriter.java b/src/main/java/technology/tabula/writers/TSVWriter.java index 225ba980..115d0347 100644 --- a/src/main/java/technology/tabula/writers/TSVWriter.java +++ b/src/main/java/technology/tabula/writers/TSVWriter.java @@ -1,20 +1,11 @@ package technology.tabula.writers; -import java.io.IOException; - import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVPrinter; public class TSVWriter extends CSVWriter { - - @Override - void createWriter(Appendable out) { - try { - this.printer = new CSVPrinter(out, CSVFormat.TDF); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } + + public TSVWriter() { + super(CSVFormat.TDF); } - + } diff --git a/src/main/java/technology/tabula/writers/Writer.java b/src/main/java/technology/tabula/writers/Writer.java index 78f4faa4..99b708c6 100644 --- a/src/main/java/technology/tabula/writers/Writer.java +++ b/src/main/java/technology/tabula/writers/Writer.java @@ -6,6 +6,9 @@ import technology.tabula.Table; public interface Writer { + void write(Appendable out, Table table) throws IOException; + void write(Appendable out, List
tables) throws IOException; + } diff --git a/src/test/java/technology/tabula/TableTest.java b/src/test/java/technology/tabula/TableTest.java new file mode 100644 index 00000000..c574a553 --- /dev/null +++ b/src/test/java/technology/tabula/TableTest.java @@ -0,0 +1,45 @@ +package technology.tabula; + +import static org.junit.Assert.*; + +import org.junit.Test; + +public class TableTest { + + @Test public void testEmpty() { + Table empty = Table.empty(); + + assertEquals(TextChunk.EMPTY, empty.getCell(0, 0)); + assertEquals(TextChunk.EMPTY, empty.getCell(1, 1)); + + assertEquals(0, empty.getRowCount()); + assertEquals(0, empty.getColCount()); + + assertEquals("", empty.getExtractionMethod()); + + assertEquals(0, empty.getTop(), 0); + assertEquals(0, empty.getRight(), 0); + assertEquals(0, empty.getBottom(), 0); + assertEquals(0, empty.getLeft(), 0); + + assertEquals(0, empty.getArea(), 0); + } + + @Test public void testRowColCounts() { + Table table = Table.empty(); + + assertEquals(0, table.getRowCount()); + assertEquals(0, table.getColCount()); + + table.add(TextChunk.EMPTY, 0, 0); + + assertEquals(1, table.getRowCount()); + assertEquals(1, table.getColCount()); + + table.add(TextChunk.EMPTY, 9, 9); + + assertEquals(10, table.getRowCount()); + assertEquals(10, table.getColCount()); + } + +} diff --git a/src/test/java/technology/tabula/TestBasicExtractor.java b/src/test/java/technology/tabula/TestBasicExtractor.java index da40db78..b56fd6ea 100644 --- a/src/test/java/technology/tabula/TestBasicExtractor.java +++ b/src/test/java/technology/tabula/TestBasicExtractor.java @@ -13,181 +13,143 @@ import org.apache.commons.csv.CSVRecord; import org.junit.Test; -import technology.tabula.Page; -import technology.tabula.Ruling; -import technology.tabula.Table; import technology.tabula.extractors.BasicExtractionAlgorithm; import technology.tabula.writers.CSVWriter; -import technology.tabula.UtilsForTesting; public class TestBasicExtractor { - private static final String[][] EXPECTED_CORRECT_COLUMNS = { - {"", "", "Involvement of pupils in", ""}, - {"", "Preperation and", "Production of", "Presentation an"}, - {"", "planing", "materials", "evaluation"}, - {"Knowledge and awareness of different cultures", "0,2885", - "0,3974", "0,3904"}, - {"Foreign language competence", "0,3057", "0,4184", "0,3899"}, - {"Social skills and abilities", "0,3416", "0,3369", "0,4303"}, - {"Acquaintance of special knowledge", "0,2569", "0,2909", - "0,3557"}, - {"Self competence", "0,3791", "0,3320", "0,4617"}}; - - private static final String[][] EXPECTED_COLUMN_RECOGNITION = { - {"ABDALA de MATARAZZO, Norma Amanda", - "Frente Cívico por Santiago", "Santiago del Estero", - "AFIRMATIVO"}, - {"ALBRIEU, Oscar Edmundo Nicolas", - "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}, - {"ALONSO, María Luz", "Frente para la Victoria - PJ", - "La Pampa", "AFIRMATIVO"}, - {"ARENA, Celia Isabel", "Frente para la Victoria - PJ", - "Santa Fe", "AFIRMATIVO"}, - {"ARREGUI, Andrés Roberto", "Frente para la Victoria - PJ", - "Buenos Aires", "AFIRMATIVO"}, - {"AVOSCAN, Herman Horacio", "Frente para la Victoria - PJ", - "Rio Negro", "AFIRMATIVO"}, - {"BALCEDO, María Ester", "Frente para la Victoria - PJ", - "Buenos Aires", "AFIRMATIVO"}, - {"BARRANDEGUY, Raúl Enrique", "Frente para la Victoria - PJ", - "Entre Ríos", "AFIRMATIVO"}, - {"BASTERRA, Luis Eugenio", "Frente para la Victoria - PJ", - "Formosa", "AFIRMATIVO"}, - {"BEDANO, Nora Esther", "Frente para la Victoria - PJ", - "Córdoba", "AFIRMATIVO"}, - {"BERNAL, María Eugenia", "Frente para la Victoria - PJ", - "Jujuy", "AFIRMATIVO"}, - {"BERTONE, Rosana Andrea", "Frente para la Victoria - PJ", - "Tierra del Fuego", "AFIRMATIVO"}, - {"BIANCHI, María del Carmen", "Frente para la Victoria - PJ", - "Cdad. Aut. Bs. As.", "AFIRMATIVO"}, - {"BIDEGAIN, Gloria Mercedes", "Frente para la Victoria - PJ", - "Buenos Aires", "AFIRMATIVO"}, - {"BRAWER, Mara", "Frente para la Victoria - PJ", - "Cdad. Aut. Bs. As.", "AFIRMATIVO"}, - {"BRILLO, José Ricardo", "Movimiento Popular Neuquino", - "Neuquén", "AFIRMATIVO"}, - {"BROMBERG, Isaac Benjamín", "Frente para la Victoria - PJ", - "Tucumán", "AFIRMATIVO"}, - {"BRUE, Daniel Agustín", "Frente Cívico por Santiago", - "Santiago del Estero", "AFIRMATIVO"}, - {"CALCAGNO, Eric", "Frente para la Victoria - PJ", - "Buenos Aires", "AFIRMATIVO"}, - {"CARLOTTO, Remo Gerardo", "Frente para la Victoria - PJ", - "Buenos Aires", "AFIRMATIVO"}, - {"CARMONA, Guillermo Ramón", "Frente para la Victoria - PJ", - "Mendoza", "AFIRMATIVO"}, - {"CATALAN MAGNI, Julio César", "Frente para la Victoria - PJ", - "Tierra del Fuego", "AFIRMATIVO"}, - {"CEJAS, Jorge Alberto", "Frente para la Victoria - PJ", - "Rio Negro", "AFIRMATIVO"}, - {"CHIENO, María Elena", "Frente para la Victoria - PJ", - "Corrientes", "AFIRMATIVO"}, - {"CIAMPINI, José Alberto", "Frente para la Victoria - PJ", - "Neuquén", "AFIRMATIVO"}, - {"CIGOGNA, Luis Francisco Jorge", - "Frente para la Victoria - PJ", "Buenos Aires", - "AFIRMATIVO"}, - {"CLERI, Marcos", "Frente para la Victoria - PJ", "Santa Fe", - "AFIRMATIVO"}, - {"COMELLI, Alicia Marcela", "Movimiento Popular Neuquino", - "Neuquén", "AFIRMATIVO"}, - {"CONTI, Diana Beatriz", "Frente para la Victoria - PJ", - "Buenos Aires", "AFIRMATIVO"}, - {"CORDOBA, Stella Maris", "Frente para la Victoria - PJ", - "Tucumán", "AFIRMATIVO"}, - {"CURRILEN, Oscar Rubén", "Frente para la Victoria - PJ", - "Chubut", "AFIRMATIVO"}}; - - private static final String[][] EXPECTED_COLUMN_EXTRACTION2 = { - {"", "Austria", "77", "1", "78"}, - {"", "Belgium", "159", "2", "161"}, - {"", "Bulgaria", "52", "0", "52"}, - {"", "Croatia", "144", "0", "144"}, - {"", "Cyprus", "43", "2", "45"}, - {"", "Czech Republic", "78", "0", "78"}, - {"", "Denmark", "151", "2", "153"}, - {"", "Estonia", "46", "0", "46"}, - {"", "Finland", "201", "1", "202"}, - {"", "France", "428", "7", "435"}, - {"", "Germany", "646", "21", "667"}, - {"", "Greece", "113", "2", "115"}, - {"", "Hungary", "187", "0", "187"}, - {"", "Iceland", "18", "0", "18"}, - {"", "Ireland", "213", "4", "217"}, - {"", "Israel", "25", "0", "25"}, - {"", "Italy", "627", "12", "639"}, - {"", "Latvia", "7", "0", "7"}, - {"", "Lithuania", "94", "1", "95"}, - {"", "Luxembourg", "22", "0", "22"}, - {"", "Malta", "18", "0", "18"}, - {"", "Netherlands", "104", "1", "105"}, - {"", "Norway", "195", "0", "195"}, - {"", "Poland", "120", "1", "121"}, - {"", "Portugal", "532", "3", "535"}, - {"", "Romania", "110", "0", "110"}, - {"", "Slovakia", "176", "0", "176"}, - {"", "Slovenia", "56", "0", "56"}, - {"", "Spain", "614", "3", "617"}, - {"", "Sweden", "122", "3", "125"}, - {"", "Switzerland", "64", "0", "64"}, - {"", "Turkey", "96", "0", "96"}, - {"", "United Kingdom", "572", "14", "586"} - }; - - private static final String[][] EXPECTED_TABLE_EXTRACTION = { - {"AANONSEN, DEBORAH, A", "", "STATEN ISLAND, NY", "MEALS", "$85.00"}, - {"TOTAL", "", "", "", "$85.00"}, - {"AARON, CAREN, T", "", "RICHMOND, VA", "EDUCATIONAL ITEMS", "$78.80"}, - {"AARON, CAREN, T", "", "RICHMOND, VA", "MEALS", "$392.45"}, - {"TOTAL", "", "", "", "$471.25"}, - {"AARON, JOHN", "", "CLARKSVILLE, TN", "MEALS", "$20.39"}, - {"TOTAL", "", "", "", "$20.39"}, - {"AARON, JOSHUA, N", "", "WEST GROVE, PA", "MEALS", "$310.33"}, - {"", "REGIONAL PULMONARY & SLEEP", "", "", ""}, - {"AARON, JOSHUA, N", "", "WEST GROVE, PA", "SPEAKING FEES", "$4,700.00"}, - {"", "MEDICINE", "", "", ""}, - {"TOTAL", "", "", "", "$5,010.33"}, - {"AARON, MAUREEN, M", "", "MARTINSVILLE, VA", "MEALS", "$193.67"}, - {"TOTAL", "", "", "", "$193.67"}, - {"AARON, MICHAEL, L", "", "WEST ISLIP, NY", "MEALS", "$19.50"}, - {"TOTAL", "", "", "", "$19.50"}, - {"AARON, MICHAEL, R", "", "BROOKLYN, NY", "MEALS", "$65.92"} - }; - - private static final String[][] EXPECTED_EMPTY_TABLE = { - {""} - }; + private static final String EU_002_PDF = "src/test/resources/technology/tabula/eu-002.pdf"; + private static final String[][] EU_002_EXPECTED = { + {"", "", "Involvement of pupils in", ""}, + {"", "Preperation and", "Production of", "Presentation an"}, + {"", "planing", "materials", "evaluation"}, + {"Knowledge and awareness of different cultures", "0,2885", "0,3974", "0,3904"}, + {"Foreign language competence", "0,3057", "0,4184", "0,3899"}, + {"Social skills and abilities", "0,3416", "0,3369", "0,4303"}, + {"Acquaintance of special knowledge", "0,2569", "0,2909", "0,3557"}, + {"Self competence", "0,3791", "0,3320", "0,4617"} + }; + + private static final String ARGENTINA_DIPUTADOS_VOTING_RECORD_PDF = "src/test/resources/technology/tabula/argentina_diputados_voting_record.pdf"; + private static final String[][] ARGENTINA_DIPUTADOS_VOTING_RECORD_EXPECTED = { + {"ABDALA de MATARAZZO, Norma Amanda", "Frente Cívico por Santiago", "Santiago del Estero", "AFIRMATIVO"}, + {"ALBRIEU, Oscar Edmundo Nicolas", "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}, + {"ALONSO, María Luz", "Frente para la Victoria - PJ", "La Pampa", "AFIRMATIVO"}, + {"ARENA, Celia Isabel", "Frente para la Victoria - PJ", "Santa Fe", "AFIRMATIVO"}, + {"ARREGUI, Andrés Roberto", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, + {"AVOSCAN, Herman Horacio", "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}, + {"BALCEDO, María Ester", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, + {"BARRANDEGUY, Raúl Enrique", "Frente para la Victoria - PJ", "Entre Ríos", "AFIRMATIVO"}, + {"BASTERRA, Luis Eugenio", "Frente para la Victoria - PJ", "Formosa", "AFIRMATIVO"}, + {"BEDANO, Nora Esther", "Frente para la Victoria - PJ", "Córdoba", "AFIRMATIVO"}, + {"BERNAL, María Eugenia", "Frente para la Victoria - PJ", "Jujuy", "AFIRMATIVO"}, + {"BERTONE, Rosana Andrea", "Frente para la Victoria - PJ", "Tierra del Fuego", "AFIRMATIVO"}, + {"BIANCHI, María del Carmen", "Frente para la Victoria - PJ", "Cdad. Aut. Bs. As.", "AFIRMATIVO"}, + {"BIDEGAIN, Gloria Mercedes", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, + {"BRAWER, Mara", "Frente para la Victoria - PJ", "Cdad. Aut. Bs. As.", "AFIRMATIVO"}, + {"BRILLO, José Ricardo", "Movimiento Popular Neuquino", "Neuquén", "AFIRMATIVO"}, + {"BROMBERG, Isaac Benjamín", "Frente para la Victoria - PJ", "Tucumán", "AFIRMATIVO"}, + {"BRUE, Daniel Agustín", "Frente Cívico por Santiago", "Santiago del Estero", "AFIRMATIVO"}, + {"CALCAGNO, Eric", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, + {"CARLOTTO, Remo Gerardo", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, + {"CARMONA, Guillermo Ramón", "Frente para la Victoria - PJ", "Mendoza", "AFIRMATIVO"}, + {"CATALAN MAGNI, Julio César", "Frente para la Victoria - PJ", "Tierra del Fuego", "AFIRMATIVO"}, + {"CEJAS, Jorge Alberto", "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}, + {"CHIENO, María Elena", "Frente para la Victoria - PJ", "Corrientes", "AFIRMATIVO"}, + {"CIAMPINI, José Alberto", "Frente para la Victoria - PJ", "Neuquén", "AFIRMATIVO"}, + {"CIGOGNA, Luis Francisco Jorge", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, + {"CLERI, Marcos", "Frente para la Victoria - PJ", "Santa Fe", "AFIRMATIVO"}, + {"COMELLI, Alicia Marcela", "Movimiento Popular Neuquino", "Neuquén", "AFIRMATIVO"}, + {"CONTI, Diana Beatriz", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, + {"CORDOBA, Stella Maris", "Frente para la Victoria - PJ", "Tucumán", "AFIRMATIVO"}, + {"CURRILEN, Oscar Rubén", "Frente para la Victoria - PJ", "Chubut", "AFIRMATIVO"} + }; + + private static final String EU_017_PDF = "src/test/resources/technology/tabula/eu-017.pdf"; + private static final String[][] EU_017_EXPECTED = { + {"", "Austria", "77", "1", "78"}, + {"", "Belgium", "159", "2", "161"}, + {"", "Bulgaria", "52", "0", "52"}, + {"", "Croatia", "144", "0", "144"}, + {"", "Cyprus", "43", "2", "45"}, + {"", "Czech Republic", "78", "0", "78"}, + {"", "Denmark", "151", "2", "153"}, + {"", "Estonia", "46", "0", "46"}, + {"", "Finland", "201", "1", "202"}, + {"", "France", "428", "7", "435"}, + {"", "Germany", "646", "21", "667"}, + {"", "Greece", "113", "2", "115"}, + {"", "Hungary", "187", "0", "187"}, + {"", "Iceland", "18", "0", "18"}, + {"", "Ireland", "213", "4", "217"}, + {"", "Israel", "25", "0", "25"}, + {"", "Italy", "627", "12", "639"}, + {"", "Latvia", "7", "0", "7"}, + {"", "Lithuania", "94", "1", "95"}, + {"", "Luxembourg", "22", "0", "22"}, + {"", "Malta", "18", "0", "18"}, + {"", "Netherlands", "104", "1", "105"}, + {"", "Norway", "195", "0", "195"}, + {"", "Poland", "120", "1", "121"}, + {"", "Portugal", "532", "3", "535"}, + {"", "Romania", "110", "0", "110"}, + {"", "Slovakia", "176", "0", "176"}, + {"", "Slovenia", "56", "0", "56"}, + {"", "Spain", "614", "3", "617"}, + {"", "Sweden", "122", "3", "125"}, + {"", "Switzerland", "64", "0", "64"}, + {"", "Turkey", "96", "0", "96"}, + {"", "United Kingdom", "572", "14", "586"} + }; + + private static final String FRX_2012_DISCLOSURE_PDF = "src/test/resources/technology/tabula/frx_2012_disclosure.pdf"; + private static final String[][] FRX_2012_DISCLOSURE_EXPECTED = { + {"AANONSEN, DEBORAH, A", "", "STATEN ISLAND, NY", "MEALS", "$85.00"}, + {"TOTAL", "", "", "", "$85.00"}, + {"AARON, CAREN, T", "", "RICHMOND, VA", "EDUCATIONAL ITEMS", "$78.80"}, + {"AARON, CAREN, T", "", "RICHMOND, VA", "MEALS", "$392.45"}, + {"TOTAL", "", "", "", "$471.25"}, + {"AARON, JOHN", "", "CLARKSVILLE, TN", "MEALS", "$20.39"}, + {"TOTAL", "", "", "", "$20.39"}, + {"AARON, JOSHUA, N", "", "WEST GROVE, PA", "MEALS", "$310.33"}, + {"", "REGIONAL PULMONARY & SLEEP", "", "", ""}, + {"AARON, JOSHUA, N", "", "WEST GROVE, PA", "SPEAKING FEES", "$4,700.00"}, + {"", "MEDICINE", "", "", ""}, + {"TOTAL", "", "", "", "$5,010.33"}, + {"AARON, MAUREEN, M", "", "MARTINSVILLE, VA", "MEALS", "$193.67"}, + {"TOTAL", "", "", "", "$193.67"}, + {"AARON, MICHAEL, L", "", "WEST ISLIP, NY", "MEALS", "$19.50"}, + {"TOTAL", "", "", "", "$19.50"}, + {"AARON, MICHAEL, R", "", "BROOKLYN, NY", "MEALS", "$65.92"} + }; + + private static final String[][] EXPECTED_EMPTY_TABLE = { /* actually empty! */ }; @Test public void testRemoveSequentialSpaces() throws IOException { - Page page = UtilsForTesting.getAreaFromFirstPage( - "src/test/resources/technology/tabula/m27.pdf", 79.2f, - 28.28f, 103.04f, 732.6f); + Page page = UtilsForTesting.getAreaFromFirstPage("src/test/resources/technology/tabula/m27.pdf", 79.2f, 28.28f, 103.04f, 732.6f); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.extract(page).get(0); List firstRow = table.getRows().get(0); assertTrue(firstRow.get(1).getText().equals("ALLEGIANT AIR")); assertTrue(firstRow.get(2).getText().equals("ALLEGIANT AIR LLC")); + page.getPDDoc().close(); } @Test public void testColumnRecognition() throws IOException { - Page page = UtilsForTesting - .getAreaFromFirstPage( - "src/test/resources/technology/tabula/argentina_diputados_voting_record.pdf", - 269.875f, 12.75f, 790.5f, 561f); + Page page = UtilsForTesting.getAreaFromFirstPage(ARGENTINA_DIPUTADOS_VOTING_RECORD_PDF, 269.875f, 12.75f, 790.5f, 561f); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.extract(page).get(0); - assertArrayEquals(EXPECTED_COLUMN_RECOGNITION, UtilsForTesting.tableToArrayOfRows(table)); + assertArrayEquals(ARGENTINA_DIPUTADOS_VOTING_RECORD_EXPECTED, UtilsForTesting.tableToArrayOfRows(table)); + page.getPDDoc().close(); } @Test public void testVerticalRulingsPreventMergingOfColumns() throws IOException { - List rulings = new ArrayList(); + List rulings = new ArrayList<>(); Float[] rulingsVerticalPositions = {147f, 256f, 310f, 375f, 431f, 504f}; for (int i = 0; i < 6; i++) { rulings.add(new Ruling(255.57f, rulingsVerticalPositions[i], 0, 398.76f - 255.57f)); @@ -202,34 +164,34 @@ public void testVerticalRulingsPreventMergingOfColumns() throws IOException { assertTrue(sixthRow.get(0).getText().equals("VALSANGIACOMO BLANC")); assertTrue(sixthRow.get(1).getText().equals("OFERNANDO JORGE")); + page.getPDDoc().close(); } @Test public void testExtractColumnsCorrectly() throws IOException { - Page page = UtilsForTesting.getAreaFromPage( - "src/test/resources/technology/tabula/eu-002.pdf", 1, - 115.0f, 70.0f, 233.0f, 510.0f); + Page page = UtilsForTesting.getAreaFromPage(EU_002_PDF, 1, 115.0f, 70.0f, 233.0f, 510.0f); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.extract(page).get(0); - assertArrayEquals(EXPECTED_CORRECT_COLUMNS, UtilsForTesting.tableToArrayOfRows(table)); + assertArrayEquals(EU_002_EXPECTED, UtilsForTesting.tableToArrayOfRows(table)); + page.getPDDoc().close(); } @Test public void testExtractColumnsCorrectly2() throws IOException { - Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/eu-017.pdf", 3); + Page page = UtilsForTesting.getPage(EU_017_PDF, 3); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(page.getVerticalRulings()); Table table = bea.extract(page.getArea(299.625f, 148.44f, 711.875f, 452.32f)).get(0); - assertArrayEquals(EXPECTED_COLUMN_EXTRACTION2, UtilsForTesting.tableToArrayOfRows(table)); + assertArrayEquals(EU_017_EXPECTED, UtilsForTesting.tableToArrayOfRows(table)); + page.getPDDoc().close(); } @Test public void testExtractColumnsCorrectly3() throws IOException { - Page page = UtilsForTesting.getAreaFromFirstPage("src/test/resources/technology/tabula/frx_2012_disclosure.pdf", - 106.01f, 48.09f, 227.31f, 551.89f); + Page page = UtilsForTesting.getAreaFromFirstPage(FRX_2012_DISCLOSURE_PDF, 106.01f, 48.09f, 227.31f, 551.89f); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.extract(page).get(0); - - assertArrayEquals(EXPECTED_TABLE_EXTRACTION, UtilsForTesting.tableToArrayOfRows(table)); + assertArrayEquals(FRX_2012_DISCLOSURE_EXPECTED, UtilsForTesting.tableToArrayOfRows(table)); + page.getPDDoc().close(); } @Test @@ -241,8 +203,9 @@ public void testCheckSqueezeDoesntBreak() throws IOException { List> rows = table.getRows(); List firstRow = rows.get(0); List lastRow = rows.get(rows.size() - 1); - assertTrue(firstRow.get(0).getText().equals("Violent crime . . . . . . . . . . . . . . . . . .")); + assertTrue(firstRow.get(0).getText().equals("Violent crime. . . . . . . . . . . . . . . . . .")); assertTrue(lastRow.get(lastRow.size() - 1).getText().equals("(X)")); + page.getPDDoc().close(); } @Test @@ -254,7 +217,7 @@ public void testNaturalOrderOfRectangles() throws IOException { page.getVerticalRulings()); Table table = bea.extract(page).get(0); - List cells = table.getCells(); + List cells = new ArrayList<>(table.cells.values()); for (RectangularTextContainer rectangularTextContainer : cells) { System.out.println(rectangularTextContainer.getText()); } @@ -318,6 +281,8 @@ public void testNaturalOrderOfRectangles() throws IOException { assertEquals("DOD, and NIH", cells.get(38).getText()); assertEquals("and networks", cells.get(39).getText()); + page.getPDDoc().close(); + } @Test @@ -326,7 +291,7 @@ public void testNaturalOrderOfRectanglesOneMoreTime() throws IOException { Charset.forName("utf-8"), CSVFormat.DEFAULT); - List rectangles = new ArrayList(); + List rectangles = new ArrayList<>(); for (CSVRecord record : parse) { rectangles.add(new Rectangle(Float.parseFloat(record.get(0)), @@ -337,12 +302,12 @@ public void testNaturalOrderOfRectanglesOneMoreTime() throws IOException { //List rectangles = Arrays.asList(RECTANGLES_TEST_NATURAL_ORDER); - Utils.sort(rectangles); + Utils.sort(rectangles, Rectangle.ILL_DEFINED_ORDER); for (int i = 0; i < (rectangles.size() - 1); i++) { Rectangle rectangle = rectangles.get(i); Rectangle nextRectangle = rectangles.get(i + 1); - + assertTrue(rectangle.compareTo(nextRectangle) < 0); } } @@ -358,24 +323,24 @@ public void testRealLifeRTL2() throws IOException { StringBuilder sb = new StringBuilder(); (new CSVWriter()).write(sb, table); assertEquals(expectedCsv, sb.toString()); + page.getPDDoc().close(); } @Test public void testEmptyRegion() throws IOException { - Page page = UtilsForTesting.getAreaFromPage("src/test/resources/technology/tabula/indictb1h_14.pdf", 1, - 0.0f, 0.0f, 80.82f, 100.9f); // an empty area + Page page = UtilsForTesting.getAreaFromPage("src/test/resources/technology/tabula/indictb1h_14.pdf", 1, 0, 0, 80.82f, 100.9f); // an empty area BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.extract(page).get(0); assertArrayEquals(EXPECTED_EMPTY_TABLE, UtilsForTesting.tableToArrayOfRows(table)); + page.getPDDoc().close(); } @Test public void testTableWithMultilineHeader() throws IOException { String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/us-020.csv"); - Page page = UtilsForTesting.getAreaFromPage("src/test/resources/technology/tabula/us-020.pdf", 2, - 103.0f, 35.0f, 641.0f, 560.0f); + Page page = UtilsForTesting.getAreaFromPage("src/test/resources/technology/tabula/us-020.pdf", 2, 103.0f, 35.0f, 641.0f, 560.0f); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.extract(page).get(0); diff --git a/src/test/java/technology/tabula/TestCell.java b/src/test/java/technology/tabula/TestCell.java index 92796920..2795565c 100644 --- a/src/test/java/technology/tabula/TestCell.java +++ b/src/test/java/technology/tabula/TestCell.java @@ -6,6 +6,7 @@ import java.util.ArrayList; import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.font.Standard14Fonts; import org.junit.Test; public class TestCell { @@ -31,9 +32,9 @@ public void testGetTextElements() { Cell cell = new Cell(0, 0, 0, 0); assertTrue(cell.getTextElements().isEmpty()); - TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5); + TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5); TextChunk tChunk = new TextChunk(tElement); - List tList = new ArrayList(); + List tList = new ArrayList<>(); tList.add(tChunk); cell.setTextElements(tList); diff --git a/src/test/java/technology/tabula/TestCellPosition.java b/src/test/java/technology/tabula/TestCellPosition.java deleted file mode 100644 index 476168dd..00000000 --- a/src/test/java/technology/tabula/TestCellPosition.java +++ /dev/null @@ -1,45 +0,0 @@ -package technology.tabula; - -import static org.junit.Assert.*; - -import org.junit.Test; - -import technology.tabula.Table.CellPosition; - -public class TestCellPosition { - - @Test - public void testHashCode() { - Table table = new Table(); - CellPosition cellPosition = table.new CellPosition(5, 5); - - assertEquals(500005, cellPosition.hashCode()); - - } - - @Test - public void testEqualsObject() { - Table table = new Table(); - CellPosition cellPosition1 = table.new CellPosition(5, 5); - - assertTrue(cellPosition1.equals(cellPosition1)); - } - - @Test - public void testNotEqualsObject() { - Table table = new Table(); - CellPosition cellPosition1 = table.new CellPosition(5, 5); - CellPosition cellPosition2 = table.new CellPosition(5, 6); - - assertFalse(cellPosition1.equals(cellPosition2)); - } - - @Test - public void testNotInstanceOfObject() { - Table table = new Table(); - CellPosition cellPosition = table.new CellPosition(5, 5); - - assertFalse(cellPosition.equals("test")); - } - -} diff --git a/src/test/java/technology/tabula/TestCohenSutherland.java b/src/test/java/technology/tabula/TestCohenSutherland.java new file mode 100644 index 00000000..2d747608 --- /dev/null +++ b/src/test/java/technology/tabula/TestCohenSutherland.java @@ -0,0 +1,102 @@ +package technology.tabula; + +import org.junit.Before; +import org.junit.Test; + +import java.awt.geom.Line2D; +import java.awt.geom.Rectangle2D; + +import static org.junit.Assert.*; + +public class TestCohenSutherland { + + private Rectangle2D clipWindow; + private CohenSutherlandClipping algorithm; + private static final double DELTA = 0.001; + + @Before + public void set() { + clipWindow = new Rectangle(10, 10, 50, 50); + algorithm = new CohenSutherlandClipping(clipWindow); + } + + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // + // TODO: How to parameterize the tests? + @Test + public void theLineIsCompletelyInside() { + Line2D.Float line = new Line2D.Float(20, 20, 30, 30); + assertTrue(algorithm.clip(line)); + assertEquals(20, line.x1, DELTA); + assertEquals(20, line.y1, DELTA); + assertEquals(30, line.x2, DELTA); + assertEquals(30, line.y2, DELTA); + } + + @Test + public void theLineIsCompletelyOnTheLeft() { + float x1 = 3, y1 = 13, x2 = 6, y2 = 16; + Line2D.Float line = new Line2D.Float(x1, y1, x2, y2); + assertFalse(algorithm.clip(line)); + assertEquals(x1, line.x1, DELTA); + assertEquals(y1, line.y1, DELTA); + assertEquals(x2, line.x2, DELTA); + assertEquals(y2, line.y2, DELTA); + } + + @Test + public void theLineIsCompletelyOnTheUp() { + float x1 = 15, y1 = 5, x2 = 25, y2 = 2; + Line2D.Float line = new Line2D.Float(x1, y1, x2, y2); + assertFalse(algorithm.clip(line)); + assertEquals(x1, line.x1, DELTA); + assertEquals(y1, line.y1, DELTA); + assertEquals(x2, line.x2, DELTA); + assertEquals(y2, line.y2, DELTA); + } + + @Test + public void theLineIsCompletelyOnTheRight() { + float x1 = 65, y1 = 15, x2 = 70, y2 = 20; + Line2D.Float line = new Line2D.Float(x1, y1, x2, y2); + assertFalse(algorithm.clip(line)); + assertEquals(x1, line.x1, DELTA); + assertEquals(y1, line.y1, DELTA); + assertEquals(x2, line.x2, DELTA); + assertEquals(y2, line.y2, DELTA); + } + + @Test + public void theLineIsCompletelyOnTheBottom() { + float x1 = 15, y1 = 65, x2 = 25, y2 = 70; + Line2D.Float line = new Line2D.Float(x1, y1, x2, y2); + assertFalse(algorithm.clip(line)); + assertEquals(x1, line.x1, DELTA); + assertEquals(y1, line.y1, DELTA); + assertEquals(x2, line.x2, DELTA); + assertEquals(y2, line.y2, DELTA); + } + + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // + @Test + public void lineCrossesTopLeftCorner() { + float x1 = 5, y1 = 25, x2 = 25, y2 = 5; + Line2D.Float line = new Line2D.Float(x1, y1, x2, y2); + assertTrue(algorithm.clip(line)); + assertEquals(10, line.x1, DELTA); + assertEquals(20, line.y1, DELTA); + assertEquals(20, line.x2, DELTA); + assertEquals(10, line.y2, DELTA); + } + + @Test + public void lineCrossesPartiallyTopLeftCorner() { + float x1 = 15, y1 = 15, x2 = 25, y2 = 5; + Line2D.Float line = new Line2D.Float(x1, y1, x2, y2); + assertTrue(algorithm.clip(line)); + assertEquals(x1, line.x1, DELTA); + assertEquals(y1, line.y1, DELTA); + assertEquals(20, line.x2, DELTA); + assertEquals(10, line.y2, DELTA); + } + +} diff --git a/src/test/java/technology/tabula/TestCommandLineApp.java b/src/test/java/technology/tabula/TestCommandLineApp.java index fd79e9c2..5a4e3af5 100644 --- a/src/test/java/technology/tabula/TestCommandLineApp.java +++ b/src/test/java/technology/tabula/TestCommandLineApp.java @@ -2,20 +2,23 @@ import static org.junit.Assert.*; +import java.io.File; import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.FileSystem; -import java.nio.file.FileSystems; +import java.nio.file.*; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.ParseException; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.TemporaryFolder; public class TestCommandLineApp { + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + private String csvFromCommandLineArgs(String[] args) throws ParseException { CommandLineParser parser = new DefaultParser(); CommandLine cmd = parser.parse(CommandLineApp.buildOptions(), args); @@ -68,13 +71,15 @@ public void testExtractSpreadsheetWithAreaAndNewFile() throws ParseException, IO String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/spreadsheet_no_bounding_frame.csv"); + File newFile = folder.newFile(); this.csvFromCommandLineArgs(new String[]{ "src/test/resources/technology/tabula/spreadsheet_no_bounding_frame.pdf", "-p", "1", "-a", "150.56,58.9,654.7,536.12", "-f", - "CSV", "-o", "outputFile" + "CSV", "-o", newFile.getAbsolutePath() }); - //assertEquals(expectedCsv,); + + assertArrayEquals(expectedCsv.getBytes(), Files.readAllBytes(Paths.get(newFile.getAbsolutePath()))); } @@ -143,6 +148,72 @@ public void testEncryptedWrongPassword() throws ParseException { }); } + @Test + public void testExtractWithMultiplePercentArea() throws ParseException, IOException { + + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/MultiColumn.csv"); + + assertEquals(expectedCsv, this.csvFromCommandLineArgs(new String[]{ + "src/test/resources/technology/tabula/MultiColumn.pdf", + "-p", "1", "-a", + "%0,0,100,50", "-a", + "%0,50,100,100", "-f", + "CSV" + })); + } + + @Test + public void testExtractWithMultipleAbsoluteArea() throws ParseException, IOException { + + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/MultiColumn.csv"); + + assertEquals(expectedCsv, this.csvFromCommandLineArgs(new String[]{ + "src/test/resources/technology/tabula/MultiColumn.pdf", + "-p", "1", "-a", + "0,0,451,212", "-a", + "0,212,451,425", "-f", + "CSV" + })); + } + + @Test + public void testExtractWithPercentAndAbsoluteArea() throws ParseException, IOException { + + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/MultiColumn.csv"); + + assertEquals(expectedCsv, this.csvFromCommandLineArgs(new String[]{ + "src/test/resources/technology/tabula/MultiColumn.pdf", + "-p", "1", "-a", + "%0,0,100,50", "-a", + "0,212,451,425", "-f", + "CSV" + })); + } + @Test + public void testLatticeModeWithColumnOption() throws ParseException, IOException { + + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/AnimalSounds.csv"); + + assertEquals(expectedCsv, this.csvFromCommandLineArgs(new String[]{ + "src/test/resources/technology/tabula/AnimalSounds.pdf", + "-p", "1", "-c", + "59,218,331,551", + "-r", "-f", "CSV" + })); + } + + @Test + public void testLatticeModeWithColumnAndMultipleAreasOption() throws ParseException, IOException { + + String expectedJson = UtilsForTesting.loadJson("src/test/resources/technology/tabula/json/AnimalSounds1.json"); + String resultJson = this.csvFromCommandLineArgs(new String[]{ + "src/test/resources/technology/tabula/AnimalSounds1.pdf", + "-p", "1", "-c", "57,136,197,296,314,391,457,553", + "-a", "%0,0,100,50", "-a", "%0,50,100,100", + "-r", "-f", "JSON" + }); + assertEquals(expectedJson, resultJson); + } } diff --git a/src/test/java/technology/tabula/TestLine.java b/src/test/java/technology/tabula/TestLine.java index 9748415a..f7a6a88d 100644 --- a/src/test/java/technology/tabula/TestLine.java +++ b/src/test/java/technology/tabula/TestLine.java @@ -6,6 +6,7 @@ import java.util.List; import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.font.Standard14Fonts; import org.junit.Test; public class TestLine { @@ -14,9 +15,9 @@ public class TestLine { public void testSetTextElements() { Line line = new Line(); - TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5); + TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5); TextChunk tChunk = new TextChunk(tElement); - List tList = new ArrayList(); + List tList = new ArrayList<>(); tList.add(tChunk); line.setTextElements(tList); @@ -28,7 +29,7 @@ public void testSetTextElements() { public void testAddTextChunkIntTextChunk() { Line line = new Line(); - TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5); + TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5); TextChunk tChunk = new TextChunk(tElement); line.addTextChunk(3, tChunk); @@ -39,7 +40,7 @@ public void testAddTextChunkIntTextChunk() { public void testLessThanAddTextChunkIntTextChunk() { Line line = new Line(); - TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5); + TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5); TextChunk tChunk = new TextChunk(tElement); line.addTextChunk(0, tChunk); line.addTextChunk(0, tChunk); @@ -51,7 +52,7 @@ public void testLessThanAddTextChunkIntTextChunk() { public void testErrorAddTextChunkIntTextChunk() { Line line = new Line(); - TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5); + TextElement tElement = new TextElement(0, 0, 0, 0,new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5); TextChunk tChunk = new TextChunk(tElement); line.addTextChunk(-1, tChunk); } @@ -60,7 +61,7 @@ public void testErrorAddTextChunkIntTextChunk() { public void testToString() { Line line = new Line(); - TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5); + TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5); TextChunk tChunk = new TextChunk(tElement); line.addTextChunk(0, tChunk); line.addTextChunk(0, tChunk); diff --git a/src/test/java/technology/tabula/TestObjectExtractor.java b/src/test/java/technology/tabula/TestObjectExtractor.java index d7ac5a69..69864c61 100644 --- a/src/test/java/technology/tabula/TestObjectExtractor.java +++ b/src/test/java/technology/tabula/TestObjectExtractor.java @@ -7,6 +7,7 @@ import java.util.ArrayList; import java.util.List; +import org.apache.pdfbox.Loader; import org.apache.pdfbox.pdmodel.PDDocument; import org.junit.Test; @@ -21,125 +22,129 @@ public void testWrongPasswordRaisesException() throws IOException { @Test(expected = IOException.class) public void testEmptyOnEncryptedFileRaisesException() throws IOException { - PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/encrypted.pdf")); - ObjectExtractor oe = new ObjectExtractor(pdf_document); - oe.extract().next(); + PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/encrypted.pdf")); + try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { + oe.extract().next(); + } } @Test public void testCanReadPDFWithOwnerEncryption() throws IOException { - PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf")); - ObjectExtractor oe = new ObjectExtractor(pdf_document); - PageIterator pi = oe.extract(); - int i = 0; - while (pi.hasNext()) { + PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf")); + try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { + PageIterator pi = oe.extract(); + int i = 0; + while (pi.hasNext()) { i++; pi.next(); + } + assertEquals(2, i); } - assertEquals(2, i); } @Test public void testGoodPassword() throws IOException { - PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/encrypted.pdf"), "userpassword"); - ObjectExtractor oe = new ObjectExtractor(pdf_document); - List pages = new ArrayList<>(); - PageIterator pi = oe.extract(); - while (pi.hasNext()) { + PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/encrypted.pdf"), "userpassword"); + try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { + List pages = new ArrayList<>(); + PageIterator pi = oe.extract(); + while (pi.hasNext()) { pages.add(pi.next()); + } + assertEquals(1, pages.size()); } - assertEquals(1, pages.size()); } @Test public void testTextExtractionDoesNotRaise() throws IOException { - PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/rotated_page.pdf")); - ObjectExtractor oe = new ObjectExtractor(pdf_document); - PageIterator pi = oe.extract(); - - assertTrue(pi.hasNext()); - assertNotNull(pi.next()); - assertFalse(pi.hasNext()); + PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/rotated_page.pdf")); + try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { + PageIterator pi = oe.extract(); + assertTrue(pi.hasNext()); + assertNotNull(pi.next()); + assertFalse(pi.hasNext()); + } } @Test public void testShouldDetectRulings() throws IOException { - PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/should_detect_rulings.pdf")); - ObjectExtractor oe = new ObjectExtractor(pdf_document); - PageIterator pi = oe.extract(); + PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/should_detect_rulings.pdf")); + try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { + PageIterator pi = oe.extract(); - Page page = pi.next(); - List rulings = page.getRulings(); + Page page = pi.next(); + List rulings = page.getRulings(); - for (Ruling r: rulings) { + for (Ruling r: rulings) { assertTrue(page.contains(r.getBounds())); + } } } @Test public void testDontThrowNPEInShfill() throws IOException { - PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/labor.pdf")); - ObjectExtractor oe = new ObjectExtractor(pdf_document); - PageIterator pi = oe.extract(); - assertTrue(pi.hasNext()); - try { + PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/labor.pdf")); + + try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { + PageIterator pi = oe.extract(); + assertTrue(pi.hasNext()); + try { Page p = pi.next(); assertNotNull(p); - } catch (NullPointerException e) { + } catch (NullPointerException e) { fail("NPE in ObjectExtractor " + e.toString()); + } } } @Test public void testExtractOnePage() throws IOException { - PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf")); + PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf")); assertEquals(2, pdf_document.getNumberOfPages()); - ObjectExtractor oe = new ObjectExtractor(pdf_document); - Page page = oe.extract(2); + try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { + Page page = oe.extract(2); - assertNotNull(page); + assertNotNull(page); + } } @Test(expected = IndexOutOfBoundsException.class) public void testExtractWrongPageNumber() throws IOException { - PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf")); + PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf")); assertEquals(2, pdf_document.getNumberOfPages()); - ObjectExtractor oe = new ObjectExtractor(pdf_document); - oe.extract(3); - + try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { + oe.extract(3); + } } @Test public void testTextElementsContainedInPage() throws IOException { - PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/cs-en-us-pbms.pdf")); - ObjectExtractor oe = new ObjectExtractor(pdf_document); + PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/cs-en-us-pbms.pdf")); - Page page = oe.extractPage(1); + try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { + Page page = oe.extractPage(1); - for (TextElement te: page.getText()) { + for (TextElement te: page.getText()) { assertTrue(page.contains(te)); + } } + } - - /* - @Test - public void testExtractWithoutExtractingRulings() throws IOException { - PDDocument pdf_document = PDDocument.load("src/test/resources/technology/tabula/should_detect_rulings.pdf"); - ObjectExtractor oe = new ObjectExtractor(pdf_document, null, false, false); - PageIterator pi = oe.extract(); - - assertTrue(pi.hasNext()); - Page page = pi.next(); - assertNotNull(page); - assertEquals(0, page.getRulings().size()); - assertFalse(pi.hasNext()); - } - */ + @Test public void testDoNotNPEInPointComparator() throws IOException { + PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/npe_issue_206.pdf")); + + try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { + Page p = oe.extractPage(1); + assertNotNull(p); + } catch (NullPointerException e) { + fail("NPE in ObjectExtractor " + e.toString()); + } + } } diff --git a/src/test/java/technology/tabula/TestProjectionProfile.java b/src/test/java/technology/tabula/TestProjectionProfile.java index 4bdd83af..e6d93b39 100644 --- a/src/test/java/technology/tabula/TestProjectionProfile.java +++ b/src/test/java/technology/tabula/TestProjectionProfile.java @@ -5,8 +5,10 @@ import java.util.ArrayList; import java.util.List; +import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.font.Standard14Fonts; import org.junit.Before; import org.junit.Test; @@ -18,21 +20,30 @@ public class TestProjectionProfile { @Before public void setUpProjectionProfile() { PDPage pdPage = new PDPage(); - - TextElement textElement = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "test", 1f); - TextElement textElement2 = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "test", 1f); - List textList = new ArrayList(); + PDDocument pdDocument = new PDDocument(); + + PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA); + TextElement textElement = new TextElement(5f, 15f, 10f, 20f, font, 1f, "test", 1f); + TextElement textElement2 = new TextElement(5f, 15f, 10f, 20f, font, 1f, "test", 1f); + List textList = new ArrayList<>(); textList.add(textElement); textList.add(textElement2); Ruling ruling = new Ruling(0, 0, 10, 10); - List rulingList = new ArrayList(); + List rulingList = new ArrayList<>(); rulingList.add(ruling); - - page = new Page(0, 0, 1, 1, 0, 1, pdPage, textList, rulingList); - - List rectangles = new ArrayList(); + page = Page.Builder.newInstance() + .withPageDims(PageDims.of(0, 0, 1, 1)) + .withRotation(0) + .withNumber(1) + .withPdPage(pdPage) + .withPdDocument(pdDocument) + .withTextElements(textList) + .withRulings(rulingList) + .build(); + + List rectangles = new ArrayList<>(); rectangles.add(new Rectangle(0f, 0f, 500f, 5f)); pProfile = new ProjectionProfile(page, rectangles, 5, 5); @@ -65,7 +76,7 @@ public void testFindHorizontalSeparators() { @Test public void testSmooth() { float[] data = {0, 1, 2}; - float[] rv = pProfile.smooth(data, 3); + float[] rv = ProjectionProfile.smooth(data, 3); assertEquals(1f, rv[2], 1e-5); } @@ -73,7 +84,7 @@ public void testSmooth() { @Test public void testFilter() { float[] data = {0, 1, 2}; - float[] rv = pProfile.filter(data, 3); + float[] rv = ProjectionProfile.filter(data, 3); assertEquals(3f, rv[1], 1e-5); } @@ -81,7 +92,7 @@ public void testFilter() { @Test public void testGetAutocorrelation() { float[] projection = {0, 1, 2}; - float[] rv = pProfile.getAutocorrelation(projection); + float[] rv = ProjectionProfile.getAutocorrelation(projection); assertEquals(0f, rv[0], 1e-5); assertTrue(rv.length == 2); diff --git a/src/test/java/technology/tabula/TestRectangle.java b/src/test/java/technology/tabula/TestRectangle.java index c763fcbc..7fa66f7a 100644 --- a/src/test/java/technology/tabula/TestRectangle.java +++ b/src/test/java/technology/tabula/TestRectangle.java @@ -1,239 +1,291 @@ package technology.tabula; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.*; import java.awt.geom.Point2D; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import org.junit.Assert; +import org.junit.Ignore; import org.junit.Test; public class TestRectangle { - - + + @Test public void testCompareEqualsRectangles() { Rectangle first = new Rectangle(); Rectangle second = new Rectangle(); - + assertTrue(first.equals(second)); assertTrue(second.equals(first)); } - + @Test public void testCompareAlignedHorizontalRectangle() { Rectangle lower = new Rectangle(0f, 10f, 10f, 10f); Rectangle upper = new Rectangle(0f,20f, 10f, 10f); - + assertTrue(lower.compareTo(upper) < 0); } - + @Test public void testCompareAlignedVerticalRectangle() { Rectangle lower = new Rectangle(10f, 0f, 10f, 10f); Rectangle upper = new Rectangle(20f,0f, 10f, 10f); - + assertTrue(lower.compareTo(upper) < 0); } - + @Test public void testCompareVerticalOverlapRectangle() { Rectangle lower = new Rectangle(5f, 0f, 10f, 10f); Rectangle upper = new Rectangle(0f, 10f, 10f, 10f); - + assertTrue(lower.compareTo(upper) < 0); } - + @Test public void testCompareVerticalOverlapLessThresholdRectangle() { Rectangle lower = new Rectangle(0f, 10f, 10f, 10f); Rectangle upper = new Rectangle(9.8f, 0f, 10f, 10f); - + assertTrue(lower.compareTo(upper) < 0); } - - - + + + @Test public void testQuickSortOneUpperThanOther() { - + Rectangle lower = new Rectangle(175.72f, 72.72f, 1.67f, 1.52f); //, (Comma after AARON) Rectangle upper = new Rectangle(169.21f, 161.16f, 4.33f, 4.31f); // R (REGIONAL PULMONARY) - + assertTrue(lower.compareTo(upper) > 0); - + } - + @Test public void testQuickSortRectangleList() { - + //Testing wrong sorting // Expected: AARON, JOSHUA, N // but was: AARON JOSHUA N , , - Rectangle first = new Rectangle(172.92999267578125f, 51.47999954223633f, 4.0f, 4.309999942779541f); //A + Rectangle first = new Rectangle(172.92999267578125f, 51.47999954223633f, 4.0f, 4.309999942779541f); //A Rectangle second = new Rectangle(175.72000122070312f, 72.72000122070312f, 1.6699999570846558f, 1.5199999809265137f); //, Rectangle third = new Rectangle(172.92999267578125f, 96.36000061035156f, 4.0f, 4.309999942779541f); //A Rectangle fourth = new Rectangle(175.72000122070312f, 100.31999969482422f, 1.6699999570846558f, 1.5199999809265137f); //, Rectangle fifth = new Rectangle(172.92999267578125f, 103.68000030517578f, 4.329999923706055f, 4.309999942779541f); //N Rectangle sixth = new Rectangle(169.2100067138672f, 161.16000366210938f, 4.329999923706055f, 4.309999942779541f); //R - - List expectedList = new ArrayList(); + + List expectedList = new ArrayList<>(); expectedList.add(first); expectedList.add(sixth); expectedList.add(second); expectedList.add(third); expectedList.add(fourth); expectedList.add(fifth); - List toSortList = new ArrayList(); + List toSortList = new ArrayList<>(); toSortList.add(sixth); toSortList.add(second); toSortList.add(third); toSortList.add(fifth); toSortList.add(first); toSortList.add(fourth); - - Collections.sort(toSortList); - + + Collections.sort(toSortList, Rectangle.ILL_DEFINED_ORDER); + assertEquals(expectedList, toSortList); } - + @Test public void testGetVerticalOverlapShouldReturnZero() { - + Rectangle lower = new Rectangle(10f, 0f, 10f, 10f); Rectangle upper = new Rectangle(20f,0f, 10f, 10f); - + float overlap = lower.verticalOverlap(upper); - + assertEquals(0f, overlap, 0); assertTrue(!lower.verticallyOverlaps(upper)); assertEquals(0f, lower.verticalOverlapRatio(upper), 0); assertEquals(0f, lower.overlapRatio(upper), 0); - + } - + @Test public void testGetVerticalOverlapShouldReturnMoreThanZero() { - + Rectangle lower = new Rectangle(15f, 10f, 10f, 10f); Rectangle upper = new Rectangle(20f, 0f, 10f, 10f); - + float overlap = lower.verticalOverlap(upper); - + assertEquals(5f, overlap, 0); assertTrue(lower.verticallyOverlaps(upper)); assertEquals(0.5f, lower.verticalOverlapRatio(upper), 0); assertEquals(0f, lower.overlapRatio(upper), 0); - + } - + @Test public void testGetHorizontalOverlapShouldReturnZero() { - + Rectangle one = new Rectangle(0f, 0f, 10f, 10f); Rectangle two = new Rectangle(10f, 10f, 10f, 10f); - + assertTrue(!one.horizontallyOverlaps(two)); assertEquals(0f, one.overlapRatio(two), 0); - + } - + @Test public void testGetHorizontalOverlapShouldReturnMoreThanZero() { - + Rectangle one = new Rectangle(0f, 0f, 10f, 10f); Rectangle two = new Rectangle(10f, 5f, 10f, 10f); - + assertTrue(one.horizontallyOverlaps(two)); assertEquals(5f, one.horizontalOverlap(two), 0); assertEquals(0f, one.overlapRatio(two), 0); - + } - + @Test public void testGetOverlapShouldReturnMoreThanZero() { - + Rectangle one = new Rectangle(0f, 0f, 10f, 10f); Rectangle two = new Rectangle(5f, 5f, 10f, 10f); - + assertTrue(one.horizontallyOverlaps(two)); assertTrue(one.verticallyOverlaps(two)); assertEquals(5f, one.horizontalOverlap(two), 0); assertEquals(5f, one.verticalOverlap(two), 0); assertEquals((25f/175), one.overlapRatio(two), 0); - + } - + @Test public void testMergeNoOverlappingRectangles() { - + Rectangle one = new Rectangle(0f, 0f, 10f, 10f); Rectangle two = new Rectangle(0f, 10f, 10f, 10f); - + one.merge(two); - + assertEquals(20f, one.getWidth(), 0); assertEquals(10f, one.getHeight(), 0); assertEquals(0f, one.getLeft(), 0); assertEquals(0f, one.getTop(), 0); assertEquals(10f, one.getBottom(), 0); assertEquals(20f * 10f, one.getArea(), 0); - + } - + @Test public void testMergeOverlappingRectangles() { - + Rectangle one = new Rectangle(0f, 0f, 10f, 10f); Rectangle two = new Rectangle(5f, 5f, 10f, 10f); - + one.merge(two); - + assertEquals(15f, one.getWidth(), 0); assertEquals(15f, one.getHeight(), 0); assertEquals(0f, one.getLeft(), 0); assertEquals(0f, one.getTop(), 0); - + } - + @Test public void testRectangleGetPoints() { - + Rectangle one = new Rectangle(10f, 20f, 30f, 40f); - + Point2D[] points = one.getPoints(); - - Point2D[] expectedPoints = new Point2D[]{ + + Point2D[] expectedPoints = new Point2D[]{ new Point2D.Float(20f, 10f), new Point2D.Float(50f, 10f), new Point2D.Float(50f, 50f), new Point2D.Float(20f, 50f) - + }; - + Assert.assertArrayEquals(expectedPoints, points); - + } - + @Test public void testGetBoundingBox() { - - List rectangles = new ArrayList(); + + List rectangles = new ArrayList<>(); rectangles.add(new Rectangle(0f, 0f, 10f, 10f)); rectangles.add(new Rectangle(20f, 30f, 10f, 10f)); - + Rectangle boundingBoxOf = Rectangle.boundingBoxOf(rectangles); - + assertEquals(new Rectangle(0f, 0f, 40f, 30f), boundingBoxOf); - - - - + + + + } - - + + @Test + public void testTransitiveComparison1() { + // +-------+ + // | | + // | A | +-------+ + // | | | | + // +-------+ | B | +-------+ + // | | | | + // +-------+ | C | + // | | + // +-------+ + Rectangle a = new Rectangle(0,0,2,2); + Rectangle b = new Rectangle(1,1,2,2); + Rectangle c = new Rectangle(2,2,2,2); + assertTrue(a.compareTo(b) < 0); + assertTrue(b.compareTo(c) < 0); + assertTrue(a.compareTo(c) < 0); + } + + @Test @Ignore + public void testTransitiveComparison2() { + // +-------+ + // | | + // +-------+ | C | + // | | | | + // +-------+ | B | +-------+ + // | | | | + // | A | +-------+ + // | | + // +-------+ + Rectangle a = new Rectangle(2,0,2,2); + Rectangle b = new Rectangle(1,1,2,2); + Rectangle c = new Rectangle(0,2,2,2); + assertTrue(a.compareTo(b) < 0); + assertTrue(b.compareTo(c) < 0); + assertTrue(a.compareTo(c) < 0); + } + + @Test @Ignore + public void testWellDefinedComparison1() { + Rectangle a = new Rectangle(2,0,2,2); + Rectangle b = new Rectangle(1,1,2,2); + Rectangle c = new Rectangle(0,2,2,2); + List l1 = new ArrayList<>(Arrays.asList(b, a, c)); + List l2 = new ArrayList<>(Arrays.asList(c, b, a)); + QuickSort.sort(l1, Rectangle.ILL_DEFINED_ORDER); + QuickSort.sort(l2, Rectangle.ILL_DEFINED_ORDER); + assertEquals(l1.get(0), l2.get(0)); + assertEquals(l1.get(1), l2.get(1)); + assertEquals(l1.get(2), l2.get(2)); + } + } diff --git a/src/test/java/technology/tabula/TestRectangleSpatialIndex.java b/src/test/java/technology/tabula/TestRectangleSpatialIndex.java index 1c05daf1..46eb1ea3 100644 --- a/src/test/java/technology/tabula/TestRectangleSpatialIndex.java +++ b/src/test/java/technology/tabula/TestRectangleSpatialIndex.java @@ -11,7 +11,7 @@ public void testIntersects() { Rectangle r = new Rectangle(0, 0, 0, 0); - RectangleSpatialIndex rSpatialIndex = new RectangleSpatialIndex(); + RectangleSpatialIndex rSpatialIndex = new RectangleSpatialIndex<>(); rSpatialIndex.add(r); assertTrue(rSpatialIndex.intersects(r).size() > 0); diff --git a/src/test/java/technology/tabula/TestSpreadsheetExtractor.java b/src/test/java/technology/tabula/TestSpreadsheetExtractor.java index d63da204..f8bd4074 100644 --- a/src/test/java/technology/tabula/TestSpreadsheetExtractor.java +++ b/src/test/java/technology/tabula/TestSpreadsheetExtractor.java @@ -129,9 +129,9 @@ public class TestSpreadsheetExtractor { @Test public void testLinesToCells() { List cells = SpreadsheetExtractionAlgorithm.findCells(Arrays.asList(HORIZONTAL_RULING_LINES), Arrays.asList(VERTICAL_RULING_LINES)); - Collections.sort(cells); + Collections.sort(cells, Rectangle.ILL_DEFINED_ORDER); List expected = Arrays.asList(EXPECTED_CELLS); - Collections.sort(expected); + Collections.sort(expected, Rectangle.ILL_DEFINED_ORDER); assertTrue(cells.equals(expected)); } @@ -163,7 +163,7 @@ public void testFindSpreadsheetsFromCells() throws IOException { Charset.forName("utf-8"), CSVFormat.DEFAULT); - List cells = new ArrayList(); + List cells = new ArrayList<>(); for (CSVRecord record : parse) { cells.add(new Cell(Float.parseFloat(record.get(0)), @@ -173,11 +173,10 @@ public void testFindSpreadsheetsFromCells() throws IOException { } - SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); List expected = Arrays.asList(EXPECTED_RECTANGLES); - Collections.sort(expected); - List foundRectangles = se.findSpreadsheetsFromCells(cells); - Collections.sort(foundRectangles); + Collections.sort(expected, Rectangle.ILL_DEFINED_ORDER); + List foundRectangles = SpreadsheetExtractionAlgorithm.findSpreadsheetsFromCells(cells); + Collections.sort(foundRectangles, Rectangle.ILL_DEFINED_ORDER); assertTrue(foundRectangles.equals(expected)); } @@ -190,6 +189,7 @@ public void testSpreadsheetExtraction() throws IOException { 269.875f, 12.75f, 790.5f, 561f); SpreadsheetExtractionAlgorithm.findCells(page.getHorizontalRulings(), page.getVerticalRulings()); + page.getPDDoc().close(); } @Test @@ -198,14 +198,14 @@ public void testSpanningCells() throws IOException { .getPage("src/test/resources/technology/tabula/spanning_cells.pdf", 1); String expectedJson = UtilsForTesting.loadJson("src/test/resources/technology/tabula/json/spanning_cells.json"); SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); - List tables = se.extract(page); + List
tables = se.extract(page); assertEquals(2, tables.size()); StringBuilder sb = new StringBuilder(); - (new JSONWriter()).write(sb, (List
) tables); + (new JSONWriter()).write(sb, tables); assertEquals(expectedJson, sb.toString()); - + page.getPDDoc().close(); } @Test @@ -214,14 +214,14 @@ public void testSpanningCellsToCsv() throws IOException { .getPage("src/test/resources/technology/tabula/spanning_cells.pdf", 1); String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/spanning_cells.csv"); SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); - List tables = se.extract(page); + List
tables = se.extract(page); assertEquals(2, tables.size()); StringBuilder sb = new StringBuilder(); - (new CSVWriter()).write(sb, (List
) tables); + (new CSVWriter()).write(sb, tables); assertEquals(expectedCsv, sb.toString()); - + page.getPDDoc().close(); } @Test @@ -230,6 +230,7 @@ public void testIncompleteGrid() throws IOException { SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); List tables = se.extract(page); assertEquals(2, tables.size()); + page.getPDDoc().close(); } @Test @@ -245,6 +246,7 @@ public void testNaturalOrderOfRectanglesDoesNotBreakContract() throws IOExceptio String expected = "Project,Agency,Institution\r\nNanotechnology and its publics,NSF,Pennsylvania State University\r\n\"Public information and deliberation in nanoscience and\rnanotechnology policy (SGER)\",Interagency,\"North Carolina State\rUniversity\"\r\n\"Social and ethical research and education in agrifood\rnanotechnology (NIRT)\",NSF,Michigan State University\r\n\"From laboratory to society: developing an informed\rapproach to nanoscale science and engineering (NIRT)\",NSF,University of South Carolina\r\nDatabase and innovation timeline for nanotechnology,NSF,UCLA\r\nSocial and ethical dimensions of nanotechnology,NSF,University of Virginia\r\n\"Undergraduate exploration of nanoscience,\rapplications and societal implications (NUE)\",NSF,\"Michigan Technological\rUniversity\"\r\n\"Ethics and belief inside the development of\rnanotechnology (CAREER)\",NSF,University of Virginia\r\n\"All centers, NNIN and NCN have a societal\rimplications components\",\"NSF, DOE,\rDOD, and NIH\",\"All nanotechnology centers\rand networks\"\r\n"; assertEquals(expected, result); + page.getPDDoc().close(); } @Test @@ -256,8 +258,7 @@ public void testMergeLinesCloseToEachOther() throws IOException { assertEquals(expectedRulings[i], rulings.get(i).getLeft(), 0.1); } assertEquals(6, rulings.size()); - - + page.getPDDoc().close(); } @Test @@ -275,6 +276,7 @@ public void testSpreadsheetWithNoBoundingFrameShouldBeSpreadsheet() throws IOExc (new CSVWriter()).write(sb, tables.get(0)); assertEquals(expectedCsv, sb.toString()); + page.getPDDoc().close(); } @@ -334,7 +336,7 @@ public void testExtractSpreadsheetWithinAnArea() throws IOException { for (int i = 0; i < parsedResult.size(); i++) { assertEquals(parsedResult.get(i).size(), parsedExpected.get(i).size()); } - + page.getPDDoc().close(); } @Test @@ -354,7 +356,8 @@ public void testDontRaiseSortException() throws IOException { 446.0f, 97.0f, 685.0f, 520.0f); page.getText(); SpreadsheetExtractionAlgorithm bea = new SpreadsheetExtractionAlgorithm(); - Table table = bea.extract(page).get(0); + bea.extract(page).get(0); + page.getPDDoc().close(); } @Test @@ -364,8 +367,9 @@ public void testShouldDetectASingleSpreadsheet() throws IOException { 1, 68.08f, 16.44f, 680.85f, 597.84f); SpreadsheetExtractionAlgorithm bea = new SpreadsheetExtractionAlgorithm(); - List
tables = (List
) bea.extract(page); + List
tables = bea.extract(page); assertEquals(1, tables.size()); + page.getPDDoc().close(); } @Test @@ -373,7 +377,7 @@ public void testExtractTableWithExternallyDefinedRulings() throws IOException { Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/us-007.pdf", 1); SpreadsheetExtractionAlgorithm bea = new SpreadsheetExtractionAlgorithm(); - List
tables = (List
) bea.extract(page, + List
tables = bea.extract(page, Arrays.asList(EXTERNALLY_DEFINED_RULINGS)); assertEquals(1, tables.size()); Table table = tables.get(0); @@ -396,6 +400,7 @@ public void testExtractTableWithExternallyDefinedRulings() throws IOException { assertEquals("3,700.00", table.getRows().get(7).get(1).getText()); assertEquals("Daily or Miscellaneous\r(each day of the payroll period)", table.getRows().get(8).get(0).getText()); assertEquals("14.23", table.getRows().get(8).get(1).getText()); + page.getPDDoc().close(); } @@ -404,13 +409,14 @@ public void testAnotherExtractTableWithExternallyDefinedRulings() throws IOExcep Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/us-024.pdf", 1); SpreadsheetExtractionAlgorithm bea = new SpreadsheetExtractionAlgorithm(); - List
tables = (List
) bea.extract(page, + List
tables = bea.extract(page, Arrays.asList(EXTERNALLY_DEFINED_RULINGS2)); assertEquals(1, tables.size()); Table table = tables.get(0); assertEquals("Total Supply", table.getRows().get(4).get(0).getText()); assertEquals("6.6", table.getRows().get(6).get(2).getText()); + page.getPDDoc().close(); } @Test @@ -419,10 +425,11 @@ public void testSpreadsheetsSortedByTopAndRight() throws IOException { 1); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - List
tables = (List
) sea.extract(page); + List
tables = sea.extract(page); for (int i = 1; i < tables.size(); i++) { assert (tables.get(i - 1).getTop() <= tables.get(i).getTop()); } + page.getPDDoc().close(); } @Test @@ -431,10 +438,11 @@ public void testDontStackOverflowQuicksort() throws IOException { 1); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - List
tables = (List
) sea.extract(page); + List
tables = sea.extract(page); for (int i = 1; i < tables.size(); i++) { assert (tables.get(i - 1).getTop() <= tables.get(i).getTop()); } + page.getPDDoc().close(); } @Test @@ -442,7 +450,7 @@ public void testRTL() throws IOException { Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/arabic.pdf", 1); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - List
tables = (List
) sea.extract(page); + List
tables = sea.extract(page); // assertEquals(1, tables.size()); Table table = tables.get(0); @@ -465,6 +473,7 @@ public void testRTL() throws IOException { // which is not currently possible because of the two problems listed above // assertEquals("مرحباً", table.getRows().get(0).get(0).getText()); // really ought to be ً, but this is forgiveable for now + page.getPDDoc().close(); } @@ -473,7 +482,7 @@ public void testRealLifeRTL() throws IOException { Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/mednine.pdf", 1); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - List
tables = (List
) sea.extract(page); + List
tables = sea.extract(page); // assertEquals(1, tables.size()); Table table = tables.get(0); @@ -498,6 +507,7 @@ public void testRealLifeRTL() throws IOException { // these (commented-out) tests reflect the theoretical correct answer, // which is not currently possible because of the two problems listed above // assertEquals("مرحباً", table.getRows().get(0).get(0).getText()); // really ought to be ً, but this is forgiveable for now + page.getPDDoc().close(); } @@ -510,6 +520,7 @@ public void testExtractColumnsCorrectly3() throws IOException { Table table = sea.extract(page).get(0); assertEquals("REGIONAL PULMONARY & SLEEP\rMEDICINE", table.getRows().get(8).get(1).getText()); + page.getPDDoc().close(); } @@ -522,7 +533,7 @@ public void testSpreadsheetExtractionIssue656() throws IOException { String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/Publication_of_award_of_Bids_for_Transport_Sector__August_2016.csv"); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - List
tables = (List
) sea.extract(page); + List
tables = sea.extract(page); assertEquals(1, tables.size()); Table table = tables.get(0); @@ -530,6 +541,7 @@ public void testSpreadsheetExtractionIssue656() throws IOException { (new CSVWriter()).write(sb, table); String result = sb.toString(); assertEquals(expectedCsv, result); + page.getPDDoc().close(); } } diff --git a/src/test/java/technology/tabula/TestTableDetection.java b/src/test/java/technology/tabula/TestTableDetection.java index 33e8cff9..c13ff201 100644 --- a/src/test/java/technology/tabula/TestTableDetection.java +++ b/src/test/java/technology/tabula/TestTableDetection.java @@ -1,28 +1,29 @@ package technology.tabula; -import java.io.File; -import java.io.FileWriter; -import java.io.FilenameFilter; -import java.io.IOException; -import java.util.*; -import java.util.logging.Level; -import java.util.logging.Logger; - -import static org.junit.Assert.*; - import com.google.gson.Gson; +import org.apache.pdfbox.Loader; +import org.apache.pdfbox.pdmodel.PDDocument; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import org.w3c.dom.*; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.NodeList; +import technology.tabula.detectors.NurminenDetectionAlgorithm; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.*; +import java.util.logging.Level; +import java.util.logging.Logger; -import org.apache.pdfbox.pdmodel.PDDocument; -import technology.tabula.detectors.NurminenDetectionAlgorithm; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; /** * Created by matt on 2015-12-14. @@ -47,10 +48,6 @@ private static final class TestStatus { private transient boolean firstRun; private transient String pdfFilename; - public TestStatus() { - this(null); - } - public TestStatus(String pdfFilename) { this.numExpectedTables = 0; this.numCorrectlyDetectedTables = 0; @@ -74,12 +71,12 @@ public static TestStatus load(String pdfFilename) { } public void save() { - try { - FileWriter w = new FileWriter(jsonFilename(this.pdfFilename)); + try (FileWriter w = new FileWriter(jsonFilename(this.pdfFilename))) { Gson gson = new Gson(); w.write(gson.toJson(this)); w.close(); } catch (Exception e) { + throw new Error(e); } } @@ -108,21 +105,16 @@ public static void enableLogging() { public static Collection data() { String[] regionCodes = {"eu", "us"}; - ArrayList data = new ArrayList(); + ArrayList data = new ArrayList<>(); for (String regionCode : regionCodes) { String directoryName = "src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-" + regionCode + "/"; File dir = new File(directoryName); - File[] pdfs = dir.listFiles(new FilenameFilter() { - @Override - public boolean accept(File dir, String name) { - return name.toLowerCase().endsWith(".pdf"); - } - }); + File[] pdfs = dir.listFiles((dir1, name) -> name.toLowerCase().endsWith(".pdf")); for (File pdf : pdfs) { - data.add(new Object[] {pdf}); + data.add(new Object[]{pdf}); } } @@ -144,6 +136,7 @@ public TestTableDetection(File pdf) { try { this.builder = factory.newDocumentBuilder(); } catch (Exception e) { + // ignored } } @@ -165,15 +158,17 @@ public void testDetectionOfTables() throws Exception { NodeList tables = regionDocument.getElementsByTagName("table"); // tabula extractors - PDDocument pdfDocument = PDDocument.load(this.pdf); + + + PDDocument pdfDocument = Loader.loadPDF(this.pdf); ObjectExtractor extractor = new ObjectExtractor(pdfDocument); // parse expected tables from the ground truth dataset - Map> expectedTables = new HashMap>(); + Map> expectedTables = new HashMap<>(); int numExpectedTables = 0; - for (int i=0; i pageTables = expectedTables.get(page); if (pageTables == null) { - pageTables = new ArrayList(); + pageTables = new ArrayList<>(); expectedTables.put(page, pageTables); } @@ -197,7 +192,7 @@ public void testDetectionOfTables() throws Exception { // do some extra work to extract the page with tabula and get the dimensions from there Page extractedPage = extractor.extractPage(page); - float top = (float)extractedPage.getHeight() - y2; + float top = (float) extractedPage.getHeight() - y2; float left = x1; float width = x2 - x1; float height = y2 - y1; @@ -207,7 +202,7 @@ public void testDetectionOfTables() throws Exception { } // now find tables detected by tabula-java - Map> detectedTables = new HashMap>(); + Map> detectedTables = new HashMap<>(); // the algorithm we're going to be testing NurminenDetectionAlgorithm detectionAlgorithm = new NurminenDetectionAlgorithm(); @@ -216,15 +211,15 @@ public void testDetectionOfTables() throws Exception { while (pages.hasNext()) { Page page = pages.next(); List tablesOnPage = detectionAlgorithm.detect(page); - if (tablesOnPage.size() > 0) { - detectedTables.put(new Integer(page.getPageNumber()), tablesOnPage); + if (!tablesOnPage.isEmpty()) { + detectedTables.put(page.getPageNumber(), tablesOnPage); } } // now compare System.out.println("Testing " + this.pdf.getName()); - List errors = new ArrayList(); + List errors = new ArrayList<>(); this.status.numExpectedTables = numExpectedTables; totalExpectedTables += numExpectedTables; @@ -269,7 +264,7 @@ public void testDetectionOfTables() throws Exception { System.out.println(totalErroneouslyDetectedTables + " tables incorrectly detected"); - if(this.status.isFirstRun()) { + if (this.status.isFirstRun()) { // make the baseline this.status.expectedFailure = failed; this.status.numCorrectlyDetectedTables = this.numCorrectlyDetectedTables; @@ -289,20 +284,20 @@ public void testDetectionOfTables() throws Exception { } private List comparePages(Integer page, List detected, List expected) { - ArrayList errors = new ArrayList(); + ArrayList errors = new ArrayList<>(); // go through the detected tables and try to match them with expected tables // from http://www.orsigiorgio.net/wp-content/papercite-data/pdf/gho*12.pdf (comparing regions): // for other (e.g.“black-box”) algorithms, bounding boxes and content are used. A region is correct if it // contains the minimal bounding box of the ground truth without intersecting additional content. - for (Iterator detectedIterator = detected.iterator(); detectedIterator.hasNext();) { + for (Iterator detectedIterator = detected.iterator(); detectedIterator.hasNext(); ) { Rectangle detectedTable = detectedIterator.next(); - for (int i=0; i elements = new ArrayList(); - elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); - elements.add(new TextElement(20f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)); - elements.add(new TextElement(40f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)); - elements.add(new TextElement(60f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f)); - - List words = TextElement.mergeWords(elements); - - List expectedWords = new ArrayList(); - expectedWords.add(new TextChunk(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f))); - expectedWords.add(new TextChunk(new TextElement(20f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f))); - expectedWords.add(new TextChunk(new TextElement(40f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f))); - expectedWords.add(new TextChunk(new TextElement(60f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f))); - - Assert.assertEquals(expectedWords, words); - - } - - @Test - public void mergeFourElementsIntoOneWord() { - - List elements = new ArrayList(); - elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); - elements.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)); - elements.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)); - elements.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f)); - - List words = TextElement.mergeWords(elements); - - List expectedWords = new ArrayList(); - TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); - textChunk.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)); - textChunk.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)); - textChunk.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f)); - expectedWords.add(textChunk); - - Assert.assertEquals(expectedWords, words); - - } - - @Test - public void mergeElementsShouldBeIdempotent() { - /* - * a bug in TextElement.merge_words would delete the first TextElement in the array - * it was called with. Discussion here: https://github.com/tabulapdf/tabula-java/issues/78 - */ - - List elements = new ArrayList(); - elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); - elements.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)); - elements.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)); - elements.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f)); - - List words = TextElement.mergeWords(elements); - List words2 = TextElement.mergeWords(elements); - Assert.assertEquals(words, words2); - } - - @Test - public void mergeElementsWithSkippingRules() { - - List elements = new ArrayList(); - elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); - elements.add(new TextElement(0f, 17f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); - elements.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)); - elements.add(new TextElement(0.001f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, " ", 1f, 6f)); - elements.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)); - elements.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.TIMES_ROMAN, 10f, "D", 1f, 6f)); - - List words = TextElement.mergeWords(elements); - - List expectedWords = new ArrayList(); - TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); - textChunk.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)); - textChunk.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)); - textChunk.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.TIMES_ROMAN, 10f, "D", 1f, 6f)); - expectedWords.add(textChunk); - - Assert.assertEquals(expectedWords, words); - - } - - @Test - public void mergeTenElementsIntoTwoWords() { - - List elements = new ArrayList(); - elements.add(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f)); - elements.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f)); - elements.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f)); - elements.add(new TextElement(0f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); - elements.add(new TextElement(0f, 60f, 10f, 20f, PDType1Font.HELVETICA, 1f, "M", 1f, 6f)); - elements.add(new TextElement(0f, 70f, 10f, 20f, PDType1Font.HELVETICA, 1f, "U", 1f, 6f)); - elements.add(new TextElement(0f, 80f, 10f, 20f, PDType1Font.HELVETICA, 1f, "N", 1f, 6f)); - elements.add(new TextElement(0f, 90f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f)); - elements.add(new TextElement(0f, 100f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f)); - - List words = TextElement.mergeWords(elements); - - List expectedWords = new ArrayList(); - TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f)); - textChunk.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f)); - textChunk.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f)); - textChunk.add(new TextElement(0f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); - textChunk.add(new TextElement(0f, 30f, 10.5f, 20f, PDType1Font.HELVETICA, 1f, " ", 1f)); //Check why width=10.5? - expectedWords.add(textChunk); - TextChunk textChunk2 = new TextChunk(new TextElement(0f, 60f, 10f, 20f, PDType1Font.HELVETICA, 1f, "M", 1f, 6f)); - textChunk2.add(new TextElement(0f, 70f, 10f, 20f, PDType1Font.HELVETICA, 1f, "U", 1f, 6f)); - textChunk2.add(new TextElement(0f, 80f, 10f, 20f, PDType1Font.HELVETICA, 1f, "N", 1f, 6f)); - textChunk2.add(new TextElement(0f, 90f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f)); - textChunk2.add(new TextElement(0f, 100f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f)); - expectedWords.add(textChunk2); - - Assert.assertEquals(2, words.size()); - Assert.assertEquals(expectedWords, words); - - } - - @Test - public void mergeTenElementsIntoTwoLines() { - - List elements = new ArrayList(); - elements.add(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f)); - elements.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f)); - elements.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f)); - elements.add(new TextElement(0f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); - elements.add(new TextElement(20f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "M", 1f, 6f)); - elements.add(new TextElement(20f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "U", 1f, 6f)); - elements.add(new TextElement(20f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "N", 1f, 6f)); - elements.add(new TextElement(20f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f)); - elements.add(new TextElement(20f, 40f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f)); - - List words = TextElement.mergeWords(elements); - - List expectedWords = new ArrayList(); - TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f)); - textChunk.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f)); - textChunk.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f)); - textChunk.add(new TextElement(0f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); - expectedWords.add(textChunk); - TextChunk textChunk2 = new TextChunk(new TextElement(20f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "M", 1f, 6f)); - textChunk2.add(new TextElement(20f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "U", 1f, 6f)); - textChunk2.add(new TextElement(20f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "N", 1f, 6f)); - textChunk2.add(new TextElement(20f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f)); - textChunk2.add(new TextElement(20f, 40f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f)); - expectedWords.add(textChunk2); - - Assert.assertEquals(2, words.size()); - Assert.assertEquals(expectedWords, words); - - } - - + + + @Test + public void createTextElement() { + + TextElement textElement = new TextElement(5f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f); + + Assert.assertNotNull(textElement); + Assert.assertEquals("A", textElement.getText()); + Assert.assertEquals(1f, textElement.getFontSize(), 0); + Assert.assertEquals(15f, textElement.getLeft(), 0); + Assert.assertEquals(5f, textElement.getTop(), 0); + Assert.assertEquals(10f, textElement.getWidth(), 0); + Assert.assertEquals(20f, textElement.getHeight(), 0); + Assert.assertEquals(Standard14Fonts.FontName.HELVETICA.getName(), textElement.getFont().getName()); + Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0); + Assert.assertEquals(0f, textElement.getDirection(), 0); + + + } + + @Test + public void createTextElementWithDirection() { + + TextElement textElement = new TextElement(5f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f); + + Assert.assertNotNull(textElement); + Assert.assertEquals("A", textElement.getText()); + Assert.assertEquals(1f, textElement.getFontSize(), 0); + Assert.assertEquals(15f, textElement.getLeft(), 0); + Assert.assertEquals(5f, textElement.getTop(), 0); + Assert.assertEquals(10f, textElement.getWidth(), 0); + Assert.assertEquals(20f, textElement.getHeight(), 0); + Assert.assertEquals(Standard14Fonts.FontName.HELVETICA.getName(), textElement.getFont().getName()); + Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0); + Assert.assertEquals(6f, textElement.getDirection(), 0); + + + } + + @Test + public void mergeFourElementsIntoFourWords() { + + List elements = new ArrayList<>(); + PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA); + elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f)); + elements.add(new TextElement(20f, 15f, 10f, 20f, font, 1f, "B", 1f, 6f)); + elements.add(new TextElement(40f, 15f, 10f, 20f, font, 1f, "C", 1f, 6f)); + elements.add(new TextElement(60f, 15f, 10f, 20f, font, 1f, "D", 1f, 6f)); + + List words = TextElement.mergeWords(elements); + + List expectedWords = new ArrayList<>(); + expectedWords.add(new TextChunk(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f))); + expectedWords.add(new TextChunk(new TextElement(20f, 15f, 10f, 20f, font, 1f, "B", 1f, 6f))); + expectedWords.add(new TextChunk(new TextElement(40f, 15f, 10f, 20f, font, 1f, "C", 1f, 6f))); + expectedWords.add(new TextChunk(new TextElement(60f, 15f, 10f, 20f, font, 1f, "D", 1f, 6f))); + + Assert.assertEquals(expectedWords, words); + + } + + @Test + public void mergeFourElementsIntoOneWord() { + + List elements = new ArrayList<>(); + PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA); + elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f)); + elements.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f)); + elements.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f)); + elements.add(new TextElement(0f, 45f, 10f, 20f, font, 1f, "D", 1f, 6f)); + + List words = TextElement.mergeWords(elements); + + List expectedWords = new ArrayList<>(); + TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f)); + textChunk.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f)); + textChunk.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f)); + textChunk.add(new TextElement(0f, 45f, 10f, 20f, font, 1f, "D", 1f, 6f)); + expectedWords.add(textChunk); + + Assert.assertEquals(expectedWords, words); + + } + + @Test + public void mergeElementsShouldBeIdempotent() { + /* + * a bug in TextElement.merge_words would delete the first TextElement in the array + * it was called with. Discussion here: https://github.com/tabulapdf/tabula-java/issues/78 + */ + + List elements = new ArrayList<>(); + PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA); + elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f)); + elements.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f)); + elements.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f)); + elements.add(new TextElement(0f, 45f, 10f, 20f, font, 1f, "D", 1f, 6f)); + + List words = TextElement.mergeWords(elements); + List words2 = TextElement.mergeWords(elements); + Assert.assertEquals(words, words2); + } + + @Test + public void mergeElementsWithSkippingRules() { + + List elements = new ArrayList<>(); + PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA); + elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f)); + elements.add(new TextElement(0f, 17f, 10f, 20f, font, 1f, "A", 1f, 6f)); + elements.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f)); + elements.add(new TextElement(0.001f, 25f, 10f, 20f, font, 1f, " ", 1f, 6f)); + elements.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f)); + PDFont TIMES_ROMAN = new PDType1Font(Standard14Fonts.FontName.TIMES_ROMAN); + elements.add(new TextElement(0f, 45f, 10f, 20f, TIMES_ROMAN, 10f, "D", 1f, 6f)); + + List words = TextElement.mergeWords(elements); + + List expectedWords = new ArrayList<>(); + TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f)); + textChunk.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f)); + textChunk.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f)); + textChunk.add(new TextElement(0f, 45f, 10f, 20f, TIMES_ROMAN, 10f, "D", 1f, 6f)); + expectedWords.add(textChunk); + + Assert.assertEquals(expectedWords, words); + + } + + @Test + public void mergeTenElementsIntoTwoWords() { + + List elements = new ArrayList<>(); + PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA); + elements.add(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f)); + elements.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f)); + elements.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f)); + elements.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f)); + elements.add(new TextElement(0f, 60f, 10f, 20f, font, 1f, "M", 1f, 6f)); + elements.add(new TextElement(0f, 70f, 10f, 20f, font, 1f, "U", 1f, 6f)); + elements.add(new TextElement(0f, 80f, 10f, 20f, font, 1f, "N", 1f, 6f)); + elements.add(new TextElement(0f, 90f, 10f, 20f, font, 1f, "D", 1f, 6f)); + elements.add(new TextElement(0f, 100f, 10f, 20f, font, 1f, "O", 1f, 6f)); + + List words = TextElement.mergeWords(elements); + + List expectedWords = new ArrayList<>(); + TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f)); + textChunk.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f)); + textChunk.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f)); + textChunk.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f)); + textChunk.add(new TextElement(0f, 30f, 10.5f, 20f, font, 1f, " ", 1f)); //Check why width=10.5? + expectedWords.add(textChunk); + TextChunk textChunk2 = new TextChunk(new TextElement(0f, 60f, 10f, 20f, font, 1f, "M", 1f, 6f)); + textChunk2.add(new TextElement(0f, 70f, 10f, 20f, font, 1f, "U", 1f, 6f)); + textChunk2.add(new TextElement(0f, 80f, 10f, 20f, font, 1f, "N", 1f, 6f)); + textChunk2.add(new TextElement(0f, 90f, 10f, 20f, font, 1f, "D", 1f, 6f)); + textChunk2.add(new TextElement(0f, 100f, 10f, 20f, font, 1f, "O", 1f, 6f)); + expectedWords.add(textChunk2); + + Assert.assertEquals(2, words.size()); + Assert.assertEquals(expectedWords, words); + + } + + @Test + public void mergeTenElementsIntoTwoLines() { + + List elements = new ArrayList<>(); + PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA); + elements.add(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f)); + elements.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f)); + elements.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f)); + elements.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f)); + elements.add(new TextElement(20f, 0f, 10f, 20f, font, 1f, "M", 1f, 6f)); + elements.add(new TextElement(20f, 10f, 10f, 20f, font, 1f, "U", 1f, 6f)); + elements.add(new TextElement(20f, 20f, 10f, 20f, font, 1f, "N", 1f, 6f)); + elements.add(new TextElement(20f, 30f, 10f, 20f, font, 1f, "D", 1f, 6f)); + elements.add(new TextElement(20f, 40f, 10f, 20f, font, 1f, "O", 1f, 6f)); + + List words = TextElement.mergeWords(elements); + + List expectedWords = new ArrayList<>(); + TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f)); + textChunk.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f)); + textChunk.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f)); + textChunk.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f)); + expectedWords.add(textChunk); + TextChunk textChunk2 = new TextChunk(new TextElement(20f, 0f, 10f, 20f, font, 1f, "M", 1f, 6f)); + textChunk2.add(new TextElement(20f, 10f, 10f, 20f, font, 1f, "U", 1f, 6f)); + textChunk2.add(new TextElement(20f, 20f, 10f, 20f, font, 1f, "N", 1f, 6f)); + textChunk2.add(new TextElement(20f, 30f, 10f, 20f, font, 1f, "D", 1f, 6f)); + textChunk2.add(new TextElement(20f, 40f, 10f, 20f, font, 1f, "O", 1f, 6f)); + expectedWords.add(textChunk2); + + Assert.assertEquals(2, words.size()); + Assert.assertEquals(expectedWords, words); + + } + } diff --git a/src/test/java/technology/tabula/TestUtils.java b/src/test/java/technology/tabula/TestUtils.java index 75146565..cb85cb7b 100644 --- a/src/test/java/technology/tabula/TestUtils.java +++ b/src/test/java/technology/tabula/TestUtils.java @@ -12,6 +12,7 @@ import java.util.Collections; import java.util.List; +import org.apache.pdfbox.Loader; import org.apache.pdfbox.rendering.ImageType; import org.apache.commons.cli.ParseException; import org.apache.pdfbox.pdmodel.PDDocument; @@ -48,7 +49,7 @@ public void testBoundsOfOneEmptyRectangleAndAnotherNonEmpty() { @Test public void testBoundsOfOneRectangle() { - ArrayList shapes = new ArrayList(); + ArrayList shapes = new ArrayList<>(); shapes.add(new Rectangle(0, 0, 20, 40)); Rectangle r = Utils.bounds(shapes); assertEquals(r, shapes.get(0)); @@ -82,7 +83,7 @@ public void testAnotherExceptionInParsePages() throws ParseException { @Test public void testQuickSortEmptyList() { - List numbers = new ArrayList(); + List numbers = new ArrayList<>(); QuickSort.sort(numbers); assertEquals(Collections.emptyList(), numbers); @@ -107,8 +108,8 @@ public void testQuickSortShortList() { @Test public void testQuickSortLongList() { - List numbers = new ArrayList(); - List expectedNumbers = new ArrayList(); + List numbers = new ArrayList<>(); + List expectedNumbers = new ArrayList<>(); for(int i = 0; i <= 12000; i++){ numbers.add(12000 - i); @@ -122,9 +123,9 @@ public void testQuickSortLongList() { @Test public void testJPEG2000DoesNotRaise() throws IOException { - PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/jpeg2000.pdf")); + PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/jpeg2000.pdf")); PDPage page = pdf_document.getPage(0); - Utils.pageConvertToImage(page, 360, ImageType.RGB); + Utils.pageConvertToImage(pdf_document, page, 360, ImageType.RGB); } } diff --git a/src/test/java/technology/tabula/TestWriters.java b/src/test/java/technology/tabula/TestWriters.java index 63b3dcce..961d57af 100644 --- a/src/test/java/technology/tabula/TestWriters.java +++ b/src/test/java/technology/tabula/TestWriters.java @@ -31,7 +31,7 @@ private List
getTables() throws IOException { Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/twotables.pdf", 1); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - return (List
) sea.extract(page); + return sea.extract(page); } @Test diff --git a/src/test/java/technology/tabula/UtilsForTesting.java b/src/test/java/technology/tabula/UtilsForTesting.java index 524b497e..8d3c91cf 100644 --- a/src/test/java/technology/tabula/UtilsForTesting.java +++ b/src/test/java/technology/tabula/UtilsForTesting.java @@ -7,7 +7,9 @@ import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVParser; import org.apache.commons.csv.CSVPrinter; +import org.apache.pdfbox.Loader; import org.apache.pdfbox.pdmodel.PDDocument; +import org.junit.Assert; public class UtilsForTesting { @@ -22,11 +24,9 @@ public static Page getAreaFromPage(String path, int page, float top, float left, public static Page getPage(String path, int pageNumber) throws IOException { ObjectExtractor oe = null; try { - PDDocument document = PDDocument - .load(new File(path)); + PDDocument document = Loader.loadPDF(new File(path)); oe = new ObjectExtractor(document); - Page page = oe.extract(pageNumber); - return page; + return oe.extract(pageNumber); } finally { if (oe != null) oe.close(); @@ -36,7 +36,7 @@ public static Page getPage(String path, int pageNumber) throws IOException { public static String[][] tableToArrayOfRows(Table table) { List> tableRows = table.getRows(); - int maxColCount = -Integer.MAX_VALUE; + int maxColCount = 0; for (int i = 0; i < tableRows.size(); i++) { List row = tableRows.get(i); @@ -44,6 +44,9 @@ public static String[][] tableToArrayOfRows(Table table) { maxColCount = row.size(); } } + + Assert.assertEquals(maxColCount, table.getColCount()); + String[][] rv = new String[tableRows.size()][maxColCount]; for (int i = 0; i < tableRows.size(); i++) { @@ -57,14 +60,14 @@ public static String[][] tableToArrayOfRows(Table table) { } public static String loadJson(String path) throws IOException { - - BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF-8")); - StringBuilder stringBuilder = new StringBuilder(); - String line = null; - - while ((line = reader.readLine()) != null) { - stringBuilder.append(line); - } + + StringBuilder stringBuilder = new StringBuilder(); + try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF-8"))) { + String line = null; + while ((line = reader.readLine()) != null) { + stringBuilder.append(line); + } + } return stringBuilder.toString(); diff --git a/src/test/resources/technology/tabula/AnimalSounds.pdf b/src/test/resources/technology/tabula/AnimalSounds.pdf new file mode 100644 index 00000000..ad6c78cf Binary files /dev/null and b/src/test/resources/technology/tabula/AnimalSounds.pdf differ diff --git a/src/test/resources/technology/tabula/AnimalSounds1.pdf b/src/test/resources/technology/tabula/AnimalSounds1.pdf new file mode 100644 index 00000000..8883d101 Binary files /dev/null and b/src/test/resources/technology/tabula/AnimalSounds1.pdf differ diff --git a/src/test/resources/technology/tabula/MultiColumn.pdf b/src/test/resources/technology/tabula/MultiColumn.pdf new file mode 100644 index 00000000..197df402 Binary files /dev/null and b/src/test/resources/technology/tabula/MultiColumn.pdf differ diff --git a/src/test/resources/technology/tabula/csv/AnimalSounds.csv b/src/test/resources/technology/tabula/csv/AnimalSounds.csv new file mode 100644 index 00000000..bae0c105 --- /dev/null +++ b/src/test/resources/technology/tabula/csv/AnimalSounds.csv @@ -0,0 +1,8 @@ +Cat,Says,Meow +"Parastratiosphecomyiastratiosph +ecomyioides",Says,bzzzzzzz +Fox,Says,"Ring- +dingdingdingdingeringedingGer +ing- +dingdingdingdingeringedingGer +ing-dingdingdingdingeringeding" \ No newline at end of file diff --git a/src/test/resources/technology/tabula/csv/MultiColumn.csv b/src/test/resources/technology/tabula/csv/MultiColumn.csv new file mode 100644 index 00000000..f4f2e726 --- /dev/null +++ b/src/test/resources/technology/tabula/csv/MultiColumn.csv @@ -0,0 +1,44 @@ +1,100,200 +2,101,201 +3,102,202 +4,103,203 +5,104,204 +6,105,205 +7,106,206 +8,107,207 +9,108,208 +10,109,209 +11,110,210 +12,111,211 +13,112,212 +14,113,213 +15,114,214 +16,115,215 +17,116,216 +18,117,217 +19,118,218 +20,119,219 +21,120,220 +22,121,221 +23,122,222 +24,123,223 +25,124,224 +26,125,225 +27,126,226 +28,127,227 +29,128,228 +30,129,229 +31,130,230 +32,131,231 +33,132,232 +34,133,233 +35,134,234 +36,135,235 +37,136,236 +38,137,237 +39,138,238 +40,139,239 +41,140,240 +42,141,241 +43,142,242 +44,143,243 \ No newline at end of file diff --git a/src/test/resources/technology/tabula/csv/TestCommandLineApp_testGuessOption_no_guessing.csv b/src/test/resources/technology/tabula/csv/TestCommandLineApp_testGuessOption_no_guessing.csv index cd546d5a..de63c5c0 100644 --- a/src/test/resources/technology/tabula/csv/TestCommandLineApp_testGuessOption_no_guessing.csv +++ b/src/test/resources/technology/tabula/csv/TestCommandLineApp_testGuessOption_no_guessing.csv @@ -1,65 +1,65 @@ -"E-PRTR pollutants and their thresholds" +E-PRTR pollutants and their thresholds "" -"A facility has to report data under E-PRTR if it fulfils the following criteria:" -"• the facility falls under at least one of the 65 E-PRTR economic activities. The" -"activities are also reported using a statistical classification of economic activities" -"(NACE rev 2)" -"• the facility has a capacity exceeding at least one of the E-PRTR capacity" -"thresholds" -"• the facility releases pollutants or transfers waste off-site which exceed specific" -"thresholds set out in Article 5 of the E-PRTR Regulation. These thresholds for" +A facility has to report data under E-PRTR if it fulfils the following criteria: +• the facility falls under at least one of the 65 E-PRTR economic activities. The +activities are also reported using a statistical classification of economic activities +(NACE rev 2) +• the facility has a capacity exceeding at least one of the E-PRTR capacity +thresholds +• the facility releases pollutants or transfers waste off-site which exceed specific +thresholds set out in Article 5 of the E-PRTR Regulation. These thresholds for "releases of pollutants are specified for each media - air, water and land - in Annex" -"II of the E-PRTR Regulation." +II of the E-PRTR Regulation. "" -"In the following tables you will find the 91 E-PRTR pollutants and their thresholds broken" -"down by the 7 groups used in all the searches of the E-PRTR website." +In the following tables you will find the 91 E-PRTR pollutants and their thresholds broken +down by the 7 groups used in all the searches of the E-PRTR website. "" "" -"Greenhouse gases" +Greenhouse gases "" -"THRESHOLD FOR RELEASES" -"to air to water to land" -"kg/year kg/year kg/year" -"Carbon dioxide (CO2) 100 million - -" -"Hydro-fluorocarbons (HFCs) 100 - -" -"Methane (CH4) 100 000 - -" -"Nitrous oxide (N2O) 10 000 - -" -"Perfluorocarbons (PFCs) 100 - -" -"Sulphur hexafluoride (SF6) 50 - -" +THRESHOLD FOR RELEASES +to air to water to land +kg/year kg/year kg/year +Carbon dioxide (CO2) 100 million - - +Hydro-fluorocarbons (HFCs) 100 - - +Methane (CH4) 100 000 - - +Nitrous oxide (N2O) 10 000 - - +Perfluorocarbons (PFCs) 100 - - +Sulphur hexafluoride (SF6) 50 - - "" -"Other gases" +Other gases "" -"THRESHOLD FOR RELEASES" -"to air to water to land" -"kg/year kg/year kg/year" -"Ammonia (NH3) 10 000 - -" -"Carbon monoxide (CO) 500 000 - -" -"Chlorine and inorganic compounds" -"(as HCl)" -"10 000 - -" -"Chlorofluorocarbons (CFCs) 1 - -" -"Flourine and inorganic compounds" -"(as HF)" -"5 000 - -" -"Halons 1 - -" -"Hydrochlorofluorocarbons (HCFCs) 1 - -" -"Hydrogen Cyanide (HCN) 200 - -" -"Nitrogen oxides (NOx/NO2) 100 000 - -" -"Non-methane volatile organic" -"compounds (NMVOC)" -"100 000 - -" -"Sulphur oxides (SOx/SO2) 150 000 - -" +THRESHOLD FOR RELEASES +to air to water to land +kg/year kg/year kg/year +Ammonia (NH3) 10 000 - - +Carbon monoxide (CO) 500 000 - - +Chlorine and inorganic compounds +10 000 - - +(as HCl) +Chlorofluorocarbons (CFCs) 1 - - +Flourine and inorganic compounds +5 000 - - +(as HF) +Halons 1 - - +Hydrochlorofluorocarbons (HCFCs) 1 - - +Hydrogen Cyanide (HCN) 200 - - +Nitrogen oxides (NOx/NO2) 100 000 - - +Non-methane volatile organic +100 000 - - +compounds (NMVOC) +Sulphur oxides (SOx/SO2) 150 000 - - "" -"Heavy metals" +Heavy metals "" -"THRESHOLD FOR RELEASES" -"to air to water to land" -"kg/year kg/year kg/year" -"Arsenic and compounds (as As) 20 5 5" -"Cadmium and compounds (as Cd) 10 5 5" -"Chromium and compounds (as Cr) 100 50 50" -"Copper and compounds (as Cu) 100 50 50" -"Lead and compounds (as Pb) 200 20 20" -"Mercury and compounds (as Hg) 10 1 1" -"Nickel and compounds (as Ni) 50 20 20" -"Zinc and compounds (as Zn) 200 100 100" +THRESHOLD FOR RELEASES +to air to water to land +kg/year kg/year kg/year +Arsenic and compounds (as As) 20 5 5 +Cadmium and compounds (as Cd) 10 5 5 +Chromium and compounds (as Cr) 100 50 50 +Copper and compounds (as Cu) 100 50 50 +Lead and compounds (as Pb) 200 20 20 +Mercury and compounds (as Hg) 10 1 1 +Nickel and compounds (as Ni) 50 20 20 +Zinc and compounds (as Zn) 200 100 100 diff --git a/src/test/resources/technology/tabula/csv/TestCommandLineApp_testGuessOption_with_guessing.csv b/src/test/resources/technology/tabula/csv/TestCommandLineApp_testGuessOption_with_guessing.csv index 40a51d00..e683abd3 100644 --- a/src/test/resources/technology/tabula/csv/TestCommandLineApp_testGuessOption_with_guessing.csv +++ b/src/test/resources/technology/tabula/csv/TestCommandLineApp_testGuessOption_with_guessing.csv @@ -1,40 +1,40 @@ -"",,"THRESHOLD FOR RELEASES", -"","to air","to water","to land" -"","kg/year","kg/year","kg/year" -"Carbon dioxide (CO2)","100 million","-","-" -"Hydro-fluorocarbons (HFCs)","100","-","-" -"Methane (CH4)","100 000","-","-" -"Nitrous oxide (N2O)","10 000","-","-" -"Perfluorocarbons (PFCs)","100","-","-" -"Sulphur hexafluoride (SF6)","50","-","-" -"","THRESHOLD FOR RELEASES" -"","to air to water to land" -"","kg/year kg/year kg/year" -"Ammonia (NH3)","10 000 - -" -"Carbon monoxide (CO)","500 000 - -" -"Chlorine and inorganic compounds", -"(as HCl)", -"","10 000 - -" -"Chlorofluorocarbons (CFCs)","1 - -" -"Flourine and inorganic compounds", -"(as HF)", -"","5 000 - -" -"Halons","1 - -" -"Hydrochlorofluorocarbons (HCFCs)","1 - -" -"Hydrogen Cyanide (HCN)","200 - -" -"Nitrogen oxides (NOx/NO2)","100 000 - -" -"Non-methane volatile organic", -"compounds (NMVOC)", -"","100 000 - -" -"Sulphur oxides (SOx/SO2)","150 000 - -" -"","THRESHOLD FOR RELEASES" -"","to air to water to land" -"","kg/year kg/year kg/year" -"Arsenic and compounds (as As)","20 5 5" -"Cadmium and compounds (as Cd)","10 5 5" -"Chromium and compounds (as Cr)","100 50 50" -"Copper and compounds (as Cu)","100 50 50" -"Lead and compounds (as Pb)","200 20 20" -"Mercury and compounds (as Hg)","10 1 1" -"Nickel and compounds (as Ni)","50 20 20" -"Zinc and compounds (as Zn)","200 100 100" +"",,THRESHOLD FOR RELEASES, +"",to air,to water,to land +"",kg/year,kg/year,kg/year +Carbon dioxide (CO2),100 million,-,- +Hydro-fluorocarbons (HFCs),100,-,- +Methane (CH4),100 000,-,- +Nitrous oxide (N2O),10 000,-,- +Perfluorocarbons (PFCs),100,-,- +Sulphur hexafluoride (SF6),50,-,- +"",THRESHOLD FOR RELEASES +"",to air to water to land +"",kg/year kg/year kg/year +Ammonia (NH3),10 000 - - +Carbon monoxide (CO),500 000 - - +Chlorine and inorganic compounds, +"",10 000 - - +(as HCl), +Chlorofluorocarbons (CFCs),1 - - +Flourine and inorganic compounds, +"",5 000 - - +(as HF), +Halons,1 - - +Hydrochlorofluorocarbons (HCFCs),1 - - +Hydrogen Cyanide (HCN),200 - - +Nitrogen oxides (NOx/NO2),100 000 - - +Non-methane volatile organic, +"",100 000 - - +compounds (NMVOC), +Sulphur oxides (SOx/SO2),150 000 - - +"",THRESHOLD FOR RELEASES +"",to air to water to land +"",kg/year kg/year kg/year +Arsenic and compounds (as As),20 5 5 +Cadmium and compounds (as Cd),10 5 5 +Chromium and compounds (as Cr),100 50 50 +Copper and compounds (as Cu),100 50 50 +Lead and compounds (as Pb),200 20 20 +Mercury and compounds (as Hg),10 1 1 +Nickel and compounds (as Ni),50 20 20 +Zinc and compounds (as Zn),200 100 100 diff --git a/src/test/resources/technology/tabula/csv/spreadsheet_no_bounding_frame.csv b/src/test/resources/technology/tabula/csv/spreadsheet_no_bounding_frame.csv index b5e1fd0b..513e26e7 100644 --- a/src/test/resources/technology/tabula/csv/spreadsheet_no_bounding_frame.csv +++ b/src/test/resources/technology/tabula/csv/spreadsheet_no_bounding_frame.csv @@ -1,31 +1,28 @@ -"",HARVEST,VARIATION,,, -"","11/12 -(a)",12/13,Percentage,Absolute, -"",,"May/2013 -(b)","Jun/2013 -(c)",(c/a),(c-a) -COTTON,"1.393,4","886,7","894,9","(35,8)","( 498,5)" -TOTAL PEANUT,"93,9","100,6","100,2","6,7","6,3" -PEANUT 1ST CROP,"82,1","86,3","86,2","5,0","4,1" -PEANUT 2ND CROP,"11,8","14,3","14,0","18,6","2,2" -RICE,"2.426,7","2.389,7","2.396,0","(1,3)","( 30,7)" -TOTAL BEANS,"3.262,1","2.952,7","3.026,9","(7,2)","( 235,2)" -BEANS 1ST CROP,"1.241,4","1.122,6","1.122,9","(9,5)","( 118,5)" -BEANS 2ND CROP,"1.394,6","1.275,4","1.271,7","(8,8)","( 122,9)" -BEANS 3RD CROP,"626,1","554,7","632,3","1,0","6,3" -SUNFLOWER,"74,5","60,4","68,9","(7,5)","( 5,6)" -CASTOR BEAN,"128,2","87,5","87,4","(31,8)","( 40,8)" -TOTAL CORN,"15.178,1","15.686,2","15.817,4","4,2","639,3" -CORN 1ST CROP,"7.558,5","6.879,2","6.864,7","(9,2)","( 693,8)" -CORN 2ND CROP,"7.619,6","8.807,0","8.952,7","17,5","1.333,1" -SOYBEAN,"25.042,2","27.715,2","27.715,5","10,7","2.673,3" -SORGHUM,"786,9","836,4","836,4","6,3","49,5" -SUBTOTAL,"48.386,0","50.715,4","50.943,6","5,3","2.557,7" -OAT,"153,0","168,7","168,7","10,3","15,7" -CANOLA,"42,4","43,8","43,8","3,3","1,4" -RYE,"2,3","2,3","2,3",-,- -BARLEY,"88,4","102,8","102,8","16,3","14,4" -WHEAT,"2.166,2","1.895,4","1.895,4","(12,5)","( 270,8)" -TRITICALE,"46,9","48,0","48,0","2,3","1,1" -SUBTOTAL,"2.499,2","2.261,0","2.261,0","(9,5)","( 238,2)" -BRAZIL,"50.885,2","5 2.976,4","5 3.204,6","4,6","2.319,5" \ No newline at end of file +"",HARVEST,VARIATION,,, +"","11/12 (a)",12/13,Percentage,Absolute, +"","May/2013 (b)","Jun/2013 (c)",(c/a),(c-a), +COTTON,"1.393,4","886,7","894,9","(35,8)","( 498,5)" +TOTAL PEANUT,"93,9","100,6","100,2","6,7","6,3" +PEANUT 1ST CROP,"82,1","86,3","86,2","5,0","4,1" +PEANUT 2ND CROP,"11,8","14,3","14,0","18,6","2,2" +RICE,"2.426,7","2.389,7","2.396,0","(1,3)","( 30,7)" +TOTAL BEANS,"3.262,1","2.952,7","3.026,9","(7,2)","( 235,2)" +BEANS 1ST CROP,"1.241,4","1.122,6","1.122,9","(9,5)","( 118,5)" +BEANS 2ND CROP,"1.394,6","1.275,4","1.271,7","(8,8)","( 122,9)" +BEANS 3RD CROP,"626,1","554,7","632,3","1,0","6,3" +SUNFLOWER,"74,5","60,4","68,9","(7,5)","( 5,6)" +CASTOR BEAN,"128,2","87,5","87,4","(31,8)","( 40,8)" +TOTAL CORN,"15.178,1","15.686,2","15.817,4","4,2","639,3" +CORN 1ST CROP,"7.558,5","6.879,2","6.864,7","(9,2)","( 693,8)" +CORN 2ND CROP,"7.619,6","8.807,0","8.952,7","17,5","1.333,1" +SOYBEAN,"25.042,2","27.715,2","27.715,5","10,7","2.673,3" +SORGHUM,"786,9","836,4","836,4","6,3","49,5" +SUBTOTAL,"48.386,0","50.715,4","50.943,6","5,3","2.557,7" +OAT,"153,0","168,7","168,7","10,3","15,7" +CANOLA,"42,4","43,8","43,8","3,3","1,4" +RYE,"2,3","2,3","2,3",-,- +BARLEY,"88,4","102,8","102,8","16,3","14,4" +WHEAT,"2.166,2","1.895,4","1.895,4","(12,5)","( 270,8)" +TRITICALE,"46,9","48,0","48,0","2,3","1,1" +SUBTOTAL,"2.499,2","2.261,0","2.261,0","(9,5)","( 238,2)" +BRAZIL,"50.885,2","5 2.976,4","5 3.204,6","4,6","2.319,5" diff --git a/src/test/resources/technology/tabula/json/AnimalSounds1.json b/src/test/resources/technology/tabula/json/AnimalSounds1.json new file mode 100644 index 00000000..8511b786 --- /dev/null +++ b/src/test/resources/technology/tabula/json/AnimalSounds1.json @@ -0,0 +1 @@ +[{"extraction_method":"lattice","page_number":1,"top":0.006499578,"left":56.8,"width":241.1999969482422,"height":315.36407470703125,"right":298.0,"bottom":315.37057,"data":[[{"top":0.006499578,"left":56.8,"width":79.19999694824219,"height":95.31405639648438,"text":"Animal"},{"top":0.006499578,"left":136.0,"width":61.0,"height":95.31405639648438,"text":"Action"},{"top":0.006499578,"left":197.0,"width":101.0,"height":95.31405639648438,"text":"Result"}],[{"top":95.32056,"left":56.8,"width":79.19999694824219,"height":23.050010681152344,"text":"Cat"},{"top":95.32056,"left":136.0,"width":61.0,"height":23.050010681152344,"text":"Says"},{"top":95.32056,"left":197.0,"width":101.0,"height":23.050010681152344,"text":"Meow"}],[{"top":118.37057,"left":56.8,"width":79.19999694824219,"height":63.99999237060547,"text":"Parastratiosph\recomyiastratio\rsphecomyioid\res"},{"top":118.37057,"left":136.0,"width":61.0,"height":63.99999237060547,"text":"Says"},{"top":118.37057,"left":197.0,"width":101.0,"height":63.99999237060547,"text":"bzzzzzzz"}],[{"top":182.37056,"left":56.8,"width":79.19999694824219,"height":133.00001525878906,"text":"Fox"},{"top":182.37056,"left":136.0,"width":61.0,"height":133.00001525878906,"text":"Says"},{"top":182.37056,"left":197.0,"width":101.0,"height":133.00001525878906,"text":"Ring-\rdingdingdingd\ringeringeding\rGering-\rdingdingdingd\ringeringeding\rGering-\rdingdingdingd\ringeringeding"}]]},{"extraction_method":"lattice","page_number":1,"top":0.006499578,"left":313.35715,"width":241.55941772460938,"height":259.2640380859375,"right":554.91656,"bottom":259.27054,"data":[[{"top":0.006499578,"left":313.35715,"width":77.64285278320312,"height":72.26405334472656,"text":""},{"top":0.006499578,"left":391.0,"width":66.0,"height":72.26405334472656,"text":""},{"top":0.006499578,"left":457.0,"width":97.91656494140625,"height":72.26405334472656,"text":""}],[{"top":72.27055,"left":313.35715,"width":77.64285278320312,"height":23.050003051757812,"text":"Animal"},{"top":72.27055,"left":391.0,"width":66.0,"height":23.050003051757812,"text":"Action"},{"top":72.27055,"left":457.0,"width":97.91656494140625,"height":23.050003051757812,"text":"Result"}],[{"top":95.32056,"left":313.35715,"width":77.64285278320312,"height":35.94999694824219,"text":"Dogs/wolves/\rMore dogs"},{"top":95.32056,"left":391.0,"width":66.0,"height":35.94999694824219,"text":"Says"},{"top":95.32056,"left":457.0,"width":97.91656494140625,"height":35.94999694824219,"text":"Bow-wow/\rruff-ruff"}],[{"top":131.27055,"left":313.35715,"width":77.64285278320312,"height":36.40000915527344,"text":"Donkey"},{"top":131.27055,"left":391.0,"width":66.0,"height":36.40000915527344,"text":"Says"},{"top":131.27055,"left":457.0,"width":97.91656494140625,"height":36.40000915527344,"text":"Hee-Haw Hee-\rHaw"}],[{"top":167.67056,"left":313.35715,"width":77.64285278320312,"height":91.5999755859375,"text":"Fox"},{"top":167.67056,"left":391.0,"width":66.0,"height":91.5999755859375,"text":"Says"},{"top":167.67056,"left":457.0,"width":97.91656494140625,"height":91.5999755859375,"text":"Wa-pa-pa-pa-\rpa-pa-pow\rWa-pa-pa-pa-\rpa-pow\rWa-pa-pa-pa-\rpa-pa-pow"}]]}] diff --git a/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json b/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json index 3a2eced9..fb2d478e 100644 --- a/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json +++ b/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json @@ -1 +1 @@ -{"extraction_method":"stream","top":0.0,"left":0.0,"width":549.0399780273438,"height":782.0400390625,"data":[[{"top":279.87,"left":28.56,"width":175.21029663085938,"height":6.449999809265137,"text":"ABDALA de MATARAZZO, Norma Amanda"},{"top":279.87,"left":213.72,"width":108.25027465820312,"height":6.449999809265137,"text":"Frente Cívico por Santiago"},{"top":280.59,"left":397.56,"width":79.69027709960938,"height":6.449999809265137,"text":"Santiago del Estero"},{"top":279.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":296.31,"left":28.56,"width":141.71029663085938,"height":6.449999809265137,"text":"ALBRIEU, Oscar Edmundo Nicolas"},{"top":296.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":297.15,"left":397.56,"width":41.6602783203125,"height":6.449999809265137,"text":"Rio Negro"},{"top":296.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":312.87,"left":28.56,"width":83.75028228759766,"height":6.449999809265137,"text":"ALONSO, María Luz"},{"top":312.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":313.59,"left":397.56,"width":42.250274658203125,"height":6.449999809265137,"text":"La Pampa"},{"top":312.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":329.31,"left":28.56,"width":84.25028228759766,"height":6.449999809265137,"text":"ARENA, Celia Isabel"},{"top":329.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":330.15,"left":397.56,"width":37.690277099609375,"height":6.449999809265137,"text":"Santa Fe"},{"top":329.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":345.87,"left":28.56,"width":110.29029846191406,"height":6.449999809265137,"text":"ARREGUI, Andrés Roberto"},{"top":345.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":346.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":345.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":362.31,"left":28.56,"width":115.69029235839844,"height":6.449999809265137,"text":"AVOSCAN, Herman Horacio"},{"top":362.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":363.15,"left":397.56,"width":41.6602783203125,"height":6.449999809265137,"text":"Rio Negro"},{"top":362.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":378.87,"left":28.56,"width":95.6902847290039,"height":6.449999809265137,"text":"BALCEDO, María Ester"},{"top":378.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":379.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":378.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":395.31,"left":28.56,"width":127.69029235839844,"height":6.449999809265137,"text":"BARRANDEGUY, Raúl Enrique"},{"top":395.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":396.15,"left":397.56,"width":43.6702880859375,"height":6.449999809265137,"text":"Entre Ríos"},{"top":395.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":411.87,"left":28.56,"width":106.69029235839844,"height":6.449999809265137,"text":"BASTERRA, Luis Eugenio"},{"top":411.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":412.59,"left":397.56,"width":36.73028564453125,"height":6.449999809265137,"text":"Formosa"},{"top":411.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":428.31,"left":28.56,"width":92.21028137207031,"height":6.449999809265137,"text":"BEDANO, Nora Esther"},{"top":428.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":429.15,"left":397.56,"width":35.6602783203125,"height":6.449999809265137,"text":"Córdoba"},{"top":428.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":444.87,"left":28.56,"width":100.69029235839844,"height":6.449999809265137,"text":"BERNAL, María Eugenia"},{"top":444.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":445.59,"left":397.56,"width":22.200286865234375,"height":6.449999809265137,"text":"Jujuy"},{"top":444.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":461.31,"left":28.56,"width":112.21029663085938,"height":6.449999809265137,"text":"BERTONE, Rosana Andrea"},{"top":461.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":462.15,"left":397.56,"width":67.21026611328125,"height":6.449999809265137,"text":"Tierra del Fuego"},{"top":461.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":477.87,"left":28.56,"width":114.73028564453125,"height":6.449999809265137,"text":"BIANCHI, María del Carmen"},{"top":477.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":478.59,"left":397.56,"width":74.6702880859375,"height":6.449999809265137,"text":"Cdad. Aut. Bs. As."},{"top":477.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":494.31,"left":28.56,"width":115.19029235839844,"height":6.449999809265137,"text":"BIDEGAIN, Gloria Mercedes"},{"top":494.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":495.15,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":494.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":510.87,"left":28.56,"width":66.25028228759766,"height":6.449999809265137,"text":"BRAWER, Mara"},{"top":510.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":511.59,"left":397.56,"width":74.6702880859375,"height":6.449999809265137,"text":"Cdad. Aut. Bs. As."},{"top":510.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":527.31,"left":28.56,"width":90.73028564453125,"height":6.449999809265137,"text":"BRILLO, José Ricardo"},{"top":527.31,"left":213.72,"width":121.81027221679688,"height":6.449999809265137,"text":"Movimiento Popular Neuquino"},{"top":528.15,"left":397.56,"width":37.700286865234375,"height":6.449999809265137,"text":"Neuquén"},{"top":527.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":543.87,"left":28.56,"width":120.73028564453125,"height":6.449999809265137,"text":"BROMBERG, Isaac Benjamín"},{"top":543.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":544.59,"left":397.56,"width":38.6602783203125,"height":6.449999809265137,"text":"Tucumán"},{"top":543.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":560.31,"left":28.56,"width":89.7702865600586,"height":6.449999809265137,"text":"BRUE, Daniel Agustín"},{"top":560.31,"left":213.72,"width":108.25027465820312,"height":6.449999809265137,"text":"Frente Cívico por Santiago"},{"top":561.15,"left":397.56,"width":79.69027709960938,"height":6.449999809265137,"text":"Santiago del Estero"},{"top":560.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":576.87,"left":28.56,"width":72.23028564453125,"height":6.449999809265137,"text":"CALCAGNO, Eric"},{"top":576.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":577.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":576.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":593.31,"left":28.56,"width":114.73028564453125,"height":6.449999809265137,"text":"CARLOTTO, Remo Gerardo"},{"top":593.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":594.15,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":593.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":609.87,"left":28.56,"width":122.1702880859375,"height":6.449999809265137,"text":"CARMONA, Guillermo Ramón"},{"top":609.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":610.59,"left":397.56,"width":38.1602783203125,"height":6.449999809265137,"text":"Mendoza"},{"top":609.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":626.31,"left":28.56,"width":124.73028564453125,"height":6.449999809265137,"text":"CATALAN MAGNI, Julio César"},{"top":626.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":627.15,"left":397.56,"width":67.21026611328125,"height":6.449999809265137,"text":"Tierra del Fuego"},{"top":626.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":642.87,"left":28.56,"width":88.6902847290039,"height":6.449999809265137,"text":"CEJAS, Jorge Alberto"},{"top":642.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":643.59,"left":397.56,"width":41.6602783203125,"height":6.449999809265137,"text":"Rio Negro"},{"top":642.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":659.31,"left":28.56,"width":89.7702865600586,"height":6.449999809265137,"text":"CHIENO, María Elena"},{"top":659.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":660.15,"left":397.56,"width":42.72027587890625,"height":6.449999809265137,"text":"Corrientes"},{"top":659.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":675.87,"left":28.56,"width":96.25028228759766,"height":6.449999809265137,"text":"CIAMPINI, José Alberto"},{"top":675.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":676.59,"left":397.56,"width":37.700286865234375,"height":6.449999809265137,"text":"Neuquén"},{"top":675.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":692.31,"left":28.56,"width":131.77029418945312,"height":6.449999809265137,"text":"CIGOGNA, Luis Francisco Jorge"},{"top":692.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":693.15,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":692.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":708.87,"left":28.56,"width":62.15028381347656,"height":6.449999809265137,"text":"CLERI, Marcos"},{"top":708.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":709.59,"left":397.56,"width":37.690277099609375,"height":6.449999809265137,"text":"Santa Fe"},{"top":708.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":725.31,"left":28.56,"width":101.77029418945312,"height":6.449999809265137,"text":"COMELLI, Alicia Marcela"},{"top":725.31,"left":213.72,"width":121.81027221679688,"height":6.449999809265137,"text":"Movimiento Popular Neuquino"},{"top":726.15,"left":397.56,"width":37.700286865234375,"height":6.449999809265137,"text":"Neuquén"},{"top":725.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":741.87,"left":28.56,"width":88.1902847290039,"height":6.449999809265137,"text":"CONTI, Diana Beatriz"},{"top":741.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":742.59,"left":397.56,"width":54.71026611328125,"height":6.449999809265137,"text":"Buenos Aires"},{"top":741.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":758.31,"left":28.56,"width":98.75028228759766,"height":6.449999809265137,"text":"CORDOBA, Stella Maris"},{"top":758.31,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":759.15,"left":397.56,"width":38.6602783203125,"height":6.449999809265137,"text":"Tucumán"},{"top":758.31,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}],[{"top":774.87,"left":28.56,"width":105.25028991699219,"height":6.449999809265137,"text":"CURRILEN, Oscar Rubén"},{"top":774.87,"left":213.72,"width":108.23028564453125,"height":6.449999809265137,"text":"Frente para la Victoria - PJ"},{"top":775.59,"left":397.56,"width":30.2802734375,"height":6.449999809265137,"text":"Chubut"},{"top":774.87,"left":494.04,"width":54.999969482421875,"height":6.449999809265137,"text":"AFIRMATIVO"}]]} \ No newline at end of file +{"extraction_method":"stream","page_number":1,"top":269.875,"left":12.75,"width":548.25,"height":520.625,"right":561.0,"bottom":790.5,"data":[[{"top":281.82,"left":28.56,"width":175.21029663085938,"height":4.5,"text":"ABDALA de MATARAZZO, Norma Amanda"},{"top":281.82,"left":213.72,"width":108.25027465820312,"height":4.5,"text":"Frente Cívico por Santiago"},{"top":282.54,"left":397.56,"width":79.69027709960938,"height":4.5,"text":"Santiago del Estero"},{"top":281.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":298.26,"left":28.56,"width":141.71029663085938,"height":4.5,"text":"ALBRIEU, Oscar Edmundo Nicolas"},{"top":298.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":299.1,"left":397.56,"width":41.6602783203125,"height":4.5,"text":"Rio Negro"},{"top":298.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":314.82,"left":28.56,"width":83.75028228759766,"height":4.5,"text":"ALONSO, María Luz"},{"top":314.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":315.54,"left":397.56,"width":42.250274658203125,"height":4.5,"text":"La Pampa"},{"top":314.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":331.26,"left":28.56,"width":84.25028228759766,"height":4.5,"text":"ARENA, Celia Isabel"},{"top":331.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":332.1,"left":397.56,"width":37.690277099609375,"height":4.5,"text":"Santa Fe"},{"top":331.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":347.82,"left":28.56,"width":110.29029846191406,"height":4.5,"text":"ARREGUI, Andrés Roberto"},{"top":347.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":348.54,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":347.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":364.26,"left":28.56,"width":115.69029235839844,"height":4.5,"text":"AVOSCAN, Herman Horacio"},{"top":364.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":365.1,"left":397.56,"width":41.6602783203125,"height":4.5,"text":"Rio Negro"},{"top":364.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":380.82,"left":28.56,"width":95.6902847290039,"height":4.5,"text":"BALCEDO, María Ester"},{"top":380.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":381.54,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":380.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":397.26,"left":28.56,"width":127.69029235839844,"height":4.5,"text":"BARRANDEGUY, Raúl Enrique"},{"top":397.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":398.1,"left":397.56,"width":43.6702880859375,"height":4.5,"text":"Entre Ríos"},{"top":397.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":413.82,"left":28.56,"width":106.69029235839844,"height":4.5,"text":"BASTERRA, Luis Eugenio"},{"top":413.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":414.54,"left":397.56,"width":36.73028564453125,"height":4.5,"text":"Formosa"},{"top":413.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":430.26,"left":28.56,"width":92.21028137207031,"height":4.5,"text":"BEDANO, Nora Esther"},{"top":430.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":431.1,"left":397.56,"width":35.6602783203125,"height":4.5,"text":"Córdoba"},{"top":430.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":446.82,"left":28.56,"width":100.69029235839844,"height":4.5,"text":"BERNAL, María Eugenia"},{"top":446.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":447.54,"left":397.56,"width":22.200286865234375,"height":4.5,"text":"Jujuy"},{"top":446.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":463.26,"left":28.56,"width":112.21029663085938,"height":4.5,"text":"BERTONE, Rosana Andrea"},{"top":463.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":464.1,"left":397.56,"width":67.21026611328125,"height":4.5,"text":"Tierra del Fuego"},{"top":463.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":479.82,"left":28.56,"width":114.73028564453125,"height":4.5,"text":"BIANCHI, María del Carmen"},{"top":479.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":480.54,"left":397.56,"width":74.6702880859375,"height":4.5,"text":"Cdad. Aut. Bs. As."},{"top":479.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":496.26,"left":28.56,"width":115.19029235839844,"height":4.5,"text":"BIDEGAIN, Gloria Mercedes"},{"top":496.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":497.1,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":496.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":512.82,"left":28.56,"width":66.25028228759766,"height":4.5,"text":"BRAWER, Mara"},{"top":512.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":513.54,"left":397.56,"width":74.6702880859375,"height":4.5,"text":"Cdad. Aut. Bs. As."},{"top":512.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":529.26,"left":28.56,"width":90.73028564453125,"height":4.5,"text":"BRILLO, José Ricardo"},{"top":529.26,"left":213.72,"width":121.81027221679688,"height":4.5,"text":"Movimiento Popular Neuquino"},{"top":530.1,"left":397.56,"width":37.700286865234375,"height":4.5,"text":"Neuquén"},{"top":529.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":545.82,"left":28.56,"width":120.73028564453125,"height":4.5,"text":"BROMBERG, Isaac Benjamín"},{"top":545.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":546.54,"left":397.56,"width":38.6602783203125,"height":4.5,"text":"Tucumán"},{"top":545.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":562.26,"left":28.56,"width":89.7702865600586,"height":4.5,"text":"BRUE, Daniel Agustín"},{"top":562.26,"left":213.72,"width":108.25027465820312,"height":4.5,"text":"Frente Cívico por Santiago"},{"top":563.1,"left":397.56,"width":79.69027709960938,"height":4.5,"text":"Santiago del Estero"},{"top":562.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":578.82,"left":28.56,"width":72.23028564453125,"height":4.5,"text":"CALCAGNO, Eric"},{"top":578.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":579.54,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":578.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":595.26,"left":28.56,"width":114.73028564453125,"height":4.5,"text":"CARLOTTO, Remo Gerardo"},{"top":595.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":596.1,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":595.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":611.82,"left":28.56,"width":122.1702880859375,"height":4.5,"text":"CARMONA, Guillermo Ramón"},{"top":611.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":612.54,"left":397.56,"width":38.1602783203125,"height":4.5,"text":"Mendoza"},{"top":611.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":628.26,"left":28.56,"width":124.73028564453125,"height":4.5,"text":"CATALAN MAGNI, Julio César"},{"top":628.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":629.1,"left":397.56,"width":67.21026611328125,"height":4.5,"text":"Tierra del Fuego"},{"top":628.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":644.82,"left":28.56,"width":88.6902847290039,"height":4.5,"text":"CEJAS, Jorge Alberto"},{"top":644.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":645.54,"left":397.56,"width":41.6602783203125,"height":4.5,"text":"Rio Negro"},{"top":644.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":661.26,"left":28.56,"width":89.7702865600586,"height":4.5,"text":"CHIENO, María Elena"},{"top":661.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":662.1,"left":397.56,"width":42.72027587890625,"height":4.5,"text":"Corrientes"},{"top":661.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":677.82,"left":28.56,"width":96.25028228759766,"height":4.5,"text":"CIAMPINI, José Alberto"},{"top":677.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":678.54,"left":397.56,"width":37.700286865234375,"height":4.5,"text":"Neuquén"},{"top":677.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":694.26,"left":28.56,"width":131.77029418945312,"height":4.5,"text":"CIGOGNA, Luis Francisco Jorge"},{"top":694.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":695.1,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":694.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":710.82,"left":28.56,"width":62.15028381347656,"height":4.5,"text":"CLERI, Marcos"},{"top":710.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":711.54,"left":397.56,"width":37.690277099609375,"height":4.5,"text":"Santa Fe"},{"top":710.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":727.26,"left":28.56,"width":101.77029418945312,"height":4.5,"text":"COMELLI, Alicia Marcela"},{"top":727.26,"left":213.72,"width":121.81027221679688,"height":4.5,"text":"Movimiento Popular Neuquino"},{"top":728.1,"left":397.56,"width":37.700286865234375,"height":4.5,"text":"Neuquén"},{"top":727.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":743.82,"left":28.56,"width":88.1902847290039,"height":4.5,"text":"CONTI, Diana Beatriz"},{"top":743.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":744.54,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":743.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":760.26,"left":28.56,"width":98.75028228759766,"height":4.5,"text":"CORDOBA, Stella Maris"},{"top":760.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":761.1,"left":397.56,"width":38.6602783203125,"height":4.5,"text":"Tucumán"},{"top":760.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":776.82,"left":28.56,"width":105.25028991699219,"height":4.5,"text":"CURRILEN, Oscar Rubén"},{"top":776.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":777.54,"left":397.56,"width":30.2802734375,"height":4.5,"text":"Chubut"},{"top":776.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}]]} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/json/schools.json b/src/test/resources/technology/tabula/json/schools.json index 1c9ed032..d251bb20 100644 --- a/src/test/resources/technology/tabula/json/schools.json +++ b/src/test/resources/technology/tabula/json/schools.json @@ -1,2 +1 @@ - -{"extraction_method":"lattice","top":54.315777,"left":16.97,"width":745.3303833007812,"height":483.9442443847656,"data":[[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":54.315777,"left":18.588728,"width":153.91128540039062,"height":8.744216918945312,"text":"Last Name"},{"top":54.315777,"left":172.50002,"width":69.71998596191406,"height":8.744216918945312,"text":"First Name"},{"top":54.315777,"left":242.22,"width":104.760009765625,"height":8.744216918945312,"text":"Address"},{"top":54.315777,"left":346.98,"width":70.80001831054688,"height":8.744216918945312,"text":"City"},{"top":54.315777,"left":417.78003,"width":20.87994384765625,"height":8.744216918945312,"text":"State"},{"top":54.315777,"left":438.65997,"width":49.91998291015625,"height":8.744216918945312,"text":"Zip"},{"top":54.315777,"left":488.57996,"width":78.47998046875,"height":8.744216918945312,"text":"Occupation"},{"top":54.315777,"left":567.05994,"width":106.4400634765625,"height":8.744216918945312,"text":"Employer"},{"top":54.315777,"left":673.5,"width":45.52001953125,"height":8.744216918945312,"text":"Date"},{"top":54.315777,"left":719.02,"width":43.28033447265625,"height":8.744216918945312,"text":"Amount"}],[{"top":63.059994,"left":16.97,"width":1.6187286376953125,"height":10.799991607666016,"text":""},{"top":63.059994,"left":18.588728,"width":153.91128540039062,"height":10.799991607666016,"text":"Lidstad"},{"top":63.059994,"left":172.50002,"width":69.71998596191406,"height":10.799991607666016,"text":"Dick \u0026 Peg"},{"top":63.059994,"left":242.22,"width":104.760009765625,"height":10.799991607666016,"text":"62 Mississippi River Blvd N"},{"top":63.059994,"left":346.98,"width":70.80001831054688,"height":10.799991607666016,"text":"Saint Paul"},{"top":63.059994,"left":417.78003,"width":20.87994384765625,"height":10.799991607666016,"text":"MN"},{"top":63.059994,"left":438.65997,"width":49.91998291015625,"height":10.799991607666016,"text":"55104"},{"top":63.059994,"left":488.57996,"width":78.47998046875,"height":10.799991607666016,"text":"retired"},{"top":63.059994,"left":567.05994,"width":106.4400634765625,"height":10.799991607666016,"text":""},{"top":63.059994,"left":673.5,"width":45.52001953125,"height":10.799991607666016,"text":"10/12/2012"},{"top":63.059994,"left":719.02,"width":43.28033447265625,"height":10.799991607666016,"text":"60.00"}],[{"top":73.859985,"left":16.97,"width":1.6187286376953125,"height":10.800025939941406,"text":""},{"top":73.859985,"left":18.588728,"width":153.91128540039062,"height":10.800025939941406,"text":"Strom"},{"top":73.859985,"left":172.50002,"width":69.71998596191406,"height":10.800025939941406,"text":"Pam"},{"top":73.859985,"left":242.22,"width":104.760009765625,"height":10.800025939941406,"text":"1229 Hague Ave"},{"top":73.859985,"left":346.98,"width":70.80001831054688,"height":10.800025939941406,"text":"St. Paul"},{"top":73.859985,"left":417.78003,"width":20.87994384765625,"height":10.800025939941406,"text":"MN"},{"top":73.859985,"left":438.65997,"width":49.91998291015625,"height":10.800025939941406,"text":"55104"},{"top":73.859985,"left":488.57996,"width":78.47998046875,"height":10.800025939941406,"text":""},{"top":73.859985,"left":567.05994,"width":106.4400634765625,"height":10.800025939941406,"text":""},{"top":73.859985,"left":673.5,"width":45.52001953125,"height":10.800025939941406,"text":"9/12/2012"},{"top":73.859985,"left":719.02,"width":43.28033447265625,"height":10.800025939941406,"text":"60.00"}],[{"top":84.66001,"left":16.97,"width":1.6187286376953125,"height":10.799995422363281,"text":""},{"top":84.66001,"left":18.588728,"width":153.91128540039062,"height":10.799995422363281,"text":"Seeba"},{"top":84.66001,"left":172.50002,"width":69.71998596191406,"height":10.799995422363281,"text":"Louise \u0026 Paul"},{"top":84.66001,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"1399 Sheldon St"},{"top":84.66001,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"Saint Paul"},{"top":84.66001,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":84.66001,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55108"},{"top":84.66001,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":"BOE"},{"top":84.66001,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":"City of Saint Paul"},{"top":84.66001,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"10/12/2012"},{"top":84.66001,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"60.00"}],[{"top":95.46001,"left":16.97,"width":1.6187286376953125,"height":10.799995422363281,"text":""},{"top":95.46001,"left":18.588728,"width":153.91128540039062,"height":10.799995422363281,"text":"Schumacher / Bales"},{"top":95.46001,"left":172.50002,"width":69.71998596191406,"height":10.799995422363281,"text":"Douglas L. / Patricia"},{"top":95.46001,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"948 County Rd. D W"},{"top":95.46001,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"Saint Paul"},{"top":95.46001,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":95.46001,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55126"},{"top":95.46001,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":""},{"top":95.46001,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":""},{"top":95.46001,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"10/13/2012"},{"top":95.46001,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"60.00"}],[{"top":106.26,"left":16.97,"width":1.6187286376953125,"height":10.799995422363281,"text":""},{"top":106.26,"left":18.588728,"width":153.91128540039062,"height":10.799995422363281,"text":"Abrams"},{"top":106.26,"left":172.50002,"width":69.71998596191406,"height":10.799995422363281,"text":"Marjorie"},{"top":106.26,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"238 8th St east"},{"top":106.26,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"St Paul"},{"top":106.26,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":106.26,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55101"},{"top":106.26,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":"Retired"},{"top":106.26,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":"Retired"},{"top":106.26,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"8/8/2012"},{"top":106.26,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"75.00"}],[{"top":117.06,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":117.06,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Crouse / Schroeder"},{"top":117.06,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Abigail / Jonathan"},{"top":117.06,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1545 Branston St."},{"top":117.06,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":117.06,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":117.06,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55108"},{"top":117.06,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":""},{"top":117.06,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":""},{"top":117.06,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/6/2012"},{"top":117.06,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"75.00"}],[{"top":127.859985,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":127.859985,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"O\u0027Connell"},{"top":127.859985,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"Jean"},{"top":127.859985,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"888 Ivy Ave W."},{"top":127.859985,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Saint Paul"},{"top":127.859985,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":127.859985,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55117"},{"top":127.859985,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":""},{"top":127.859985,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":""},{"top":127.859985,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/30/2012"},{"top":127.859985,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"75.00"}],[{"top":138.65999,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":138.65999,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Reese"},{"top":138.65999,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Cheri A"},{"top":138.65999,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"981 Davern St."},{"top":138.65999,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":138.65999,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":138.65999,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55116"},{"top":138.65999,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Public Relations"},{"top":138.65999,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Far North Spirits"},{"top":138.65999,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/3/2012"},{"top":138.65999,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"75.00"}],[{"top":149.46,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":149.46,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"Serrano"},{"top":149.46,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"Luz Maria"},{"top":149.46,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"5559 Park Place Drive"},{"top":149.46,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Shoreview"},{"top":149.46,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":149.46,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55126"},{"top":149.46,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"retired"},{"top":149.46,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":"SPPS"},{"top":149.46,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/20/2012"},{"top":149.46,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"75.00"}],[{"top":160.26001,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":160.26001,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Alsiddiqui"},{"top":160.26001,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Jaber"},{"top":160.26001,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"13056 Euclid Ave"},{"top":160.26001,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Apple Valley"},{"top":160.26001,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":160.26001,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55124"},{"top":160.26001,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"SPPS budget analyst"},{"top":160.26001,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"SPPS"},{"top":160.26001,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"9/20/2012"},{"top":160.26001,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":171.06,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":171.06,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"Andrastek"},{"top":171.06,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"John"},{"top":171.06,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"774 Ashland Ave"},{"top":171.06,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"St Paul"},{"top":171.06,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":171.06,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55104"},{"top":171.06,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"asst principal"},{"top":171.06,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":"SPPS"},{"top":171.06,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"8/22/2012"},{"top":171.06,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"100.00"}],[{"top":181.86,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":181.86,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Anfang"},{"top":181.86,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Heather L. \u0026 Matt"},{"top":181.86,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1635 Bayard Ave"},{"top":181.86,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":181.86,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":181.86,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55116"},{"top":181.86,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"Ex Director"},{"top":181.86,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"BOMA"},{"top":181.86,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/6/2012"},{"top":181.86,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":192.65999,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":192.65999,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Aronson"},{"top":192.65999,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Roger J."},{"top":192.65999,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4852 Emerson Ave. S."},{"top":192.65999,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Minneapolis"},{"top":192.65999,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":192.65999,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55419"},{"top":192.65999,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Attorney at Law"},{"top":192.65999,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":""},{"top":192.65999,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"9/20/2012"},{"top":192.65999,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":203.46,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":203.46,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"Aronson"},{"top":203.46,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"Roger J."},{"top":203.46,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"4852 Emerson Ave. S."},{"top":203.46,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Minneapolis"},{"top":203.46,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":203.46,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55419"},{"top":203.46,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"Attorney at Law"},{"top":203.46,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":""},{"top":203.46,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/20/2012"},{"top":203.46,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"100.00"}],[{"top":214.26001,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":214.26001,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Banks"},{"top":214.26001,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Michael or Patricia"},{"top":214.26001,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1664 Van Buren Ave."},{"top":214.26001,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":214.26001,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":214.26001,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":214.26001,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"retired social worker"},{"top":214.26001,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":""},{"top":214.26001,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"9/20/2012"},{"top":214.26001,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":225.06,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":225.06,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Bennett"},{"top":225.06,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"David"},{"top":225.06,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"25 Birchwood Rd."},{"top":225.06,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Mahtomedi"},{"top":225.06,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":225.06,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55110"},{"top":225.06,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"retired"},{"top":225.06,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"retired"},{"top":225.06,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"9/3/2012"},{"top":225.06,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":235.86002,"left":16.97,"width":1.6187286376953125,"height":10.799972534179688,"text":""},{"top":235.86002,"left":18.588728,"width":153.91128540039062,"height":10.799972534179688,"text":"Berry"},{"top":235.86002,"left":172.50002,"width":69.71998596191406,"height":10.799972534179688,"text":"Margaret"},{"top":235.86002,"left":242.22,"width":104.760009765625,"height":10.799972534179688,"text":"1267 Pike Lake Dr"},{"top":235.86002,"left":346.98,"width":70.80001831054688,"height":10.799972534179688,"text":"New Brighton"},{"top":235.86002,"left":417.78003,"width":20.87994384765625,"height":10.799972534179688,"text":"MN"},{"top":235.86002,"left":438.65997,"width":49.91998291015625,"height":10.799972534179688,"text":"55112"},{"top":235.86002,"left":488.57996,"width":78.47998046875,"height":10.799972534179688,"text":"Asst Principal"},{"top":235.86002,"left":567.05994,"width":106.4400634765625,"height":10.799972534179688,"text":"Saint Paul Public Schools"},{"top":235.86002,"left":673.5,"width":45.52001953125,"height":10.799972534179688,"text":"8/8/2012"},{"top":235.86002,"left":719.02,"width":43.28033447265625,"height":10.799972534179688,"text":"100.00"}],[{"top":246.65999,"left":16.97,"width":1.6187286376953125,"height":10.800033569335938,"text":""},{"top":246.65999,"left":18.588728,"width":153.91128540039062,"height":10.800033569335938,"text":"Boyle"},{"top":246.65999,"left":172.50002,"width":69.71998596191406,"height":10.800033569335938,"text":"Matthew C. \u0026 Eliza"},{"top":246.65999,"left":242.22,"width":104.760009765625,"height":10.800033569335938,"text":"2165 Princeton Ave"},{"top":246.65999,"left":346.98,"width":70.80001831054688,"height":10.800033569335938,"text":"Saint Paul"},{"top":246.65999,"left":417.78003,"width":20.87994384765625,"height":10.800033569335938,"text":"MN"},{"top":246.65999,"left":438.65997,"width":49.91998291015625,"height":10.800033569335938,"text":"55105"},{"top":246.65999,"left":488.57996,"width":78.47998046875,"height":10.800033569335938,"text":""},{"top":246.65999,"left":567.05994,"width":106.4400634765625,"height":10.800033569335938,"text":""},{"top":246.65999,"left":673.5,"width":45.52001953125,"height":10.800033569335938,"text":"10/6/2012"},{"top":246.65999,"left":719.02,"width":43.28033447265625,"height":10.800033569335938,"text":"100.00"}],[{"top":257.46002,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":257.46002,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Brodrick"},{"top":257.46002,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"John"},{"top":257.46002,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1007 Charles"},{"top":257.46002,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"St. Paul"},{"top":257.46002,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":257.46002,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":257.46002,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"BOE"},{"top":257.46002,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"retired"},{"top":257.46002,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"8/26/2012"},{"top":257.46002,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":268.26,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":268.26,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Brodrick"},{"top":268.26,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"John F."},{"top":268.26,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1007 Charles Ave"},{"top":268.26,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":268.26,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":268.26,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":268.26,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"BoE"},{"top":268.26,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"SPPS"},{"top":268.26,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/4/2012"},{"top":268.26,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":279.06,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":279.06,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Brown"},{"top":279.06,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Mike"},{"top":279.06,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1440 Goodrich Ave"},{"top":279.06,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":279.06,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":279.06,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":279.06,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":279.06,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":279.06,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"9/23/2012"},{"top":279.06,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":289.84,"left":16.97,"width":1.6187286376953125,"height":10.819976806640625,"text":""},{"top":289.84,"left":18.588728,"width":153.91128540039062,"height":10.819976806640625,"text":"Cacy"},{"top":289.84,"left":172.50002,"width":69.71998596191406,"height":10.819976806640625,"text":"Jill"},{"top":289.84,"left":242.22,"width":104.760009765625,"height":10.819976806640625,"text":"1409 Smith Ave So"},{"top":289.84,"left":346.98,"width":70.80001831054688,"height":10.819976806640625,"text":"West St. Paul"},{"top":289.84,"left":417.78003,"width":20.87994384765625,"height":10.819976806640625,"text":"MN"},{"top":289.84,"left":438.65997,"width":49.91998291015625,"height":10.819976806640625,"text":"55118"},{"top":289.84,"left":488.57996,"width":78.47998046875,"height":10.819976806640625,"text":"Administrator"},{"top":289.84,"left":567.05994,"width":106.4400634765625,"height":10.819976806640625,"text":"SPPS"},{"top":289.84,"left":673.5,"width":45.52001953125,"height":10.819976806640625,"text":"9/12/2012"},{"top":289.84,"left":719.02,"width":43.28033447265625,"height":10.819976806640625,"text":"100.00"}],[{"top":300.65997,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":300.65997,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Cardwell"},{"top":300.65997,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Eileen"},{"top":300.65997,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4172 Bridgewater Ct"},{"top":300.65997,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Vadnais Height"},{"top":300.65997,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":300.65997,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55127"},{"top":300.65997,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Retired"},{"top":300.65997,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Retired"},{"top":300.65997,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/3/2012"},{"top":300.65997,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":311.46,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":311.46,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Carlstrom"},{"top":311.46,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Cheryl"},{"top":311.46,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4244 Oakmede Ln"},{"top":311.46,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"St Paul"},{"top":311.46,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":311.46,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55110"},{"top":311.46,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Satff"},{"top":311.46,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Saint Paul Public Schools"},{"top":311.46,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/8/2012"},{"top":311.46,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":322.26,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":322.26,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Carter"},{"top":322.26,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Melvin W. \u0026 Willet"},{"top":322.26,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"405 Western Ave N"},{"top":322.26,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":322.26,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":322.26,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55103"},{"top":322.26,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Cty Commissioner"},{"top":322.26,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Ramsey Cty"},{"top":322.26,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/8/2012"},{"top":322.26,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":333.06003,"left":16.97,"width":1.6187286376953125,"height":10.8199462890625,"text":""},{"top":333.06003,"left":18.588728,"width":153.91128540039062,"height":10.8199462890625,"text":"Caruso"},{"top":333.06003,"left":172.50002,"width":69.71998596191406,"height":10.8199462890625,"text":"Sarah"},{"top":333.06003,"left":242.22,"width":104.760009765625,"height":10.8199462890625,"text":"2615 Newton Ave S"},{"top":333.06003,"left":346.98,"width":70.80001831054688,"height":10.8199462890625,"text":"Minneapolis"},{"top":333.06003,"left":417.78003,"width":20.87994384765625,"height":10.8199462890625,"text":"MN"},{"top":333.06003,"left":438.65997,"width":49.91998291015625,"height":10.8199462890625,"text":"55405"},{"top":333.06003,"left":488.57996,"width":78.47998046875,"height":10.8199462890625,"text":"CEO"},{"top":333.06003,"left":567.05994,"width":106.4400634765625,"height":10.8199462890625,"text":"United Way"},{"top":333.06003,"left":673.5,"width":45.52001953125,"height":10.8199462890625,"text":"9/12/2012"},{"top":333.06003,"left":719.02,"width":43.28033447265625,"height":10.8199462890625,"text":"100.00"}],[{"top":343.87997,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":343.87997,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Casey /Trewartha"},{"top":343.87997,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Kerry F. / Kelly A."},{"top":343.87997,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"2266 Goodrich Ave"},{"top":343.87997,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":343.87997,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":343.87997,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":343.87997,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":343.87997,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":343.87997,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/4/2012"},{"top":343.87997,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":354.65997,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":354.65997,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Cassidy"},{"top":354.65997,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Paul D."},{"top":354.65997,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1990 Dayton Ave"},{"top":354.65997,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":354.65997,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":354.65997,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55104"},{"top":354.65997,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":354.65997,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":354.65997,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/6/2012"},{"top":354.65997,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":365.43997,"left":16.97,"width":1.6187286376953125,"height":10.820037841796875,"text":""},{"top":365.43997,"left":18.588728,"width":153.91128540039062,"height":10.820037841796875,"text":"Causey"},{"top":365.43997,"left":172.50002,"width":69.71998596191406,"height":10.820037841796875,"text":"Christopher"},{"top":365.43997,"left":242.22,"width":104.760009765625,"height":10.820037841796875,"text":"2181 Doswell Avenue"},{"top":365.43997,"left":346.98,"width":70.80001831054688,"height":10.820037841796875,"text":"Saint Paul"},{"top":365.43997,"left":417.78003,"width":20.87994384765625,"height":10.820037841796875,"text":"MN"},{"top":365.43997,"left":438.65997,"width":49.91998291015625,"height":10.820037841796875,"text":"55108"},{"top":365.43997,"left":488.57996,"width":78.47998046875,"height":10.820037841796875,"text":"finance"},{"top":365.43997,"left":567.05994,"width":106.4400634765625,"height":10.820037841796875,"text":""},{"top":365.43997,"left":673.5,"width":45.52001953125,"height":10.820037841796875,"text":"9/3/2012"},{"top":365.43997,"left":719.02,"width":43.28033447265625,"height":10.820037841796875,"text":"100.00"}],[{"top":376.26,"left":16.97,"width":1.6187286376953125,"height":10.819976806640625,"text":""},{"top":376.26,"left":18.588728,"width":153.91128540039062,"height":10.819976806640625,"text":"Christiansen"},{"top":376.26,"left":172.50002,"width":69.71998596191406,"height":10.819976806640625,"text":"Lena"},{"top":376.26,"left":242.22,"width":104.760009765625,"height":10.819976806640625,"text":"2453 Como Ave"},{"top":376.26,"left":346.98,"width":70.80001831054688,"height":10.819976806640625,"text":"St Paul"},{"top":376.26,"left":417.78003,"width":20.87994384765625,"height":10.819976806640625,"text":"MN"},{"top":376.26,"left":438.65997,"width":49.91998291015625,"height":10.819976806640625,"text":"55108"},{"top":376.26,"left":488.57996,"width":78.47998046875,"height":10.819976806640625,"text":"Principal"},{"top":376.26,"left":567.05994,"width":106.4400634765625,"height":10.819976806640625,"text":"Saint Paul Public Schools"},{"top":376.26,"left":673.5,"width":45.52001953125,"height":10.819976806640625,"text":"8/8/2012"},{"top":376.26,"left":719.02,"width":43.28033447265625,"height":10.819976806640625,"text":"100.00"}],[{"top":387.08,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":387.08,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Clapp"},{"top":387.08,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Agustus (Bill)"},{"top":387.08,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"757 Osceola Ave #1"},{"top":387.08,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":387.08,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":387.08,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":387.08,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":"retired"},{"top":387.08,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":387.08,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/2/2012"},{"top":387.08,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":397.86,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":397.86,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Cohen"},{"top":397.86,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Brad A."},{"top":397.86,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1460 Raymond Ave"},{"top":397.86,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":397.86,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":397.86,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55108"},{"top":397.86,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":"academic technology"},{"top":397.86,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":"UMN"},{"top":397.86,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/17/2012"},{"top":397.86,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":408.63998,"left":16.97,"width":1.6187286376953125,"height":10.82000732421875,"text":""},{"top":408.63998,"left":18.588728,"width":153.91128540039062,"height":10.82000732421875,"text":"Commers"},{"top":408.63998,"left":172.50002,"width":69.71998596191406,"height":10.82000732421875,"text":"Beth"},{"top":408.63998,"left":242.22,"width":104.760009765625,"height":10.82000732421875,"text":"2294 Commonwealth Ave"},{"top":408.63998,"left":346.98,"width":70.80001831054688,"height":10.82000732421875,"text":"St Paul"},{"top":408.63998,"left":417.78003,"width":20.87994384765625,"height":10.82000732421875,"text":"MN"},{"top":408.63998,"left":438.65997,"width":49.91998291015625,"height":10.82000732421875,"text":"55108"},{"top":408.63998,"left":488.57996,"width":78.47998046875,"height":10.82000732421875,"text":"Self Employed"},{"top":408.63998,"left":567.05994,"width":106.4400634765625,"height":10.82000732421875,"text":"Homemaker"},{"top":408.63998,"left":673.5,"width":45.52001953125,"height":10.82000732421875,"text":"8/1/2012"},{"top":408.63998,"left":719.02,"width":43.28033447265625,"height":10.82000732421875,"text":"100.00"}],[{"top":419.46,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":419.46,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Couture"},{"top":419.46,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Steven"},{"top":419.46,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"7950 Victoria Way"},{"top":419.46,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Louise Park"},{"top":419.46,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":419.46,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55426"},{"top":419.46,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Principal"},{"top":419.46,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Saint Paul Public Schools"},{"top":419.46,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/10/2012"},{"top":419.46,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":430.26,"left":16.97,"width":1.6187286376953125,"height":10.800048828125,"text":""},{"top":430.26,"left":18.588728,"width":153.91128540039062,"height":10.800048828125,"text":"Crawford"},{"top":430.26,"left":172.50002,"width":69.71998596191406,"height":10.800048828125,"text":"Lydia P."},{"top":430.26,"left":242.22,"width":104.760009765625,"height":10.800048828125,"text":"1140 Edgcumbe Rd"},{"top":430.26,"left":346.98,"width":70.80001831054688,"height":10.800048828125,"text":"Saint Paul"},{"top":430.26,"left":417.78003,"width":20.87994384765625,"height":10.800048828125,"text":"MN"},{"top":430.26,"left":438.65997,"width":49.91998291015625,"height":10.800048828125,"text":"55105"},{"top":430.26,"left":488.57996,"width":78.47998046875,"height":10.800048828125,"text":""},{"top":430.26,"left":567.05994,"width":106.4400634765625,"height":10.800048828125,"text":""},{"top":430.26,"left":673.5,"width":45.52001953125,"height":10.800048828125,"text":"9/15/2012"},{"top":430.26,"left":719.02,"width":43.28033447265625,"height":10.800048828125,"text":"100.00"}],[{"top":441.06006,"left":16.97,"width":1.6187286376953125,"height":10.799896240234375,"text":""},{"top":441.06006,"left":18.588728,"width":153.91128540039062,"height":10.799896240234375,"text":"Cudahy / Ricker"},{"top":441.06006,"left":172.50002,"width":69.71998596191406,"height":10.799896240234375,"text":"Robert \u0026 Mary C"},{"top":441.06006,"left":242.22,"width":104.760009765625,"height":10.799896240234375,"text":"616 Cherokee Ave."},{"top":441.06006,"left":346.98,"width":70.80001831054688,"height":10.799896240234375,"text":"Saint Paul"},{"top":441.06006,"left":417.78003,"width":20.87994384765625,"height":10.799896240234375,"text":"MN"},{"top":441.06006,"left":438.65997,"width":49.91998291015625,"height":10.799896240234375,"text":"55107"},{"top":441.06006,"left":488.57996,"width":78.47998046875,"height":10.799896240234375,"text":"Teacher/Union Presid"},{"top":441.06006,"left":567.05994,"width":106.4400634765625,"height":10.799896240234375,"text":"SPPS/SPFT"},{"top":441.06006,"left":673.5,"width":45.52001953125,"height":10.799896240234375,"text":"9/18/2012"},{"top":441.06006,"left":719.02,"width":43.28033447265625,"height":10.799896240234375,"text":"100.00"}],[{"top":451.85995,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":451.85995,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Cudahy / Ricker"},{"top":451.85995,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Robert \u0026 Mary C"},{"top":451.85995,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"616 Cherokee Ave."},{"top":451.85995,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":451.85995,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":451.85995,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55107"},{"top":451.85995,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Teacher/Union Presid"},{"top":451.85995,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"SPPS/SPFT"},{"top":451.85995,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/6/2012"},{"top":451.85995,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":462.65997,"left":16.97,"width":1.6187286376953125,"height":10.82000732421875,"text":""},{"top":462.65997,"left":18.588728,"width":153.91128540039062,"height":10.82000732421875,"text":"Currie"},{"top":462.65997,"left":172.50002,"width":69.71998596191406,"height":10.82000732421875,"text":"Elisabeth"},{"top":462.65997,"left":242.22,"width":104.760009765625,"height":10.82000732421875,"text":"2274 Hillside Ave"},{"top":462.65997,"left":346.98,"width":70.80001831054688,"height":10.82000732421875,"text":"St. Paul"},{"top":462.65997,"left":417.78003,"width":20.87994384765625,"height":10.82000732421875,"text":"MN"},{"top":462.65997,"left":438.65997,"width":49.91998291015625,"height":10.82000732421875,"text":"55108"},{"top":462.65997,"left":488.57996,"width":78.47998046875,"height":10.82000732421875,"text":""},{"top":462.65997,"left":567.05994,"width":106.4400634765625,"height":10.82000732421875,"text":""},{"top":462.65997,"left":673.5,"width":45.52001953125,"height":10.82000732421875,"text":"9/12/2012"},{"top":462.65997,"left":719.02,"width":43.28033447265625,"height":10.82000732421875,"text":"100.00"}],[{"top":473.47998,"left":16.97,"width":1.6187286376953125,"height":10.780029296875,"text":""},{"top":473.47998,"left":18.588728,"width":153.91128540039062,"height":10.780029296875,"text":"Doane"},{"top":473.47998,"left":172.50002,"width":69.71998596191406,"height":10.780029296875,"text":"Paul V \u0026 Helen R"},{"top":473.47998,"left":242.22,"width":104.760009765625,"height":10.780029296875,"text":"444 Portland Ave"},{"top":473.47998,"left":346.98,"width":70.80001831054688,"height":10.780029296875,"text":"Sant Paul"},{"top":473.47998,"left":417.78003,"width":20.87994384765625,"height":10.780029296875,"text":"MN"},{"top":473.47998,"left":438.65997,"width":49.91998291015625,"height":10.780029296875,"text":"55102"},{"top":473.47998,"left":488.57996,"width":78.47998046875,"height":10.780029296875,"text":"Ex director"},{"top":473.47998,"left":567.05994,"width":106.4400634765625,"height":10.780029296875,"text":"St Paul Teachers\u0027 Retirement A"},{"top":473.47998,"left":673.5,"width":45.52001953125,"height":10.780029296875,"text":"10/3/2012"},{"top":473.47998,"left":719.02,"width":43.28033447265625,"height":10.780029296875,"text":"100.00"}],[{"top":484.26,"left":16.97,"width":1.6187286376953125,"height":10.800048828125,"text":""},{"top":484.26,"left":18.588728,"width":153.91128540039062,"height":10.800048828125,"text":"Dougherty"},{"top":484.26,"left":172.50002,"width":69.71998596191406,"height":10.800048828125,"text":"Richards S \u0026 Patrici"},{"top":484.26,"left":242.22,"width":104.760009765625,"height":10.800048828125,"text":"812 Goodrich Ave"},{"top":484.26,"left":346.98,"width":70.80001831054688,"height":10.800048828125,"text":"Saint Paul"},{"top":484.26,"left":417.78003,"width":20.87994384765625,"height":10.800048828125,"text":"MN"},{"top":484.26,"left":438.65997,"width":49.91998291015625,"height":10.800048828125,"text":"55105"},{"top":484.26,"left":488.57996,"width":78.47998046875,"height":10.800048828125,"text":""},{"top":484.26,"left":567.05994,"width":106.4400634765625,"height":10.800048828125,"text":""},{"top":484.26,"left":673.5,"width":45.52001953125,"height":10.800048828125,"text":"10/4/2012"},{"top":484.26,"left":719.02,"width":43.28033447265625,"height":10.800048828125,"text":"100.00"}],[{"top":495.06006,"left":16.97,"width":1.6187286376953125,"height":10.799896240234375,"text":""},{"top":495.06006,"left":18.588728,"width":153.91128540039062,"height":10.799896240234375,"text":"Driscoll"},{"top":495.06006,"left":172.50002,"width":69.71998596191406,"height":10.799896240234375,"text":"Joe \u0026 Jill"},{"top":495.06006,"left":242.22,"width":104.760009765625,"height":10.799896240234375,"text":"2383 Bourne Ave"},{"top":495.06006,"left":346.98,"width":70.80001831054688,"height":10.799896240234375,"text":"Saint Paul"},{"top":495.06006,"left":417.78003,"width":20.87994384765625,"height":10.799896240234375,"text":"MN"},{"top":495.06006,"left":438.65997,"width":49.91998291015625,"height":10.799896240234375,"text":"55108"},{"top":495.06006,"left":488.57996,"width":78.47998046875,"height":10.799896240234375,"text":"HR Manager"},{"top":495.06006,"left":567.05994,"width":106.4400634765625,"height":10.799896240234375,"text":"LexisNexis"},{"top":495.06006,"left":673.5,"width":45.52001953125,"height":10.799896240234375,"text":"10/18/2012"},{"top":495.06006,"left":719.02,"width":43.28033447265625,"height":10.799896240234375,"text":"100.00"}],[{"top":505.85995,"left":16.97,"width":1.6187286376953125,"height":10.780059814453125,"text":""},{"top":505.85995,"left":18.588728,"width":153.91128540039062,"height":10.780059814453125,"text":"Dubaille / Haugee"},{"top":505.85995,"left":172.50002,"width":69.71998596191406,"height":10.780059814453125,"text":"Florence M. /Eric"},{"top":505.85995,"left":242.22,"width":104.760009765625,"height":10.780059814453125,"text":"1009 Edmund Ave"},{"top":505.85995,"left":346.98,"width":70.80001831054688,"height":10.780059814453125,"text":"Saint Paul"},{"top":505.85995,"left":417.78003,"width":20.87994384765625,"height":10.780059814453125,"text":"MN"},{"top":505.85995,"left":438.65997,"width":49.91998291015625,"height":10.780059814453125,"text":"55104"},{"top":505.85995,"left":488.57996,"width":78.47998046875,"height":10.780059814453125,"text":"Florence‐teacher"},{"top":505.85995,"left":567.05994,"width":106.4400634765625,"height":10.780059814453125,"text":"SPPS"},{"top":505.85995,"left":673.5,"width":45.52001953125,"height":10.780059814453125,"text":"10/3/2012"},{"top":505.85995,"left":719.02,"width":43.28033447265625,"height":10.780059814453125,"text":"100.00"}],[{"top":516.64,"left":16.97,"width":1.6187286376953125,"height":10.83990478515625,"text":""},{"top":516.64,"left":18.588728,"width":153.91128540039062,"height":10.83990478515625,"text":"Eaton"},{"top":516.64,"left":172.50002,"width":69.71998596191406,"height":10.83990478515625,"text":"Jim"},{"top":516.64,"left":242.22,"width":104.760009765625,"height":10.83990478515625,"text":"2133 Berkeley Ave"},{"top":516.64,"left":346.98,"width":70.80001831054688,"height":10.83990478515625,"text":"St Paul"},{"top":516.64,"left":417.78003,"width":20.87994384765625,"height":10.83990478515625,"text":"MN"},{"top":516.64,"left":438.65997,"width":49.91998291015625,"height":10.83990478515625,"text":"55105"},{"top":516.64,"left":488.57996,"width":78.47998046875,"height":10.83990478515625,"text":"Principal"},{"top":516.64,"left":567.05994,"width":106.4400634765625,"height":10.83990478515625,"text":"Saint Paul Public Schools"},{"top":516.64,"left":673.5,"width":45.52001953125,"height":10.83990478515625,"text":"8/23/2012"},{"top":516.64,"left":719.02,"width":43.28033447265625,"height":10.83990478515625,"text":"100.00"}],[{"top":527.4799,"left":16.97,"width":1.6187286376953125,"height":10.78009033203125,"text":""},{"top":527.4799,"left":18.588728,"width":153.91128540039062,"height":10.78009033203125,"text":"Eaves /Alger"},{"top":527.4799,"left":172.50002,"width":69.71998596191406,"height":10.78009033203125,"text":"Patricia / Stuart"},{"top":527.4799,"left":242.22,"width":104.760009765625,"height":10.78009033203125,"text":"1143 Portladn Ave."},{"top":527.4799,"left":346.98,"width":70.80001831054688,"height":10.78009033203125,"text":"Saint Paul"},{"top":527.4799,"left":417.78003,"width":20.87994384765625,"height":10.78009033203125,"text":"MN"},{"top":527.4799,"left":438.65997,"width":49.91998291015625,"height":10.78009033203125,"text":"55104"},{"top":527.4799,"left":488.57996,"width":78.47998046875,"height":10.78009033203125,"text":""},{"top":527.4799,"left":567.05994,"width":106.4400634765625,"height":10.78009033203125,"text":""},{"top":527.4799,"left":673.5,"width":45.52001953125,"height":10.78009033203125,"text":"10/3/2012"},{"top":527.4799,"left":719.02,"width":43.28033447265625,"height":10.78009033203125,"text":"100.00"}]]} \ No newline at end of file +{"extraction_method":"lattice","page_number":1,"top":54.315777,"left":16.97,"width":745.3303833007812,"height":483.9442443847656,"right":762.30035,"bottom":538.26,"data":[[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":54.315777,"left":18.588728,"width":153.91128540039062,"height":8.744216918945312,"text":"Last Name"},{"top":54.315777,"left":172.50002,"width":69.71998596191406,"height":8.744216918945312,"text":"First Name"},{"top":54.315777,"left":242.22,"width":104.760009765625,"height":8.744216918945312,"text":"Address"},{"top":54.315777,"left":346.98,"width":70.80001831054688,"height":8.744216918945312,"text":"City"},{"top":54.315777,"left":417.78003,"width":20.87994384765625,"height":8.744216918945312,"text":"State"},{"top":54.315777,"left":438.65997,"width":49.91998291015625,"height":8.744216918945312,"text":"Zip"},{"top":54.315777,"left":488.57996,"width":78.47998046875,"height":8.744216918945312,"text":"Occupation"},{"top":54.315777,"left":567.05994,"width":106.4400634765625,"height":8.744216918945312,"text":"Employer"},{"top":54.315777,"left":673.5,"width":45.52001953125,"height":8.744216918945312,"text":"Date"},{"top":54.315777,"left":719.02,"width":43.28033447265625,"height":8.744216918945312,"text":"Amount"}],[{"top":63.059994,"left":16.97,"width":1.6187286376953125,"height":10.799991607666016,"text":""},{"top":63.059994,"left":18.588728,"width":153.91128540039062,"height":10.799991607666016,"text":"Lidstad"},{"top":63.059994,"left":172.50002,"width":69.71998596191406,"height":10.799991607666016,"text":"Dick \u0026 Peg"},{"top":63.059994,"left":242.22,"width":104.760009765625,"height":10.799991607666016,"text":"62 Mississippi River Blvd N"},{"top":63.059994,"left":346.98,"width":70.80001831054688,"height":10.799991607666016,"text":"Saint Paul"},{"top":63.059994,"left":417.78003,"width":20.87994384765625,"height":10.799991607666016,"text":"MN"},{"top":63.059994,"left":438.65997,"width":49.91998291015625,"height":10.799991607666016,"text":"55104"},{"top":63.059994,"left":488.57996,"width":78.47998046875,"height":10.799991607666016,"text":"retired"},{"top":63.059994,"left":567.05994,"width":106.4400634765625,"height":10.799991607666016,"text":""},{"top":63.059994,"left":673.5,"width":45.52001953125,"height":10.799991607666016,"text":"10/12/2012"},{"top":63.059994,"left":719.02,"width":43.28033447265625,"height":10.799991607666016,"text":"60.00"}],[{"top":73.859985,"left":16.97,"width":1.6187286376953125,"height":10.800025939941406,"text":""},{"top":73.859985,"left":18.588728,"width":153.91128540039062,"height":10.800025939941406,"text":"Strom"},{"top":73.859985,"left":172.50002,"width":69.71998596191406,"height":10.800025939941406,"text":"Pam"},{"top":73.859985,"left":242.22,"width":104.760009765625,"height":10.800025939941406,"text":"1229 Hague Ave"},{"top":73.859985,"left":346.98,"width":70.80001831054688,"height":10.800025939941406,"text":"St. Paul"},{"top":73.859985,"left":417.78003,"width":20.87994384765625,"height":10.800025939941406,"text":"MN"},{"top":73.859985,"left":438.65997,"width":49.91998291015625,"height":10.800025939941406,"text":"55104"},{"top":73.859985,"left":488.57996,"width":78.47998046875,"height":10.800025939941406,"text":""},{"top":73.859985,"left":567.05994,"width":106.4400634765625,"height":10.800025939941406,"text":""},{"top":73.859985,"left":673.5,"width":45.52001953125,"height":10.800025939941406,"text":"9/12/2012"},{"top":73.859985,"left":719.02,"width":43.28033447265625,"height":10.800025939941406,"text":"60.00"}],[{"top":84.66001,"left":16.97,"width":1.6187286376953125,"height":10.799995422363281,"text":""},{"top":84.66001,"left":18.588728,"width":153.91128540039062,"height":10.799995422363281,"text":"Seeba"},{"top":84.66001,"left":172.50002,"width":69.71998596191406,"height":10.799995422363281,"text":"Louise \u0026 Paul"},{"top":84.66001,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"1399 Sheldon St"},{"top":84.66001,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"Saint Paul"},{"top":84.66001,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":84.66001,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55108"},{"top":84.66001,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":"BOE"},{"top":84.66001,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":"City of Saint Paul"},{"top":84.66001,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"10/12/2012"},{"top":84.66001,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"60.00"}],[{"top":95.46001,"left":16.97,"width":1.6187286376953125,"height":10.799995422363281,"text":""},{"top":95.46001,"left":18.588728,"width":153.91128540039062,"height":10.799995422363281,"text":"Schumacher / Bales"},{"top":95.46001,"left":172.50002,"width":69.71998596191406,"height":10.799995422363281,"text":"Douglas L. / Patricia"},{"top":95.46001,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"948 County Rd. D W"},{"top":95.46001,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"Saint Paul"},{"top":95.46001,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":95.46001,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55126"},{"top":95.46001,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":""},{"top":95.46001,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":""},{"top":95.46001,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"10/13/2012"},{"top":95.46001,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"60.00"}],[{"top":106.26,"left":16.97,"width":1.6187286376953125,"height":10.799995422363281,"text":""},{"top":106.26,"left":18.588728,"width":153.91128540039062,"height":10.799995422363281,"text":"Abrams"},{"top":106.26,"left":172.50002,"width":69.71998596191406,"height":10.799995422363281,"text":"Marjorie"},{"top":106.26,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"238 8th St east"},{"top":106.26,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"St Paul"},{"top":106.26,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":106.26,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55101"},{"top":106.26,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":"Retired"},{"top":106.26,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":"Retired"},{"top":106.26,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"8/8/2012"},{"top":106.26,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"75.00"}],[{"top":117.06,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":117.06,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Crouse / Schroeder"},{"top":117.06,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Abigail / Jonathan"},{"top":117.06,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1545 Branston St."},{"top":117.06,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":117.06,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":117.06,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55108"},{"top":117.06,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":""},{"top":117.06,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":""},{"top":117.06,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/6/2012"},{"top":117.06,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"75.00"}],[{"top":127.859985,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":127.859985,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"O\u0027Connell"},{"top":127.859985,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"Jean"},{"top":127.859985,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"888 Ivy Ave W."},{"top":127.859985,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Saint Paul"},{"top":127.859985,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":127.859985,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55117"},{"top":127.859985,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":""},{"top":127.859985,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":""},{"top":127.859985,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/30/2012"},{"top":127.859985,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"75.00"}],[{"top":138.65999,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":138.65999,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Reese"},{"top":138.65999,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Cheri A"},{"top":138.65999,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"981 Davern St."},{"top":138.65999,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":138.65999,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":138.65999,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55116"},{"top":138.65999,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Public Relations"},{"top":138.65999,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Far North Spirits"},{"top":138.65999,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/3/2012"},{"top":138.65999,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"75.00"}],[{"top":149.46,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":149.46,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"Serrano"},{"top":149.46,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"Luz Maria"},{"top":149.46,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"5559 Park Place Drive"},{"top":149.46,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Shoreview"},{"top":149.46,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":149.46,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55126"},{"top":149.46,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"retired"},{"top":149.46,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":"SPPS"},{"top":149.46,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/20/2012"},{"top":149.46,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"75.00"}],[{"top":160.26001,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":160.26001,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Alsiddiqui"},{"top":160.26001,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Jaber"},{"top":160.26001,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"13056 Euclid Ave"},{"top":160.26001,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Apple Valley"},{"top":160.26001,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":160.26001,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55124"},{"top":160.26001,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"SPPS budget analyst"},{"top":160.26001,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"SPPS"},{"top":160.26001,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"9/20/2012"},{"top":160.26001,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":171.06,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":171.06,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"Andrastek"},{"top":171.06,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"John"},{"top":171.06,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"774 Ashland Ave"},{"top":171.06,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"St Paul"},{"top":171.06,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":171.06,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55104"},{"top":171.06,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"asst principal"},{"top":171.06,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":"SPPS"},{"top":171.06,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"8/22/2012"},{"top":171.06,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"100.00"}],[{"top":181.86,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":181.86,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Anfang"},{"top":181.86,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Heather L. \u0026 Matt"},{"top":181.86,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1635 Bayard Ave"},{"top":181.86,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":181.86,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":181.86,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55116"},{"top":181.86,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"Ex Director"},{"top":181.86,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"BOMA"},{"top":181.86,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/6/2012"},{"top":181.86,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":192.65999,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":192.65999,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Aronson"},{"top":192.65999,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Roger J."},{"top":192.65999,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4852 Emerson Ave. S."},{"top":192.65999,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Minneapolis"},{"top":192.65999,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":192.65999,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55419"},{"top":192.65999,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Attorney at Law"},{"top":192.65999,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":""},{"top":192.65999,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"9/20/2012"},{"top":192.65999,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":203.46,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":203.46,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"Aronson"},{"top":203.46,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"Roger J."},{"top":203.46,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"4852 Emerson Ave. S."},{"top":203.46,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Minneapolis"},{"top":203.46,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":203.46,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55419"},{"top":203.46,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"Attorney at Law"},{"top":203.46,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":""},{"top":203.46,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/20/2012"},{"top":203.46,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"100.00"}],[{"top":214.26001,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":214.26001,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Banks"},{"top":214.26001,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Michael or Patricia"},{"top":214.26001,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1664 Van Buren Ave."},{"top":214.26001,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":214.26001,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":214.26001,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":214.26001,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"retired social worker"},{"top":214.26001,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":""},{"top":214.26001,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"9/20/2012"},{"top":214.26001,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":225.06,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":225.06,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Bennett"},{"top":225.06,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"David"},{"top":225.06,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"25 Birchwood Rd."},{"top":225.06,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Mahtomedi"},{"top":225.06,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":225.06,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55110"},{"top":225.06,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"retired"},{"top":225.06,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"retired"},{"top":225.06,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"9/3/2012"},{"top":225.06,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":235.86002,"left":16.97,"width":1.6187286376953125,"height":10.799972534179688,"text":""},{"top":235.86002,"left":18.588728,"width":153.91128540039062,"height":10.799972534179688,"text":"Berry"},{"top":235.86002,"left":172.50002,"width":69.71998596191406,"height":10.799972534179688,"text":"Margaret"},{"top":235.86002,"left":242.22,"width":104.760009765625,"height":10.799972534179688,"text":"1267 Pike Lake Dr"},{"top":235.86002,"left":346.98,"width":70.80001831054688,"height":10.799972534179688,"text":"New Brighton"},{"top":235.86002,"left":417.78003,"width":20.87994384765625,"height":10.799972534179688,"text":"MN"},{"top":235.86002,"left":438.65997,"width":49.91998291015625,"height":10.799972534179688,"text":"55112"},{"top":235.86002,"left":488.57996,"width":78.47998046875,"height":10.799972534179688,"text":"Asst Principal"},{"top":235.86002,"left":567.05994,"width":106.4400634765625,"height":10.799972534179688,"text":"Saint Paul Public Schools"},{"top":235.86002,"left":673.5,"width":45.52001953125,"height":10.799972534179688,"text":"8/8/2012"},{"top":235.86002,"left":719.02,"width":43.28033447265625,"height":10.799972534179688,"text":"100.00"}],[{"top":246.65999,"left":16.97,"width":1.6187286376953125,"height":10.800033569335938,"text":""},{"top":246.65999,"left":18.588728,"width":153.91128540039062,"height":10.800033569335938,"text":"Boyle"},{"top":246.65999,"left":172.50002,"width":69.71998596191406,"height":10.800033569335938,"text":"Matthew C. \u0026 Eliza"},{"top":246.65999,"left":242.22,"width":104.760009765625,"height":10.800033569335938,"text":"2165 Princeton Ave"},{"top":246.65999,"left":346.98,"width":70.80001831054688,"height":10.800033569335938,"text":"Saint Paul"},{"top":246.65999,"left":417.78003,"width":20.87994384765625,"height":10.800033569335938,"text":"MN"},{"top":246.65999,"left":438.65997,"width":49.91998291015625,"height":10.800033569335938,"text":"55105"},{"top":246.65999,"left":488.57996,"width":78.47998046875,"height":10.800033569335938,"text":""},{"top":246.65999,"left":567.05994,"width":106.4400634765625,"height":10.800033569335938,"text":""},{"top":246.65999,"left":673.5,"width":45.52001953125,"height":10.800033569335938,"text":"10/6/2012"},{"top":246.65999,"left":719.02,"width":43.28033447265625,"height":10.800033569335938,"text":"100.00"}],[{"top":257.46002,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":257.46002,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Brodrick"},{"top":257.46002,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"John"},{"top":257.46002,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1007 Charles"},{"top":257.46002,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"St. Paul"},{"top":257.46002,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":257.46002,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":257.46002,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"BOE"},{"top":257.46002,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"retired"},{"top":257.46002,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"8/26/2012"},{"top":257.46002,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":268.26,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":268.26,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Brodrick"},{"top":268.26,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"John F."},{"top":268.26,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1007 Charles Ave"},{"top":268.26,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":268.26,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":268.26,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":268.26,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"BoE"},{"top":268.26,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"SPPS"},{"top":268.26,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/4/2012"},{"top":268.26,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":279.06,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":279.06,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Brown"},{"top":279.06,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Mike"},{"top":279.06,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1440 Goodrich Ave"},{"top":279.06,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":279.06,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":279.06,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":279.06,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":279.06,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":279.06,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"9/23/2012"},{"top":279.06,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":289.84,"left":16.97,"width":1.6187286376953125,"height":10.819976806640625,"text":""},{"top":289.84,"left":18.588728,"width":153.91128540039062,"height":10.819976806640625,"text":"Cacy"},{"top":289.84,"left":172.50002,"width":69.71998596191406,"height":10.819976806640625,"text":"Jill"},{"top":289.84,"left":242.22,"width":104.760009765625,"height":10.819976806640625,"text":"1409 Smith Ave So"},{"top":289.84,"left":346.98,"width":70.80001831054688,"height":10.819976806640625,"text":"West St. Paul"},{"top":289.84,"left":417.78003,"width":20.87994384765625,"height":10.819976806640625,"text":"MN"},{"top":289.84,"left":438.65997,"width":49.91998291015625,"height":10.819976806640625,"text":"55118"},{"top":289.84,"left":488.57996,"width":78.47998046875,"height":10.819976806640625,"text":"Administrator"},{"top":289.84,"left":567.05994,"width":106.4400634765625,"height":10.819976806640625,"text":"SPPS"},{"top":289.84,"left":673.5,"width":45.52001953125,"height":10.819976806640625,"text":"9/12/2012"},{"top":289.84,"left":719.02,"width":43.28033447265625,"height":10.819976806640625,"text":"100.00"}],[{"top":300.65997,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":300.65997,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Cardwell"},{"top":300.65997,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Eileen"},{"top":300.65997,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4172 Bridgewater Ct"},{"top":300.65997,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Vadnais Height"},{"top":300.65997,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":300.65997,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55127"},{"top":300.65997,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Retired"},{"top":300.65997,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Retired"},{"top":300.65997,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/3/2012"},{"top":300.65997,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":311.46,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":311.46,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Carlstrom"},{"top":311.46,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Cheryl"},{"top":311.46,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4244 Oakmede Ln"},{"top":311.46,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"St Paul"},{"top":311.46,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":311.46,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55110"},{"top":311.46,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Satff"},{"top":311.46,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Saint Paul Public Schools"},{"top":311.46,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/8/2012"},{"top":311.46,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":322.26,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":322.26,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Carter"},{"top":322.26,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Melvin W. \u0026 Willet"},{"top":322.26,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"405 Western Ave N"},{"top":322.26,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":322.26,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":322.26,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55103"},{"top":322.26,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Cty Commissioner"},{"top":322.26,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Ramsey Cty"},{"top":322.26,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/8/2012"},{"top":322.26,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":333.06003,"left":16.97,"width":1.6187286376953125,"height":10.8199462890625,"text":""},{"top":333.06003,"left":18.588728,"width":153.91128540039062,"height":10.8199462890625,"text":"Caruso"},{"top":333.06003,"left":172.50002,"width":69.71998596191406,"height":10.8199462890625,"text":"Sarah"},{"top":333.06003,"left":242.22,"width":104.760009765625,"height":10.8199462890625,"text":"2615 Newton Ave S"},{"top":333.06003,"left":346.98,"width":70.80001831054688,"height":10.8199462890625,"text":"Minneapolis"},{"top":333.06003,"left":417.78003,"width":20.87994384765625,"height":10.8199462890625,"text":"MN"},{"top":333.06003,"left":438.65997,"width":49.91998291015625,"height":10.8199462890625,"text":"55405"},{"top":333.06003,"left":488.57996,"width":78.47998046875,"height":10.8199462890625,"text":"CEO"},{"top":333.06003,"left":567.05994,"width":106.4400634765625,"height":10.8199462890625,"text":"United Way"},{"top":333.06003,"left":673.5,"width":45.52001953125,"height":10.8199462890625,"text":"9/12/2012"},{"top":333.06003,"left":719.02,"width":43.28033447265625,"height":10.8199462890625,"text":"100.00"}],[{"top":343.87997,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":343.87997,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Casey /Trewartha"},{"top":343.87997,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Kerry F. / Kelly A."},{"top":343.87997,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"2266 Goodrich Ave"},{"top":343.87997,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":343.87997,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":343.87997,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":343.87997,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":343.87997,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":343.87997,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/4/2012"},{"top":343.87997,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":354.65997,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":354.65997,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Cassidy"},{"top":354.65997,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Paul D."},{"top":354.65997,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1990 Dayton Ave"},{"top":354.65997,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":354.65997,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":354.65997,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55104"},{"top":354.65997,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":354.65997,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":354.65997,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/6/2012"},{"top":354.65997,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":365.43997,"left":16.97,"width":1.6187286376953125,"height":10.820037841796875,"text":""},{"top":365.43997,"left":18.588728,"width":153.91128540039062,"height":10.820037841796875,"text":"Causey"},{"top":365.43997,"left":172.50002,"width":69.71998596191406,"height":10.820037841796875,"text":"Christopher"},{"top":365.43997,"left":242.22,"width":104.760009765625,"height":10.820037841796875,"text":"2181 Doswell Avenue"},{"top":365.43997,"left":346.98,"width":70.80001831054688,"height":10.820037841796875,"text":"Saint Paul"},{"top":365.43997,"left":417.78003,"width":20.87994384765625,"height":10.820037841796875,"text":"MN"},{"top":365.43997,"left":438.65997,"width":49.91998291015625,"height":10.820037841796875,"text":"55108"},{"top":365.43997,"left":488.57996,"width":78.47998046875,"height":10.820037841796875,"text":"finance"},{"top":365.43997,"left":567.05994,"width":106.4400634765625,"height":10.820037841796875,"text":""},{"top":365.43997,"left":673.5,"width":45.52001953125,"height":10.820037841796875,"text":"9/3/2012"},{"top":365.43997,"left":719.02,"width":43.28033447265625,"height":10.820037841796875,"text":"100.00"}],[{"top":376.26,"left":16.97,"width":1.6187286376953125,"height":10.819976806640625,"text":""},{"top":376.26,"left":18.588728,"width":153.91128540039062,"height":10.819976806640625,"text":"Christiansen"},{"top":376.26,"left":172.50002,"width":69.71998596191406,"height":10.819976806640625,"text":"Lena"},{"top":376.26,"left":242.22,"width":104.760009765625,"height":10.819976806640625,"text":"2453 Como Ave"},{"top":376.26,"left":346.98,"width":70.80001831054688,"height":10.819976806640625,"text":"St Paul"},{"top":376.26,"left":417.78003,"width":20.87994384765625,"height":10.819976806640625,"text":"MN"},{"top":376.26,"left":438.65997,"width":49.91998291015625,"height":10.819976806640625,"text":"55108"},{"top":376.26,"left":488.57996,"width":78.47998046875,"height":10.819976806640625,"text":"Principal"},{"top":376.26,"left":567.05994,"width":106.4400634765625,"height":10.819976806640625,"text":"Saint Paul Public Schools"},{"top":376.26,"left":673.5,"width":45.52001953125,"height":10.819976806640625,"text":"8/8/2012"},{"top":376.26,"left":719.02,"width":43.28033447265625,"height":10.819976806640625,"text":"100.00"}],[{"top":387.08,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":387.08,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Clapp"},{"top":387.08,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Agustus (Bill)"},{"top":387.08,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"757 Osceola Ave #1"},{"top":387.08,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":387.08,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":387.08,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":387.08,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":"retired"},{"top":387.08,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":387.08,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/2/2012"},{"top":387.08,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":397.86,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":397.86,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Cohen"},{"top":397.86,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Brad A."},{"top":397.86,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1460 Raymond Ave"},{"top":397.86,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":397.86,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":397.86,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55108"},{"top":397.86,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":"academic technology"},{"top":397.86,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":"UMN"},{"top":397.86,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/17/2012"},{"top":397.86,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":408.63998,"left":16.97,"width":1.6187286376953125,"height":10.82000732421875,"text":""},{"top":408.63998,"left":18.588728,"width":153.91128540039062,"height":10.82000732421875,"text":"Commers"},{"top":408.63998,"left":172.50002,"width":69.71998596191406,"height":10.82000732421875,"text":"Beth"},{"top":408.63998,"left":242.22,"width":104.760009765625,"height":10.82000732421875,"text":"2294 Commonwealth Ave"},{"top":408.63998,"left":346.98,"width":70.80001831054688,"height":10.82000732421875,"text":"St Paul"},{"top":408.63998,"left":417.78003,"width":20.87994384765625,"height":10.82000732421875,"text":"MN"},{"top":408.63998,"left":438.65997,"width":49.91998291015625,"height":10.82000732421875,"text":"55108"},{"top":408.63998,"left":488.57996,"width":78.47998046875,"height":10.82000732421875,"text":"Self Employed"},{"top":408.63998,"left":567.05994,"width":106.4400634765625,"height":10.82000732421875,"text":"Homemaker"},{"top":408.63998,"left":673.5,"width":45.52001953125,"height":10.82000732421875,"text":"8/1/2012"},{"top":408.63998,"left":719.02,"width":43.28033447265625,"height":10.82000732421875,"text":"100.00"}],[{"top":419.46,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":419.46,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Couture"},{"top":419.46,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Steven"},{"top":419.46,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"7950 Victoria Way"},{"top":419.46,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Louise Park"},{"top":419.46,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":419.46,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55426"},{"top":419.46,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Principal"},{"top":419.46,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Saint Paul Public Schools"},{"top":419.46,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/10/2012"},{"top":419.46,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":430.26,"left":16.97,"width":1.6187286376953125,"height":10.800048828125,"text":""},{"top":430.26,"left":18.588728,"width":153.91128540039062,"height":10.800048828125,"text":"Crawford"},{"top":430.26,"left":172.50002,"width":69.71998596191406,"height":10.800048828125,"text":"Lydia P."},{"top":430.26,"left":242.22,"width":104.760009765625,"height":10.800048828125,"text":"1140 Edgcumbe Rd"},{"top":430.26,"left":346.98,"width":70.80001831054688,"height":10.800048828125,"text":"Saint Paul"},{"top":430.26,"left":417.78003,"width":20.87994384765625,"height":10.800048828125,"text":"MN"},{"top":430.26,"left":438.65997,"width":49.91998291015625,"height":10.800048828125,"text":"55105"},{"top":430.26,"left":488.57996,"width":78.47998046875,"height":10.800048828125,"text":""},{"top":430.26,"left":567.05994,"width":106.4400634765625,"height":10.800048828125,"text":""},{"top":430.26,"left":673.5,"width":45.52001953125,"height":10.800048828125,"text":"9/15/2012"},{"top":430.26,"left":719.02,"width":43.28033447265625,"height":10.800048828125,"text":"100.00"}],[{"top":441.06006,"left":16.97,"width":1.6187286376953125,"height":10.799896240234375,"text":""},{"top":441.06006,"left":18.588728,"width":153.91128540039062,"height":10.799896240234375,"text":"Cudahy / Ricker"},{"top":441.06006,"left":172.50002,"width":69.71998596191406,"height":10.799896240234375,"text":"Robert \u0026 Mary C"},{"top":441.06006,"left":242.22,"width":104.760009765625,"height":10.799896240234375,"text":"616 Cherokee Ave."},{"top":441.06006,"left":346.98,"width":70.80001831054688,"height":10.799896240234375,"text":"Saint Paul"},{"top":441.06006,"left":417.78003,"width":20.87994384765625,"height":10.799896240234375,"text":"MN"},{"top":441.06006,"left":438.65997,"width":49.91998291015625,"height":10.799896240234375,"text":"55107"},{"top":441.06006,"left":488.57996,"width":78.47998046875,"height":10.799896240234375,"text":"Teacher/Union Presid"},{"top":441.06006,"left":567.05994,"width":106.4400634765625,"height":10.799896240234375,"text":"SPPS/SPFT"},{"top":441.06006,"left":673.5,"width":45.52001953125,"height":10.799896240234375,"text":"9/18/2012"},{"top":441.06006,"left":719.02,"width":43.28033447265625,"height":10.799896240234375,"text":"100.00"}],[{"top":451.85995,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":451.85995,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Cudahy / Ricker"},{"top":451.85995,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Robert \u0026 Mary C"},{"top":451.85995,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"616 Cherokee Ave."},{"top":451.85995,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":451.85995,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":451.85995,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55107"},{"top":451.85995,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Teacher/Union Presid"},{"top":451.85995,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"SPPS/SPFT"},{"top":451.85995,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/6/2012"},{"top":451.85995,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":462.65997,"left":16.97,"width":1.6187286376953125,"height":10.82000732421875,"text":""},{"top":462.65997,"left":18.588728,"width":153.91128540039062,"height":10.82000732421875,"text":"Currie"},{"top":462.65997,"left":172.50002,"width":69.71998596191406,"height":10.82000732421875,"text":"Elisabeth"},{"top":462.65997,"left":242.22,"width":104.760009765625,"height":10.82000732421875,"text":"2274 Hillside Ave"},{"top":462.65997,"left":346.98,"width":70.80001831054688,"height":10.82000732421875,"text":"St. Paul"},{"top":462.65997,"left":417.78003,"width":20.87994384765625,"height":10.82000732421875,"text":"MN"},{"top":462.65997,"left":438.65997,"width":49.91998291015625,"height":10.82000732421875,"text":"55108"},{"top":462.65997,"left":488.57996,"width":78.47998046875,"height":10.82000732421875,"text":""},{"top":462.65997,"left":567.05994,"width":106.4400634765625,"height":10.82000732421875,"text":""},{"top":462.65997,"left":673.5,"width":45.52001953125,"height":10.82000732421875,"text":"9/12/2012"},{"top":462.65997,"left":719.02,"width":43.28033447265625,"height":10.82000732421875,"text":"100.00"}],[{"top":473.47998,"left":16.97,"width":1.6187286376953125,"height":10.780029296875,"text":""},{"top":473.47998,"left":18.588728,"width":153.91128540039062,"height":10.780029296875,"text":"Doane"},{"top":473.47998,"left":172.50002,"width":69.71998596191406,"height":10.780029296875,"text":"Paul V \u0026 Helen R"},{"top":473.47998,"left":242.22,"width":104.760009765625,"height":10.780029296875,"text":"444 Portland Ave"},{"top":473.47998,"left":346.98,"width":70.80001831054688,"height":10.780029296875,"text":"Sant Paul"},{"top":473.47998,"left":417.78003,"width":20.87994384765625,"height":10.780029296875,"text":"MN"},{"top":473.47998,"left":438.65997,"width":49.91998291015625,"height":10.780029296875,"text":"55102"},{"top":473.47998,"left":488.57996,"width":78.47998046875,"height":10.780029296875,"text":"Ex director"},{"top":473.47998,"left":567.05994,"width":106.4400634765625,"height":10.780029296875,"text":"St Paul Teachers\u0027 Retirement A"},{"top":473.47998,"left":673.5,"width":45.52001953125,"height":10.780029296875,"text":"10/3/2012"},{"top":473.47998,"left":719.02,"width":43.28033447265625,"height":10.780029296875,"text":"100.00"}],[{"top":484.26,"left":16.97,"width":1.6187286376953125,"height":10.800048828125,"text":""},{"top":484.26,"left":18.588728,"width":153.91128540039062,"height":10.800048828125,"text":"Dougherty"},{"top":484.26,"left":172.50002,"width":69.71998596191406,"height":10.800048828125,"text":"Richards S \u0026 Patrici"},{"top":484.26,"left":242.22,"width":104.760009765625,"height":10.800048828125,"text":"812 Goodrich Ave"},{"top":484.26,"left":346.98,"width":70.80001831054688,"height":10.800048828125,"text":"Saint Paul"},{"top":484.26,"left":417.78003,"width":20.87994384765625,"height":10.800048828125,"text":"MN"},{"top":484.26,"left":438.65997,"width":49.91998291015625,"height":10.800048828125,"text":"55105"},{"top":484.26,"left":488.57996,"width":78.47998046875,"height":10.800048828125,"text":""},{"top":484.26,"left":567.05994,"width":106.4400634765625,"height":10.800048828125,"text":""},{"top":484.26,"left":673.5,"width":45.52001953125,"height":10.800048828125,"text":"10/4/2012"},{"top":484.26,"left":719.02,"width":43.28033447265625,"height":10.800048828125,"text":"100.00"}],[{"top":495.06006,"left":16.97,"width":1.6187286376953125,"height":10.799896240234375,"text":""},{"top":495.06006,"left":18.588728,"width":153.91128540039062,"height":10.799896240234375,"text":"Driscoll"},{"top":495.06006,"left":172.50002,"width":69.71998596191406,"height":10.799896240234375,"text":"Joe \u0026 Jill"},{"top":495.06006,"left":242.22,"width":104.760009765625,"height":10.799896240234375,"text":"2383 Bourne Ave"},{"top":495.06006,"left":346.98,"width":70.80001831054688,"height":10.799896240234375,"text":"Saint Paul"},{"top":495.06006,"left":417.78003,"width":20.87994384765625,"height":10.799896240234375,"text":"MN"},{"top":495.06006,"left":438.65997,"width":49.91998291015625,"height":10.799896240234375,"text":"55108"},{"top":495.06006,"left":488.57996,"width":78.47998046875,"height":10.799896240234375,"text":"HR Manager"},{"top":495.06006,"left":567.05994,"width":106.4400634765625,"height":10.799896240234375,"text":"LexisNexis"},{"top":495.06006,"left":673.5,"width":45.52001953125,"height":10.799896240234375,"text":"10/18/2012"},{"top":495.06006,"left":719.02,"width":43.28033447265625,"height":10.799896240234375,"text":"100.00"}],[{"top":505.85995,"left":16.97,"width":1.6187286376953125,"height":10.780059814453125,"text":""},{"top":505.85995,"left":18.588728,"width":153.91128540039062,"height":10.780059814453125,"text":"Dubaille / Haugee"},{"top":505.85995,"left":172.50002,"width":69.71998596191406,"height":10.780059814453125,"text":"Florence M. /Eric"},{"top":505.85995,"left":242.22,"width":104.760009765625,"height":10.780059814453125,"text":"1009 Edmund Ave"},{"top":505.85995,"left":346.98,"width":70.80001831054688,"height":10.780059814453125,"text":"Saint Paul"},{"top":505.85995,"left":417.78003,"width":20.87994384765625,"height":10.780059814453125,"text":"MN"},{"top":505.85995,"left":438.65997,"width":49.91998291015625,"height":10.780059814453125,"text":"55104"},{"top":505.85995,"left":488.57996,"width":78.47998046875,"height":10.780059814453125,"text":"Florence‐teacher"},{"top":505.85995,"left":567.05994,"width":106.4400634765625,"height":10.780059814453125,"text":"SPPS"},{"top":505.85995,"left":673.5,"width":45.52001953125,"height":10.780059814453125,"text":"10/3/2012"},{"top":505.85995,"left":719.02,"width":43.28033447265625,"height":10.780059814453125,"text":"100.00"}],[{"top":516.64,"left":16.97,"width":1.6187286376953125,"height":10.83990478515625,"text":""},{"top":516.64,"left":18.588728,"width":153.91128540039062,"height":10.83990478515625,"text":"Eaton"},{"top":516.64,"left":172.50002,"width":69.71998596191406,"height":10.83990478515625,"text":"Jim"},{"top":516.64,"left":242.22,"width":104.760009765625,"height":10.83990478515625,"text":"2133 Berkeley Ave"},{"top":516.64,"left":346.98,"width":70.80001831054688,"height":10.83990478515625,"text":"St Paul"},{"top":516.64,"left":417.78003,"width":20.87994384765625,"height":10.83990478515625,"text":"MN"},{"top":516.64,"left":438.65997,"width":49.91998291015625,"height":10.83990478515625,"text":"55105"},{"top":516.64,"left":488.57996,"width":78.47998046875,"height":10.83990478515625,"text":"Principal"},{"top":516.64,"left":567.05994,"width":106.4400634765625,"height":10.83990478515625,"text":"Saint Paul Public Schools"},{"top":516.64,"left":673.5,"width":45.52001953125,"height":10.83990478515625,"text":"8/23/2012"},{"top":516.64,"left":719.02,"width":43.28033447265625,"height":10.83990478515625,"text":"100.00"}],[{"top":527.4799,"left":16.97,"width":1.6187286376953125,"height":10.78009033203125,"text":""},{"top":527.4799,"left":18.588728,"width":153.91128540039062,"height":10.78009033203125,"text":"Eaves /Alger"},{"top":527.4799,"left":172.50002,"width":69.71998596191406,"height":10.78009033203125,"text":"Patricia / Stuart"},{"top":527.4799,"left":242.22,"width":104.760009765625,"height":10.78009033203125,"text":"1143 Portladn Ave."},{"top":527.4799,"left":346.98,"width":70.80001831054688,"height":10.78009033203125,"text":"Saint Paul"},{"top":527.4799,"left":417.78003,"width":20.87994384765625,"height":10.78009033203125,"text":"MN"},{"top":527.4799,"left":438.65997,"width":49.91998291015625,"height":10.78009033203125,"text":"55104"},{"top":527.4799,"left":488.57996,"width":78.47998046875,"height":10.78009033203125,"text":""},{"top":527.4799,"left":567.05994,"width":106.4400634765625,"height":10.78009033203125,"text":""},{"top":527.4799,"left":673.5,"width":45.52001953125,"height":10.78009033203125,"text":"10/3/2012"},{"top":527.4799,"left":719.02,"width":43.28033447265625,"height":10.78009033203125,"text":"100.00"}]]} diff --git a/src/test/resources/technology/tabula/json/spanning_cells.json b/src/test/resources/technology/tabula/json/spanning_cells.json index 97f3d147..7f68ee99 100644 --- a/src/test/resources/technology/tabula/json/spanning_cells.json +++ b/src/test/resources/technology/tabula/json/spanning_cells.json @@ -1 +1 @@ -[{"extraction_method":"lattice","top":146.51932,"left":119.78943,"width":355.312255859375,"height":270.5516052246094,"data":[[{"top":146.51932,"left":119.78943,"width":355.312255859375,"height":12.938491821289062,"text":"Improved operation scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":159.45781,"left":119.78943,"width":121.92680358886719,"height":12.785446166992188,"text":"Volume servers in:"},{"top":159.45781,"left":241.71623,"width":46.66426086425781,"height":12.785446166992188,"text":"2007"},{"top":159.45781,"left":288.3805,"width":46.663482666015625,"height":12.785446166992188,"text":"2008"},{"top":159.45781,"left":335.04398,"width":46.663818359375,"height":12.785446166992188,"text":"2009"},{"top":159.45781,"left":381.7078,"width":46.663360595703125,"height":12.785446166992188,"text":"2010"},{"top":159.45781,"left":428.37115,"width":46.73052978515625,"height":12.785446166992188,"text":"2011"}],[{"top":172.24326,"left":119.78943,"width":121.92680358886719,"height":12.832427978515625,"text":"Server closets"},{"top":172.24326,"left":241.71623,"width":46.66426086425781,"height":12.832427978515625,"text":"1,505"},{"top":172.24326,"left":288.3805,"width":46.663482666015625,"height":12.832427978515625,"text":"1,580"},{"top":172.24326,"left":335.04398,"width":46.663818359375,"height":12.832427978515625,"text":"1,643"},{"top":172.24326,"left":381.7078,"width":46.663360595703125,"height":12.832427978515625,"text":"1,673"},{"top":172.24326,"left":428.37115,"width":46.73052978515625,"height":12.832427978515625,"text":"1,689"}],[{"top":185.07568,"left":119.78943,"width":121.92680358886719,"height":12.891036987304688,"text":"Server rooms"},{"top":185.07568,"left":241.71623,"width":46.66426086425781,"height":12.891036987304688,"text":"1,512"},{"top":185.07568,"left":288.3805,"width":46.663482666015625,"height":12.891036987304688,"text":"1,586"},{"top":185.07568,"left":335.04398,"width":46.663818359375,"height":12.891036987304688,"text":"1,646"},{"top":185.07568,"left":381.7078,"width":46.663360595703125,"height":12.891036987304688,"text":"1,677"},{"top":185.07568,"left":428.37115,"width":46.73052978515625,"height":12.891036987304688,"text":"1,693"}],[{"top":197.96672,"left":119.78943,"width":121.92680358886719,"height":12.890411376953125,"text":"Localized data centers"},{"top":197.96672,"left":241.71623,"width":46.66426086425781,"height":12.890411376953125,"text":"1,512"},{"top":197.96672,"left":288.3805,"width":46.663482666015625,"height":12.890411376953125,"text":"1,586"},{"top":197.96672,"left":335.04398,"width":46.663818359375,"height":12.890411376953125,"text":"1,646"},{"top":197.96672,"left":381.7078,"width":46.663360595703125,"height":12.890411376953125,"text":"1,677"},{"top":197.96672,"left":428.37115,"width":46.73052978515625,"height":12.890411376953125,"text":"1,693"}],[{"top":210.85713,"left":119.78943,"width":121.92680358886719,"height":12.890716552734375,"text":"Mid-tier data centers"},{"top":210.85713,"left":241.71623,"width":46.66426086425781,"height":12.890716552734375,"text":"1,512"},{"top":210.85713,"left":288.3805,"width":46.663482666015625,"height":12.890716552734375,"text":"1,586"},{"top":210.85713,"left":335.04398,"width":46.663818359375,"height":12.890716552734375,"text":"1,646"},{"top":210.85713,"left":381.7078,"width":46.663360595703125,"height":12.890716552734375,"text":"1,677"},{"top":210.85713,"left":428.37115,"width":46.73052978515625,"height":12.890716552734375,"text":"1,693"}],[{"top":223.74785,"left":119.78943,"width":121.92680358886719,"height":12.785018920898438,"text":"Enterprise-class data centers"},{"top":223.74785,"left":241.71623,"width":46.66426086425781,"height":12.785018920898438,"text":"1,512"},{"top":223.74785,"left":288.3805,"width":46.663482666015625,"height":12.785018920898438,"text":"1,586"},{"top":223.74785,"left":335.04398,"width":46.663818359375,"height":12.785018920898438,"text":"1,646"},{"top":223.74785,"left":381.7078,"width":46.663360595703125,"height":12.785018920898438,"text":"1,677"},{"top":223.74785,"left":428.37115,"width":46.73052978515625,"height":12.785018920898438,"text":"1,693"}],[{"top":236.53287,"left":119.78943,"width":355.312255859375,"height":13.1025390625,"text":"Best practice scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":249.6354,"left":119.78943,"width":121.92680358886719,"height":12.72686767578125,"text":"Volume servers in:"},{"top":249.6354,"left":241.71623,"width":46.66426086425781,"height":12.72686767578125,"text":"2007"},{"top":249.6354,"left":288.3805,"width":46.663482666015625,"height":12.72686767578125,"text":"2008"},{"top":249.6354,"left":335.04398,"width":46.663818359375,"height":12.72686767578125,"text":"2009"},{"top":249.6354,"left":381.7078,"width":46.663360595703125,"height":12.72686767578125,"text":"2010"},{"top":249.6354,"left":428.37115,"width":46.73052978515625,"height":12.72686767578125,"text":"2011"}],[{"top":262.36227,"left":119.78943,"width":121.92680358886719,"height":12.890472412109375,"text":"Server closets"},{"top":262.36227,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,456"},{"top":262.36227,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,439"},{"top":262.36227,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,386"},{"top":262.36227,"left":381.7078,"width":46.663360595703125,"height":12.890472412109375,"text":"1,296"},{"top":262.36227,"left":428.37115,"width":46.73052978515625,"height":12.890472412109375,"text":"1,326"}],[{"top":275.25275,"left":119.78943,"width":121.92680358886719,"height":12.8914794921875,"text":"Server rooms"},{"top":275.25275,"left":241.71623,"width":46.66426086425781,"height":12.8914794921875,"text":"1,465"},{"top":275.25275,"left":288.3805,"width":46.663482666015625,"height":12.8914794921875,"text":"1,472"},{"top":275.25275,"left":335.04398,"width":46.663818359375,"height":12.8914794921875,"text":"1,427"},{"top":275.25275,"left":381.7078,"width":46.663360595703125,"height":12.8914794921875,"text":"1,334"},{"top":275.25275,"left":428.37115,"width":46.73052978515625,"height":12.8914794921875,"text":"1,371"}],[{"top":288.14423,"left":119.78943,"width":121.92680358886719,"height":12.890167236328125,"text":"Localized data centers"},{"top":288.14423,"left":241.71623,"width":46.66426086425781,"height":12.890167236328125,"text":"1,465"},{"top":288.14423,"left":288.3805,"width":46.663482666015625,"height":12.890167236328125,"text":"1,471"},{"top":288.14423,"left":335.04398,"width":46.663818359375,"height":12.890167236328125,"text":"1,426"},{"top":288.14423,"left":381.7078,"width":46.663360595703125,"height":12.890167236328125,"text":"1,334"},{"top":288.14423,"left":428.37115,"width":46.73052978515625,"height":12.890167236328125,"text":"1,371"}],[{"top":301.0344,"left":119.78943,"width":121.92680358886719,"height":12.890472412109375,"text":"Mid-tier data centers"},{"top":301.0344,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,465"},{"top":301.0344,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,471"},{"top":301.0344,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,426"},{"top":301.0344,"left":381.7078,"width":46.663360595703125,"height":12.890472412109375,"text":"1,334"},{"top":301.0344,"left":428.37115,"width":46.73052978515625,"height":12.890472412109375,"text":"1,371"}],[{"top":313.92487,"left":119.78943,"width":121.92680358886719,"height":12.7855224609375,"text":"Enterprise-class data centers"},{"top":313.92487,"left":241.71623,"width":46.66426086425781,"height":12.7855224609375,"text":"1,465"},{"top":313.92487,"left":288.3805,"width":46.663482666015625,"height":12.7855224609375,"text":"1,471"},{"top":313.92487,"left":335.04398,"width":46.663818359375,"height":12.7855224609375,"text":"1,426"},{"top":313.92487,"left":381.7078,"width":46.663360595703125,"height":12.7855224609375,"text":"1,334"},{"top":313.92487,"left":428.37115,"width":46.73052978515625,"height":12.7855224609375,"text":"1,371"}],[{"top":326.7104,"left":119.78943,"width":355.312255859375,"height":13.0440673828125,"text":"State-of-the-art scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":339.75446,"left":119.78943,"width":121.92680358886719,"height":12.784912109375,"text":"Volume servers in:"},{"top":339.75446,"left":241.71623,"width":46.66426086425781,"height":12.784912109375,"text":"2007"},{"top":339.75446,"left":288.3805,"width":46.663482666015625,"height":12.784912109375,"text":"2008"},{"top":339.75446,"left":335.04398,"width":46.663818359375,"height":12.784912109375,"text":"2009"},{"top":339.75446,"left":381.7078,"width":46.663360595703125,"height":12.784912109375,"text":"2010"},{"top":339.75446,"left":428.37115,"width":46.73052978515625,"height":12.784912109375,"text":"2011"}],[{"top":352.53937,"left":119.78943,"width":121.92680358886719,"height":12.89111328125,"text":"Server closets"},{"top":352.53937,"left":241.71623,"width":46.66426086425781,"height":12.89111328125,"text":"1,485"},{"top":352.53937,"left":288.3805,"width":46.663482666015625,"height":12.89111328125,"text":"1,471"},{"top":352.53937,"left":335.04398,"width":46.663818359375,"height":12.89111328125,"text":"1,424"},{"top":352.53937,"left":381.7078,"width":46.663360595703125,"height":12.89111328125,"text":"1,315"},{"top":352.53937,"left":428.37115,"width":46.73052978515625,"height":12.89111328125,"text":"1,349"}],[{"top":365.43048,"left":119.78943,"width":121.92680358886719,"height":12.8902587890625,"text":"Server rooms"},{"top":365.43048,"left":241.71623,"width":46.66426086425781,"height":12.8902587890625,"text":"1,495"},{"top":365.43048,"left":288.3805,"width":46.663482666015625,"height":12.8902587890625,"text":"1,573"},{"top":365.43048,"left":335.04398,"width":46.663818359375,"height":12.8902587890625,"text":"1,586"},{"top":365.43048,"left":381.7078,"width":46.663360595703125,"height":12.8902587890625,"text":"1,424"},{"top":365.43048,"left":428.37115,"width":46.73052978515625,"height":12.8902587890625,"text":"1,485"}],[{"top":378.32074,"left":119.78943,"width":121.92680358886719,"height":12.891143798828125,"text":"Localized data centers"},{"top":378.32074,"left":241.71623,"width":46.66426086425781,"height":12.891143798828125,"text":"1,495"},{"top":378.32074,"left":288.3805,"width":46.663482666015625,"height":12.891143798828125,"text":"1,572"},{"top":378.32074,"left":335.04398,"width":46.663818359375,"height":12.891143798828125,"text":"1,585"},{"top":378.32074,"left":381.7078,"width":46.663360595703125,"height":12.891143798828125,"text":"1,424"},{"top":378.32074,"left":428.37115,"width":46.73052978515625,"height":12.891143798828125,"text":"1,485"}],[{"top":391.21188,"left":119.78943,"width":121.92680358886719,"height":12.89068603515625,"text":"Mid-tier data centers"},{"top":391.21188,"left":241.71623,"width":46.66426086425781,"height":12.89068603515625,"text":"1,495"},{"top":391.21188,"left":288.3805,"width":46.663482666015625,"height":12.89068603515625,"text":"1,572"},{"top":391.21188,"left":335.04398,"width":46.663818359375,"height":12.89068603515625,"text":"1,585"},{"top":391.21188,"left":381.7078,"width":46.663360595703125,"height":12.89068603515625,"text":"1,424"},{"top":391.21188,"left":428.37115,"width":46.73052978515625,"height":12.89068603515625,"text":"1,485"}],[{"top":404.10257,"left":119.78943,"width":121.92680358886719,"height":12.968353271484375,"text":"Enterprise-class data centers"},{"top":404.10257,"left":241.71623,"width":46.66426086425781,"height":12.968353271484375,"text":"1,495"},{"top":404.10257,"left":288.3805,"width":46.663482666015625,"height":12.968353271484375,"text":"1,572"},{"top":404.10257,"left":335.04398,"width":46.663818359375,"height":12.968353271484375,"text":"1,585"},{"top":404.10257,"left":381.7078,"width":46.663360595703125,"height":12.968353271484375,"text":"1,424"},{"top":404.10257,"left":428.37115,"width":46.73052978515625,"height":12.968353271484375,"text":"1,485"}]]},{"extraction_method":"lattice","top":538.25964,"left":130.8136,"width":333.3038024902344,"height":51.52508544921875,"data":[[{"top":538.25964,"left":130.8136,"width":333.3038024902344,"height":12.880615234375,"text":"All alternative scenarios"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":551.14026,"left":130.8136,"width":99.93739318847656,"height":12.78485107421875,"text":"Server class"},{"top":551.14026,"left":230.75099,"width":46.66316223144531,"height":12.78485107421875,"text":"2007"},{"top":551.14026,"left":277.41415,"width":46.663726806640625,"height":12.78485107421875,"text":"2008"},{"top":551.14026,"left":324.07788,"width":46.663970947265625,"height":12.78485107421875,"text":"2009"},{"top":551.14026,"left":370.74185,"width":46.663177490234375,"height":12.78485107421875,"text":"2010"},{"top":551.14026,"left":417.40503,"width":46.712371826171875,"height":12.78485107421875,"text":"2011"}],[{"top":563.9251,"left":130.8136,"width":99.93739318847656,"height":12.8905029296875,"text":"Mid-range"},{"top":563.9251,"left":230.75099,"width":46.66316223144531,"height":12.8905029296875,"text":"4,921"},{"top":563.9251,"left":277.41415,"width":46.663726806640625,"height":12.8905029296875,"text":"5,467"},{"top":563.9251,"left":324.07788,"width":46.663970947265625,"height":12.8905029296875,"text":"6,152"},{"top":563.9251,"left":370.74185,"width":46.663177490234375,"height":12.8905029296875,"text":"6,649"},{"top":563.9251,"left":417.40503,"width":46.712371826171875,"height":12.8905029296875,"text":"7,185"}],[{"top":576.8156,"left":130.8136,"width":99.93739318847656,"height":12.9691162109375,"text":"High-end"},{"top":576.8156,"left":230.75099,"width":46.66316223144531,"height":12.9691162109375,"text":"76,295"},{"top":576.8156,"left":277.41415,"width":46.663726806640625,"height":12.9691162109375,"text":"81,624"},{"top":576.8156,"left":324.07788,"width":46.663970947265625,"height":12.9691162109375,"text":"86,849"},{"top":576.8156,"left":370.74185,"width":46.663177490234375,"height":12.9691162109375,"text":"92,662"},{"top":576.8156,"left":417.40503,"width":46.712371826171875,"height":12.9691162109375,"text":"98,864"}]]}] \ No newline at end of file +[{"extraction_method":"lattice","page_number":1,"top":146.51932,"left":119.78943,"width":355.312255859375,"height":270.5516052246094,"right":475.10168,"bottom":417.07092,"data":[[{"top":146.51932,"left":119.78943,"width":355.312255859375,"height":12.938491821289062,"text":"Improved operation scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":159.45781,"left":119.78943,"width":121.92680358886719,"height":12.785446166992188,"text":"Volume servers in:"},{"top":159.45781,"left":241.71623,"width":46.66426086425781,"height":12.785446166992188,"text":"2007"},{"top":159.45781,"left":288.3805,"width":46.663482666015625,"height":12.785446166992188,"text":"2008"},{"top":159.45781,"left":335.04398,"width":46.663818359375,"height":12.785446166992188,"text":"2009"},{"top":159.45781,"left":381.7078,"width":46.663360595703125,"height":12.785446166992188,"text":"2010"},{"top":159.45781,"left":428.37115,"width":46.73052978515625,"height":12.785446166992188,"text":"2011"}],[{"top":172.24326,"left":119.78943,"width":121.92680358886719,"height":12.832427978515625,"text":"Server closets"},{"top":172.24326,"left":241.71623,"width":46.66426086425781,"height":12.832427978515625,"text":"1,505"},{"top":172.24326,"left":288.3805,"width":46.663482666015625,"height":12.832427978515625,"text":"1,580"},{"top":172.24326,"left":335.04398,"width":46.663818359375,"height":12.832427978515625,"text":"1,643"},{"top":172.24326,"left":381.7078,"width":46.663360595703125,"height":12.832427978515625,"text":"1,673"},{"top":172.24326,"left":428.37115,"width":46.73052978515625,"height":12.832427978515625,"text":"1,689"}],[{"top":185.07568,"left":119.78943,"width":121.92680358886719,"height":12.891036987304688,"text":"Server rooms"},{"top":185.07568,"left":241.71623,"width":46.66426086425781,"height":12.891036987304688,"text":"1,512"},{"top":185.07568,"left":288.3805,"width":46.663482666015625,"height":12.891036987304688,"text":"1,586"},{"top":185.07568,"left":335.04398,"width":46.663818359375,"height":12.891036987304688,"text":"1,646"},{"top":185.07568,"left":381.7078,"width":46.663360595703125,"height":12.891036987304688,"text":"1,677"},{"top":185.07568,"left":428.37115,"width":46.73052978515625,"height":12.891036987304688,"text":"1,693"}],[{"top":197.96672,"left":119.78943,"width":121.92680358886719,"height":12.890411376953125,"text":"Localized data centers"},{"top":197.96672,"left":241.71623,"width":46.66426086425781,"height":12.890411376953125,"text":"1,512"},{"top":197.96672,"left":288.3805,"width":46.663482666015625,"height":12.890411376953125,"text":"1,586"},{"top":197.96672,"left":335.04398,"width":46.663818359375,"height":12.890411376953125,"text":"1,646"},{"top":197.96672,"left":381.7078,"width":46.663360595703125,"height":12.890411376953125,"text":"1,677"},{"top":197.96672,"left":428.37115,"width":46.73052978515625,"height":12.890411376953125,"text":"1,693"}],[{"top":210.85713,"left":119.78943,"width":121.92680358886719,"height":12.890716552734375,"text":"Mid-tier data centers"},{"top":210.85713,"left":241.71623,"width":46.66426086425781,"height":12.890716552734375,"text":"1,512"},{"top":210.85713,"left":288.3805,"width":46.663482666015625,"height":12.890716552734375,"text":"1,586"},{"top":210.85713,"left":335.04398,"width":46.663818359375,"height":12.890716552734375,"text":"1,646"},{"top":210.85713,"left":381.7078,"width":46.663360595703125,"height":12.890716552734375,"text":"1,677"},{"top":210.85713,"left":428.37115,"width":46.73052978515625,"height":12.890716552734375,"text":"1,693"}],[{"top":223.74785,"left":119.78943,"width":121.92680358886719,"height":12.785018920898438,"text":"Enterprise-class data centers"},{"top":223.74785,"left":241.71623,"width":46.66426086425781,"height":12.785018920898438,"text":"1,512"},{"top":223.74785,"left":288.3805,"width":46.663482666015625,"height":12.785018920898438,"text":"1,586"},{"top":223.74785,"left":335.04398,"width":46.663818359375,"height":12.785018920898438,"text":"1,646"},{"top":223.74785,"left":381.7078,"width":46.663360595703125,"height":12.785018920898438,"text":"1,677"},{"top":223.74785,"left":428.37115,"width":46.73052978515625,"height":12.785018920898438,"text":"1,693"}],[{"top":236.53287,"left":119.78943,"width":355.312255859375,"height":13.1025390625,"text":"Best practice scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":249.6354,"left":119.78943,"width":121.92680358886719,"height":12.72686767578125,"text":"Volume servers in:"},{"top":249.6354,"left":241.71623,"width":46.66426086425781,"height":12.72686767578125,"text":"2007"},{"top":249.6354,"left":288.3805,"width":46.663482666015625,"height":12.72686767578125,"text":"2008"},{"top":249.6354,"left":335.04398,"width":46.663818359375,"height":12.72686767578125,"text":"2009"},{"top":249.6354,"left":381.7078,"width":46.663360595703125,"height":12.72686767578125,"text":"2010"},{"top":249.6354,"left":428.37115,"width":46.73052978515625,"height":12.72686767578125,"text":"2011"}],[{"top":262.36227,"left":119.78943,"width":121.92680358886719,"height":12.890472412109375,"text":"Server closets"},{"top":262.36227,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,456"},{"top":262.36227,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,439"},{"top":262.36227,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,386"},{"top":262.36227,"left":381.7078,"width":46.663360595703125,"height":12.890472412109375,"text":"1,296"},{"top":262.36227,"left":428.37115,"width":46.73052978515625,"height":12.890472412109375,"text":"1,326"}],[{"top":275.25275,"left":119.78943,"width":121.92680358886719,"height":12.8914794921875,"text":"Server rooms"},{"top":275.25275,"left":241.71623,"width":46.66426086425781,"height":12.8914794921875,"text":"1,465"},{"top":275.25275,"left":288.3805,"width":46.663482666015625,"height":12.8914794921875,"text":"1,472"},{"top":275.25275,"left":335.04398,"width":46.663818359375,"height":12.8914794921875,"text":"1,427"},{"top":275.25275,"left":381.7078,"width":46.663360595703125,"height":12.8914794921875,"text":"1,334"},{"top":275.25275,"left":428.37115,"width":46.73052978515625,"height":12.8914794921875,"text":"1,371"}],[{"top":288.14423,"left":119.78943,"width":121.92680358886719,"height":12.890167236328125,"text":"Localized data centers"},{"top":288.14423,"left":241.71623,"width":46.66426086425781,"height":12.890167236328125,"text":"1,465"},{"top":288.14423,"left":288.3805,"width":46.663482666015625,"height":12.890167236328125,"text":"1,471"},{"top":288.14423,"left":335.04398,"width":46.663818359375,"height":12.890167236328125,"text":"1,426"},{"top":288.14423,"left":381.7078,"width":46.663360595703125,"height":12.890167236328125,"text":"1,334"},{"top":288.14423,"left":428.37115,"width":46.73052978515625,"height":12.890167236328125,"text":"1,371"}],[{"top":301.0344,"left":119.78943,"width":121.92680358886719,"height":12.890472412109375,"text":"Mid-tier data centers"},{"top":301.0344,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,465"},{"top":301.0344,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,471"},{"top":301.0344,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,426"},{"top":301.0344,"left":381.7078,"width":46.663360595703125,"height":12.890472412109375,"text":"1,334"},{"top":301.0344,"left":428.37115,"width":46.73052978515625,"height":12.890472412109375,"text":"1,371"}],[{"top":313.92487,"left":119.78943,"width":121.92680358886719,"height":12.7855224609375,"text":"Enterprise-class data centers"},{"top":313.92487,"left":241.71623,"width":46.66426086425781,"height":12.7855224609375,"text":"1,465"},{"top":313.92487,"left":288.3805,"width":46.663482666015625,"height":12.7855224609375,"text":"1,471"},{"top":313.92487,"left":335.04398,"width":46.663818359375,"height":12.7855224609375,"text":"1,426"},{"top":313.92487,"left":381.7078,"width":46.663360595703125,"height":12.7855224609375,"text":"1,334"},{"top":313.92487,"left":428.37115,"width":46.73052978515625,"height":12.7855224609375,"text":"1,371"}],[{"top":326.7104,"left":119.78943,"width":355.312255859375,"height":13.0440673828125,"text":"State-of-the-art scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":339.75446,"left":119.78943,"width":121.92680358886719,"height":12.784912109375,"text":"Volume servers in:"},{"top":339.75446,"left":241.71623,"width":46.66426086425781,"height":12.784912109375,"text":"2007"},{"top":339.75446,"left":288.3805,"width":46.663482666015625,"height":12.784912109375,"text":"2008"},{"top":339.75446,"left":335.04398,"width":46.663818359375,"height":12.784912109375,"text":"2009"},{"top":339.75446,"left":381.7078,"width":46.663360595703125,"height":12.784912109375,"text":"2010"},{"top":339.75446,"left":428.37115,"width":46.73052978515625,"height":12.784912109375,"text":"2011"}],[{"top":352.53937,"left":119.78943,"width":121.92680358886719,"height":12.89111328125,"text":"Server closets"},{"top":352.53937,"left":241.71623,"width":46.66426086425781,"height":12.89111328125,"text":"1,485"},{"top":352.53937,"left":288.3805,"width":46.663482666015625,"height":12.89111328125,"text":"1,471"},{"top":352.53937,"left":335.04398,"width":46.663818359375,"height":12.89111328125,"text":"1,424"},{"top":352.53937,"left":381.7078,"width":46.663360595703125,"height":12.89111328125,"text":"1,315"},{"top":352.53937,"left":428.37115,"width":46.73052978515625,"height":12.89111328125,"text":"1,349"}],[{"top":365.43048,"left":119.78943,"width":121.92680358886719,"height":12.8902587890625,"text":"Server rooms"},{"top":365.43048,"left":241.71623,"width":46.66426086425781,"height":12.8902587890625,"text":"1,495"},{"top":365.43048,"left":288.3805,"width":46.663482666015625,"height":12.8902587890625,"text":"1,573"},{"top":365.43048,"left":335.04398,"width":46.663818359375,"height":12.8902587890625,"text":"1,586"},{"top":365.43048,"left":381.7078,"width":46.663360595703125,"height":12.8902587890625,"text":"1,424"},{"top":365.43048,"left":428.37115,"width":46.73052978515625,"height":12.8902587890625,"text":"1,485"}],[{"top":378.32074,"left":119.78943,"width":121.92680358886719,"height":12.891143798828125,"text":"Localized data centers"},{"top":378.32074,"left":241.71623,"width":46.66426086425781,"height":12.891143798828125,"text":"1,495"},{"top":378.32074,"left":288.3805,"width":46.663482666015625,"height":12.891143798828125,"text":"1,572"},{"top":378.32074,"left":335.04398,"width":46.663818359375,"height":12.891143798828125,"text":"1,585"},{"top":378.32074,"left":381.7078,"width":46.663360595703125,"height":12.891143798828125,"text":"1,424"},{"top":378.32074,"left":428.37115,"width":46.73052978515625,"height":12.891143798828125,"text":"1,485"}],[{"top":391.21188,"left":119.78943,"width":121.92680358886719,"height":12.89068603515625,"text":"Mid-tier data centers"},{"top":391.21188,"left":241.71623,"width":46.66426086425781,"height":12.89068603515625,"text":"1,495"},{"top":391.21188,"left":288.3805,"width":46.663482666015625,"height":12.89068603515625,"text":"1,572"},{"top":391.21188,"left":335.04398,"width":46.663818359375,"height":12.89068603515625,"text":"1,585"},{"top":391.21188,"left":381.7078,"width":46.663360595703125,"height":12.89068603515625,"text":"1,424"},{"top":391.21188,"left":428.37115,"width":46.73052978515625,"height":12.89068603515625,"text":"1,485"}],[{"top":404.10257,"left":119.78943,"width":121.92680358886719,"height":12.968353271484375,"text":"Enterprise-class data centers"},{"top":404.10257,"left":241.71623,"width":46.66426086425781,"height":12.968353271484375,"text":"1,495"},{"top":404.10257,"left":288.3805,"width":46.663482666015625,"height":12.968353271484375,"text":"1,572"},{"top":404.10257,"left":335.04398,"width":46.663818359375,"height":12.968353271484375,"text":"1,585"},{"top":404.10257,"left":381.7078,"width":46.663360595703125,"height":12.968353271484375,"text":"1,424"},{"top":404.10257,"left":428.37115,"width":46.73052978515625,"height":12.968353271484375,"text":"1,485"}]]},{"extraction_method":"lattice","page_number":1,"top":538.25964,"left":130.8136,"width":333.3038024902344,"height":51.52508544921875,"right":464.1174,"bottom":589.7847,"data":[[{"top":538.25964,"left":130.8136,"width":333.3038024902344,"height":12.880615234375,"text":"All alternative scenarios"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":551.14026,"left":130.8136,"width":99.93739318847656,"height":12.78485107421875,"text":"Server class"},{"top":551.14026,"left":230.75099,"width":46.66316223144531,"height":12.78485107421875,"text":"2007"},{"top":551.14026,"left":277.41415,"width":46.663726806640625,"height":12.78485107421875,"text":"2008"},{"top":551.14026,"left":324.07788,"width":46.663970947265625,"height":12.78485107421875,"text":"2009"},{"top":551.14026,"left":370.74185,"width":46.663177490234375,"height":12.78485107421875,"text":"2010"},{"top":551.14026,"left":417.40503,"width":46.712371826171875,"height":12.78485107421875,"text":"2011"}],[{"top":563.9251,"left":130.8136,"width":99.93739318847656,"height":12.8905029296875,"text":"Mid-range"},{"top":563.9251,"left":230.75099,"width":46.66316223144531,"height":12.8905029296875,"text":"4,921"},{"top":563.9251,"left":277.41415,"width":46.663726806640625,"height":12.8905029296875,"text":"5,467"},{"top":563.9251,"left":324.07788,"width":46.663970947265625,"height":12.8905029296875,"text":"6,152"},{"top":563.9251,"left":370.74185,"width":46.663177490234375,"height":12.8905029296875,"text":"6,649"},{"top":563.9251,"left":417.40503,"width":46.712371826171875,"height":12.8905029296875,"text":"7,185"}],[{"top":576.8156,"left":130.8136,"width":99.93739318847656,"height":12.9691162109375,"text":"High-end"},{"top":576.8156,"left":230.75099,"width":46.66316223144531,"height":12.9691162109375,"text":"76,295"},{"top":576.8156,"left":277.41415,"width":46.663726806640625,"height":12.9691162109375,"text":"81,624"},{"top":576.8156,"left":324.07788,"width":46.663970947265625,"height":12.9691162109375,"text":"86,849"},{"top":576.8156,"left":370.74185,"width":46.663177490234375,"height":12.9691162109375,"text":"92,662"},{"top":576.8156,"left":417.40503,"width":46.712371826171875,"height":12.9691162109375,"text":"98,864"}]]}] diff --git a/src/test/resources/technology/tabula/json/spanning_cells_basic.json b/src/test/resources/technology/tabula/json/spanning_cells_basic.json index da8c71e0..6efaef5b 100644 --- a/src/test/resources/technology/tabula/json/spanning_cells_basic.json +++ b/src/test/resources/technology/tabula/json/spanning_cells_basic.json @@ -1 +1 @@ -[{"extraction_method":"lattice","top":150.56,"left":119.789444,"width":355.31939697265625,"height":266.5108642578125,"data":[[{"top":150.56,"left":119.789444,"width":355.31939697265625,"height":8.897811889648438,"text":"Improved operation scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":159.45781,"left":119.789444,"width":121.92678833007812,"height":12.785446166992188,"text":"Volume servers in:"},{"top":159.45781,"left":241.71623,"width":46.66426086425781,"height":12.785446166992188,"text":"2007"},{"top":159.45781,"left":288.3805,"width":46.663482666015625,"height":12.785446166992188,"text":"2008"},{"top":159.45781,"left":335.04398,"width":46.663818359375,"height":12.785446166992188,"text":"2009"},{"top":159.45781,"left":381.7078,"width":46.663330078125,"height":12.785446166992188,"text":"2010"},{"top":159.45781,"left":428.37112,"width":46.73773193359375,"height":12.785446166992188,"text":"2011"}],[{"top":172.24326,"left":119.789444,"width":121.92678833007812,"height":12.832427978515625,"text":"Server closets"},{"top":172.24326,"left":241.71623,"width":46.66426086425781,"height":12.832427978515625,"text":"1,505"},{"top":172.24326,"left":288.3805,"width":46.663482666015625,"height":12.832427978515625,"text":"1,580"},{"top":172.24326,"left":335.04398,"width":46.663818359375,"height":12.832427978515625,"text":"1,643"},{"top":172.24326,"left":381.7078,"width":46.663330078125,"height":12.832427978515625,"text":"1,673"},{"top":172.24326,"left":428.37112,"width":46.73773193359375,"height":12.832427978515625,"text":"1,689"}],[{"top":185.07568,"left":119.789444,"width":121.92678833007812,"height":12.891036987304688,"text":"Server rooms"},{"top":185.07568,"left":241.71623,"width":46.66426086425781,"height":12.891036987304688,"text":"1,512"},{"top":185.07568,"left":288.3805,"width":46.663482666015625,"height":12.891036987304688,"text":"1,586"},{"top":185.07568,"left":335.04398,"width":46.663818359375,"height":12.891036987304688,"text":"1,646"},{"top":185.07568,"left":381.7078,"width":46.663330078125,"height":12.891036987304688,"text":"1,677"},{"top":185.07568,"left":428.37112,"width":46.73773193359375,"height":12.891036987304688,"text":"1,693"}],[{"top":197.96672,"left":119.789444,"width":121.92678833007812,"height":12.890411376953125,"text":"Localized data centers"},{"top":197.96672,"left":241.71623,"width":46.66426086425781,"height":12.890411376953125,"text":"1,512"},{"top":197.96672,"left":288.3805,"width":46.663482666015625,"height":12.890411376953125,"text":"1,586"},{"top":197.96672,"left":335.04398,"width":46.663818359375,"height":12.890411376953125,"text":"1,646"},{"top":197.96672,"left":381.7078,"width":46.663330078125,"height":12.890411376953125,"text":"1,677"},{"top":197.96672,"left":428.37112,"width":46.73773193359375,"height":12.890411376953125,"text":"1,693"}],[{"top":210.85713,"left":119.789444,"width":121.92678833007812,"height":12.890716552734375,"text":"Mid-tier data centers"},{"top":210.85713,"left":241.71623,"width":46.66426086425781,"height":12.890716552734375,"text":"1,512"},{"top":210.85713,"left":288.3805,"width":46.663482666015625,"height":12.890716552734375,"text":"1,586"},{"top":210.85713,"left":335.04398,"width":46.663818359375,"height":12.890716552734375,"text":"1,646"},{"top":210.85713,"left":381.7078,"width":46.663330078125,"height":12.890716552734375,"text":"1,677"},{"top":210.85713,"left":428.37112,"width":46.73773193359375,"height":12.890716552734375,"text":"1,693"}],[{"top":223.74785,"left":119.789444,"width":121.92678833007812,"height":12.785003662109375,"text":"Enterprise-class data centers"},{"top":223.74785,"left":241.71623,"width":46.66426086425781,"height":12.785003662109375,"text":"1,512"},{"top":223.74785,"left":288.3805,"width":46.663482666015625,"height":12.785003662109375,"text":"1,586"},{"top":223.74785,"left":335.04398,"width":46.663818359375,"height":12.785003662109375,"text":"1,646"},{"top":223.74785,"left":381.7078,"width":46.663330078125,"height":12.785003662109375,"text":"1,677"},{"top":223.74785,"left":428.37112,"width":46.73773193359375,"height":12.785003662109375,"text":"1,693"}],[{"top":236.53285,"left":119.789444,"width":355.31939697265625,"height":13.1025390625,"text":"Best practice scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":249.63539,"left":119.789444,"width":121.92678833007812,"height":12.726882934570312,"text":"Volume servers in:"},{"top":249.63539,"left":241.71623,"width":46.66426086425781,"height":12.726882934570312,"text":"2007"},{"top":249.63539,"left":288.3805,"width":46.663482666015625,"height":12.726882934570312,"text":"2008"},{"top":249.63539,"left":335.04398,"width":46.663818359375,"height":12.726882934570312,"text":"2009"},{"top":249.63539,"left":381.7078,"width":46.663330078125,"height":12.726882934570312,"text":"2010"},{"top":249.63539,"left":428.37112,"width":46.73773193359375,"height":12.726882934570312,"text":"2011"}],[{"top":262.36227,"left":119.789444,"width":121.92678833007812,"height":12.890472412109375,"text":"Server closets"},{"top":262.36227,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,456"},{"top":262.36227,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,439"},{"top":262.36227,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,386"},{"top":262.36227,"left":381.7078,"width":46.663330078125,"height":12.890472412109375,"text":"1,296"},{"top":262.36227,"left":428.37112,"width":46.73773193359375,"height":12.890472412109375,"text":"1,326"}],[{"top":275.25275,"left":119.789444,"width":121.92678833007812,"height":12.8914794921875,"text":"Server rooms"},{"top":275.25275,"left":241.71623,"width":46.66426086425781,"height":12.8914794921875,"text":"1,465"},{"top":275.25275,"left":288.3805,"width":46.663482666015625,"height":12.8914794921875,"text":"1,472"},{"top":275.25275,"left":335.04398,"width":46.663818359375,"height":12.8914794921875,"text":"1,427"},{"top":275.25275,"left":381.7078,"width":46.663330078125,"height":12.8914794921875,"text":"1,334"},{"top":275.25275,"left":428.37112,"width":46.73773193359375,"height":12.8914794921875,"text":"1,371"}],[{"top":288.14423,"left":119.789444,"width":121.92678833007812,"height":12.890167236328125,"text":"Localized data centers"},{"top":288.14423,"left":241.71623,"width":46.66426086425781,"height":12.890167236328125,"text":"1,465"},{"top":288.14423,"left":288.3805,"width":46.663482666015625,"height":12.890167236328125,"text":"1,471"},{"top":288.14423,"left":335.04398,"width":46.663818359375,"height":12.890167236328125,"text":"1,426"},{"top":288.14423,"left":381.7078,"width":46.663330078125,"height":12.890167236328125,"text":"1,334"},{"top":288.14423,"left":428.37112,"width":46.73773193359375,"height":12.890167236328125,"text":"1,371"}],[{"top":301.0344,"left":119.789444,"width":121.92678833007812,"height":12.890472412109375,"text":"Mid-tier data centers"},{"top":301.0344,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,465"},{"top":301.0344,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,471"},{"top":301.0344,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,426"},{"top":301.0344,"left":381.7078,"width":46.663330078125,"height":12.890472412109375,"text":"1,334"},{"top":301.0344,"left":428.37112,"width":46.73773193359375,"height":12.890472412109375,"text":"1,371"}],[{"top":313.92487,"left":119.789444,"width":121.92678833007812,"height":12.785552978515625,"text":"Enterprise-class data centers"},{"top":313.92487,"left":241.71623,"width":46.66426086425781,"height":12.785552978515625,"text":"1,465"},{"top":313.92487,"left":288.3805,"width":46.663482666015625,"height":12.785552978515625,"text":"1,471"},{"top":313.92487,"left":335.04398,"width":46.663818359375,"height":12.785552978515625,"text":"1,426"},{"top":313.92487,"left":381.7078,"width":46.663330078125,"height":12.785552978515625,"text":"1,334"},{"top":313.92487,"left":428.37112,"width":46.73773193359375,"height":12.785552978515625,"text":"1,371"}],[{"top":326.71042,"left":119.789444,"width":355.31939697265625,"height":13.04400634765625,"text":"State-of-the-art scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":339.75443,"left":119.789444,"width":121.92678833007812,"height":12.784942626953125,"text":"Volume servers in:"},{"top":339.75443,"left":241.71623,"width":46.66426086425781,"height":12.784942626953125,"text":"2007"},{"top":339.75443,"left":288.3805,"width":46.663482666015625,"height":12.784942626953125,"text":"2008"},{"top":339.75443,"left":335.04398,"width":46.663818359375,"height":12.784942626953125,"text":"2009"},{"top":339.75443,"left":381.7078,"width":46.663330078125,"height":12.784942626953125,"text":"2010"},{"top":339.75443,"left":428.37112,"width":46.73773193359375,"height":12.784942626953125,"text":"2011"}],[{"top":352.53937,"left":119.789444,"width":121.92678833007812,"height":12.89111328125,"text":"Server closets"},{"top":352.53937,"left":241.71623,"width":46.66426086425781,"height":12.89111328125,"text":"1,485"},{"top":352.53937,"left":288.3805,"width":46.663482666015625,"height":12.89111328125,"text":"1,471"},{"top":352.53937,"left":335.04398,"width":46.663818359375,"height":12.89111328125,"text":"1,424"},{"top":352.53937,"left":381.7078,"width":46.663330078125,"height":12.89111328125,"text":"1,315"},{"top":352.53937,"left":428.37112,"width":46.73773193359375,"height":12.89111328125,"text":"1,349"}],[{"top":365.43048,"left":119.789444,"width":121.92678833007812,"height":12.8902587890625,"text":"Server rooms"},{"top":365.43048,"left":241.71623,"width":46.66426086425781,"height":12.8902587890625,"text":"1,495"},{"top":365.43048,"left":288.3805,"width":46.663482666015625,"height":12.8902587890625,"text":"1,573"},{"top":365.43048,"left":335.04398,"width":46.663818359375,"height":12.8902587890625,"text":"1,586"},{"top":365.43048,"left":381.7078,"width":46.663330078125,"height":12.8902587890625,"text":"1,424"},{"top":365.43048,"left":428.37112,"width":46.73773193359375,"height":12.8902587890625,"text":"1,485"}],[{"top":378.32074,"left":119.789444,"width":121.92678833007812,"height":12.891143798828125,"text":"Localized data centers"},{"top":378.32074,"left":241.71623,"width":46.66426086425781,"height":12.891143798828125,"text":"1,495"},{"top":378.32074,"left":288.3805,"width":46.663482666015625,"height":12.891143798828125,"text":"1,572"},{"top":378.32074,"left":335.04398,"width":46.663818359375,"height":12.891143798828125,"text":"1,585"},{"top":378.32074,"left":381.7078,"width":46.663330078125,"height":12.891143798828125,"text":"1,424"},{"top":378.32074,"left":428.37112,"width":46.73773193359375,"height":12.891143798828125,"text":"1,485"}],[{"top":391.21188,"left":119.789444,"width":121.92678833007812,"height":12.89068603515625,"text":"Mid-tier data centers"},{"top":391.21188,"left":241.71623,"width":46.66426086425781,"height":12.89068603515625,"text":"1,495"},{"top":391.21188,"left":288.3805,"width":46.663482666015625,"height":12.89068603515625,"text":"1,572"},{"top":391.21188,"left":335.04398,"width":46.663818359375,"height":12.89068603515625,"text":"1,585"},{"top":391.21188,"left":381.7078,"width":46.663330078125,"height":12.89068603515625,"text":"1,424"},{"top":391.21188,"left":428.37112,"width":46.73773193359375,"height":12.89068603515625,"text":"1,485"}],[{"top":404.10257,"left":119.789444,"width":121.92678833007812,"height":12.968292236328125,"text":"Enterprise-class data centers"},{"top":404.10257,"left":241.71623,"width":46.66426086425781,"height":12.968292236328125,"text":"1,495"},{"top":404.10257,"left":288.3805,"width":46.663482666015625,"height":12.968292236328125,"text":"1,572"},{"top":404.10257,"left":335.04398,"width":46.663818359375,"height":12.968292236328125,"text":"1,585"},{"top":404.10257,"left":381.7078,"width":46.663330078125,"height":12.968292236328125,"text":"1,424"},{"top":404.10257,"left":428.37112,"width":46.73773193359375,"height":12.968292236328125,"text":"1,485"}]]},{"extraction_method":"lattice","top":538.25964,"left":130.8136,"width":333.3038024902344,"height":51.5423583984375,"data":[[{"top":538.25964,"left":130.8136,"width":333.3038024902344,"height":12.88055419921875,"text":"All alternative scenarios"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":551.1402,"left":130.8136,"width":99.93739318847656,"height":12.784912109375,"text":"Server class"},{"top":551.1402,"left":230.75099,"width":46.66316223144531,"height":12.784912109375,"text":"2007"},{"top":551.1402,"left":277.41415,"width":46.663726806640625,"height":12.784912109375,"text":"2008"},{"top":551.1402,"left":324.07788,"width":46.663970947265625,"height":12.784912109375,"text":"2009"},{"top":551.1402,"left":370.74185,"width":46.663177490234375,"height":12.784912109375,"text":"2010"},{"top":551.1402,"left":417.40503,"width":46.712371826171875,"height":12.784912109375,"text":"2011"}],[{"top":563.9251,"left":130.8136,"width":99.93739318847656,"height":12.8905029296875,"text":"Mid-range"},{"top":563.9251,"left":230.75099,"width":46.66316223144531,"height":12.8905029296875,"text":"4,921"},{"top":563.9251,"left":277.41415,"width":46.663726806640625,"height":12.8905029296875,"text":"5,467"},{"top":563.9251,"left":324.07788,"width":46.663970947265625,"height":12.8905029296875,"text":"6,152"},{"top":563.9251,"left":370.74185,"width":46.663177490234375,"height":12.8905029296875,"text":"6,649"},{"top":563.9251,"left":417.40503,"width":46.712371826171875,"height":12.8905029296875,"text":"7,185"}],[{"top":576.8156,"left":130.8136,"width":99.93739318847656,"height":12.98638916015625,"text":"High-end"},{"top":576.8156,"left":230.75099,"width":46.66316223144531,"height":12.98638916015625,"text":"76,295"},{"top":576.8156,"left":277.41415,"width":46.663726806640625,"height":12.98638916015625,"text":"81,624"},{"top":576.8156,"left":324.07788,"width":46.663970947265625,"height":12.98638916015625,"text":"86,849"},{"top":576.8156,"left":370.74185,"width":46.663177490234375,"height":12.98638916015625,"text":"92,662"},{"top":576.8156,"left":417.40503,"width":46.712371826171875,"height":12.98638916015625,"text":"98,864"}]]}] \ No newline at end of file +[{"extraction_method":"lattice","page_number":1,"top":150.56,"left":119.789444,"width":355.31939697265625,"height":266.5108642578125,"right":475.10883,"bottom":417.07086,"data":[[{"top":150.56,"left":119.789444,"width":355.31939697265625,"height":8.897811889648438,"text":"Improved operation scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":159.45781,"left":119.789444,"width":121.92678833007812,"height":12.785446166992188,"text":"Volume servers in:"},{"top":159.45781,"left":241.71623,"width":46.66426086425781,"height":12.785446166992188,"text":"2007"},{"top":159.45781,"left":288.3805,"width":46.663482666015625,"height":12.785446166992188,"text":"2008"},{"top":159.45781,"left":335.04398,"width":46.663818359375,"height":12.785446166992188,"text":"2009"},{"top":159.45781,"left":381.7078,"width":46.663330078125,"height":12.785446166992188,"text":"2010"},{"top":159.45781,"left":428.37112,"width":46.73773193359375,"height":12.785446166992188,"text":"2011"}],[{"top":172.24326,"left":119.789444,"width":121.92678833007812,"height":12.832427978515625,"text":"Server closets"},{"top":172.24326,"left":241.71623,"width":46.66426086425781,"height":12.832427978515625,"text":"1,505"},{"top":172.24326,"left":288.3805,"width":46.663482666015625,"height":12.832427978515625,"text":"1,580"},{"top":172.24326,"left":335.04398,"width":46.663818359375,"height":12.832427978515625,"text":"1,643"},{"top":172.24326,"left":381.7078,"width":46.663330078125,"height":12.832427978515625,"text":"1,673"},{"top":172.24326,"left":428.37112,"width":46.73773193359375,"height":12.832427978515625,"text":"1,689"}],[{"top":185.07568,"left":119.789444,"width":121.92678833007812,"height":12.891036987304688,"text":"Server rooms"},{"top":185.07568,"left":241.71623,"width":46.66426086425781,"height":12.891036987304688,"text":"1,512"},{"top":185.07568,"left":288.3805,"width":46.663482666015625,"height":12.891036987304688,"text":"1,586"},{"top":185.07568,"left":335.04398,"width":46.663818359375,"height":12.891036987304688,"text":"1,646"},{"top":185.07568,"left":381.7078,"width":46.663330078125,"height":12.891036987304688,"text":"1,677"},{"top":185.07568,"left":428.37112,"width":46.73773193359375,"height":12.891036987304688,"text":"1,693"}],[{"top":197.96672,"left":119.789444,"width":121.92678833007812,"height":12.890411376953125,"text":"Localized data centers"},{"top":197.96672,"left":241.71623,"width":46.66426086425781,"height":12.890411376953125,"text":"1,512"},{"top":197.96672,"left":288.3805,"width":46.663482666015625,"height":12.890411376953125,"text":"1,586"},{"top":197.96672,"left":335.04398,"width":46.663818359375,"height":12.890411376953125,"text":"1,646"},{"top":197.96672,"left":381.7078,"width":46.663330078125,"height":12.890411376953125,"text":"1,677"},{"top":197.96672,"left":428.37112,"width":46.73773193359375,"height":12.890411376953125,"text":"1,693"}],[{"top":210.85713,"left":119.789444,"width":121.92678833007812,"height":12.890716552734375,"text":"Mid-tier data centers"},{"top":210.85713,"left":241.71623,"width":46.66426086425781,"height":12.890716552734375,"text":"1,512"},{"top":210.85713,"left":288.3805,"width":46.663482666015625,"height":12.890716552734375,"text":"1,586"},{"top":210.85713,"left":335.04398,"width":46.663818359375,"height":12.890716552734375,"text":"1,646"},{"top":210.85713,"left":381.7078,"width":46.663330078125,"height":12.890716552734375,"text":"1,677"},{"top":210.85713,"left":428.37112,"width":46.73773193359375,"height":12.890716552734375,"text":"1,693"}],[{"top":223.74785,"left":119.789444,"width":121.92678833007812,"height":12.785003662109375,"text":"Enterprise-class data centers"},{"top":223.74785,"left":241.71623,"width":46.66426086425781,"height":12.785003662109375,"text":"1,512"},{"top":223.74785,"left":288.3805,"width":46.663482666015625,"height":12.785003662109375,"text":"1,586"},{"top":223.74785,"left":335.04398,"width":46.663818359375,"height":12.785003662109375,"text":"1,646"},{"top":223.74785,"left":381.7078,"width":46.663330078125,"height":12.785003662109375,"text":"1,677"},{"top":223.74785,"left":428.37112,"width":46.73773193359375,"height":12.785003662109375,"text":"1,693"}],[{"top":236.53285,"left":119.789444,"width":355.31939697265625,"height":13.1025390625,"text":"Best practice scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":249.63539,"left":119.789444,"width":121.92678833007812,"height":12.726882934570312,"text":"Volume servers in:"},{"top":249.63539,"left":241.71623,"width":46.66426086425781,"height":12.726882934570312,"text":"2007"},{"top":249.63539,"left":288.3805,"width":46.663482666015625,"height":12.726882934570312,"text":"2008"},{"top":249.63539,"left":335.04398,"width":46.663818359375,"height":12.726882934570312,"text":"2009"},{"top":249.63539,"left":381.7078,"width":46.663330078125,"height":12.726882934570312,"text":"2010"},{"top":249.63539,"left":428.37112,"width":46.73773193359375,"height":12.726882934570312,"text":"2011"}],[{"top":262.36227,"left":119.789444,"width":121.92678833007812,"height":12.890472412109375,"text":"Server closets"},{"top":262.36227,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,456"},{"top":262.36227,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,439"},{"top":262.36227,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,386"},{"top":262.36227,"left":381.7078,"width":46.663330078125,"height":12.890472412109375,"text":"1,296"},{"top":262.36227,"left":428.37112,"width":46.73773193359375,"height":12.890472412109375,"text":"1,326"}],[{"top":275.25275,"left":119.789444,"width":121.92678833007812,"height":12.8914794921875,"text":"Server rooms"},{"top":275.25275,"left":241.71623,"width":46.66426086425781,"height":12.8914794921875,"text":"1,465"},{"top":275.25275,"left":288.3805,"width":46.663482666015625,"height":12.8914794921875,"text":"1,472"},{"top":275.25275,"left":335.04398,"width":46.663818359375,"height":12.8914794921875,"text":"1,427"},{"top":275.25275,"left":381.7078,"width":46.663330078125,"height":12.8914794921875,"text":"1,334"},{"top":275.25275,"left":428.37112,"width":46.73773193359375,"height":12.8914794921875,"text":"1,371"}],[{"top":288.14423,"left":119.789444,"width":121.92678833007812,"height":12.890167236328125,"text":"Localized data centers"},{"top":288.14423,"left":241.71623,"width":46.66426086425781,"height":12.890167236328125,"text":"1,465"},{"top":288.14423,"left":288.3805,"width":46.663482666015625,"height":12.890167236328125,"text":"1,471"},{"top":288.14423,"left":335.04398,"width":46.663818359375,"height":12.890167236328125,"text":"1,426"},{"top":288.14423,"left":381.7078,"width":46.663330078125,"height":12.890167236328125,"text":"1,334"},{"top":288.14423,"left":428.37112,"width":46.73773193359375,"height":12.890167236328125,"text":"1,371"}],[{"top":301.0344,"left":119.789444,"width":121.92678833007812,"height":12.890472412109375,"text":"Mid-tier data centers"},{"top":301.0344,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,465"},{"top":301.0344,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,471"},{"top":301.0344,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,426"},{"top":301.0344,"left":381.7078,"width":46.663330078125,"height":12.890472412109375,"text":"1,334"},{"top":301.0344,"left":428.37112,"width":46.73773193359375,"height":12.890472412109375,"text":"1,371"}],[{"top":313.92487,"left":119.789444,"width":121.92678833007812,"height":12.785552978515625,"text":"Enterprise-class data centers"},{"top":313.92487,"left":241.71623,"width":46.66426086425781,"height":12.785552978515625,"text":"1,465"},{"top":313.92487,"left":288.3805,"width":46.663482666015625,"height":12.785552978515625,"text":"1,471"},{"top":313.92487,"left":335.04398,"width":46.663818359375,"height":12.785552978515625,"text":"1,426"},{"top":313.92487,"left":381.7078,"width":46.663330078125,"height":12.785552978515625,"text":"1,334"},{"top":313.92487,"left":428.37112,"width":46.73773193359375,"height":12.785552978515625,"text":"1,371"}],[{"top":326.71042,"left":119.789444,"width":355.31939697265625,"height":13.04400634765625,"text":"State-of-the-art scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":339.75443,"left":119.789444,"width":121.92678833007812,"height":12.784942626953125,"text":"Volume servers in:"},{"top":339.75443,"left":241.71623,"width":46.66426086425781,"height":12.784942626953125,"text":"2007"},{"top":339.75443,"left":288.3805,"width":46.663482666015625,"height":12.784942626953125,"text":"2008"},{"top":339.75443,"left":335.04398,"width":46.663818359375,"height":12.784942626953125,"text":"2009"},{"top":339.75443,"left":381.7078,"width":46.663330078125,"height":12.784942626953125,"text":"2010"},{"top":339.75443,"left":428.37112,"width":46.73773193359375,"height":12.784942626953125,"text":"2011"}],[{"top":352.53937,"left":119.789444,"width":121.92678833007812,"height":12.89111328125,"text":"Server closets"},{"top":352.53937,"left":241.71623,"width":46.66426086425781,"height":12.89111328125,"text":"1,485"},{"top":352.53937,"left":288.3805,"width":46.663482666015625,"height":12.89111328125,"text":"1,471"},{"top":352.53937,"left":335.04398,"width":46.663818359375,"height":12.89111328125,"text":"1,424"},{"top":352.53937,"left":381.7078,"width":46.663330078125,"height":12.89111328125,"text":"1,315"},{"top":352.53937,"left":428.37112,"width":46.73773193359375,"height":12.89111328125,"text":"1,349"}],[{"top":365.43048,"left":119.789444,"width":121.92678833007812,"height":12.8902587890625,"text":"Server rooms"},{"top":365.43048,"left":241.71623,"width":46.66426086425781,"height":12.8902587890625,"text":"1,495"},{"top":365.43048,"left":288.3805,"width":46.663482666015625,"height":12.8902587890625,"text":"1,573"},{"top":365.43048,"left":335.04398,"width":46.663818359375,"height":12.8902587890625,"text":"1,586"},{"top":365.43048,"left":381.7078,"width":46.663330078125,"height":12.8902587890625,"text":"1,424"},{"top":365.43048,"left":428.37112,"width":46.73773193359375,"height":12.8902587890625,"text":"1,485"}],[{"top":378.32074,"left":119.789444,"width":121.92678833007812,"height":12.891143798828125,"text":"Localized data centers"},{"top":378.32074,"left":241.71623,"width":46.66426086425781,"height":12.891143798828125,"text":"1,495"},{"top":378.32074,"left":288.3805,"width":46.663482666015625,"height":12.891143798828125,"text":"1,572"},{"top":378.32074,"left":335.04398,"width":46.663818359375,"height":12.891143798828125,"text":"1,585"},{"top":378.32074,"left":381.7078,"width":46.663330078125,"height":12.891143798828125,"text":"1,424"},{"top":378.32074,"left":428.37112,"width":46.73773193359375,"height":12.891143798828125,"text":"1,485"}],[{"top":391.21188,"left":119.789444,"width":121.92678833007812,"height":12.89068603515625,"text":"Mid-tier data centers"},{"top":391.21188,"left":241.71623,"width":46.66426086425781,"height":12.89068603515625,"text":"1,495"},{"top":391.21188,"left":288.3805,"width":46.663482666015625,"height":12.89068603515625,"text":"1,572"},{"top":391.21188,"left":335.04398,"width":46.663818359375,"height":12.89068603515625,"text":"1,585"},{"top":391.21188,"left":381.7078,"width":46.663330078125,"height":12.89068603515625,"text":"1,424"},{"top":391.21188,"left":428.37112,"width":46.73773193359375,"height":12.89068603515625,"text":"1,485"}],[{"top":404.10257,"left":119.789444,"width":121.92678833007812,"height":12.968292236328125,"text":"Enterprise-class data centers"},{"top":404.10257,"left":241.71623,"width":46.66426086425781,"height":12.968292236328125,"text":"1,495"},{"top":404.10257,"left":288.3805,"width":46.663482666015625,"height":12.968292236328125,"text":"1,572"},{"top":404.10257,"left":335.04398,"width":46.663818359375,"height":12.968292236328125,"text":"1,585"},{"top":404.10257,"left":381.7078,"width":46.663330078125,"height":12.968292236328125,"text":"1,424"},{"top":404.10257,"left":428.37112,"width":46.73773193359375,"height":12.968292236328125,"text":"1,485"}]]},{"extraction_method":"lattice","page_number":1,"top":538.25964,"left":130.8136,"width":333.3038024902344,"height":51.5423583984375,"right":464.1174,"bottom":589.802,"data":[[{"top":538.25964,"left":130.8136,"width":333.3038024902344,"height":12.88055419921875,"text":"All alternative scenarios"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":551.1402,"left":130.8136,"width":99.93739318847656,"height":12.784912109375,"text":"Server class"},{"top":551.1402,"left":230.75099,"width":46.66316223144531,"height":12.784912109375,"text":"2007"},{"top":551.1402,"left":277.41415,"width":46.663726806640625,"height":12.784912109375,"text":"2008"},{"top":551.1402,"left":324.07788,"width":46.663970947265625,"height":12.784912109375,"text":"2009"},{"top":551.1402,"left":370.74185,"width":46.663177490234375,"height":12.784912109375,"text":"2010"},{"top":551.1402,"left":417.40503,"width":46.712371826171875,"height":12.784912109375,"text":"2011"}],[{"top":563.9251,"left":130.8136,"width":99.93739318847656,"height":12.8905029296875,"text":"Mid-range"},{"top":563.9251,"left":230.75099,"width":46.66316223144531,"height":12.8905029296875,"text":"4,921"},{"top":563.9251,"left":277.41415,"width":46.663726806640625,"height":12.8905029296875,"text":"5,467"},{"top":563.9251,"left":324.07788,"width":46.663970947265625,"height":12.8905029296875,"text":"6,152"},{"top":563.9251,"left":370.74185,"width":46.663177490234375,"height":12.8905029296875,"text":"6,649"},{"top":563.9251,"left":417.40503,"width":46.712371826171875,"height":12.8905029296875,"text":"7,185"}],[{"top":576.8156,"left":130.8136,"width":99.93739318847656,"height":12.98638916015625,"text":"High-end"},{"top":576.8156,"left":230.75099,"width":46.66316223144531,"height":12.98638916015625,"text":"76,295"},{"top":576.8156,"left":277.41415,"width":46.663726806640625,"height":12.98638916015625,"text":"81,624"},{"top":576.8156,"left":324.07788,"width":46.663970947265625,"height":12.98638916015625,"text":"86,849"},{"top":576.8156,"left":370.74185,"width":46.663177490234375,"height":12.98638916015625,"text":"92,662"},{"top":576.8156,"left":417.40503,"width":46.712371826171875,"height":12.98638916015625,"text":"98,864"}]]}] diff --git a/src/test/resources/technology/tabula/json/twotables.json b/src/test/resources/technology/tabula/json/twotables.json index 5c70c52e..35a4c50f 100644 --- a/src/test/resources/technology/tabula/json/twotables.json +++ b/src/test/resources/technology/tabula/json/twotables.json @@ -1 +1 @@ -[{"extraction_method":"lattice","top":111.79087,"left":67.41156,"width":342.44476318359375,"height":174.0704345703125,"data":[[{"top":111.79087,"left":67.41156,"width":85.61141967773438,"height":28.536293029785156,"text":""},{"top":111.79087,"left":153.02298,"width":256.8333435058594,"height":14.267105102539062,"text":"株主資本"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":126.057976,"left":153.02298,"width":51.367401123046875,"height":14.269187927246094,"text":"資本金"},{"top":126.057976,"left":204.39038,"width":51.36669921875,"height":14.269187927246094,"text":"資本剰余金"},{"top":126.057976,"left":255.75708,"width":51.3656005859375,"height":14.269187927246094,"text":"利益剰余金"},{"top":126.057976,"left":307.12268,"width":51.366668701171875,"height":14.269187927246094,"text":"自己株式"},{"top":126.057976,"left":358.48935,"width":51.366973876953125,"height":14.269187927246094,"text":"株主資本合計"}],[{"top":140.32716,"left":67.41156,"width":85.61141967773438,"height":14.265777587890625,"text":"当期首残高"},{"top":140.32716,"left":153.02298,"width":51.367401123046875,"height":14.265777587890625,"text":"5,664"},{"top":140.32716,"left":204.39038,"width":51.36669921875,"height":14.265777587890625,"text":"749"},{"top":140.32716,"left":255.75708,"width":51.3656005859375,"height":14.265777587890625,"text":"12,017"},{"top":140.32716,"left":307.12268,"width":51.366668701171875,"height":14.265777587890625,"text":"△747"},{"top":140.32716,"left":358.48935,"width":51.366973876953125,"height":14.265777587890625,"text":"17,683"}],[{"top":154.59294,"left":67.41156,"width":85.61141967773438,"height":14.26910400390625,"text":"当期変動額"},{"top":154.59294,"left":153.02298,"width":51.367401123046875,"height":14.26910400390625,"text":""},{"top":154.59294,"left":204.39038,"width":51.36669921875,"height":14.26910400390625,"text":""},{"top":154.59294,"left":255.75708,"width":51.3656005859375,"height":14.26910400390625,"text":""},{"top":154.59294,"left":307.12268,"width":51.366668701171875,"height":14.26910400390625,"text":""},{"top":154.59294,"left":358.48935,"width":51.366973876953125,"height":14.26910400390625,"text":""}],[{"top":168.86205,"left":67.41156,"width":85.61141967773438,"height":14.268997192382812,"text":"剰余金の配当"},{"top":168.86205,"left":153.02298,"width":51.367401123046875,"height":14.268997192382812,"text":""},{"top":168.86205,"left":204.39038,"width":51.36669921875,"height":14.268997192382812,"text":""},{"top":168.86205,"left":255.75708,"width":51.3656005859375,"height":14.268997192382812,"text":"△525"},{"top":168.86205,"left":307.12268,"width":51.366668701171875,"height":14.268997192382812,"text":""},{"top":168.86205,"left":358.48935,"width":51.366973876953125,"height":14.268997192382812,"text":"△525"}],[{"top":183.13104,"left":67.41156,"width":85.61141967773438,"height":14.2691650390625,"text":"当期純利益"},{"top":183.13104,"left":153.02298,"width":51.367401123046875,"height":14.2691650390625,"text":""},{"top":183.13104,"left":204.39038,"width":51.36669921875,"height":14.2691650390625,"text":""},{"top":183.13104,"left":255.75708,"width":51.3656005859375,"height":14.2691650390625,"text":"1,269"},{"top":183.13104,"left":307.12268,"width":51.366668701171875,"height":14.2691650390625,"text":""},{"top":183.13104,"left":358.48935,"width":51.366973876953125,"height":14.2691650390625,"text":"1,269"}],[{"top":197.4002,"left":67.41156,"width":85.61141967773438,"height":14.268936157226562,"text":"自己株式の取得"},{"top":197.4002,"left":153.02298,"width":51.367401123046875,"height":14.268936157226562,"text":""},{"top":197.4002,"left":204.39038,"width":51.36669921875,"height":14.268936157226562,"text":""},{"top":197.4002,"left":255.75708,"width":51.3656005859375,"height":14.268936157226562,"text":""},{"top":197.4002,"left":307.12268,"width":51.366668701171875,"height":14.268936157226562,"text":"△0"},{"top":197.4002,"left":358.48935,"width":51.366973876953125,"height":14.268936157226562,"text":"△0"}],[{"top":211.66914,"left":67.41156,"width":85.61141967773438,"height":22.82952880859375,"text":"持分法の適用範囲\rの変動"},{"top":211.66914,"left":153.02298,"width":51.367401123046875,"height":22.82952880859375,"text":""},{"top":211.66914,"left":204.39038,"width":51.36669921875,"height":22.82952880859375,"text":""},{"top":211.66914,"left":255.75708,"width":51.3656005859375,"height":22.82952880859375,"text":"85"},{"top":211.66914,"left":307.12268,"width":51.366668701171875,"height":22.82952880859375,"text":""},{"top":211.66914,"left":358.48935,"width":51.366973876953125,"height":22.82952880859375,"text":"85"}],[{"top":234.49867,"left":67.41156,"width":85.61141967773438,"height":22.829910278320312,"text":"株主資本以外の項目\rの当期変動額(純額)"},{"top":234.49867,"left":153.02298,"width":51.367401123046875,"height":22.829910278320312,"text":""},{"top":234.49867,"left":204.39038,"width":51.36669921875,"height":22.829910278320312,"text":""},{"top":234.49867,"left":255.75708,"width":51.3656005859375,"height":22.829910278320312,"text":""},{"top":234.49867,"left":307.12268,"width":51.366668701171875,"height":22.829910278320312,"text":""},{"top":234.49867,"left":358.48935,"width":51.366973876953125,"height":22.829910278320312,"text":""}],[{"top":257.32858,"left":67.41156,"width":85.61141967773438,"height":14.2686767578125,"text":"当期変動額合計"},{"top":257.32858,"left":153.02298,"width":51.367401123046875,"height":14.2686767578125,"text":"―"},{"top":257.32858,"left":204.39038,"width":51.36669921875,"height":14.2686767578125,"text":"―"},{"top":257.32858,"left":255.75708,"width":51.3656005859375,"height":14.2686767578125,"text":"829"},{"top":257.32858,"left":307.12268,"width":51.366668701171875,"height":14.2686767578125,"text":"△0"},{"top":257.32858,"left":358.48935,"width":51.366973876953125,"height":14.2686767578125,"text":"829"}],[{"top":271.59726,"left":67.41156,"width":85.61141967773438,"height":14.2640380859375,"text":"当期末残高"},{"top":271.59726,"left":153.02298,"width":51.367401123046875,"height":14.2640380859375,"text":"5,664"},{"top":271.59726,"left":204.39038,"width":51.36669921875,"height":14.2640380859375,"text":"749"},{"top":271.59726,"left":255.75708,"width":51.3656005859375,"height":14.2640380859375,"text":"12,846"},{"top":271.59726,"left":307.12268,"width":51.366668701171875,"height":14.2640380859375,"text":"△747"},{"top":271.59726,"left":358.48935,"width":51.366973876953125,"height":14.2640380859375,"text":"18,512"}]]},{"extraction_method":"lattice","top":312.25272,"left":67.41156,"width":445.17803955078125,"height":191.19696044921875,"data":[[{"top":312.25272,"left":67.41156,"width":85.61141967773438,"height":45.658233642578125,"text":""},{"top":312.25272,"left":153.02298,"width":256.8333435058594,"height":14.267730712890625,"text":"その他の包括利益累計額"},{"top":312.25272,"left":409.85632,"width":51.365631103515625,"height":45.658233642578125,"text":"少数株主持分"},{"top":312.25272,"left":461.22195,"width":51.367645263671875,"height":45.658233642578125,"text":"純資産合計"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":326.52045,"left":153.02298,"width":51.367401123046875,"height":31.3905029296875,"text":"その他有価証券\r評価差額金"},{"top":326.52045,"left":204.39038,"width":51.36669921875,"height":31.3905029296875,"text":"繰延ヘッジ\r損益"},{"top":326.52045,"left":255.75708,"width":51.3656005859375,"height":31.3905029296875,"text":"為替換算\r調整勘定"},{"top":326.52045,"left":307.12268,"width":51.366668701171875,"height":31.3905029296875,"text":"退職給付に係る\r調整累計額"},{"top":326.52045,"left":358.48935,"width":51.366973876953125,"height":31.3905029296875,"text":"その他の\r包括利益\r累計額合計"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":357.91095,"left":67.41156,"width":85.61141967773438,"height":14.268768310546875,"text":"当期首残高"},{"top":357.91095,"left":153.02298,"width":51.367401123046875,"height":14.268768310546875,"text":"△669"},{"top":357.91095,"left":204.39038,"width":51.36669921875,"height":14.268768310546875,"text":"61"},{"top":357.91095,"left":255.75708,"width":51.3656005859375,"height":14.268768310546875,"text":"△109"},{"top":357.91095,"left":307.12268,"width":51.366668701171875,"height":14.268768310546875,"text":"―"},{"top":357.91095,"left":358.48935,"width":51.366973876953125,"height":14.268768310546875,"text":"△717"},{"top":357.91095,"left":409.85632,"width":51.365631103515625,"height":14.268768310546875,"text":"246"},{"top":357.91095,"left":461.22195,"width":51.367645263671875,"height":14.268768310546875,"text":"17,212"}],[{"top":372.17972,"left":67.41156,"width":85.61141967773438,"height":14.2691650390625,"text":"当期変動額"},{"top":372.17972,"left":153.02298,"width":51.367401123046875,"height":14.2691650390625,"text":""},{"top":372.17972,"left":204.39038,"width":51.36669921875,"height":14.2691650390625,"text":""},{"top":372.17972,"left":255.75708,"width":51.3656005859375,"height":14.2691650390625,"text":""},{"top":372.17972,"left":307.12268,"width":51.366668701171875,"height":14.2691650390625,"text":""},{"top":372.17972,"left":358.48935,"width":51.366973876953125,"height":14.2691650390625,"text":""},{"top":372.17972,"left":409.85632,"width":51.365631103515625,"height":14.2691650390625,"text":""},{"top":372.17972,"left":461.22195,"width":51.367645263671875,"height":14.2691650390625,"text":""}],[{"top":386.44888,"left":67.41156,"width":85.61141967773438,"height":14.268646240234375,"text":"剰余金の配当"},{"top":386.44888,"left":153.02298,"width":51.367401123046875,"height":14.268646240234375,"text":""},{"top":386.44888,"left":204.39038,"width":51.36669921875,"height":14.268646240234375,"text":""},{"top":386.44888,"left":255.75708,"width":51.3656005859375,"height":14.268646240234375,"text":""},{"top":386.44888,"left":307.12268,"width":51.366668701171875,"height":14.268646240234375,"text":""},{"top":386.44888,"left":358.48935,"width":51.366973876953125,"height":14.268646240234375,"text":""},{"top":386.44888,"left":409.85632,"width":51.365631103515625,"height":14.268646240234375,"text":""},{"top":386.44888,"left":461.22195,"width":51.367645263671875,"height":14.268646240234375,"text":"△525"}],[{"top":400.71753,"left":67.41156,"width":85.61141967773438,"height":14.26812744140625,"text":"当期純利益"},{"top":400.71753,"left":153.02298,"width":51.367401123046875,"height":14.26812744140625,"text":""},{"top":400.71753,"left":204.39038,"width":51.36669921875,"height":14.26812744140625,"text":""},{"top":400.71753,"left":255.75708,"width":51.3656005859375,"height":14.26812744140625,"text":""},{"top":400.71753,"left":307.12268,"width":51.366668701171875,"height":14.26812744140625,"text":""},{"top":400.71753,"left":358.48935,"width":51.366973876953125,"height":14.26812744140625,"text":""},{"top":400.71753,"left":409.85632,"width":51.365631103515625,"height":14.26812744140625,"text":""},{"top":400.71753,"left":461.22195,"width":51.367645263671875,"height":14.26812744140625,"text":"1,269"}],[{"top":414.98566,"left":67.41156,"width":85.61141967773438,"height":14.26678466796875,"text":"自己株式の取得"},{"top":414.98566,"left":153.02298,"width":51.367401123046875,"height":14.26678466796875,"text":""},{"top":414.98566,"left":204.39038,"width":51.36669921875,"height":14.26678466796875,"text":""},{"top":414.98566,"left":255.75708,"width":51.3656005859375,"height":14.26678466796875,"text":""},{"top":414.98566,"left":307.12268,"width":51.366668701171875,"height":14.26678466796875,"text":""},{"top":414.98566,"left":358.48935,"width":51.366973876953125,"height":14.26678466796875,"text":""},{"top":414.98566,"left":409.85632,"width":51.365631103515625,"height":14.26678466796875,"text":""},{"top":414.98566,"left":461.22195,"width":51.367645263671875,"height":14.26678466796875,"text":"△0"}],[{"top":429.25244,"left":67.41156,"width":85.61141967773438,"height":22.8292236328125,"text":"持分法の適用範囲\rの変動"},{"top":429.25244,"left":153.02298,"width":51.367401123046875,"height":22.8292236328125,"text":""},{"top":429.25244,"left":204.39038,"width":51.36669921875,"height":22.8292236328125,"text":""},{"top":429.25244,"left":255.75708,"width":51.3656005859375,"height":22.8292236328125,"text":""},{"top":429.25244,"left":307.12268,"width":51.366668701171875,"height":22.8292236328125,"text":""},{"top":429.25244,"left":358.48935,"width":51.366973876953125,"height":22.8292236328125,"text":""},{"top":429.25244,"left":409.85632,"width":51.365631103515625,"height":22.8292236328125,"text":""},{"top":429.25244,"left":461.22195,"width":51.367645263671875,"height":22.8292236328125,"text":"85"}],[{"top":452.08167,"left":67.41156,"width":85.61141967773438,"height":22.830596923828125,"text":"株主資本以外の項目\rの当期変動額(純額)"},{"top":452.08167,"left":153.02298,"width":51.367401123046875,"height":22.830596923828125,"text":"556"},{"top":452.08167,"left":204.39038,"width":51.36669921875,"height":22.830596923828125,"text":"80"},{"top":452.08167,"left":255.75708,"width":51.3656005859375,"height":22.830596923828125,"text":"5"},{"top":452.08167,"left":307.12268,"width":51.366668701171875,"height":22.830596923828125,"text":"―"},{"top":452.08167,"left":358.48935,"width":51.366973876953125,"height":22.830596923828125,"text":"642"},{"top":452.08167,"left":409.85632,"width":51.365631103515625,"height":22.830596923828125,"text":"△0"},{"top":452.08167,"left":461.22195,"width":51.367645263671875,"height":22.830596923828125,"text":"642"}],[{"top":474.91226,"left":67.41156,"width":85.61141967773438,"height":14.26776123046875,"text":"当期変動額合計"},{"top":474.91226,"left":153.02298,"width":51.367401123046875,"height":14.26776123046875,"text":"556"},{"top":474.91226,"left":204.39038,"width":51.36669921875,"height":14.26776123046875,"text":"80"},{"top":474.91226,"left":255.75708,"width":51.3656005859375,"height":14.26776123046875,"text":"5"},{"top":474.91226,"left":307.12268,"width":51.366668701171875,"height":14.26776123046875,"text":"―"},{"top":474.91226,"left":358.48935,"width":51.366973876953125,"height":14.26776123046875,"text":"642"},{"top":474.91226,"left":409.85632,"width":51.365631103515625,"height":14.26776123046875,"text":"△0"},{"top":474.91226,"left":461.22195,"width":51.367645263671875,"height":14.26776123046875,"text":"1,471"}],[{"top":489.18002,"left":67.41156,"width":85.61141967773438,"height":14.2696533203125,"text":"当期末残高"},{"top":489.18002,"left":153.02298,"width":51.367401123046875,"height":14.2696533203125,"text":"△113"},{"top":489.18002,"left":204.39038,"width":51.36669921875,"height":14.2696533203125,"text":"142"},{"top":489.18002,"left":255.75708,"width":51.3656005859375,"height":14.2696533203125,"text":"△104"},{"top":489.18002,"left":307.12268,"width":51.366668701171875,"height":14.2696533203125,"text":"―"},{"top":489.18002,"left":358.48935,"width":51.366973876953125,"height":14.2696533203125,"text":"△75"},{"top":489.18002,"left":409.85632,"width":51.365631103515625,"height":14.2696533203125,"text":"245"},{"top":489.18002,"left":461.22195,"width":51.367645263671875,"height":14.2696533203125,"text":"18,683"}]]}] \ No newline at end of file +[{"extraction_method":"lattice","page_number":1,"top":111.79087,"left":67.41156,"width":342.44476318359375,"height":174.0704345703125,"right":409.85632,"bottom":285.8613,"data":[[{"top":111.79087,"left":67.41156,"width":85.61141967773438,"height":28.536293029785156,"text":""},{"top":111.79087,"left":153.02298,"width":256.8333435058594,"height":14.267105102539062,"text":"株主資本"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":126.057976,"left":153.02298,"width":51.367401123046875,"height":14.269187927246094,"text":"資本金"},{"top":126.057976,"left":204.39038,"width":51.36669921875,"height":14.269187927246094,"text":"資本剰余金"},{"top":126.057976,"left":255.75708,"width":51.3656005859375,"height":14.269187927246094,"text":"利益剰余金"},{"top":126.057976,"left":307.12268,"width":51.366668701171875,"height":14.269187927246094,"text":"自己株式"},{"top":126.057976,"left":358.48935,"width":51.366973876953125,"height":14.269187927246094,"text":"株主資本合計"}],[{"top":140.32716,"left":67.41156,"width":85.61141967773438,"height":14.265777587890625,"text":"当期首残高"},{"top":140.32716,"left":153.02298,"width":51.367401123046875,"height":14.265777587890625,"text":"5,664"},{"top":140.32716,"left":204.39038,"width":51.36669921875,"height":14.265777587890625,"text":"749"},{"top":140.32716,"left":255.75708,"width":51.3656005859375,"height":14.265777587890625,"text":"12,017"},{"top":140.32716,"left":307.12268,"width":51.366668701171875,"height":14.265777587890625,"text":"△747"},{"top":140.32716,"left":358.48935,"width":51.366973876953125,"height":14.265777587890625,"text":"17,683"}],[{"top":154.59294,"left":67.41156,"width":85.61141967773438,"height":14.26910400390625,"text":"当期変動額"},{"top":154.59294,"left":153.02298,"width":51.367401123046875,"height":14.26910400390625,"text":""},{"top":154.59294,"left":204.39038,"width":51.36669921875,"height":14.26910400390625,"text":""},{"top":154.59294,"left":255.75708,"width":51.3656005859375,"height":14.26910400390625,"text":""},{"top":154.59294,"left":307.12268,"width":51.366668701171875,"height":14.26910400390625,"text":""},{"top":154.59294,"left":358.48935,"width":51.366973876953125,"height":14.26910400390625,"text":""}],[{"top":168.86205,"left":67.41156,"width":85.61141967773438,"height":14.268997192382812,"text":"剰余金の配当"},{"top":168.86205,"left":153.02298,"width":51.367401123046875,"height":14.268997192382812,"text":""},{"top":168.86205,"left":204.39038,"width":51.36669921875,"height":14.268997192382812,"text":""},{"top":168.86205,"left":255.75708,"width":51.3656005859375,"height":14.268997192382812,"text":"△525"},{"top":168.86205,"left":307.12268,"width":51.366668701171875,"height":14.268997192382812,"text":""},{"top":168.86205,"left":358.48935,"width":51.366973876953125,"height":14.268997192382812,"text":"△525"}],[{"top":183.13104,"left":67.41156,"width":85.61141967773438,"height":14.2691650390625,"text":"当期純利益"},{"top":183.13104,"left":153.02298,"width":51.367401123046875,"height":14.2691650390625,"text":""},{"top":183.13104,"left":204.39038,"width":51.36669921875,"height":14.2691650390625,"text":""},{"top":183.13104,"left":255.75708,"width":51.3656005859375,"height":14.2691650390625,"text":"1,269"},{"top":183.13104,"left":307.12268,"width":51.366668701171875,"height":14.2691650390625,"text":""},{"top":183.13104,"left":358.48935,"width":51.366973876953125,"height":14.2691650390625,"text":"1,269"}],[{"top":197.4002,"left":67.41156,"width":85.61141967773438,"height":14.268936157226562,"text":"自己株式の取得"},{"top":197.4002,"left":153.02298,"width":51.367401123046875,"height":14.268936157226562,"text":""},{"top":197.4002,"left":204.39038,"width":51.36669921875,"height":14.268936157226562,"text":""},{"top":197.4002,"left":255.75708,"width":51.3656005859375,"height":14.268936157226562,"text":""},{"top":197.4002,"left":307.12268,"width":51.366668701171875,"height":14.268936157226562,"text":"△0"},{"top":197.4002,"left":358.48935,"width":51.366973876953125,"height":14.268936157226562,"text":"△0"}],[{"top":211.66914,"left":67.41156,"width":85.61141967773438,"height":22.82952880859375,"text":"持分法の適用範囲\rの変動"},{"top":211.66914,"left":153.02298,"width":51.367401123046875,"height":22.82952880859375,"text":""},{"top":211.66914,"left":204.39038,"width":51.36669921875,"height":22.82952880859375,"text":""},{"top":211.66914,"left":255.75708,"width":51.3656005859375,"height":22.82952880859375,"text":"85"},{"top":211.66914,"left":307.12268,"width":51.366668701171875,"height":22.82952880859375,"text":""},{"top":211.66914,"left":358.48935,"width":51.366973876953125,"height":22.82952880859375,"text":"85"}],[{"top":234.49867,"left":67.41156,"width":85.61141967773438,"height":22.829910278320312,"text":"株主資本以外の項目\rの当期変動額(純額)"},{"top":234.49867,"left":153.02298,"width":51.367401123046875,"height":22.829910278320312,"text":""},{"top":234.49867,"left":204.39038,"width":51.36669921875,"height":22.829910278320312,"text":""},{"top":234.49867,"left":255.75708,"width":51.3656005859375,"height":22.829910278320312,"text":""},{"top":234.49867,"left":307.12268,"width":51.366668701171875,"height":22.829910278320312,"text":""},{"top":234.49867,"left":358.48935,"width":51.366973876953125,"height":22.829910278320312,"text":""}],[{"top":257.32858,"left":67.41156,"width":85.61141967773438,"height":14.2686767578125,"text":"当期変動額合計"},{"top":257.32858,"left":153.02298,"width":51.367401123046875,"height":14.2686767578125,"text":"―"},{"top":257.32858,"left":204.39038,"width":51.36669921875,"height":14.2686767578125,"text":"―"},{"top":257.32858,"left":255.75708,"width":51.3656005859375,"height":14.2686767578125,"text":"829"},{"top":257.32858,"left":307.12268,"width":51.366668701171875,"height":14.2686767578125,"text":"△0"},{"top":257.32858,"left":358.48935,"width":51.366973876953125,"height":14.2686767578125,"text":"829"}],[{"top":271.59726,"left":67.41156,"width":85.61141967773438,"height":14.2640380859375,"text":"当期末残高"},{"top":271.59726,"left":153.02298,"width":51.367401123046875,"height":14.2640380859375,"text":"5,664"},{"top":271.59726,"left":204.39038,"width":51.36669921875,"height":14.2640380859375,"text":"749"},{"top":271.59726,"left":255.75708,"width":51.3656005859375,"height":14.2640380859375,"text":"12,846"},{"top":271.59726,"left":307.12268,"width":51.366668701171875,"height":14.2640380859375,"text":"△747"},{"top":271.59726,"left":358.48935,"width":51.366973876953125,"height":14.2640380859375,"text":"18,512"}]]},{"extraction_method":"lattice","page_number":1,"top":312.25272,"left":67.41156,"width":445.17803955078125,"height":191.19696044921875,"right":512.5896,"bottom":503.44968,"data":[[{"top":312.25272,"left":67.41156,"width":85.61141967773438,"height":45.658233642578125,"text":""},{"top":312.25272,"left":153.02298,"width":256.8333435058594,"height":14.267730712890625,"text":"その他の包括利益累計額"},{"top":312.25272,"left":409.85632,"width":51.365631103515625,"height":45.658233642578125,"text":"少数株主持分"},{"top":312.25272,"left":461.22195,"width":51.367645263671875,"height":45.658233642578125,"text":"純資産合計"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":326.52045,"left":153.02298,"width":51.367401123046875,"height":31.3905029296875,"text":"その他有価証券\r評価差額金"},{"top":326.52045,"left":204.39038,"width":51.36669921875,"height":31.3905029296875,"text":"繰延ヘッジ\r損益"},{"top":326.52045,"left":255.75708,"width":51.3656005859375,"height":31.3905029296875,"text":"為替換算\r調整勘定"},{"top":326.52045,"left":307.12268,"width":51.366668701171875,"height":31.3905029296875,"text":"退職給付に係る\r調整累計額"},{"top":326.52045,"left":358.48935,"width":51.366973876953125,"height":31.3905029296875,"text":"その他の\r包括利益\r累計額合計"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":357.91095,"left":67.41156,"width":85.61141967773438,"height":14.268768310546875,"text":"当期首残高"},{"top":357.91095,"left":153.02298,"width":51.367401123046875,"height":14.268768310546875,"text":"△669"},{"top":357.91095,"left":204.39038,"width":51.36669921875,"height":14.268768310546875,"text":"61"},{"top":357.91095,"left":255.75708,"width":51.3656005859375,"height":14.268768310546875,"text":"△109"},{"top":357.91095,"left":307.12268,"width":51.366668701171875,"height":14.268768310546875,"text":"―"},{"top":357.91095,"left":358.48935,"width":51.366973876953125,"height":14.268768310546875,"text":"△717"},{"top":357.91095,"left":409.85632,"width":51.365631103515625,"height":14.268768310546875,"text":"246"},{"top":357.91095,"left":461.22195,"width":51.367645263671875,"height":14.268768310546875,"text":"17,212"}],[{"top":372.17972,"left":67.41156,"width":85.61141967773438,"height":14.2691650390625,"text":"当期変動額"},{"top":372.17972,"left":153.02298,"width":51.367401123046875,"height":14.2691650390625,"text":""},{"top":372.17972,"left":204.39038,"width":51.36669921875,"height":14.2691650390625,"text":""},{"top":372.17972,"left":255.75708,"width":51.3656005859375,"height":14.2691650390625,"text":""},{"top":372.17972,"left":307.12268,"width":51.366668701171875,"height":14.2691650390625,"text":""},{"top":372.17972,"left":358.48935,"width":51.366973876953125,"height":14.2691650390625,"text":""},{"top":372.17972,"left":409.85632,"width":51.365631103515625,"height":14.2691650390625,"text":""},{"top":372.17972,"left":461.22195,"width":51.367645263671875,"height":14.2691650390625,"text":""}],[{"top":386.44888,"left":67.41156,"width":85.61141967773438,"height":14.268646240234375,"text":"剰余金の配当"},{"top":386.44888,"left":153.02298,"width":51.367401123046875,"height":14.268646240234375,"text":""},{"top":386.44888,"left":204.39038,"width":51.36669921875,"height":14.268646240234375,"text":""},{"top":386.44888,"left":255.75708,"width":51.3656005859375,"height":14.268646240234375,"text":""},{"top":386.44888,"left":307.12268,"width":51.366668701171875,"height":14.268646240234375,"text":""},{"top":386.44888,"left":358.48935,"width":51.366973876953125,"height":14.268646240234375,"text":""},{"top":386.44888,"left":409.85632,"width":51.365631103515625,"height":14.268646240234375,"text":""},{"top":386.44888,"left":461.22195,"width":51.367645263671875,"height":14.268646240234375,"text":"△525"}],[{"top":400.71753,"left":67.41156,"width":85.61141967773438,"height":14.26812744140625,"text":"当期純利益"},{"top":400.71753,"left":153.02298,"width":51.367401123046875,"height":14.26812744140625,"text":""},{"top":400.71753,"left":204.39038,"width":51.36669921875,"height":14.26812744140625,"text":""},{"top":400.71753,"left":255.75708,"width":51.3656005859375,"height":14.26812744140625,"text":""},{"top":400.71753,"left":307.12268,"width":51.366668701171875,"height":14.26812744140625,"text":""},{"top":400.71753,"left":358.48935,"width":51.366973876953125,"height":14.26812744140625,"text":""},{"top":400.71753,"left":409.85632,"width":51.365631103515625,"height":14.26812744140625,"text":""},{"top":400.71753,"left":461.22195,"width":51.367645263671875,"height":14.26812744140625,"text":"1,269"}],[{"top":414.98566,"left":67.41156,"width":85.61141967773438,"height":14.26678466796875,"text":"自己株式の取得"},{"top":414.98566,"left":153.02298,"width":51.367401123046875,"height":14.26678466796875,"text":""},{"top":414.98566,"left":204.39038,"width":51.36669921875,"height":14.26678466796875,"text":""},{"top":414.98566,"left":255.75708,"width":51.3656005859375,"height":14.26678466796875,"text":""},{"top":414.98566,"left":307.12268,"width":51.366668701171875,"height":14.26678466796875,"text":""},{"top":414.98566,"left":358.48935,"width":51.366973876953125,"height":14.26678466796875,"text":""},{"top":414.98566,"left":409.85632,"width":51.365631103515625,"height":14.26678466796875,"text":""},{"top":414.98566,"left":461.22195,"width":51.367645263671875,"height":14.26678466796875,"text":"△0"}],[{"top":429.25244,"left":67.41156,"width":85.61141967773438,"height":22.8292236328125,"text":"持分法の適用範囲\rの変動"},{"top":429.25244,"left":153.02298,"width":51.367401123046875,"height":22.8292236328125,"text":""},{"top":429.25244,"left":204.39038,"width":51.36669921875,"height":22.8292236328125,"text":""},{"top":429.25244,"left":255.75708,"width":51.3656005859375,"height":22.8292236328125,"text":""},{"top":429.25244,"left":307.12268,"width":51.366668701171875,"height":22.8292236328125,"text":""},{"top":429.25244,"left":358.48935,"width":51.366973876953125,"height":22.8292236328125,"text":""},{"top":429.25244,"left":409.85632,"width":51.365631103515625,"height":22.8292236328125,"text":""},{"top":429.25244,"left":461.22195,"width":51.367645263671875,"height":22.8292236328125,"text":"85"}],[{"top":452.08167,"left":67.41156,"width":85.61141967773438,"height":22.830596923828125,"text":"株主資本以外の項目\rの当期変動額(純額)"},{"top":452.08167,"left":153.02298,"width":51.367401123046875,"height":22.830596923828125,"text":"556"},{"top":452.08167,"left":204.39038,"width":51.36669921875,"height":22.830596923828125,"text":"80"},{"top":452.08167,"left":255.75708,"width":51.3656005859375,"height":22.830596923828125,"text":"5"},{"top":452.08167,"left":307.12268,"width":51.366668701171875,"height":22.830596923828125,"text":"―"},{"top":452.08167,"left":358.48935,"width":51.366973876953125,"height":22.830596923828125,"text":"642"},{"top":452.08167,"left":409.85632,"width":51.365631103515625,"height":22.830596923828125,"text":"△0"},{"top":452.08167,"left":461.22195,"width":51.367645263671875,"height":22.830596923828125,"text":"642"}],[{"top":474.91226,"left":67.41156,"width":85.61141967773438,"height":14.26776123046875,"text":"当期変動額合計"},{"top":474.91226,"left":153.02298,"width":51.367401123046875,"height":14.26776123046875,"text":"556"},{"top":474.91226,"left":204.39038,"width":51.36669921875,"height":14.26776123046875,"text":"80"},{"top":474.91226,"left":255.75708,"width":51.3656005859375,"height":14.26776123046875,"text":"5"},{"top":474.91226,"left":307.12268,"width":51.366668701171875,"height":14.26776123046875,"text":"―"},{"top":474.91226,"left":358.48935,"width":51.366973876953125,"height":14.26776123046875,"text":"642"},{"top":474.91226,"left":409.85632,"width":51.365631103515625,"height":14.26776123046875,"text":"△0"},{"top":474.91226,"left":461.22195,"width":51.367645263671875,"height":14.26776123046875,"text":"1,471"}],[{"top":489.18002,"left":67.41156,"width":85.61141967773438,"height":14.2696533203125,"text":"当期末残高"},{"top":489.18002,"left":153.02298,"width":51.367401123046875,"height":14.2696533203125,"text":"△113"},{"top":489.18002,"left":204.39038,"width":51.36669921875,"height":14.2696533203125,"text":"142"},{"top":489.18002,"left":255.75708,"width":51.3656005859375,"height":14.2696533203125,"text":"△104"},{"top":489.18002,"left":307.12268,"width":51.366668701171875,"height":14.2696533203125,"text":"―"},{"top":489.18002,"left":358.48935,"width":51.366973876953125,"height":14.2696533203125,"text":"△75"},{"top":489.18002,"left":409.85632,"width":51.365631103515625,"height":14.2696533203125,"text":"245"},{"top":489.18002,"left":461.22195,"width":51.367645263671875,"height":14.2696533203125,"text":"18,683"}]]}] diff --git a/src/test/resources/technology/tabula/npe_issue_206.pdf b/src/test/resources/technology/tabula/npe_issue_206.pdf new file mode 100644 index 00000000..352e77ba Binary files /dev/null and b/src/test/resources/technology/tabula/npe_issue_206.pdf differ