diff --git a/.travis.yml b/.travis.yml index 2a5ffc72..7397abbf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,10 +1,9 @@ language: java -script: mvn test +install: mvn install -DskipTests=true -Dmaven.javadoc.skip=true -Dgpg.skip=true -B -V +script: mvn test -Dgpg.skip=true jdk: - - oraclejdk7 - - openjdk7 - - oraclejdk8 + - openjdk8 + - openjdk9 + - openjdk10 + - openjdk11 sudo: false - - - diff --git a/LICENSE b/LICENSE index 06bdd025..4beb04ee 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2014 Manuel Aristarán +Copyright (c) 2014-2016 Manuel Aristarán Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file +SOFTWARE. diff --git a/README.md b/README.md index 55cb02e1..2a08d3ac 100644 --- a/README.md +++ b/README.md @@ -1,66 +1,116 @@ -tabula-java [![Build Status](https://travis-ci.org/tabulapdf/tabula-java.svg?branch=master)](https://travis-ci.org/tabulapdf/tabula-java) [![Join the chat at https://gitter.im/tabulapdf/tabula-java](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/tabulapdf/tabula-java?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) +tabula-java [![Build Status](https://travis-ci.org/tabulapdf/tabula-java.svg?branch=master)](https://travis-ci.org/tabulapdf/tabula-java) [![Build status](https://ci.appveyor.com/api/projects/status/l5gym1mjhrd2v8yn?svg=true)](https://ci.appveyor.com/project/jazzido/tabula-java) =========== -`tabula-java` is a library for extracting tables from PDF files. It is a Java rewrite of [`tabula-extractor`](http://github.com/tabulapdf/tabula-extractor), that is a thin wrapper around this library. +`tabula-java` is a library for extracting tables from PDF files — it is the table extraction engine that powers [Tabula](http://tabula.technology/) ([repo](http://github.com/tabulapdf/tabula)). You can use `tabula-java` as a command-line tool to programmatically extract tables from PDFs. + +© 2014-2020 Manuel Aristarán. Available under MIT License. See [`LICENSE`](LICENSE). ## Download Download a version of the tabula-java's jar, with all dependencies included, that works on Mac, Windows and Linux from our [releases page](../../releases). -## Build instructions - -Clone this repo and run: - -``` -mvn clean compile assembly:single -``` - -## Examples +## Usage Examples `tabula-java` provides a command line application: ``` -$ java -jar ./target/tabula-0.8.0-jar-with-dependencies.jar --help - -usage: tabula [-a ] [-c ] [-d] [-f ] [-g] [-h] [-i] - [-n] [-o ] [-p ] [-r] [-s ] [-u] [-v] +$ java -jar target/tabula-1.0.2-jar-with-dependencies.jar --help +usage: tabula [-a ] [-b ] [-c ] [-f ] + [-g] [-h] [-i] [-l] [-n] [-o ] [-p ] [-r] [-s + ] [-t] [-u] [-v] Tabula helps you extract tables from PDFs - -a,--area Portion of the page to analyze - (top,left,bottom,right). Example: --area - 269.875,12.75,790.5,561. Default is entire - page + + -a,--area -a/--area = Portion of the page to analyze. + Example: --area 269.875,12.75,790.5,561. + Accepts top,left,bottom,right i.e. y1,x1,y2,x2 + where all values are in points relative to the + top left corner. If all values are between + 0-100 (inclusive) and preceded by '%', input + will be taken as % of actual height or width + of the page. Example: --area %0,0,100,50. To + specify multiple areas, -a option should be + repeated. Default is entire page + -b,--batch Convert all .pdfs in the provided directory. -c,--columns X coordinates of column boundaries. Example - --columns 10.1,20.2,30.3 - -d,--debug Print detected table areas instead of - processing. + --columns 10.1,20.2,30.3. If all values are + between 0-100 (inclusive) and preceded by '%', + input will be taken as % of actual width of + the page. Example: --columns %25,50,80.6 -f,--format Output format: (CSV,TSV,JSON). Default: CSV -g,--guess Guess the portion of the page to analyze per page. -h,--help Print this help text. -i,--silent Suppress all stderr output. - -n,--no-spreadsheet Force PDF not to be extracted using - spreadsheet-style extraction (if there are - ruling lines separating each cell, as in a PDF - of an Excel spreadsheet) + -l,--lattice Force PDF to be extracted using lattice-mode + extraction (if there are ruling lines + separating each cell, as in a PDF of an Excel + spreadsheet) + -n,--no-spreadsheet [Deprecated in favor of -t/--stream] Force PDF + not to be extracted using spreadsheet-style + extraction (if there are no ruling lines + separating each cell) -o,--outfile Write output to instead of STDOUT. Default: - -p,--pages Comma separated list of ranges, or all. Examples: --pages 1-3,5-7, --pages 3 or --pages all. Default is --pages 1 - -r,--spreadsheet Force PDF to be extracted using - spreadsheet-style extraction (if there are - ruling lines separating each cell, as in a PDF - of an Excel spreadsheet) + -r,--spreadsheet [Deprecated in favor of -l/--lattice] Force + PDF to be extracted using spreadsheet-style + extraction (if there are ruling lines + separating each cell, as in a PDF of an Excel + spreadsheet) -s,--password Password to decrypt document. Default is empty + -t,--stream Force PDF to be extracted using stream-mode + extraction (if there are no ruling lines + separating each cell) -u,--use-line-returns Use embedded line returns in cells. (Only in spreadsheet mode.) -v,--version Print version and exit. - ``` -It also includes a debugging tool, run `java -cp ./target/tabula-0.8.0-jar-with-dependencies.jar technology.tabula.debug.Debug -h` for the available options. +It also includes a debugging tool, run `java -cp ./target/tabula-1.0.2-jar-with-dependencies.jar technology.tabula.debug.Debug -h` for the available options. You can also integrate `tabula-java` with any JVM language. For Java examples, see the [`tests`](src/test/java/technology/tabula/) folder. -© 2014 Manuel Aristarán. Available under MIT License. See [`LICENSE`](LICENSE). +JVM start-up time is a lot of the cost of the `tabula` command, so if you're trying to extract many tables from PDFs, you have a few options for speeding it up: + + - the [drip](https://github.com/ninjudd/drip) utility + - the [Ruby](http://github.com/tabulapdf/tabula-extractor), [Python](https://github.com/chezou/tabula-py), [R](https://github.com/leeper/tabulizer), and [Node.js](https://github.com/ezodude/tabula-js) bindings + - writing your own program in any JVM language (Java, JRuby, Scala) that imports tabula-java. + - waiting for us to implement an API/server-style system (it's on the [roadmap](https://github.com/tabulapdf/tabula-api)) + +## Building from Source + +Clone this repo and run: + +``` +mvn clean compile assembly:single +``` + +## Contributing + +Interested in helping out? We'd love to have your help! + +You can help by: + +- [Reporting a bug](https://github.com/tabulapdf/tabula-java/issues). +- Adding or editing documentation. +- Contributing code via a Pull Request. +- Spreading the word about `tabula-java` to people who might be able to benefit from using it. + +### Backers + +You can also support our continued work on `tabula-java` with a one-time or monthly donation [on OpenCollective](https://opencollective.com/tabulapdf#support). Organizations who use `tabula-java` can also [sponsor the project](https://opencollective.com/tabulapdf#support) for acknowledgement on [our official site](http://tabula.technology/) and this README. + +Special thanks to the following users and organizations for generously supporting Tabula with donations and grants: + + + + + + + + +The John S. and James L. Knight Foundation +The Shuttleworth Foundation diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 00000000..b2c4a0ae --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,21 @@ +version: '{build}' +install: + - ps: | + Add-Type -AssemblyName System.IO.Compression.FileSystem + if (!(Test-Path -Path "C:\maven\apache-maven-3.5.4" )) { + (new-object System.Net.WebClient).DownloadFile( + 'http://www-us.apache.org/dist/maven/maven-3/3.5.4/binaries/apache-maven-3.5.4-bin.zip', + 'C:\maven-bin.zip' + ) + [System.IO.Compression.ZipFile]::ExtractToDirectory("C:\maven-bin.zip", "C:\maven") + } + - cmd: SET PATH=C:\maven\apache-maven-3.5.4\bin;%JAVA_HOME%\bin;%PATH% + - cmd: SET MAVEN_OPTS=-Xmx2g + - cmd: SET JAVA_OPTS=-Xmx2g +build_script: + - mvn clean package -B -DskipTests -Dmaven.javadoc.skip=true +test_script: + - mvn install -B -Dmaven.javadoc.skip=true -Dgpg.skip +cache: + - C:\maven -> appveyor.yml + - C:\Users\appveyor\.m2 -> appveyor.yml diff --git a/pom.xml b/pom.xml index e7ac6111..0f53c052 100644 --- a/pom.xml +++ b/pom.xml @@ -2,7 +2,7 @@ 4.0.0 technology.tabula tabula - 0.9.0 + 1.0.5-SNAPSHOT Tabula Extract tables from PDF files http://github.com/tabulapdf/tabula-java @@ -32,21 +32,26 @@ + + + snapshots + https://repository.apache.org/content/repositories/snapshots/ + + false + + + true + + + + scm:git:git@github.com:tabulapdf/tabula-java.git scm:git:git@github.com:tabulapdf/tabula-java.git git@github.com:tabulapdf/tabula-java.git - tabula-0.9.0 + v1.0.2 - - - sonatype - Sonatype repository - https://oss.sonatype.org/content/repositories/snapshots/ - - - UTF-8 UTF-8 @@ -58,17 +63,16 @@ https://oss.sonatype.org/content/repositories/snapshots - ossrh - https://oss.sonatype.org/service/local/staging/deploy/maven2/ - + ossrh + https://oss.sonatype.org/service/local/staging/deploy/maven2/ + - org.apache.maven.plugins maven-javadoc-plugin - 2.10.3 + 3.2.0 true @@ -81,7 +85,7 @@ org.sonatype.plugins nexus-staging-maven-plugin - 1.6.3 + 1.6.8 true ossrh @@ -93,7 +97,7 @@ org.apache.maven.plugins maven-source-plugin - 2.2.1 + 3.2.1 attach-sources @@ -103,23 +107,26 @@ - - org.apache.maven.plugins - maven-javadoc-plugin - 2.9.1 - - - attach-javadocs - - jar - - - + maven-compiler-plugin - 3.1 + 3.8.1 - 1.6 - 1.6 + 1.8 + 1.8 @@ -149,6 +162,25 @@ jar-with-dependencies + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.22.2 + + + -Xms1024m -Xmx2048m + + + + + org.apache.maven.plugins + maven-eclipse-plugin + 2.10 + + true + true @@ -158,11 +190,14 @@ release - + org.apache.maven.plugins maven-javadoc-plugin - 2.9.1 + 3.2.0 + + 8 + attach-javadocs @@ -175,7 +210,7 @@ org.apache.maven.plugins maven-source-plugin - 2.2.1 + 3.2.1 attach-sources @@ -188,7 +223,7 @@ org.apache.maven.plugins maven-gpg-plugin - 1.5 + 1.6 sign-artifacts @@ -206,64 +241,70 @@ - net.sf.jsi - jsi - 1.1.0-SNAPSHOT + org.locationtech.jts + jts-core + 1.17.0 org.slf4j slf4j-api - 1.7.20 + 1.7.30 org.slf4j slf4j-simple - 1.7.20 + 1.7.30 org.apache.pdfbox pdfbox - 1.8.10 - - - - org.bouncycastle - bcprov-jdk15 - 1.46 - - - - org.bouncycastle - bcmail-jdk15 - 1.46 + 2.0.21 junit junit - 4.11 + 4.13 test commons-cli commons-cli - 1.3.1 + 1.4 org.apache.commons commons-csv - 1.2 + 1.8 com.google.code.gson gson - 2.6.2 + 2.8.6 + + + + com.github.jai-imageio + jai-imageio-core + 1.4.0 + + + + com.github.jai-imageio + jai-imageio-jpeg2000 + 1.3.0 + + + + org.apache.pdfbox + jbig2-imageio + 3.0.3 diff --git a/src/main/java/technology/tabula/Cell.java b/src/main/java/technology/tabula/Cell.java index b7e568db..5757e729 100644 --- a/src/main/java/technology/tabula/Cell.java +++ b/src/main/java/technology/tabula/Cell.java @@ -7,69 +7,70 @@ @SuppressWarnings("serial") public class Cell extends RectangularTextContainer { - private boolean spanning; - private boolean placeholder; - private List textElements; - - public Cell(float top, float left, float width, float height) { - super(top, left, width, height); - this.setPlaceholder(false); - this.setSpanning(false); - this.setTextElements(new ArrayList()); - } - - public Cell(Point2D topLeft, Point2D bottomRight) { - super((float) topLeft.getY(), (float) topLeft.getX(), (float) (bottomRight.getX() - topLeft.getX()), (float) (bottomRight.getY() - topLeft.getY())); - this.setPlaceholder(false); - this.setSpanning(false); - this.setTextElements(new ArrayList()); - } - - @Override - public String getText(boolean useLineReturns) { - if (this.textElements.size() == 0) { - return ""; - } - StringBuilder sb = new StringBuilder(); - Collections.sort(this.textElements); - double curTop = this.textElements.get(0).getTop(); - for (TextChunk tc: this.textElements) { - if (useLineReturns && tc.getTop() > curTop) { - sb.append('\r'); - } - sb.append(tc.getText()); - curTop = tc.getTop(); - } - return sb.toString().trim(); - } + public static char CELL_DIVIDER = '\r'; - public String getText() { - return getText(true); - } + public Cell(float top, float left, float width, float height) { + super(top, left, width, height); + this.setPlaceholder(false); + this.setSpanning(false); + this.setTextElements(new ArrayList()); + } - public boolean isSpanning() { - return spanning; - } + public Cell(Point2D topLeft, Point2D bottomRight) { + super((float) topLeft.getY(), (float) topLeft.getX(), (float) (bottomRight.getX() - topLeft.getX()), (float) (bottomRight.getY() - topLeft.getY())); + this.setPlaceholder(false); + this.setSpanning(false); + this.setTextElements(new ArrayList()); + } - public void setSpanning(boolean spanning) { - this.spanning = spanning; - } + private boolean spanning; + private boolean placeholder; + private List textElements; - public boolean isPlaceholder() { - return placeholder; - } + @Override + public String getText(boolean useLineReturns) { + if (this.textElements.size() == 0) { + return ""; + } + StringBuilder sb = new StringBuilder(); + Collections.sort(this.textElements, Rectangle.ILL_DEFINED_ORDER); + double curTop = this.textElements.get(0).getTop(); + for (TextChunk tc : this.textElements) { + if (useLineReturns && tc.getTop() > curTop) { + sb.append(CELL_DIVIDER); + } + sb.append(tc.getText()); + curTop = tc.getTop(); + } + return sb.toString().trim(); + } - public void setPlaceholder(boolean placeholder) { - this.placeholder = placeholder; - } + public String getText() { + return getText(true); + } + public boolean isSpanning() { + return spanning; + } - public List getTextElements() { - return textElements; - } + public void setSpanning(boolean spanning) { + this.spanning = spanning; + } - public void setTextElements(List textElements) { - this.textElements = textElements; - } + public boolean isPlaceholder() { + return placeholder; + } + + public void setPlaceholder(boolean placeholder) { + this.placeholder = placeholder; + } + + public List getTextElements() { + return textElements; + } + + public void setTextElements(List textElements) { + this.textElements = textElements; + } } diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java index 7ec2d9d0..e940955d 100644 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ b/src/main/java/technology/tabula/CommandLineApp.java @@ -1,26 +1,25 @@ package technology.tabula; -import java.awt.geom.Point2D; import java.io.BufferedWriter; import java.io.File; +import java.io.FilenameFilter; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; -import java.util.Iterator; +import java.util.Collections; import java.util.List; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; -import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.DefaultParser; import org.apache.pdfbox.pdmodel.PDDocument; import technology.tabula.detectors.DetectionAlgorithm; import technology.tabula.detectors.NurminenDetectionAlgorithm; -import technology.tabula.detectors.SpreadsheetDetectionAlgorithm; import technology.tabula.extractors.BasicExtractionAlgorithm; import technology.tabula.extractors.SpreadsheetExtractionAlgorithm; import technology.tabula.writers.CSVWriter; @@ -31,17 +30,45 @@ public class CommandLineApp { - private static String VERSION = "0.9.0"; - private static String VERSION_STRING = String.format("tabula %s (c) 2012-2016 Manuel Aristarán", VERSION); + private static String VERSION = "1.0.5"; + private static String VERSION_STRING = String.format("tabula %s (c) 2012-2020 Manuel Aristarán", VERSION); private static String BANNER = "\nTabula helps you extract tables from PDFs\n\n"; + private static final int RELATIVE_AREA_CALCULATION_MODE = 0; + private static final int ABSOLUTE_AREA_CALCULATION_MODE = 1; + + private Appendable defaultOutput; + private List> pageAreas; + private List pages; + private OutputFormat outputFormat; + private String password; + private TableExtractor tableExtractor; + private List verticalRulingPositions; + + public CommandLineApp(Appendable defaultOutput, CommandLine line) throws ParseException { + this.defaultOutput = defaultOutput; + this.pageAreas = CommandLineApp.whichAreas(line); + this.pages = CommandLineApp.whichPages(line); + this.outputFormat = CommandLineApp.whichOutputFormat(line); + this.tableExtractor = CommandLineApp.createExtractor(line); + + if (line.hasOption('s')) { + this.password = line.getOptionValue('s'); + } + if (line.hasOption('c')) { + this.verticalRulingPositions = parseFloatList(line.getOptionValue('c')); + } + + } + public static void main(String[] args) { - CommandLineParser parser = new GnuParser(); + Cell.CELL_DIVIDER = ' '; // terminal does not like \r + CommandLineParser parser = new DefaultParser(); try { // parse the command line arguments - CommandLine line = parser.parse(buildOptions(), args ); + CommandLine line = parser.parse(buildOptions(), args); if (line.hasOption('h')) { printHelp(); @@ -53,168 +80,217 @@ public static void main(String[] args) { System.exit(0); } - if (line.getArgs().length != 1) { - throw new ParseException("Need one filename\nTry --help for help"); - } - - new CommandLineApp(System.out).extractTables(line); - - } - catch( ParseException exp ) { + new CommandLineApp(System.out, line).extractTables(line); + } catch (ParseException exp) { System.err.println("Error: " + exp.getMessage()); System.exit(1); } System.exit(0); } - public CommandLineApp(Appendable defaultOutput) { - this.defaultOutput = defaultOutput; - } - public void extractTables(CommandLine line) throws ParseException { - File pdfFile = new File(line.getArgs()[0]); - if (!pdfFile.exists()) { - throw new ParseException("File does not exist"); - } - - OutputFormat of = OutputFormat.CSV; - if (line.hasOption('f')) { - try { - of = OutputFormat.valueOf(line.getOptionValue('f')); - } - catch (IllegalArgumentException e) { - throw new ParseException(String.format( - "format %s is illegal. Available formats: %s", - line.getOptionValue('f'), - Utils.join(",", OutputFormat.formatNames()))); + if (line.hasOption('m')) { + if (line.getArgs().length != 0) { + throw new ParseException("Filename specified with batch\nTry --help for help"); } + File pdfDirectory = new File(line.getOptionValue('m')); + if (!pdfDirectory.isDirectory()) { + throw new ParseException("Directory does not exist or is not a directory"); + } + extractDirectoryTables(line, pdfDirectory); + return; } - Appendable outFile = this.defaultOutput; - if (line.hasOption('o')) { - File file = new File(line.getOptionValue('o')); + if (line.getArgs().length != 1) { + throw new ParseException("Need exactly one filename\nTry --help for help"); + } - try { - file.createNewFile(); - outFile = new BufferedWriter(new FileWriter( - file.getAbsoluteFile())); - } catch (IOException e) { - throw new ParseException("Cannot create file " - + line.getOptionValue('o')); - } + File pdfFile = new File(line.getArgs()[0]); + if (!pdfFile.exists()) { + throw new ParseException("File does not exist"); } + extractFileTables(line, pdfFile); + } - Rectangle area = null; - if (line.hasOption('a')) { - List f = parseFloatList(line.getOptionValue('a')); - if (f.size() != 4) { - throw new ParseException("area parameters must be top,left,bottom,right"); + public void extractDirectoryTables(CommandLine line, File pdfDirectory) throws ParseException { + File[] pdfs = pdfDirectory.listFiles(new FilenameFilter() { + public boolean accept(File dir, String name) { + return name.endsWith(".pdf"); } - area = new Rectangle(f.get(0), f.get(1), f.get(3) - f.get(1), f.get(2) - f.get(0)); + }); + + for (File pdfFile : pdfs) { + File outputFile = new File(getOutputFilename(pdfFile)); + extractFileInto(pdfFile, outputFile); } + } - List verticalRulingPositions = null; - if (line.hasOption('c')) { - verticalRulingPositions = parseFloatList(line.getOptionValue('c')); + public void extractFileTables(CommandLine line, File pdfFile) throws ParseException { + if (!line.hasOption('o')) { + extractFile(pdfFile, this.defaultOutput); + return; } - String pagesOption = line.hasOption('p') ? line.getOptionValue('p') : "1"; - List pages = Utils.parsePagesOption(pagesOption); - ExtractionMethod method = whichExtractionMethod(line); - boolean useLineReturns = line.hasOption('u'); + File outputFile = new File(line.getOptionValue('o')); + extractFileInto(pdfFile, outputFile); + } + public void extractFileInto(File pdfFile, File outputFile) throws ParseException { + BufferedWriter bufferedWriter = null; try { + FileWriter fileWriter = new FileWriter(outputFile.getAbsoluteFile()); + bufferedWriter = new BufferedWriter(fileWriter); - PDDocument pdfDocument = PDDocument.load(pdfFile); - - ObjectExtractor oe = line.hasOption('s') ? - new ObjectExtractor(pdfDocument, line.getOptionValue('s')) : - new ObjectExtractor(pdfDocument); - BasicExtractionAlgorithm basicExtractor = new BasicExtractionAlgorithm(); - SpreadsheetExtractionAlgorithm spreadsheetExtractor = new SpreadsheetExtractionAlgorithm(); + outputFile.createNewFile(); + extractFile(pdfFile, bufferedWriter); + } catch (IOException e) { + throw new ParseException("Cannot create file " + outputFile); + } finally { + if (bufferedWriter != null) { + try { + bufferedWriter.close(); + } catch (IOException e) { + System.out.println("Error in closing the BufferedWriter" + e); + } + } + } + } - PageIterator pageIterator = pages == null ? oe.extract() : oe.extract(pages); - Page page; - List tables = new ArrayList
(); + private void extractFile(File pdfFile, Appendable outFile) throws ParseException { + PDDocument pdfDocument = null; + try { + pdfDocument = this.password == null ? PDDocument.load(pdfFile) : PDDocument.load(pdfFile, this.password); + PageIterator pageIterator = getPageIterator(pdfDocument); + List
tables = new ArrayList<>(); while (pageIterator.hasNext()) { - page = pageIterator.next(); - - if (area != null) { - page = page.getArea(area); + Page page = pageIterator.next(); + if (verticalRulingPositions != null) { + for (Float verticalRulingPosition: verticalRulingPositions) { + page.addRuling(new Ruling(0, verticalRulingPosition, 0.0f, (float) page.getHeight())); + } } - if (method == ExtractionMethod.DECIDE) { - method = spreadsheetExtractor.isTabular(page) ? ExtractionMethod.SPREADSHEET : ExtractionMethod.BASIC; - } - - switch(method) { - case BASIC: - if (line.hasOption('g')) { - // guess the page areas to extract using a detection algorithm - // currently we only have a detector that uses spreadsheets to find table areas - DetectionAlgorithm detector = new NurminenDetectionAlgorithm(); - List guesses = detector.detect(page); - - for (Rectangle guessRect : guesses) { - Page guess = page.getArea(guessRect); - tables.addAll(basicExtractor.extract(guess)); + if (pageAreas != null) { + for (Pair areaPair : pageAreas) { + Rectangle area = areaPair.getRight(); + if (areaPair.getLeft() == RELATIVE_AREA_CALCULATION_MODE) { + area = new Rectangle((float) (area.getTop() / 100 * page.getHeight()), + (float) (area.getLeft() / 100 * page.getWidth()), (float) (area.getWidth() / 100 * page.getWidth()), + (float) (area.getHeight() / 100 * page.getHeight())); } - } else { - tables.addAll(verticalRulingPositions == null ? basicExtractor.extract(page) : basicExtractor.extract(page, verticalRulingPositions)); + tables.addAll(tableExtractor.extractTables(page.getArea(area))); } - - break; - case SPREADSHEET: - // TODO add useLineReturns - tables.addAll(spreadsheetExtractor.extract(page)); - default: - break; + } else { + tables.addAll(tableExtractor.extractTables(page)); } } - writeTables(of, tables, outFile); - - + writeTables(tables, outFile); } catch (IOException e) { throw new ParseException(e.getMessage()); + } finally { + try { + if (pdfDocument != null) { + pdfDocument.close(); + } + } catch (IOException e) { + System.out.println("Error in closing pdf document" + e); + } } + } + private PageIterator getPageIterator(PDDocument pdfDocument) throws IOException { + ObjectExtractor extractor = new ObjectExtractor(pdfDocument); + return (pages == null) ? + extractor.extract() : + extractor.extract(pages); } - private void writeTables(OutputFormat format, List
tables, Appendable out) throws IOException { - Writer writer = null; - switch (format) { - case CSV: - writer = new CSVWriter(); - break; - case JSON: - writer = new JSONWriter(); - break; - case TSV: - writer = new TSVWriter(); - break; + // CommandLine parsing methods + + private static OutputFormat whichOutputFormat(CommandLine line) throws ParseException { + if (!line.hasOption('f')) { + return OutputFormat.CSV; + } + + try { + return OutputFormat.valueOf(line.getOptionValue('f')); + } catch (IllegalArgumentException e) { + throw new ParseException(String.format( + "format %s is illegal. Available formats: %s", + line.getOptionValue('f'), + Utils.join(",", OutputFormat.formatNames()))); } - writer.write(out, tables); } - private ExtractionMethod whichExtractionMethod(CommandLine line) { - ExtractionMethod rv = ExtractionMethod.DECIDE; - if (line.hasOption('r')) { - rv = ExtractionMethod.SPREADSHEET; + private static List> whichAreas(CommandLine line) throws ParseException { + if (!line.hasOption('a')) { + return null; } - else if (line.hasOption('n') || line.hasOption('c') || line.hasOption('g')) { - rv = ExtractionMethod.BASIC; + + String[] optionValues = line.getOptionValues('a'); + + List> areaList = new ArrayList>(); + for (String optionValue: optionValues) { + int areaCalculationMode = ABSOLUTE_AREA_CALCULATION_MODE; + int startIndex = 0; + if (optionValue.startsWith("%")) { + startIndex = 1; + areaCalculationMode = RELATIVE_AREA_CALCULATION_MODE; + } + List f = parseFloatList(optionValue.substring(startIndex)); + if (f.size() != 4) { + throw new ParseException("area parameters must be top,left,bottom,right optionally preceded by %"); + } + areaList.add(new Pair(areaCalculationMode, new Rectangle(f.get(0), f.get(1), f.get(3) - f.get(1), f.get(2) - f.get(0)))); } - return rv; + return areaList; } + private static List whichPages(CommandLine line) throws ParseException { + String pagesOption = line.hasOption('p') ? line.getOptionValue('p') : "1"; + return Utils.parsePagesOption(pagesOption); + } + private static ExtractionMethod whichExtractionMethod(CommandLine line) { + // -r/--spreadsheet [deprecated; use -l] or -l/--lattice + if (line.hasOption('r') || line.hasOption('l')) { + return ExtractionMethod.SPREADSHEET; + } + + // -n/--no-spreadsheet [deprecated; use -t] or -c/--columns or -g/--guess or -t/--stream + if (line.hasOption('n') || line.hasOption('c') || line.hasOption('t')) { + return ExtractionMethod.BASIC; + } + return ExtractionMethod.DECIDE; + } + + private static TableExtractor createExtractor(CommandLine line) throws ParseException { + TableExtractor extractor = new TableExtractor(); + extractor.setGuess(line.hasOption('g')); + extractor.setBluntGuess(line.hasOption('b')); + extractor.setMethod(CommandLineApp.whichExtractionMethod(line)); + extractor.setUseLineReturns(line.hasOption('u')); + + if (line.hasOption('c')) { + String optionString = line.getOptionValue('c'); + if (optionString.startsWith("%")) { + extractor.setVerticalRulingPositionsRelative(true); + optionString = optionString.substring(1); + } + extractor.setVerticalRulingPositions(parseFloatList(optionString)); + } + + return extractor; + } + + // utilities, etc. public static List parseFloatList(String option) throws ParseException { String[] f = option.split(","); - List rv = new ArrayList(); + List rv = new ArrayList<>(); try { for (int i = 0; i < f.length; i++) { rv.add(Float.parseFloat(f[i])); @@ -230,53 +306,219 @@ private static void printHelp() { formatter.printHelp("tabula", BANNER, buildOptions(), "", true); } - @SuppressWarnings("static-access") public static Options buildOptions() { Options o = new Options(); o.addOption("v", "version", false, "Print version and exit."); o.addOption("h", "help", false, "Print this help text."); o.addOption("g", "guess", false, "Guess the portion of the page to analyze per page."); - o.addOption("d", "debug", false, "Print detected table areas instead of processing"); - o.addOption("r", "spreadsheet", false, "Force PDF to be extracted using spreadsheet-style extraction (if there are ruling lines separating each cell, as in a PDF of an Excel spreadsheet)"); - o.addOption("n", "no-spreadsheet", false, "Force PDF not to be extracted using spreadsheet-style extraction (if there are ruling lines separating each cell, as in a PDF of an Excel spreadsheet)"); + o.addOption("b", "blunt", false, "Guess the portion of the page to analyze per page using blunt detection"); + o.addOption("r", "spreadsheet", false, "[Deprecated in favor of -l/--lattice] Force PDF to be extracted using spreadsheet-style extraction (if there are ruling lines separating each cell, as in a PDF of an Excel spreadsheet)"); + o.addOption("n", "no-spreadsheet", false, "[Deprecated in favor of -t/--stream] Force PDF not to be extracted using spreadsheet-style extraction (if there are no ruling lines separating each cell)"); + o.addOption("l", "lattice", false, "Force PDF to be extracted using lattice-mode extraction (if there are ruling lines separating each cell, as in a PDF of an Excel spreadsheet)"); + o.addOption("t", "stream", false, "Force PDF to be extracted using stream-mode extraction (if there are no ruling lines separating each cell)"); o.addOption("i", "silent", false, "Suppress all stderr output."); o.addOption("u", "use-line-returns", false, "Use embedded line returns in cells. (Only in spreadsheet mode.)"); - o.addOption("d", "debug", false, "Print detected table areas instead of processing."); - o.addOption(OptionBuilder.withLongOpt("outfile") - .withDescription("Write output to instead of STDOUT. Default: -") - .hasArg() - .withArgName("OUTFILE") - .create("o")); - o.addOption(OptionBuilder.withLongOpt("format") - .withDescription("Output format: (" + Utils.join(",", OutputFormat.formatNames()) + "). Default: CSV") - .hasArg() - .withArgName("FORMAT") - .create("f")); - o.addOption(OptionBuilder.withLongOpt("password") - .withDescription("Password to decrypt document. Default is empty") - .hasArg() - .withArgName("PASSWORD") - .create("s")); - o.addOption(OptionBuilder.withLongOpt("columns") - .withDescription("X coordinates of column boundaries. Example --columns 10.1,20.2,30.3") - .hasArg() - .withArgName("COLUMNS") - .create("c")); - o.addOption(OptionBuilder.withLongOpt("area") - .withDescription("Portion of the page to analyze (top,left,bottom,right). Example: --area 269.875,12.75,790.5,561. Default is entire page") - .hasArg() - .withArgName("AREA") - .create("a")); - o.addOption(OptionBuilder.withLongOpt("pages") - .withDescription("Comma separated list of ranges, or all. Examples: --pages 1-3,5-7, --pages 3 or --pages all. Default is --pages 1") - .hasArg() - .withArgName("PAGES") - .create("p")); + // o.addOption("d", "debug", false, "Print detected table areas instead of processing."); + o.addOption(Option.builder("m") + .longOpt("multiple") + .desc("Convert all .pdfs in the provided directory.") + .hasArg() + .argName("DIRECTORY") + .build()); + o.addOption(Option.builder("o") + .longOpt("outfile") + .desc("Write output to instead of STDOUT. Default: -") + .hasArg() + .argName("OUTFILE") + .build()); + o.addOption(Option.builder("f") + .longOpt("format") + .desc("Output format: (" + Utils.join(",", OutputFormat.formatNames()) + "). Default: CSV") + .hasArg() + .argName("FORMAT") + .build()); + o.addOption(Option.builder("s") + .longOpt("password") + .desc("Password to decrypt document. Default is empty") + .hasArg() + .argName("PASSWORD") + .build()); + o.addOption(Option.builder("c") + .longOpt("columns") + .desc("X coordinates of column boundaries. Example --columns 10.1,20.2,30.3. " + + "If all values are between 0-100 (inclusive) and preceded by '%', input will be taken as % of actual width of the page. " + + "Example: --columns %25,50,80.6") + .hasArg() + .argName("COLUMNS") + .build()); + o.addOption(Option.builder("a") + .longOpt("area") + .desc("-a/--area = Portion of the page to analyze. Example: --area 269.875,12.75,790.5,561. " + + "Accepts top,left,bottom,right i.e. y1,x1,y2,x2 where all values are in points relative to the top left corner. " + + "If all values are between 0-100 (inclusive) and preceded by '%', input will be taken as % of actual height or width of the page. " + + "Example: --area %0,0,100,50. To specify multiple areas, -a option should be repeated. Default is entire page") + .hasArg() + .argName("AREA") + .build()); + o.addOption(Option.builder("p") + .longOpt("pages") + .desc("Comma separated list of ranges, or all. Examples: --pages 1-3,5-7, --pages 3 or --pages all. Default is --pages 1") + .hasArg() + .argName("PAGES") + .build()); return o; } + private static class TableExtractor { + private boolean bluntGuess = false; + private boolean guess = false; + private boolean useLineReturns = false; + private BasicExtractionAlgorithm basicExtractor = new BasicExtractionAlgorithm(); + private SpreadsheetExtractionAlgorithm spreadsheetExtractor = new SpreadsheetExtractionAlgorithm(); + + private boolean verticalRulingPositionsRelative = false; + private List verticalRulingPositions = null; + + private ExtractionMethod method = ExtractionMethod.BASIC; + + public TableExtractor() { + } + + public void setVerticalRulingPositions(List positions) { + this.verticalRulingPositions = positions; + } + public void setVerticalRulingPositionsRelative(boolean relative) { + this.verticalRulingPositionsRelative = relative; + } + + public void setGuess(boolean guess) { + this.guess = guess; + } + + public void setBluntGuess(boolean bluntGuess) { + this.bluntGuess = bluntGuess; + } + + public void setUseLineReturns(boolean useLineReturns) { + this.useLineReturns = useLineReturns; + } + + public void setMethod(ExtractionMethod method) { + this.method = method; + } + + public List
extractTables(Page page) { + ExtractionMethod effectiveMethod = this.method; + if (effectiveMethod == ExtractionMethod.DECIDE) { + effectiveMethod = spreadsheetExtractor.isTabular(page) ? + ExtractionMethod.SPREADSHEET : + ExtractionMethod.BASIC; + } + switch (effectiveMethod) { + case BASIC: + return extractTablesBasic(page); + case SPREADSHEET: + return extractTablesSpreadsheet(page); + default: + return new ArrayList<>(); + } + } + + public List
extractTablesBasic(Page page) { + basicExtractor.setMixedTableExtractionEnabled(guess || bluntGuess); + if (guess || bluntGuess) { + // guess the page areas to extract using a detection algorithm + // currently we only have a detector that uses spreadsheets to find table areas + NurminenDetectionAlgorithm detector = new NurminenDetectionAlgorithm(); + List guesses = detector.detect(page); + if (bluntGuess) { + Rectangle guess = detector.bluntDetect(); + guesses = (guess == null) ? new ArrayList<>() : Collections.singletonList(guess); + } + List
tables = new ArrayList<>(); + + for (Rectangle guessRect : guesses) { + Page guess = page.getArea(guessRect); + tables.addAll(basicExtractor.extract(guess)); + } + return tables; + } + + if (verticalRulingPositions != null) { + List absoluteRulingPositions; + + if (this.verticalRulingPositionsRelative) { + // convert relative to absolute + absoluteRulingPositions = new ArrayList<>(verticalRulingPositions.size()); + for (float relative: this.verticalRulingPositions) { + float absolute = (float)(relative / 100.0 * page.getWidth()); + absoluteRulingPositions.add(absolute); + } + } else { + absoluteRulingPositions = this.verticalRulingPositions; + } + return basicExtractor.extract(page, absoluteRulingPositions); + } + + return basicExtractor.extract(page); + } + + public List
extractTablesSpreadsheet(Page page) { + // TODO add useLineReturns + List
tables = new ArrayList<>(); + + if (guess || bluntGuess) { + NurminenDetectionAlgorithm detector = new NurminenDetectionAlgorithm(); + List guesses = detector.detect(page); + if (bluntGuess) { + Rectangle guess = detector.bluntDetect(); + guesses = (guess == null) ? new ArrayList<>() : Collections.singletonList(guess); + } + for (Rectangle guessRect : guesses) { + Page guess = page.getArea(guessRect); + tables.addAll(spreadsheetExtractor.extract(guess)); + } + return tables; + } else { + return spreadsheetExtractor.extract(page); + } + } + } + + private void writeTables(List
tables, Appendable out) throws IOException { + Writer writer = null; + switch (outputFormat) { + case CSV: + writer = new CSVWriter(); + break; + case JSON: + writer = new JSONWriter(); + break; + case TSV: + writer = new TSVWriter(); + break; + } + writer.write(out, tables); + } + + private String getOutputFilename(File pdfFile) { + String extension = ".csv"; + switch (outputFormat) { + case CSV: + extension = ".csv"; + break; + case JSON: + extension = ".json"; + break; + case TSV: + extension = ".tsv"; + break; + } + return pdfFile.getPath().replaceFirst("(\\.pdf|)$", extension); + } + private enum OutputFormat { CSV, TSV, @@ -290,7 +532,6 @@ static String[] formatNames() { } return rv; } - } private enum ExtractionMethod { diff --git a/src/main/java/technology/tabula/DummyGraphics2D.java b/src/main/java/technology/tabula/DummyGraphics2D.java deleted file mode 100644 index 88026fec..00000000 --- a/src/main/java/technology/tabula/DummyGraphics2D.java +++ /dev/null @@ -1,461 +0,0 @@ -package technology.tabula; - -import java.awt.Color; -import java.awt.Composite; -import java.awt.Font; -import java.awt.FontMetrics; -import java.awt.Graphics; -import java.awt.Graphics2D; -import java.awt.GraphicsConfiguration; -import java.awt.Image; -import java.awt.Paint; -import java.awt.Rectangle; -import java.awt.RenderingHints; -import java.awt.RenderingHints.Key; -import java.awt.Shape; -import java.awt.Stroke; -import java.awt.font.FontRenderContext; -import java.awt.font.GlyphVector; -import java.awt.geom.AffineTransform; -import java.awt.image.BufferedImage; -import java.awt.image.BufferedImageOp; -import java.awt.image.ImageObserver; -import java.awt.image.RenderedImage; -import java.awt.image.renderable.RenderableImage; -import java.text.AttributedCharacterIterator; -import java.util.Map; - -public class DummyGraphics2D extends Graphics2D { - - @Override - public void addRenderingHints(Map hints) { - // TODO Auto-generated method stub - - } - - @Override - public void clip(Shape s) { - // TODO Auto-generated method stub - - } - - @Override - public void draw(Shape s) { - // TODO Auto-generated method stub - - } - - @Override - public void drawGlyphVector(GlyphVector g, float x, float y) { - // TODO Auto-generated method stub - - } - - @Override - public boolean drawImage(Image img, AffineTransform xform, ImageObserver obs) { - // TODO Auto-generated method stub - return false; - } - - @Override - public void drawImage(BufferedImage img, BufferedImageOp op, int x, int y) { - // TODO Auto-generated method stub - - } - - @Override - public void drawRenderableImage(RenderableImage img, AffineTransform xform) { - // TODO Auto-generated method stub - - } - - @Override - public void drawRenderedImage(RenderedImage img, AffineTransform xform) { - // TODO Auto-generated method stub - - } - - @Override - public void drawString(String str, int x, int y) { - // TODO Auto-generated method stub - - } - - @Override - public void drawString(String str, float x, float y) { - // TODO Auto-generated method stub - - } - - @Override - public void drawString(AttributedCharacterIterator iterator, int x, int y) { - // TODO Auto-generated method stub - - } - - @Override - public void drawString(AttributedCharacterIterator iterator, float x, - float y) { - // TODO Auto-generated method stub - - } - - @Override - public void fill(Shape s) { - // TODO Auto-generated method stub - - } - - @Override - public Color getBackground() { - // TODO Auto-generated method stub - return null; - } - - @Override - public Composite getComposite() { - // TODO Auto-generated method stub - return null; - } - - @Override - public GraphicsConfiguration getDeviceConfiguration() { - // TODO Auto-generated method stub - return null; - } - - @Override - public FontRenderContext getFontRenderContext() { - // TODO Auto-generated method stub - return null; - } - - @Override - public Paint getPaint() { - // TODO Auto-generated method stub - return null; - } - - @Override - public Object getRenderingHint(Key hintKey) { - // TODO Auto-generated method stub - return null; - } - - @Override - public RenderingHints getRenderingHints() { - // TODO Auto-generated method stub - return null; - } - - @Override - public Stroke getStroke() { - // TODO Auto-generated method stub - return null; - } - - @Override - public AffineTransform getTransform() { - // TODO Auto-generated method stub - return null; - } - - @Override - public boolean hit(Rectangle rect, Shape s, boolean onStroke) { - // TODO Auto-generated method stub - return false; - } - - @Override - public void rotate(double theta) { - // TODO Auto-generated method stub - - } - - @Override - public void rotate(double theta, double x, double y) { - // TODO Auto-generated method stub - - } - - @Override - public void scale(double sx, double sy) { - // TODO Auto-generated method stub - - } - - @Override - public void setBackground(Color color) { - // TODO Auto-generated method stub - - } - - @Override - public void setComposite(Composite comp) { - // TODO Auto-generated method stub - - } - - @Override - public void setPaint(Paint paint) { - // TODO Auto-generated method stub - - } - - @Override - public void setRenderingHint(Key hintKey, Object hintValue) { - // TODO Auto-generated method stub - - } - - @Override - public void setRenderingHints(Map hints) { - // TODO Auto-generated method stub - - } - - @Override - public void setStroke(Stroke s) { - // TODO Auto-generated method stub - - } - - @Override - public void setTransform(AffineTransform Tx) { - // TODO Auto-generated method stub - - } - - @Override - public void shear(double shx, double shy) { - // TODO Auto-generated method stub - - } - - @Override - public void transform(AffineTransform Tx) { - // TODO Auto-generated method stub - - } - - @Override - public void translate(int x, int y) { - // TODO Auto-generated method stub - - } - - @Override - public void translate(double tx, double ty) { - // TODO Auto-generated method stub - - } - - @Override - public void clearRect(int x, int y, int width, int height) { - // TODO Auto-generated method stub - - } - - @Override - public void clipRect(int x, int y, int width, int height) { - // TODO Auto-generated method stub - - } - - @Override - public void copyArea(int x, int y, int width, int height, int dx, int dy) { - // TODO Auto-generated method stub - - } - - @Override - public Graphics create() { - // TODO Auto-generated method stub - return null; - } - - @Override - public void dispose() { - // TODO Auto-generated method stub - - } - - @Override - public void drawArc(int x, int y, int width, int height, int startAngle, - int arcAngle) { - // TODO Auto-generated method stub - - } - - @Override - public boolean drawImage(Image img, int x, int y, ImageObserver observer) { - // TODO Auto-generated method stub - return false; - } - - @Override - public boolean drawImage(Image img, int x, int y, Color bgcolor, - ImageObserver observer) { - // TODO Auto-generated method stub - return false; - } - - @Override - public boolean drawImage(Image img, int x, int y, int width, int height, - ImageObserver observer) { - // TODO Auto-generated method stub - return false; - } - - @Override - public boolean drawImage(Image img, int x, int y, int width, int height, - Color bgcolor, ImageObserver observer) { - // TODO Auto-generated method stub - return false; - } - - @Override - public boolean drawImage(Image img, int dx1, int dy1, int dx2, int dy2, - int sx1, int sy1, int sx2, int sy2, ImageObserver observer) { - // TODO Auto-generated method stub - return false; - } - - @Override - public boolean drawImage(Image img, int dx1, int dy1, int dx2, int dy2, - int sx1, int sy1, int sx2, int sy2, Color bgcolor, - ImageObserver observer) { - // TODO Auto-generated method stub - return false; - } - - @Override - public void drawLine(int x1, int y1, int x2, int y2) { - // TODO Auto-generated method stub - - } - - @Override - public void drawOval(int x, int y, int width, int height) { - // TODO Auto-generated method stub - - } - - @Override - public void drawPolygon(int[] xPoints, int[] yPoints, int nPoints) { - // TODO Auto-generated method stub - - } - - @Override - public void drawPolyline(int[] xPoints, int[] yPoints, int nPoints) { - // TODO Auto-generated method stub - - } - - @Override - public void drawRoundRect(int x, int y, int width, int height, - int arcWidth, int arcHeight) { - // TODO Auto-generated method stub - - } - - @Override - public void fillArc(int x, int y, int width, int height, int startAngle, - int arcAngle) { - // TODO Auto-generated method stub - - } - - @Override - public void fillOval(int x, int y, int width, int height) { - // TODO Auto-generated method stub - - } - - @Override - public void fillPolygon(int[] xPoints, int[] yPoints, int nPoints) { - // TODO Auto-generated method stub - - } - - @Override - public void fillRect(int x, int y, int width, int height) { - // TODO Auto-generated method stub - - } - - @Override - public void fillRoundRect(int x, int y, int width, int height, - int arcWidth, int arcHeight) { - // TODO Auto-generated method stub - - } - - @Override - public Shape getClip() { - // TODO Auto-generated method stub - return null; - } - - @Override - public Rectangle getClipBounds() { - // TODO Auto-generated method stub - return null; - } - - @Override - public Color getColor() { - // TODO Auto-generated method stub - return null; - } - - @Override - public Font getFont() { - // TODO Auto-generated method stub - return null; - } - - @Override - public FontMetrics getFontMetrics(Font f) { - // TODO Auto-generated method stub - return null; - } - - @Override - public void setClip(Shape clip) { - // TODO Auto-generated method stub - - } - - @Override - public void setClip(int x, int y, int width, int height) { - // TODO Auto-generated method stub - - } - - @Override - public void setColor(Color c) { - // TODO Auto-generated method stub - - } - - @Override - public void setFont(Font font) { - // TODO Auto-generated method stub - - } - - @Override - public void setPaintMode() { - // TODO Auto-generated method stub - - } - - @Override - public void setXORMode(Color c1) { - // TODO Auto-generated method stub - - } - -} diff --git a/src/main/java/technology/tabula/HasText.java b/src/main/java/technology/tabula/HasText.java index 6f375dbc..99455afb 100644 --- a/src/main/java/technology/tabula/HasText.java +++ b/src/main/java/technology/tabula/HasText.java @@ -1,7 +1,7 @@ package technology.tabula; public interface HasText { - - String getText(); + + String getText(); } diff --git a/src/main/java/technology/tabula/Line.java b/src/main/java/technology/tabula/Line.java index ed2f6895..31d10529 100644 --- a/src/main/java/technology/tabula/Line.java +++ b/src/main/java/technology/tabula/Line.java @@ -8,7 +8,7 @@ @SuppressWarnings("serial") public class Line extends Rectangle { - List textChunks = new ArrayList(); + List textChunks = new ArrayList<>(); public static final Character[] WHITE_SPACE_CHARS = { ' ', '\t', '\r', '\n', '\f' }; @@ -52,7 +52,7 @@ public void addTextChunk(TextChunk textChunk) { public String toString() { StringBuilder sb = new StringBuilder(); String s = super.toString(); - sb.append(s.substring(0, s.length() - 1)); + sb.append(s, 0, s.length() - 1); sb.append(",chunks="); for (TextChunk te: this.textChunks) { sb.append("'" + te.getText() + "', "); diff --git a/src/main/java/technology/tabula/ObjectExtractor.java b/src/main/java/technology/tabula/ObjectExtractor.java index b04927fb..3998ba6f 100644 --- a/src/main/java/technology/tabula/ObjectExtractor.java +++ b/src/main/java/technology/tabula/ObjectExtractor.java @@ -1,132 +1,49 @@ package technology.tabula; -import java.awt.Image; -import java.awt.Shape; -import java.awt.event.KeyEvent; -import java.awt.geom.AffineTransform; -import java.awt.geom.GeneralPath; -import java.awt.geom.Line2D; -import java.awt.geom.PathIterator; -import java.awt.geom.Point2D; -import java.awt.geom.Rectangle2D; import java.io.IOException; -import java.lang.reflect.Field; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.List; -import org.apache.pdfbox.exceptions.CryptographyException; -import org.apache.pdfbox.pdfviewer.PageDrawer; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.pdmodel.common.PDRectangle; -import org.apache.pdfbox.pdmodel.common.PDStream; -import org.apache.pdfbox.pdmodel.encryption.BadSecurityHandlerException; -import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial; -import org.apache.pdfbox.pdmodel.font.PDFont; -import org.apache.pdfbox.pdmodel.font.PDType3Font; -import org.apache.pdfbox.pdmodel.graphics.PDGraphicsState; -import org.apache.pdfbox.pdmodel.text.PDTextState; -import org.apache.pdfbox.util.TextPosition; -public class ObjectExtractor extends org.apache.pdfbox.pdfviewer.PageDrawer { +public class ObjectExtractor { - private static final char[] spaceLikeChars = { ' ', '-', '1', 'i' }; - private static final String NBSP = "\u00A0"; + private final PDDocument pdfDocument; - private float minCharWidth; - private float minCharHeight; - private List characters; - private List rulings; - private RectangleSpatialIndex spatialIndex; - private AffineTransform pageTransform; - public List clippingPaths; - private boolean debugClippingPaths; - private boolean extractRulingLines; - private final PDDocument pdf_document; - protected List pdf_document_pages; - - - public ObjectExtractor(PDDocument pdf_document) throws IOException { - this(pdf_document, null, true, false); + public ObjectExtractor(PDDocument pdfDocument) { + this.pdfDocument = pdfDocument; } - public ObjectExtractor(PDDocument pdf_document, boolean debugClippingPaths) throws IOException { - this(pdf_document, null, true, debugClippingPaths); - } - - public ObjectExtractor(PDDocument pdf_document, String password) throws IOException { - this(pdf_document, password, true, false); - } + protected Page extractPage(Integer pageNumber) throws IOException { - public ObjectExtractor(PDDocument pdf_document, String password, boolean extractRulingLines, boolean debugClippingPaths) - throws IOException { - super(); - - this.clippingPaths = new ArrayList(); - this.debugClippingPaths = debugClippingPaths; - this.extractRulingLines = extractRulingLines; - - this.initialize(); - - // patch PageDrawer: dummy Graphics2D context so some drawing operators don't complain - try { - Field field = PageDrawer.class.getDeclaredField("graphics"); - field.setAccessible(true); - field.set(this, new DummyGraphics2D()); - } - catch (Exception e1) { - } - - if (pdf_document.isEncrypted()) { - try { - pdf_document - .openProtection(new StandardDecryptionMaterial(password)); - } catch (BadSecurityHandlerException e) { - // TODO Auto-generated catch block - throw new IOException("BadSecurityHandler"); - } catch (CryptographyException e) { - throw new IOException("Document is encrypted"); - } + if (pageNumber > this.pdfDocument.getNumberOfPages() || pageNumber < 1) { + throw new java.lang.IndexOutOfBoundsException( + "Page number does not exist"); } - this.pdf_document = pdf_document; - this.pdf_document_pages = this.pdf_document.getDocumentCatalog() - .getAllPages(); - } + PDPage p = this.pdfDocument.getPage(pageNumber - 1); + ObjectExtractorStreamEngine se = new ObjectExtractorStreamEngine(p); + se.processPage(p); - protected Page extractPage(Integer page_number) throws IOException { - if (page_number > this.pdf_document_pages.size() || page_number < 1) { - throw new java.lang.IndexOutOfBoundsException( - "Page number does not exist"); - } - this.initialize(); + TextStripper pdfTextStripper = new TextStripper(this.pdfDocument, pageNumber); + + pdfTextStripper.process(); - PDPage pdPage = (PDPage) this.pdf_document_pages.get(page_number - 1); - pdPage = this.drawPage(pdPage); - - if(pdPage != null) { - - Utils.sort(this.characters); - - float w, h; - int pageRotation = pdPage.findRotation(); - if (Math.abs(pageRotation) == 90 || Math.abs(pageRotation) == 270) { - w = pdPage.findCropBox().getHeight(); - h = pdPage.findCropBox().getWidth(); - } - else { - w = pdPage.findCropBox().getWidth(); - h = pdPage.findCropBox().getHeight(); - } - - return new Page(0, 0, w, h, pageRotation, page_number, pdPage, this.characters, - this.rulings, this.minCharWidth, this.minCharHeight, - this.spatialIndex); + Utils.sort(pdfTextStripper.textElements, Rectangle.ILL_DEFINED_ORDER); + + float w, h; + int pageRotation = p.getRotation(); + if (Math.abs(pageRotation) == 90 || Math.abs(pageRotation) == 270) { + w = p.getCropBox().getHeight(); + h = p.getCropBox().getWidth(); + } else { + w = p.getCropBox().getWidth(); + h = p.getCropBox().getHeight(); } - return null;//TODO: content is empty, return null? or empty Page? or exception? + + return new Page(0, 0, w, h, pageRotation, pageNumber, p, this.pdfDocument, pdfTextStripper.textElements, + se.rulings, pdfTextStripper.minCharWidth, pdfTextStripper.minCharHeight, pdfTextStripper.spatialIndex); } public PageIterator extract(Iterable pages) { @@ -134,7 +51,7 @@ public PageIterator extract(Iterable pages) { } public PageIterator extract() { - return extract(Utils.range(1, this.pdf_document_pages.size() + 1)); + return extract(Utils.range(1, this.pdfDocument.getNumberOfPages() + 1)); } public Page extract(int pageNumber) { @@ -142,286 +59,9 @@ public Page extract(int pageNumber) { } public void close() throws IOException { - this.pdf_document.close(); - } - - private PDPage drawPage(PDPage p) throws IOException { - this.page = p; - PDStream contents = p.getContents(); - if (contents != null) { - ensurePageSize(); - this.processStream(p, p.findResources(), contents.getStream()); - return p; - } - return null; - } - - private void ensurePageSize() { - if (this.pageSize == null && this.page != null) { - PDRectangle cropBox = this.page.findCropBox(); - this.pageSize = cropBox == null ? null : cropBox - .createDimension(); - } - } - - private void initialize() { - this.characters = new ArrayList(); - this.rulings = new ArrayList(); - this.pageTransform = null; - this.spatialIndex = new RectangleSpatialIndex(); - this.minCharWidth = Float.MAX_VALUE; - this.minCharHeight = Float.MAX_VALUE; - } - - @Override - public void drawImage(Image awtImage, AffineTransform at) { - // we just ignore images (for now) - } - - public void strokeOrFillPath(boolean isFill) { - GeneralPath path = this.getLinePath(); - - if (!this.extractRulingLines) { - this.getLinePath().reset(); - return; - } - - PathIterator pi = path.getPathIterator(this.getPageTransform()); - float[] c = new float[6]; - int currentSegment; - - // skip paths whose first operation is not a MOVETO - // or contains operations other than LINETO, MOVETO or CLOSE - if ((pi.currentSegment(c) != PathIterator.SEG_MOVETO)) { - path.reset(); - return; - } - pi.next(); - while (!pi.isDone()) { - currentSegment = pi.currentSegment(c); - if (currentSegment != PathIterator.SEG_LINETO - && currentSegment != PathIterator.SEG_CLOSE - && currentSegment != PathIterator.SEG_MOVETO) { - path.reset(); - return; - } - pi.next(); - } - - // TODO: how to implement color filter? - - // skip the first path operation and save it as the starting position - float[] first = new float[6]; - pi = path.getPathIterator(this.getPageTransform()); - pi.currentSegment(first); - // last move - Point2D.Float start_pos = new Point2D.Float(Utils.round(first[0], 2), Utils.round(first[1], 2)); - Point2D.Float last_move = start_pos; - Point2D.Float end_pos = null; - Line2D.Float line; - PointComparator pc = new PointComparator(); - - while (!pi.isDone()) { - pi.next(); - currentSegment = pi.currentSegment(c); - switch (currentSegment) { - case PathIterator.SEG_LINETO: - end_pos = new Point2D.Float(c[0], c[1]); - - line = pc.compare(start_pos, end_pos) == -1 ? new Line2D.Float( - start_pos, end_pos) : new Line2D.Float(end_pos, - start_pos); - - if (line.intersects(this.currentClippingPath())) { - Ruling r = new Ruling(line.getP1(), line.getP2()) - .intersect(this.currentClippingPath()); - - if (r.length() > 0.01) { - this.rulings.add(r); - } - } - break; - case PathIterator.SEG_MOVETO: - last_move = new Point2D.Float(c[0], c[1]); - end_pos = last_move; - break; - case PathIterator.SEG_CLOSE: - // according to PathIterator docs: - // "the preceding subpath should be closed by appending a line - // segment - // back to the point corresponding to the most recent - // SEG_MOVETO." - line = pc.compare(end_pos, last_move) == -1 ? new Line2D.Float( - end_pos, last_move) : new Line2D.Float(last_move, - end_pos); - - if (line.intersects(this.currentClippingPath())) { - Ruling r = new Ruling(line.getP1(), line.getP2()) - .intersect(this.currentClippingPath()); - - if (r.length() > 0.01) { - this.rulings.add(r); - } - } - break; - } - start_pos = end_pos; - } - path.reset(); - } - - @Override - public void strokePath() throws IOException { - this.strokeOrFillPath(false); - } - - @Override - public void fillPath(int windingRule) throws IOException { - // - // float[] color_comps = - // this.getGraphicsState().getNonStrokingColor().getJavaColor().getRGBColorComponents(null); - float[] color = this.getGraphicsState().getNonStrokingColor().getJavaColor().getComponents(null); - // TODO use color_comps as filter_by_color - this.strokeOrFillPath(true); + this.pdfDocument.close(); } - private float currentSpaceWidth() { - PDGraphicsState gs = this.getGraphicsState(); - PDTextState ts = gs.getTextState(); - PDFont font = ts.getFont(); - float fontSizeText = ts.getFontSize(); - float horizontalScalingText = ts.getHorizontalScalingPercent() / 100.0f; - float spaceWidthText = 1000; - if (font instanceof PDType3Font) { - // TODO WHAT? - } - - for (int i = 0; i < spaceLikeChars.length; i++) { - spaceWidthText = font.getFontWidth(spaceLikeChars[i]); - if (spaceWidthText > 0) - break; - } - - float ctm00 = gs.getCurrentTransformationMatrix().getValue(0, 0); - - return (float) ((spaceWidthText / 1000.0) * fontSizeText - * horizontalScalingText * (ctm00 == 0 ? 1 : ctm00)); - } - - @Override - protected void processTextPosition(TextPosition textPosition) { - String c = textPosition.getCharacter(); - - // if c not printable, return - if (!isPrintable(c)) { - return; - } - - Float h = textPosition.getHeightDir(); - - if (c.equals(NBSP)) { // replace non-breaking space for space - c = " "; - } - - float wos = textPosition.getWidthOfSpace(); - - TextElement te = new TextElement( - Utils.round(textPosition.getYDirAdj() - h, 2), - Utils.round(textPosition.getXDirAdj(), 2), - Utils.round(textPosition.getWidthDirAdj(), 2), - Utils.round(textPosition.getHeightDir(), 2), - textPosition.getFont(), - textPosition.getFontSize(), - c, - // workaround a possible bug in PDFBox: - // https://issues.apache.org/jira/browse/PDFBOX-1755 - (Float.isNaN(wos) || wos == 0) ? this.currentSpaceWidth() : wos, - textPosition.getDir()); - - if (this.currentClippingPath().intersects(te)) { - - this.minCharWidth = (float) Math.min(this.minCharWidth, te.getWidth()); - this.minCharHeight = (float) Math.min(this.minCharHeight, te.getHeight()); - - this.spatialIndex.add(te); - this.characters.add(te); - } - - if (this.isDebugClippingPaths() && !this.clippingPaths.contains(this.currentClippingPath())) { - this.clippingPaths.add(this.currentClippingPath()); - } - - } - - public AffineTransform getPageTransform() { - - if (this.pageTransform != null) { - return this.pageTransform; - } - - PDRectangle cb = page.findCropBox(); - int rotation = Math.abs(page.findRotation()); - - this.pageTransform = new AffineTransform(); - - if (rotation == 90 || rotation == 270) { - this.pageTransform = AffineTransform.getRotateInstance(rotation * (Math.PI / 180.0), 0, 0); - this.pageTransform.concatenate(AffineTransform.getScaleInstance(1, -1)); - this.pageTransform.concatenate(AffineTransform.getTranslateInstance(0, cb.getHeight())); - this.pageTransform.concatenate(AffineTransform.getScaleInstance(1, -1)); - } - return this.pageTransform; - } - - public Rectangle2D currentClippingPath() { - - Shape clippingPath = this.getGraphicsState().getCurrentClippingPath(); - Shape transformedClippingPath = this.getPageTransform() - .createTransformedShape(clippingPath); - Rectangle2D transformedClippingPathBounds = transformedClippingPath - .getBounds2D(); - - return transformedClippingPathBounds; - } - - public boolean isExtractRulingLines() { - return extractRulingLines; - } - - private static boolean isPrintable(String s) { - Character c = s.charAt(0); - Character.UnicodeBlock block = Character.UnicodeBlock.of(c); - return (!Character.isISOControl(c)) && c != KeyEvent.CHAR_UNDEFINED - && block != null && block != Character.UnicodeBlock.SPECIALS; - } - - public boolean isDebugClippingPaths() { - return debugClippingPaths; - } - - public int getPageCount() { - return this.pdf_document_pages.size(); - } - - class PointComparator implements Comparator { - @Override - public int compare(Point2D o1, Point2D o2) { - float o1X = Utils.round(o1.getX(), 2); - float o1Y = Utils.round(o1.getY(), 2); - float o2X = Utils.round(o2.getX(), 2); - float o2Y = Utils.round(o2.getY(), 2); - - if (o1Y > o2Y) - return 1; - if (o1Y < o2Y) - return -1; - if (o1X > o2X) - return 1; - if (o1X < o2X) - return -1; - return 0; - } - } } diff --git a/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java b/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java new file mode 100644 index 00000000..797cb18c --- /dev/null +++ b/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java @@ -0,0 +1,272 @@ +package technology.tabula; + +import java.awt.Shape; +import java.awt.geom.AffineTransform; +import java.awt.geom.GeneralPath; +import java.awt.geom.Line2D; +import java.awt.geom.PathIterator; +import java.awt.geom.Point2D; +import java.awt.geom.Rectangle2D; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; + +import org.apache.pdfbox.contentstream.PDFGraphicsStreamEngine; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.graphics.image.PDImage; +import org.apache.pdfbox.util.Matrix; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Detects all existing lines in pdf and sets them to rulings + */ +class ObjectExtractorStreamEngine extends PDFGraphicsStreamEngine { + + protected List rulings; + private AffineTransform pageTransform; + private boolean debugClippingPaths; + private boolean extractRulingLines = true; + private Logger log; + private int clipWindingRule = -1; + private GeneralPath currentPath = new GeneralPath(); + + protected ObjectExtractorStreamEngine(PDPage page) { + super(page); + + this.log = LoggerFactory.getLogger(ObjectExtractorStreamEngine.class); + + this.rulings = new ArrayList<>(); + this.pageTransform = null; + + // calculate page transform + PDRectangle cb = this.getPage().getCropBox(); + int rotation = this.getPage().getRotation(); + + this.pageTransform = new AffineTransform(); + + if (Math.abs(rotation) == 90 || Math.abs(rotation) == 270) { + this.pageTransform = AffineTransform.getRotateInstance(rotation * (Math.PI / 180.0), 0, 0); + this.pageTransform.concatenate(AffineTransform.getScaleInstance(1, -1)); + } else { + this.pageTransform.concatenate(AffineTransform.getTranslateInstance(0, cb.getHeight())); + this.pageTransform.concatenate(AffineTransform.getScaleInstance(1, -1)); + } + + this.pageTransform.translate(-cb.getLowerLeftX(), -cb.getLowerLeftY()); + } + + @Override + public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) { + currentPath.moveTo((float) p0.getX(), (float) p0.getY()); + currentPath.lineTo((float) p1.getX(), (float) p1.getY()); + currentPath.lineTo((float) p2.getX(), (float) p2.getY()); + currentPath.lineTo((float) p3.getX(), (float) p3.getY()); + + currentPath.closePath(); + } + + @Override + public void clip(int windingRule) { + // the clipping path will not be updated until the succeeding painting + // operator is called + clipWindingRule = windingRule; + } + + @Override + public void closePath() { + currentPath.closePath(); + } + + @Override + public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) { + currentPath.curveTo(x1, y1, x2, y2, x3, y3); + } + + @Override + public void drawImage(PDImage arg0) { + // TODO Auto-generated method stub + + } + + @Override + public void endPath() { + if (clipWindingRule != -1) { + currentPath.setWindingRule(clipWindingRule); + getGraphicsState().intersectClippingPath(currentPath); + clipWindingRule = -1; + } + currentPath.reset(); + } + + @Override + public void fillAndStrokePath(int arg0) { + strokeOrFillPath(true); + } + + @Override + public void fillPath(int arg0) { + strokeOrFillPath(true); + } + + @Override + public Point2D getCurrentPoint() { + return currentPath.getCurrentPoint(); + } + + @Override + public void lineTo(float x, float y) { + currentPath.lineTo(x, y); + } + + @Override + public void moveTo(float x, float y) { + currentPath.moveTo(x, y); + } + + @Override + public void shadingFill(COSName arg0) { + // TODO Auto-generated method stub + + } + + @Override + public void strokePath() { + strokeOrFillPath(false); + } + + private void strokeOrFillPath(boolean isFill) { + GeneralPath path = this.currentPath; + + if (!this.extractRulingLines) { + this.currentPath.reset(); + return; + } + + PathIterator pi = path.getPathIterator(this.getPageTransform()); + float[] c = new float[6]; + int currentSegment; + + // skip paths whose first operation is not a MOVETO + // or contains operations other than LINETO, MOVETO or CLOSE + if ((pi.currentSegment(c) != PathIterator.SEG_MOVETO)) { + path.reset(); + return; + } + pi.next(); + while (!pi.isDone()) { + currentSegment = pi.currentSegment(c); + if (currentSegment != PathIterator.SEG_LINETO && currentSegment != PathIterator.SEG_CLOSE + && currentSegment != PathIterator.SEG_MOVETO) { + path.reset(); + return; + } + pi.next(); + } + + // TODO: how to implement color filter? + + // skip the first path operation and save it as the starting position + float[] first = new float[6]; + pi = path.getPathIterator(this.getPageTransform()); + pi.currentSegment(first); + // last move + Point2D.Float start_pos = new Point2D.Float(Utils.round(first[0], 2), Utils.round(first[1], 2)); + Point2D.Float last_move = start_pos; + Point2D.Float end_pos = null; + Line2D.Float line; + PointComparator pc = new PointComparator(); + while (!pi.isDone()) { + pi.next(); + // This can be the last segment, when pi.isDone, but we need to + // process it + // otherwise us-017.pdf fails the last value. + try { + currentSegment = pi.currentSegment(c); + } catch (IndexOutOfBoundsException ex) { + continue; + } + switch (currentSegment) { + case PathIterator.SEG_LINETO: + end_pos = new Point2D.Float(c[0], c[1]); + + if (start_pos == null || end_pos == null) { + break; + } + + line = pc.compare(start_pos, end_pos) == -1 ? new Line2D.Float(start_pos, end_pos) + : new Line2D.Float(end_pos, start_pos); + + if (line.intersects(this.currentClippingPath())) { + Ruling r = new Ruling(line.getP1(), line.getP2()).intersect(this.currentClippingPath()); + + if (r.length() > 0.01) { + this.rulings.add(r); + } + } + break; + case PathIterator.SEG_MOVETO: + last_move = new Point2D.Float(c[0], c[1]); + end_pos = last_move; + break; + case PathIterator.SEG_CLOSE: + // according to PathIterator docs: + // "the preceding subpath should be closed by appending a line + // segment + // back to the point corresponding to the most recent + // SEG_MOVETO." + if (start_pos == null || end_pos == null) { + break; + } + line = pc.compare(end_pos, last_move) == -1 ? new Line2D.Float(end_pos, last_move) + : new Line2D.Float(last_move, end_pos); + + if (line.intersects(this.currentClippingPath())) { + // intersect clips the line + Ruling r = new Ruling(line.getP1(), line.getP2()).intersect(this.currentClippingPath()); + + if (r.length() > 0.01) { + this.rulings.add(r); + } + } + break; + } + start_pos = end_pos; + } + path.reset(); + } + + public AffineTransform getPageTransform() { + return this.pageTransform; + } + + public Rectangle2D currentClippingPath() { + Shape clippingPath = this.getGraphicsState().getCurrentClippingPath(); + Shape transformedClippingPath = this.getPageTransform().createTransformedShape(clippingPath); + + return transformedClippingPath.getBounds2D(); + } + + class PointComparator implements Comparator { + @Override + public int compare(Point2D o1, Point2D o2) { + float o1X = Utils.round(o1.getX(), 2); + float o1Y = Utils.round(o1.getY(), 2); + float o2X = Utils.round(o2.getX(), 2); + float o2Y = Utils.round(o2.getY(), 2); + + if (o1Y > o2Y) + return 1; + if (o1Y < o2Y) + return -1; + if (o1X > o2X) + return 1; + if (o1X < o2X) + return -1; + return 0; + } + } +} diff --git a/src/main/java/technology/tabula/Page.java b/src/main/java/technology/tabula/Page.java index ab57d938..ac670558 100644 --- a/src/main/java/technology/tabula/Page.java +++ b/src/main/java/technology/tabula/Page.java @@ -2,235 +2,250 @@ import java.awt.geom.Point2D; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.Comparator; -import java.util.HashMap; import java.util.List; -import java.util.Map; +import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; @SuppressWarnings("serial") // TODO: this class should probably be called "PageArea" or something like that public class Page extends Rectangle { - private Integer rotation; - private int pageNumber; - private List texts; - private List rulings, cleanRulings = null, verticalRulingLines = null, horizontalRulingLines = null; - private float minCharWidth; - private float minCharHeight; - private RectangleSpatialIndex spatial_index; - private PDPage pdPage; - - public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage) { - super(top, left, width, height); - this.rotation = rotation; - this.pageNumber = page_number; - this.pdPage = pdPage; - } - - public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage, - List characters, List rulings) { - - this(top, left, width, height, rotation, page_number, pdPage); - this.texts = characters; - this.rulings = rulings; - } - - - public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage, - List characters, List rulings, - float minCharWidth, float minCharHeight, RectangleSpatialIndex index) { - - this(top, left, width, height, rotation, page_number, pdPage, characters, rulings); - this.minCharHeight = minCharHeight; - this.minCharWidth = minCharWidth; - this.spatial_index = index; - } + private Integer rotation; + private int pageNumber; + private List texts; + private List rulings, cleanRulings = null, verticalRulingLines = null, horizontalRulingLines = null; + private float minCharWidth; + private float minCharHeight; + private RectangleSpatialIndex spatial_index; + private PDPage pdPage; + private PDDocument pdDoc; + + public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage, PDDocument doc) { + super(top, left, width, height); + this.rotation = rotation; + this.pageNumber = page_number; + this.pdPage = pdPage; + this.pdDoc = doc; + } + + public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage, PDDocument doc, + List characters, List rulings) { + + this(top, left, width, height, rotation, page_number, pdPage, doc); + this.texts = characters; + this.rulings = rulings; + } + + + public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage, PDDocument doc, + List characters, List rulings, + float minCharWidth, float minCharHeight, RectangleSpatialIndex index) { + + this(top, left, width, height, rotation, page_number, pdPage, doc, characters, rulings); + this.minCharHeight = minCharHeight; + this.minCharWidth = minCharWidth; + this.spatial_index = index; + } + + public Page getArea(Rectangle area) { + List t = getText(area); + float min_char_width = 7; + float min_char_height = 7; + + if(t.size() > 0){ + min_char_width = Collections.min(t, new Comparator() { + @Override + public int compare(TextElement te1, TextElement te2) { + return java.lang.Float.compare(te1.width, te2.width); + }}).width; + min_char_height = Collections.min(t, new Comparator() { + @Override + public int compare(TextElement te1, TextElement te2) { + return java.lang.Float.compare(te1.height, te2.height); + }}).height; + } + Page rv = new Page( + area.getTop(), + area.getLeft(), + (float) area.getWidth(), + (float) area.getHeight(), + rotation, + pageNumber, + pdPage, + pdDoc, + t, + Ruling.cropRulingsToArea(getRulings(), area), + min_char_width, + min_char_height, + spatial_index); + + rv.addRuling(new Ruling( + new Point2D.Double(rv.getLeft(), + rv.getTop()), + new Point2D.Double(rv.getRight(), + rv.getTop()))); + rv.addRuling(new Ruling( + new Point2D.Double(rv.getRight(), + rv.getTop()), + new Point2D.Double(rv.getRight(), + rv.getBottom()))); + rv.addRuling(new Ruling( + new Point2D.Double(rv.getRight(), + rv.getBottom()), + new Point2D.Double(rv.getLeft(), + rv.getBottom()))); + rv.addRuling(new Ruling( + new Point2D.Double(rv.getLeft(), + rv.getBottom()), + new Point2D.Double(rv.getLeft(), + rv.getTop()))); + + return rv; + } + + public Page getArea(float top, float left, float bottom, float right) { + Rectangle area = new Rectangle(top, left, right - left, bottom - top); + return this.getArea(area); + } + + public List getText() { + return texts; + } + + public List getText(Rectangle area) { + return this.spatial_index.contains(area); + } + + /** @deprecated use {@linkplain #getText(Rectangle)} instead */ + @Deprecated public List getText(float top, float left, float bottom, float right) { + return this.getText(new Rectangle(top, left, right - left, bottom - top)); + } + + public Integer getRotation() { + return rotation; + } + + public int getPageNumber() { + return pageNumber; + } + + /** @deprecated use {@linkplain #getText()} instead */ + @Deprecated public List getTexts() { + return texts; + } + + /** + * Returns the minimum bounding box that contains all the TextElements on this Page + */ + public Rectangle getTextBounds() { + List texts = this.getText(); + if (!texts.isEmpty()) { + return Utils.bounds(texts); + } + else { + return new Rectangle(); + } + + } + + /** + * @return Only vertical or horizontal rulings, duplicates are removed + */ + public List getRulings() { + if (this.cleanRulings != null) { + return this.cleanRulings; + } + + if (this.rulings == null || this.rulings.isEmpty()) { + this.verticalRulingLines = new ArrayList<>(); + this.horizontalRulingLines = new ArrayList<>(); + return new ArrayList<>(); + } + + Utils.snapPoints(this.rulings, this.minCharWidth, this.minCharHeight); + + List vrs = new ArrayList<>(); + for (Ruling vr: this.rulings) { + if (vr.vertical()) { + vrs.add(vr); + } + } + this.verticalRulingLines = Ruling.collapseOrientedRulings(vrs); + + List hrs = new ArrayList<>(); + for (Ruling hr: this.rulings) { + if (hr.horizontal()) { + hrs.add(hr); + } + } + this.horizontalRulingLines = Ruling.collapseOrientedRulings(hrs); + + this.cleanRulings = new ArrayList<>(this.verticalRulingLines); + this.cleanRulings.addAll(this.horizontalRulingLines); + + return this.cleanRulings; + + } + + public List getVerticalRulings() { + if (this.verticalRulingLines != null) { + return this.verticalRulingLines; + } + this.getRulings(); + return this.verticalRulingLines; + } - - public Page getArea(Rectangle area) { - List t = getText(area); - Page rv = new Page( - (float) area.getTop(), - (float) area.getLeft(), - (float) area.getWidth(), - (float) area.getHeight(), - rotation, - pageNumber, - pdPage, - t, - Ruling.cropRulingsToArea(getRulings(), area), - - Collections.min(t, new Comparator() { - @Override - public int compare(TextElement te1, TextElement te2) { - return java.lang.Float.compare(te1.width, te2.width); - }}).width, - - Collections.min(t, new Comparator() { - @Override - public int compare(TextElement te1, TextElement te2) { - return java.lang.Float.compare(te1.height, te2.height); - }}).height, - - spatial_index); - - rv.addRuling(new Ruling( - new Point2D.Double(rv.getLeft(), - rv.getTop()), - new Point2D.Double(rv.getRight(), - rv.getTop()))); - rv.addRuling(new Ruling( - new Point2D.Double(rv.getRight(), - rv.getTop()), - new Point2D.Double(rv.getRight(), - rv.getBottom()))); - rv.addRuling(new Ruling( - new Point2D.Double(rv.getRight(), - rv.getBottom()), - new Point2D.Double(rv.getLeft(), - rv.getBottom()))); - rv.addRuling(new Ruling( - new Point2D.Double(rv.getLeft(), - rv.getBottom()), - new Point2D.Double(rv.getLeft(), - rv.getTop()))); - - return rv; - } - - public Page getArea(float top, float left, float bottom, float right) { - Rectangle area = new Rectangle(top, left, right - left, bottom - top); - return this.getArea(area); - } - - public List getText() { - return texts; - } - - public List getText(Rectangle area) { - return this.spatial_index.contains(area); - } - - public List getText(float top, float left, float bottom, float right) { - return this.getText(new Rectangle(top, left, right - left, bottom - top)); + public List getHorizontalRulings() { + if (this.horizontalRulingLines != null) { + return this.horizontalRulingLines; } + this.getRulings(); + return this.horizontalRulingLines; + } - public Integer getRotation() { - return rotation; + public void addRuling(Ruling r) { + if (r.oblique()) { + throw new UnsupportedOperationException("Can't add an oblique ruling"); } + this.rulings.add(r); + // clear caches + this.verticalRulingLines = null; + this.horizontalRulingLines = null; + this.cleanRulings = null; + } - public int getPageNumber() { - return pageNumber; - } + public List getUnprocessedRulings() { + return this.rulings; + } - public List getTexts() { - return texts; - } - - /** - * Returns the minimum bounding box that contains all the TextElements on this Page - */ - public Rectangle getTextBounds() { - List texts = this.getText(); - if (!texts.isEmpty()) { - return Utils.bounds(texts); - } - else { - return new Rectangle(); - } - - } + /** @deprecated with no replacement */ + @Deprecated public float getMinCharWidth() { + return minCharWidth; + } - public List getRulings() { - if (this.cleanRulings != null) { - return this.cleanRulings; - } - - if (this.rulings == null || this.rulings.isEmpty()) { - this.verticalRulingLines = new ArrayList(); - this.horizontalRulingLines = new ArrayList(); - return new ArrayList(); - } - - Utils.snapPoints(this.rulings, this.minCharWidth, this.minCharHeight); - - List vrs = new ArrayList(); - for (Ruling vr: this.rulings) { - if (vr.vertical()) { - vrs.add(vr); - } - } - this.verticalRulingLines = Ruling.collapseOrientedRulings(vrs); - - List hrs = new ArrayList(); - for (Ruling hr: this.rulings) { - if (hr.horizontal()) { - hrs.add(hr); - } - } - this.horizontalRulingLines = Ruling.collapseOrientedRulings(hrs); - - this.cleanRulings = new ArrayList(this.verticalRulingLines); - this.cleanRulings.addAll(this.horizontalRulingLines); - - return this.cleanRulings; - - } - - public List getVerticalRulings() { - if (this.verticalRulingLines != null) { - return this.verticalRulingLines; - } - this.getRulings(); - return this.verticalRulingLines; - } - - public List getHorizontalRulings() { - if (this.horizontalRulingLines != null) { - return this.horizontalRulingLines; - } - this.getRulings(); - return this.horizontalRulingLines; - } - - public void addRuling(Ruling r) { - if (r.oblique()) { - throw new UnsupportedOperationException("Can't add an oblique ruling"); - } - this.rulings.add(r); - // clear caches - this.verticalRulingLines = null; - this.horizontalRulingLines = null; - this.cleanRulings = null; - } - - public List getUnprocessedRulings() { - return this.rulings; - } + /** @deprecated with no replacement */ + @Deprecated public float getMinCharHeight() { + return minCharHeight; + } - public float getMinCharWidth() { - return minCharWidth; - } + public PDPage getPDPage() { + return pdPage; + } - public float getMinCharHeight() { - return minCharHeight; - } + public PDDocument getPDDoc() { + return pdDoc; + } - public PDPage getPDPage() { - return pdPage; - } + /** @deprecated with no replacement */ + @Deprecated public RectangleSpatialIndex getSpatialIndex() { + return this.spatial_index; + } - public RectangleSpatialIndex getSpatialIndex() { - return this.spatial_index; - } - - public boolean hasText() { - return this.texts.size() > 0; - } - - + /** @deprecated with no replacement */ + @Deprecated public boolean hasText() { + return this.texts.size() > 0; + } } diff --git a/src/main/java/technology/tabula/Pair.java b/src/main/java/technology/tabula/Pair.java new file mode 100644 index 00000000..d54cbbe5 --- /dev/null +++ b/src/main/java/technology/tabula/Pair.java @@ -0,0 +1,19 @@ +package technology.tabula; + +public class Pair { + private final L left; + private final R right; + + public Pair(L left, R right) { + this.left = left; + this.right = right; + } + + public L getLeft() { + return this.left; + } + + public R getRight() { + return this.right; + } +} diff --git a/src/main/java/technology/tabula/ProjectionProfile.java b/src/main/java/technology/tabula/ProjectionProfile.java index 6479964d..39ab9e41 100644 --- a/src/main/java/technology/tabula/ProjectionProfile.java +++ b/src/main/java/technology/tabula/ProjectionProfile.java @@ -5,6 +5,8 @@ import java.util.List; +// NOTE: this class is currently not used by the extraction algorithms +// keeping it for potential use. public class ProjectionProfile { public static final int DECIMAL_PLACES = 1; // fixed <-> float conversion precision @@ -71,7 +73,7 @@ public float[] getHorizontalProjection() { public float[] findVerticalSeparators(float minColumnWidth) { boolean foundNarrower = false; - List verticalSeparators = new ArrayList(); + List verticalSeparators = new ArrayList<>(); for (Ruling r: area.getVerticalRulings()) { if (r.length() / this.textBounds.getHeight() >= 0.95) { verticalSeparators.add(toFixed(r.getPosition() - this.areaLeft)); @@ -103,7 +105,7 @@ public float[] findVerticalSeparators(float minColumnWidth) { public float[] findHorizontalSeparators(float minRowHeight) { boolean foundShorter = false; - List horizontalSeparators = new ArrayList(); + List horizontalSeparators = new ArrayList<>(); for (Ruling r: area.getHorizontalRulings()) { System.out.println(r.length() / this.textBounds.getWidth()); if (r.length() / this.textBounds.getWidth() >= 0.95) { @@ -134,7 +136,7 @@ public float[] findHorizontalSeparators(float minRowHeight) { } private static List findSeparatorsFromProjection(float[] derivative) { - List separators = new ArrayList(); + List separators = new ArrayList<>(); Integer lastNeg = null; float s; boolean positiveSlope = false; @@ -165,7 +167,7 @@ public static float[] smooth(float[] data, int kernelSize) { + kernelSize / 2, data.length); j++) { s += data[j]; } - rv[i] = (float) Math.floor(s / (float) kernelSize); + rv[i] = (float) Math.floor(s / kernelSize); } } return rv; @@ -211,7 +213,7 @@ private static int toFixed(double value) { } private static double toDouble(int value) { - return (double) value / Math.pow(10, DECIMAL_PLACES); + return value / Math.pow(10, DECIMAL_PLACES); } } diff --git a/src/main/java/technology/tabula/QuickSort.java b/src/main/java/technology/tabula/QuickSort.java index 21d26dd5..03388a15 100644 --- a/src/main/java/technology/tabula/QuickSort.java +++ b/src/main/java/technology/tabula/QuickSort.java @@ -16,94 +16,97 @@ */ package technology.tabula; +import java.util.ArrayList; import java.util.Comparator; import java.util.List; +import java.util.RandomAccess; import java.util.Stack; /** - * see http://de.wikipedia.org/wiki/Quicksort. + * An implementation of Quicksort. + * + * @see wikipedia * * @author UWe Pachler */ -public class QuickSort -{ - - private QuickSort() - { - } - - private static final Comparator objComp = new Comparator() - { - public int compare(Comparable object1, Comparable object2) - { - return object1.compareTo(object2); - } - }; +public final class QuickSort { + + private QuickSort() { + // utility + } + + /** + * Sorts the given list according to natural order. + */ + public static > void sort(List list) { + sort(list, QuickSort.naturalOrder()); // JAVA_8 replace with Comparator.naturalOrder() (and cleanup) + } + + /** + * Sorts the given list using the given comparator. + */ + public static void sort(List list, Comparator comparator) { + if (list instanceof RandomAccess) { + quicksort(list, comparator); + } else { + List copy = new ArrayList<>(list); + quicksort(copy, comparator); + list.clear(); + list.addAll(copy); + } + } - /** - * Sorts the given list using the given comparator. - */ - public static void sort(List list, Comparator cmp) - { - quicksort(list, cmp); - } + private static void quicksort(List list, Comparator cmp) { + Stack stack = new Stack<>(); + stack.push(0); + stack.push(list.size()); + while (!stack.isEmpty()) { + int right = stack.pop(); + int left = stack.pop(); + + if (right - left < 2) continue; + int p = left + ((right - left) / 2); + p = partition(list, cmp, p, left, right); - /** - * Sorts the given list using compareTo as comparator. - */ - public static void sort(List list) - { - sort(list, (Comparator) objComp); - } + stack.push(p + 1); + stack.push(right); - private static void quicksort(List list, Comparator cmp) - { - Stack stack = new Stack(); - stack.push(0); - stack.push(list.size()); - while (!stack.isEmpty()) { - int right = stack.pop(); - int left = stack.pop(); - if (right - left < 2) continue; - int p = left + ((right-left)/2); - p = partition(list, cmp, p, left, right); - - stack.push(p+1); - stack.push(right); + stack.push(left); + stack.push(p); + } + } - stack.push(left); - stack.push(p); + private static int partition(List list, Comparator cmp, int p, int start, int end) { + int l = start; + int h = end - 2; + T piv = list.get(p); + swap(list, p, end - 1); - } - } - - private static int partition(List list, Comparator cmp, int p, int start, int end) { - int l = start; - int h = end - 2; - T piv = list.get(p); - swap(list,p,end-1); + while (l < h) { + if (cmp.compare(list.get(l), piv) <= 0) l++; + else if (cmp.compare(piv, list.get(h)) <= 0) h--; + else swap(list, l, h); + } + int idx = h; + if (cmp.compare(list.get(h), piv) < 0) idx++; + swap(list, end - 1, idx); + return idx; + } - while (l < h) { - if (cmp.compare(list.get(l), piv) <= 0) { - l++; - } else if (cmp.compare(piv, list.get(h)) <= 0) { - h--; - } else { - swap(list,l,h); - } - } - int idx = h; - if (cmp.compare(list.get(h), piv) < 0) idx++; - swap(list,end-1,idx); - return idx; - } - + private static void swap(List list, int i, int j) { + T tmp = list.get(i); + list.set(i, list.get(j)); + list.set(j, tmp); + } - private static void swap(List list, int i, int j) - { - T tmp = list.get(i); - list.set(i, list.get(j)); - list.set(j, tmp); - } + @SuppressWarnings({ "rawtypes", "unchecked" }) + private static final Comparator NATURAL_ORDER = new Comparator() { + @Override public int compare(Object l, Object r) { return ((Comparable) l).compareTo(r); } + }; + + @SuppressWarnings("unchecked") + private static > Comparator naturalOrder() { + return NATURAL_ORDER; + } } diff --git a/src/main/java/technology/tabula/Rectangle.java b/src/main/java/technology/tabula/Rectangle.java index 4dc75298..95aebb8d 100644 --- a/src/main/java/technology/tabula/Rectangle.java +++ b/src/main/java/technology/tabula/Rectangle.java @@ -2,167 +2,191 @@ import java.awt.geom.Point2D; import java.awt.geom.Rectangle2D; +import java.util.Comparator; import java.util.List; +import java.util.Locale; @SuppressWarnings("serial") -public class Rectangle extends Rectangle2D.Float implements Comparable { - - private static final float VERTICAL_COMPARISON_THRESHOLD = 0.4f; - - public Rectangle() { - super(); - } - - public Rectangle(float top, float left, float width, float height) { - super(); - this.setRect(left, top, width, height); - } - - @Override - public int compareTo(Rectangle other) { - double thisBottom = this.getBottom(); - double otherBottom = other.getBottom(); - int rv; - - if (this.equals(other)) return 0; - - if (this.verticalOverlap(other) > VERTICAL_COMPARISON_THRESHOLD) { - rv = java.lang.Double.compare(this.getX(), other.getX()); - } - else { - rv = java.lang.Double.compare(thisBottom, otherBottom); - } - return rv; - } - - - - public float getArea() { - return this.width * this.height; - } - - public float verticalOverlap(Rectangle other) { - return (float) Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop())); - } - - public boolean verticallyOverlaps(Rectangle other) { - return verticalOverlap(other) > 0; - } - - public float horizontalOverlap(Rectangle other) { - return (float) Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft())); - } - - public boolean horizontallyOverlaps(Rectangle other) { - return horizontalOverlap(other) > 0; - } - - public float verticalOverlapRatio(Rectangle other) { - float rv = 0, - delta = (float) Math.min(this.getBottom() - this.getTop(), other.getBottom() - other.getTop()); - - if (other.getTop() <= this.getTop() && this.getTop() <= other.getBottom() && other.getBottom() <= this.getBottom()) { - rv = (float) ((other.getBottom() - this.getTop()) / delta); - } - else if (this.getTop() <= other.getTop() && other.getTop() <= this.getBottom() && this.getBottom() <= other.getBottom()) { - rv = (float) ((this.getBottom() - other.getTop()) / delta); - } - else if (this.getTop() <= other.getTop() && other.getTop() <= other.getBottom() && other.getBottom() <= this.getBottom()) { - rv = (float) ((other.getBottom() - other.getTop()) / delta); - } - else if (other.getTop() <= this.getTop() && this.getTop() <= this.getBottom() && this.getBottom() <= other.getBottom()) { - rv = (float) ((this.getBottom() - this.getTop()) / delta); - } - - return rv; - - } - - public float overlapRatio(Rectangle other) { - double intersectionWidth = Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft())); - double intersectionHeight = Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop())); - double intersectionArea = Math.max(0, intersectionWidth * intersectionHeight); - double unionArea = this.getArea() + other.getArea() - intersectionArea; - - return (float) (intersectionArea / unionArea); - } - - public Rectangle merge(Rectangle other) { - this.setRect(this.createUnion(other)); - return this; - } - - public float getTop() { - return (float) this.getMinY(); - } - - public void setTop(float top) { - float deltaHeight = top - this.y; - this.setRect(this.x, top, this.width, this.height - deltaHeight); - } - - public float getRight() { - return (float) this.getMaxX(); - } - - public void setRight(float right) { - this.setRect(this.x, this.y, right - this.x, this.height); - } - - public float getLeft() { - return (float) this.getMinX(); - } - - public void setLeft(float left) { - float deltaWidth = left - this.x; - this.setRect(left, this.y, this.width - deltaWidth, this.height); - } - - public float getBottom() { - return (float) this.getMaxY(); - } - - public void setBottom(float bottom) { - this.setRect(this.x, this.y, this.width, bottom - this.y); - } - - public Point2D[] getPoints() { - return new Point2D[] { - new Point2D.Float((float) this.getLeft(), (float) this.getTop()), - new Point2D.Float((float) this.getRight(), (float) this.getTop()), - new Point2D.Float((float) this.getRight(), (float) this.getBottom()), - new Point2D.Float((float) this.getLeft(), (float) this.getBottom()) - }; - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - String s = super.toString(); - sb.append(s.substring(0, s.length() - 1)); - sb.append(String.format(",bottom=%f,right=%f]", this.getBottom(), this.getRight())); - return sb.toString(); - } - - - /** - * @param rectangles - * @return minimum bounding box that contains all the rectangles - */ - public static Rectangle boundingBoxOf(List rectangles) { - float minx = java.lang.Float.MAX_VALUE; - float miny = java.lang.Float.MAX_VALUE; - float maxx = java.lang.Float.MIN_VALUE; - float maxy = java.lang.Float.MIN_VALUE; - - for (Rectangle r: rectangles) { - minx = (float) Math.min(r.getMinX(), minx); - miny = (float) Math.min(r.getMinY(), miny); - maxx = (float) Math.max(r.getMaxX(), maxx); - maxy = (float) Math.max(r.getMaxY(), maxy); - } - return new Rectangle(miny, minx, maxx - minx, maxy - miny); - } - +public class Rectangle extends Rectangle2D.Float { + + /** + * Ill-defined comparator, from when Rectangle was Comparable. + * + * @see PR 116 + * @deprecated with no replacement + */ + @Deprecated + public static final Comparator ILL_DEFINED_ORDER = new Comparator() { + @Override public int compare(Rectangle o1, Rectangle o2) { + if (o1.equals(o2)) return 0; + if (o1.verticalOverlap(o2) > VERTICAL_COMPARISON_THRESHOLD) { + return o1.isLtrDominant() == -1 && o2.isLtrDominant() == -1 + ? - java.lang.Double.compare(o1.getX(), o2.getX()) + : java.lang.Double.compare(o1.getX(), o2.getX()); + } else { + return java.lang.Float.compare(o1.getBottom(), o2.getBottom()); + } + } + }; + + protected static final float VERTICAL_COMPARISON_THRESHOLD = 0.4f; + + public Rectangle() { + super(); + } + + public Rectangle(float top, float left, float width, float height) { + super(); + this.setRect(left, top, width, height); + } + + public int compareTo(Rectangle other) { + return ILL_DEFINED_ORDER.compare(this, other); + } + + // I'm bad at Java and need this for fancy sorting in + // technology.tabula.TextChunk. + public int isLtrDominant() { + return 0; + } + + public float getArea() { + return this.width * this.height; + } + + public float verticalOverlap(Rectangle other) { + return Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop())); + } + + public boolean verticallyOverlaps(Rectangle other) { + return verticalOverlap(other) > 0; + } + + public float verticalOverlapPercent(Rectangle other) { + float overlap = verticalOverlap(other); + return (overlap < 0) ? 0 : (float) (overlap / Math.max(getHeight(), other.getHeight())); + } + + public float horizontalOverlap(Rectangle other) { + return Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft())); + } + + public boolean horizontallyOverlaps(Rectangle other) { + return horizontalOverlap(other) > 0; + } + + public float verticalOverlapRatio(Rectangle other) { + float rv = 0, delta = Math.min(this.getBottom() - this.getTop(), other.getBottom() - other.getTop()); + + if (other.getTop() <= this.getTop() && this.getTop() <= other.getBottom() + && other.getBottom() <= this.getBottom()) { + rv = (other.getBottom() - this.getTop()) / delta; + } else if (this.getTop() <= other.getTop() && other.getTop() <= this.getBottom() + && this.getBottom() <= other.getBottom()) { + rv = (this.getBottom() - other.getTop()) / delta; + } else if (this.getTop() <= other.getTop() && other.getTop() <= other.getBottom() + && other.getBottom() <= this.getBottom()) { + rv = (other.getBottom() - other.getTop()) / delta; + } else if (other.getTop() <= this.getTop() && this.getTop() <= this.getBottom() + && this.getBottom() <= other.getBottom()) { + rv = (this.getBottom() - this.getTop()) / delta; + } + + return rv; + + } + + public float overlapRatio(Rectangle other) { + double intersectionWidth = Math.max(0, + Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft())); + double intersectionHeight = Math.max(0, + Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop())); + double intersectionArea = Math.max(0, intersectionWidth * intersectionHeight); + double unionArea = this.getArea() + other.getArea() - intersectionArea; + + return (float) (intersectionArea / unionArea); + } + + public Rectangle merge(Rectangle other) { + this.setRect(this.createUnion(other)); + return this; + } + + public float getTop() { + return (float) this.getMinY(); + } + + public void setTop(float top) { + float deltaHeight = top - this.y; + this.setRect(this.x, top, this.width, this.height - deltaHeight); + } + + public float getRight() { + return (float) this.getMaxX(); + } + + public void setRight(float right) { + this.setRect(this.x, this.y, right - this.x, this.height); + } + + public float getLeft() { + return (float) this.getMinX(); + } + + public void setLeft(float left) { + float deltaWidth = left - this.x; + this.setRect(left, this.y, this.width - deltaWidth, this.height); + } + + public float getBottom() { + return (float) this.getMaxY(); + } + + public void setBottom(float bottom) { + this.setRect(this.x, this.y, this.width, bottom - this.y); + } + + public Point2D[] getPoints() { + return new Point2D[] { new Point2D.Float(this.getLeft(), this.getTop()), + new Point2D.Float(this.getRight(), this.getTop()), new Point2D.Float(this.getRight(), this.getBottom()), + new Point2D.Float(this.getLeft(), this.getBottom()) }; + } + + public boolean almostContains(Rectangle other) { + Rectangle otherSmaller = new Rectangle(); + float margin = (other.width > 10 && other.height > 10) ? 2f : 0f; + otherSmaller.setRect(other.x + margin, other.y + margin, + other.width - 2 * margin, other.height - 2 * margin); + + return contains(otherSmaller); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + String s = super.toString(); + sb.append(s.substring(0, s.length() - 1)); + sb.append(String.format(Locale.US, ",bottom=%f,right=%f]", this.getBottom(), this.getRight())); + return sb.toString(); + } + + /** + * @param rectangles + * @return minimum bounding box that contains all the rectangles + */ + public static Rectangle boundingBoxOf(List rectangles) { + float minx = java.lang.Float.MAX_VALUE; + float miny = java.lang.Float.MAX_VALUE; + float maxx = java.lang.Float.MIN_VALUE; + float maxy = java.lang.Float.MIN_VALUE; + + for (Rectangle r : rectangles) { + minx = (float) Math.min(r.getMinX(), minx); + miny = (float) Math.min(r.getMinY(), miny); + maxx = (float) Math.max(r.getMaxX(), maxx); + maxy = (float) Math.max(r.getMaxY(), maxy); + } + return new Rectangle(miny, minx, maxx - minx, maxy - miny); + } } diff --git a/src/main/java/technology/tabula/RectangleSpatialIndex.java b/src/main/java/technology/tabula/RectangleSpatialIndex.java index e3aa633e..4fba6162 100644 --- a/src/main/java/technology/tabula/RectangleSpatialIndex.java +++ b/src/main/java/technology/tabula/RectangleSpatialIndex.java @@ -1,88 +1,54 @@ package technology.tabula; -import gnu.trove.procedure.TIntProcedure; - import java.util.ArrayList; -import java.util.Collections; import java.util.List; -import net.sf.jsi.SpatialIndex; -import net.sf.jsi.rtree.RTree; +import org.locationtech.jts.geom.Envelope; +import org.locationtech.jts.index.strtree.STRtree; -class RectangleSpatialIndex { +/*** + * List that sorts rectangles is spacial order + */ +public class RectangleSpatialIndex { - class SaveToListProcedure implements TIntProcedure { - private List ids = new ArrayList(); - public boolean execute(int id) { - ids.add(id); - return true; - }; - - private List getIds() { - return ids; - } - }; - - private final SpatialIndex si; - private final List rectangles; - private Rectangle bounds = null; - - public RectangleSpatialIndex() { - si = new RTree(); - si.init(null); - rectangles = new ArrayList(); - } - + private final STRtree si = new STRtree(); + private final List rectangles = new ArrayList<>(); + public void add(T te) { rectangles.add(te); - if (bounds == null) { - bounds = new Rectangle(); - bounds.setRect(te); - } - else { - bounds.merge(te); - } - si.add(rectangleToSpatialIndexRectangle(te), rectangles.size() - 1); + si.insert(new Envelope(te.getLeft(), te.getRight(), te.getBottom(), te.getTop()), te); } - + + /** + * ? Get all rectangles in collection that are inside r. + */ public List contains(Rectangle r) { - SaveToListProcedure proc = new SaveToListProcedure(); - si.contains(rectangleToSpatialIndexRectangle(r), proc); - ArrayList rv = new ArrayList(); - for (int i : proc.getIds()) { - rv.add(rectangles.get(i)); + List intersection = si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom())); + List rv = new ArrayList(); + + for (T ir: intersection) { + if (r.contains(ir)) { + rv.add(ir); + } } - Utils.sort(rv); + + Utils.sort(rv, Rectangle.ILL_DEFINED_ORDER); return rv; } public List intersects(Rectangle r) { - SaveToListProcedure proc = new SaveToListProcedure(); - si.intersects(rectangleToSpatialIndexRectangle(r), proc); - ArrayList rv = new ArrayList(); - for (int i : proc.getIds()) { - rv.add(rectangles.get(i)); - } - Utils.sort(rv); + List rv = si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom())); return rv; } - private net.sf.jsi.Rectangle rectangleToSpatialIndexRectangle(Rectangle r) { - return new net.sf.jsi.Rectangle((float) r.getX(), - (float) r.getY(), - (float) (r.getX() + r.getWidth()), - (float) (r.getY() + r.getHeight())); - } - - /** * Minimum bounding box of all the Rectangles contained on this RectangleSpatialIndex * * @return a Rectangle */ public Rectangle getBounds() { - return bounds; + return Rectangle.boundingBoxOf(rectangles); } } diff --git a/src/main/java/technology/tabula/RectangularTextContainer.java b/src/main/java/technology/tabula/RectangularTextContainer.java index f9e0036f..5f4d3716 100644 --- a/src/main/java/technology/tabula/RectangularTextContainer.java +++ b/src/main/java/technology/tabula/RectangularTextContainer.java @@ -5,31 +5,32 @@ @SuppressWarnings("serial") public abstract class RectangularTextContainer extends Rectangle { - public RectangularTextContainer(float top, float left, float width, float height) { - super(top, left, width, height); - } - - public String toString() { - StringBuilder sb = new StringBuilder(); - String s = super.toString(); - sb.append(s.substring(0, s.length() - 1)); - sb.append(String.format(",text=%s]", this.getText() == null ? "null" : "\"" + this.getText() + "\"")); - return sb.toString(); - } - - public RectangularTextContainer merge(RectangularTextContainer other) { - if (this.compareTo(other) < 0) { - this.getTextElements().addAll(other.getTextElements()); - - } - else { - this.getTextElements().addAll(0, other.getTextElements()); - } - super.merge(other); - return this; - } - - public abstract String getText(); - public abstract String getText(boolean useLineReturns); - public abstract List getTextElements(); + public RectangularTextContainer(float top, float left, float width, float height) { + super(top, left, width, height); + } + + public RectangularTextContainer merge(RectangularTextContainer other) { + if (compareTo(other) < 0) { + this.getTextElements().addAll(other.getTextElements()); + } else { + this.getTextElements().addAll(0, other.getTextElements()); + } + super.merge(other); + return this; + } + + public abstract String getText(); + + public abstract String getText(boolean useLineReturns); + + public abstract List getTextElements(); + + @Override public String toString() { + StringBuilder sb = new StringBuilder(); + String s = super.toString(); + sb.append(s.substring(0, s.length() - 1)); + sb.append(String.format(",text=%s]", this.getText() == null ? "null" : "\"" + this.getText() + "\"")); + return sb.toString(); + } + } diff --git a/src/main/java/technology/tabula/Ruling.java b/src/main/java/technology/tabula/Ruling.java index caf5914a..549baddd 100644 --- a/src/main/java/technology/tabula/Ruling.java +++ b/src/main/java/technology/tabula/Ruling.java @@ -8,6 +8,7 @@ import java.util.Comparator; import java.util.Formatter; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.TreeMap; @@ -16,7 +17,7 @@ public class Ruling extends Line2D.Float { private static int PERPENDICULAR_PIXEL_EXPAND_AMOUNT = 2; private static int COLINEAR_OR_PARALLEL_PIXEL_EXPAND_AMOUNT = 1; - private enum SOType { VERTICAL, HRIGHT, HLEFT }; + private enum SOType { VERTICAL, HRIGHT, HLEFT } public Ruling(float top, float left, float width, float height) { this(new Point2D.Float(left, top), new Point2D.Float(left+width, top+height)); @@ -117,6 +118,20 @@ public void setEnd(float v) { this.setRight(v); } } + + private void setStartEnd(float start, float end) { + if (this.oblique()) { + throw new UnsupportedOperationException(); + } + if (this.vertical()) { + this.setTop(start); + this.setBottom(end); + } + else { + this.setLeft(start); + this.setRight(end); + } + } // ----- @@ -277,13 +292,13 @@ public double getAngle() { public String toString() { StringBuilder sb = new StringBuilder(); Formatter formatter = new Formatter(sb); - String rv = formatter.format("%s[x1=%f y1=%f x2=%f y2=%f]", this.getClass().toString(), this.x1, this.y1, this.x2, this.y2).toString(); + String rv = formatter.format(Locale.US, "%s[x1=%f y1=%f x2=%f y2=%f]", this.getClass().toString(), this.x1, this.y1, this.x2, this.y2).toString(); formatter.close(); return rv; } public static List cropRulingsToArea(List rulings, Rectangle2D area) { - ArrayList rv = new ArrayList(); + ArrayList rv = new ArrayList<>(); for (Ruling r : rulings) { if (r.intersects(area)) { rv.add(r.intersect(area)); @@ -308,15 +323,15 @@ public SortObject(SOType type, float position, Ruling ruling) { } } - List sos = new ArrayList(); + List sos = new ArrayList<>(); - TreeMap tree = new TreeMap(new Comparator() { + TreeMap tree = new TreeMap<>(new Comparator() { @Override public int compare(Ruling o1, Ruling o2) { return java.lang.Double.compare(o1.getTop(), o2.getTop()); }}); - TreeMap rv = new TreeMap(new Comparator() { + TreeMap rv = new TreeMap<>(new Comparator() { @Override public int compare(Point2D o1, Point2D o2) { if (o1.getY() > o2.getY()) return 1; @@ -395,24 +410,40 @@ public static List collapseOrientedRulings(List lines) { } public static List collapseOrientedRulings(List lines, int expandAmount) { - ArrayList rv = new ArrayList(); - if (lines.size() == 0) { - return rv; - } + ArrayList rv = new ArrayList<>(); Collections.sort(lines, new Comparator() { @Override public int compare(Ruling a, Ruling b) { - return (int) (!Utils.feq(a.getPosition(), b.getPosition()) ? a.getPosition() - b.getPosition() : a.getStart() - b.getStart()); + final float diff = a.getPosition() - b.getPosition(); + return java.lang.Float.compare(diff == 0 ? a.getStart() - b.getStart() : diff, 0f); } }); - - rv.add(lines.remove(0)); + for (Ruling next_line : lines) { - Ruling last = rv.get(rv.size() - 1); + Ruling last = rv.isEmpty() ? null : rv.get(rv.size() - 1); + + float origNextLinePosition = next_line.getPosition(); + if (last != null && Utils.feq(next_line.getPosition(), last.getPosition(), 2.0)) { + next_line.setPosition(last.getPosition()); + } + // if current line colinear with next, and are "close enough": expand current line - if (Utils.feq(next_line.getPosition(), last.getPosition()) && last.nearlyIntersects(next_line, expandAmount)) { - last.setStart(next_line.getStart() < last.getStart() ? next_line.getStart() : last.getStart()); - last.setEnd(next_line.getEnd() < last.getEnd() ? last.getEnd() : next_line.getEnd()); + if (last != null && Utils.feq(next_line.getPosition(), last.getPosition()) && last.nearlyIntersects(next_line, expandAmount)) { + next_line.setPosition(origNextLinePosition); + final float lastStart = last.getStart(); + final float lastEnd = last.getEnd(); + + final boolean lastFlipped = lastStart > lastEnd; + final boolean nextFlipped = next_line.getStart() > next_line.getEnd(); + + boolean differentDirections = nextFlipped != lastFlipped; + float nextS = differentDirections ? next_line.getEnd() : next_line.getStart(); + float nextE = differentDirections ? next_line.getStart() : next_line.getEnd(); + + final float newStart = lastFlipped ? Math.max(nextS, lastStart) : Math.min(nextS, lastStart); + final float newEnd = lastFlipped ? Math.min(nextE, lastEnd) : Math.max(nextE, lastEnd); + last.setStartEnd(newStart, newEnd); + assert !last.oblique(); } else if (next_line.length() == 0) { continue; diff --git a/src/main/java/technology/tabula/Table.java b/src/main/java/technology/tabula/Table.java index eda11251..3f971d0e 100644 --- a/src/main/java/technology/tabula/Table.java +++ b/src/main/java/technology/tabula/Table.java @@ -8,139 +8,98 @@ @SuppressWarnings("serial") public class Table extends Rectangle { - - class CellPosition implements Comparable { - int row, col; - CellPosition(int row, int col) { - this.row = row; this.col = col; - } - - @Override - public boolean equals(Object other) { - if (this == other) - return true; - if (!(other instanceof CellPosition)) - return false; - return other != null && this.row == ((CellPosition) other).row && this.col == ((CellPosition) other).col; - } - - @Override - public int hashCode() { - return this.row * 100000 + this.col; - } - - @Override - public int compareTo(CellPosition other) { - int rv = 0; - if(this.row < other.row) { - rv = -1; - } - else if (this.row > other.row) { - rv = 1; - } - else if (this.col > other.col) { - rv = 1; - } - else if (this.col < other.col) { - rv = -1; - } - return rv; - } - } - - class CellContainer extends TreeMap { - - public int maxRow = 0, maxCol = 0; - - public RectangularTextContainer get(int row, int col) { - return this.get(new CellPosition(row, col)); - } - - public List getRow(int row) { - return new ArrayList(this.subMap(new CellPosition(row, 0), new CellPosition(row, maxRow+1)).values()); - } - - @Override - public RectangularTextContainer put(CellPosition cp, RectangularTextContainer value) { - this.maxRow = Math.max(maxRow, cp.row); - this.maxCol = Math.max(maxCol, cp.col); - if (this.containsKey(cp)) { // adding on an existing CellPosition, concatenate content and resize - value.merge(this.get(cp)); - } - super.put(cp, value); - return value; - } - - @Override - public RectangularTextContainer get(Object key) { - return this.containsKey(key) ? super.get(key) : TextChunk.EMPTY; - } - - public boolean containsKey(int row, int col) { - return this.containsKey(new CellPosition(row, col)); - } - - } - - public static final Table EMPTY = new Table(); - - CellContainer cellContainer = new CellContainer(); - Page page; - ExtractionAlgorithm extractionAlgorithm; - List> rows = null; - - public Table() { - super(); - } - - public Table(Page page, ExtractionAlgorithm extractionAlgorithm) { - this(); - this.page = page; - this.extractionAlgorithm = extractionAlgorithm; - } - - public void add(RectangularTextContainer tc, int i, int j) { - this.merge(tc); - this.cellContainer.put(new CellPosition(i, j), tc); - this.rows = null; // clear the memoized rows - } - - public List> getRows() { - if (this.rows != null) { - return this.rows; - } - - this.rows = new ArrayList>(); - for (int i = 0; i <= this.cellContainer.maxRow; i++) { - List lastRow = new ArrayList(); - this.rows.add(lastRow); - for (int j = 0; j <= this.cellContainer.maxCol; j++) { - lastRow.add(this.cellContainer.containsKey(i, j) ? this.cellContainer.get(i, j) : TextChunk.EMPTY); - } - } - return this.rows; - } - - public RectangularTextContainer getCell(int i, int j) { - return this.cellContainer.get(i, j); - } - - public List> getCols() { - return Utils.transpose(this.getRows()); - } - - public void setExtractionAlgorithm(ExtractionAlgorithm extractionAlgorithm) { - this.extractionAlgorithm = extractionAlgorithm; - } - - public ExtractionAlgorithm getExtractionAlgorithm() { - return extractionAlgorithm; - } - - public List getCells() { - return (List) new ArrayList(this.cellContainer.values()); - } - - + + public static final Table empty() { return new Table(""); } + + private Table(String extractionMethod) { + this.extractionMethod = extractionMethod; + } + + public Table(ExtractionAlgorithm extractionAlgorithm) { + this(extractionAlgorithm.toString()); + } + + private final String extractionMethod; + + private int rowCount = 0; + private int colCount = 0; + + /* visible for testing */ final TreeMap cells = new TreeMap<>(); + + public int getRowCount() { return rowCount; } + public int getColCount() { return colCount; } + + public String getExtractionMethod() { return extractionMethod; } + + /** + * Add a cell into position and if other cell present in that position merge both cells (merge text and rect) + */ + public void add(RectangularTextContainer chunk, int row, int col) { + this.merge(chunk); // expand rectangle (parent) of this table + + rowCount = Math.max(rowCount, row + 1); + colCount = Math.max(colCount, col + 1); + + CellPosition cp = new CellPosition(row, col); + + RectangularTextContainer old = cells.get(cp); + if (old != null) chunk.merge(old); + cells.put(cp, chunk); + + this.memoizedRows = null; + } + + private List> memoizedRows = null; + + public List> getRows() { + if (this.memoizedRows == null) this.memoizedRows = computeRows(); + return this.memoizedRows; + } + + private List> computeRows() { + List> rows = new ArrayList<>(); + for (int i = 0; i < rowCount; i++) { + List lastRow = new ArrayList<>(); + rows.add(lastRow); + for (int j = 0; j < colCount; j++) { + RectangularTextContainer cell = cells.get(new CellPosition(i,j)); // JAVA_8 use getOrDefault() + lastRow.add(cell != null ? cell : TextChunk.EMPTY); + } + } + return rows; + } + + public RectangularTextContainer getCell(int i, int j) { + RectangularTextContainer cell = cells.get(new CellPosition(i,j)); // JAVA_8 use getOrDefault() + return cell != null ? cell : TextChunk.EMPTY; + } + +} + +class CellPosition implements Comparable { + + CellPosition(int row, int col) { + this.row = row; + this.col = col; + } + + final int row, col; + + @Override public int hashCode() { + return row + 101 * col; + } + + @Override public boolean equals(Object obj) { + if (this == obj) return true; + if (obj == null) return false; + if (getClass() != obj.getClass()) return false; + CellPosition other = (CellPosition) obj; + return row == other.row && col == other.col; + } + + @Override public int compareTo(CellPosition other) { + int rowdiff = row - other.row; + return rowdiff != 0 ? rowdiff : col - other.col; + } } diff --git a/src/main/java/technology/tabula/TableColumnsFinder.java b/src/main/java/technology/tabula/TableColumnsFinder.java new file mode 100644 index 00000000..096fea18 --- /dev/null +++ b/src/main/java/technology/tabula/TableColumnsFinder.java @@ -0,0 +1,113 @@ +package technology.tabula; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +public class TableColumnsFinder { + private final List lines; + List regions = new ArrayList<>(); + + /** + * @param lines must be an array of lines sorted by their +top+ attribute + */ + public TableColumnsFinder(List lines) { + this.lines = lines; + } + + /** + * Merges rectangles from text lines which overlap horizontally into big rectangles. + * Than makes right side of every big rectangle. These are our columns. + * + * @return a list of column boundaries (x axis) + */ + public List generateColumns() { + // ignore first rows (might be a title header something at the top or wrongly detected thing at top of table), + // not merge with them. See eu-001.pdf, Crawford_technologies.pdf for example. + int startIndex = (lines.size() > 4) ? 1 : 0; + startIndex = (lines.size() > 5) ? 2 : startIndex; + int skipEndElements = (lines.size() > 5) ? 1 : 0; + skipEndElements = (lines.size() > 7) ? 2 : skipEndElements; + + for (TextChunk tc: lines.get(startIndex).getTextElements()) { + if (tc.isSameChar(Line.WHITE_SPACE_CHARS)) { + continue; + } + Rectangle r = new Rectangle(); + r.setRect(tc); + regions.add(r); + } + + for (Line l: lines.subList(startIndex + 1, lines.size() - skipEndElements)) { + addLine(l, true); + } + + for (Line l: lines.subList(0, startIndex + 1)) { + addLine(l, false); + } + + for (Line l: lines.subList(lines.size() - skipEndElements - 1, lines.size())) { + addLine(l, false); + } + + return columns(); + } + + public void addLine(Line line, boolean merge) { + List lineTextElements = new ArrayList<>(); + for (TextChunk tc: line.getTextElements()) { + if (!tc.isSameChar(Line.WHITE_SPACE_CHARS)) { + lineTextElements.add(tc); + } + } + + for (Rectangle cr: regions) { + + List overlaps = new ArrayList<>(); + for (TextChunk te: lineTextElements) { + if (cr.horizontallyOverlaps(te)) { + overlaps.add(te); + } + } + + if (merge) { + for (TextChunk te : overlaps) { + cr.merge(te); + } + } + + lineTextElements.removeAll(overlaps); + } + + for (TextChunk te: lineTextElements) { + Rectangle r = new Rectangle(); + r.setRect(te); + regions.add(r); + } + } + + public List columns() { + List rv = new ArrayList<>(); + for (Rectangle r: getRegions()) { + rv.add(r.getRight()); + } + + Collections.sort(rv); + return rv; + } + + public Set getRegions() { + for (Rectangle ri : regions) { + for (Rectangle rj : regions) { + if (ri.horizontallyOverlaps(rj)) { + ri.merge(rj); + rj.merge(ri); + } + } + } + + return new HashSet<>(regions); + } +} diff --git a/src/main/java/technology/tabula/TableWithRulingLines.java b/src/main/java/technology/tabula/TableWithRulingLines.java index 54de67e2..10c72db2 100644 --- a/src/main/java/technology/tabula/TableWithRulingLines.java +++ b/src/main/java/technology/tabula/TableWithRulingLines.java @@ -6,22 +6,17 @@ import java.util.Iterator; import java.util.List; +import technology.tabula.extractors.ExtractionAlgorithm; + @SuppressWarnings("serial") public class TableWithRulingLines extends Table { List verticalRulings, horizontalRulings; - RectangleSpatialIndex si = new RectangleSpatialIndex(); + RectangleSpatialIndex si = new RectangleSpatialIndex<>(); - public TableWithRulingLines() { - super(); - } - - public TableWithRulingLines(Rectangle area, Page page, List cells, - List horizontalRulings, - List verticalRulings) { - this(); + public TableWithRulingLines(Rectangle area, List cells, List horizontalRulings, List verticalRulings, ExtractionAlgorithm extractionAlgorithm) { + super(extractionAlgorithm); this.setRect(area); - this.page = page; this.verticalRulings = verticalRulings; this.horizontalRulings = horizontalRulings; this.addCells(cells); @@ -42,9 +37,15 @@ private void addCells(List cells) { List row = rowsOfCells.get(i); Iterator rowCells = row.iterator(); Cell cell = rowCells.next(); + /** + * Get cells in below rows, which are before this cell. + * + * In case there are cells before in other rows, don't make this cell first, but calculate it's index + * accordingly. + */ List> others = rowsOfCells( si.contains( - new Rectangle(cell.getBottom(), si.getBounds().getLeft(), cell.getLeft() - si.getBounds().getLeft(), + new Rectangle(cell.getBottom(), si.getBounds().getLeft(), cell.getLeft() - si.getBounds().getLeft() + 1, si.getBounds().getBottom() - cell.getBottom()) )); int startColumn = 0; @@ -57,11 +58,16 @@ private void addCells(List cells) { } } } - + + /*** + * Put cells into separate rows and sort rows starting from top to bottom. + * + * @return list of rows + */ private static List> rowsOfCells(List cells) { Cell c; float lastTop; - List> rv = new ArrayList>(); + List> rv = new ArrayList<>(); List lastRow; if (cells.isEmpty()) { @@ -78,21 +84,24 @@ public int compare(Cell arg0, Cell arg1) { Iterator iter = cells.iterator(); c = iter.next(); - lastTop = (float) c.getTop(); - lastRow = new ArrayList(); + lastTop = c.getTop(); + lastRow = new ArrayList<>(); lastRow.add(c); rv.add(lastRow); while (iter.hasNext()) { c = iter.next(); if (!Utils.feq(c.getTop(), lastTop)) { - lastRow = new ArrayList(); + lastRow = new ArrayList<>(); rv.add(lastRow); } lastRow.add(c); - lastTop = (float) c.getTop(); + lastTop = c.getTop(); } return rv; } + public RectangleSpatialIndex getSi() { + return si; + } } diff --git a/src/main/java/technology/tabula/TextChunk.java b/src/main/java/technology/tabula/TextChunk.java index fbbe0e2d..a59ba8b0 100644 --- a/src/main/java/technology/tabula/TextChunk.java +++ b/src/main/java/technology/tabula/TextChunk.java @@ -3,29 +3,152 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Collections; +import java.util.HashMap; +import java.text.Normalizer; @SuppressWarnings("serial") -public class TextChunk extends RectangularTextContainer implements HasText { - public static final TextChunk EMPTY = new TextChunk(0,0,0,0); - List textElements = new ArrayList(); - +public class TextChunk extends RectangularTextContainer implements HasText { + public static final TextChunk EMPTY = new TextChunk(0, 0, 0, 0); + List textElements = new ArrayList<>(); + public TextChunk(float top, float left, float width, float height) { super(top, left, width, height); } - + public TextChunk(TextElement textElement) { super(textElement.y, textElement.x, textElement.width, textElement.height); this.add(textElement); } - + public TextChunk(List textElements) { this(textElements.get(0)); for (int i = 1; i < textElements.size(); i++) { this.add(textElements.get(i)); } } - - + + private enum DirectionalityOptions { + LTR, NONE, RTL + } + + // I hate Java so bad. + // we're making this HashMap static! which requires really funky initialization per http://stackoverflow.com/questions/6802483/how-to-directly-initialize-a-hashmap-in-a-literal-way/6802502#6802502 + private static HashMap directionalities; + + static { + directionalities = new HashMap<>(); + // BCT = bidirectional character type + directionalities.put(java.lang.Character.DIRECTIONALITY_ARABIC_NUMBER, DirectionalityOptions.LTR); // Weak BCT "AN" in the Unicode specification. + directionalities.put(java.lang.Character.DIRECTIONALITY_BOUNDARY_NEUTRAL, DirectionalityOptions.NONE); // Weak BCT "BN" in the Unicode specification. + directionalities.put(java.lang.Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR, DirectionalityOptions.LTR); // Weak BCT "CS" in the Unicode specification. + directionalities.put(java.lang.Character.DIRECTIONALITY_EUROPEAN_NUMBER, DirectionalityOptions.LTR); // Weak BCT "EN" in the Unicode specification. + directionalities.put(java.lang.Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR, DirectionalityOptions.LTR); // Weak BCT "ES" in the Unicode specification. + directionalities.put(java.lang.Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR, DirectionalityOptions.LTR); // Weak BCT "ET" in the Unicode specification. + directionalities.put(java.lang.Character.DIRECTIONALITY_LEFT_TO_RIGHT, DirectionalityOptions.LTR); // Strong BCT "L" in the Unicode specification. + directionalities.put(java.lang.Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING, DirectionalityOptions.LTR); // Strong BCT "LRE" in the Unicode specification. + directionalities.put(java.lang.Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE, DirectionalityOptions.LTR); // Strong BCT "LRO" in the Unicode specification. + directionalities.put(java.lang.Character.DIRECTIONALITY_NONSPACING_MARK, DirectionalityOptions.NONE); // Weak BCT "NSM" in the Unicode specification. + directionalities.put(java.lang.Character.DIRECTIONALITY_OTHER_NEUTRALS, DirectionalityOptions.NONE); // Neutral BCT "ON" in the Unicode specification. + directionalities.put(java.lang.Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR, DirectionalityOptions.NONE); // Neutral BCT "B" in the Unicode specification. + directionalities.put(java.lang.Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT, DirectionalityOptions.NONE); // Weak BCT "PDF" in the Unicode specification. + directionalities.put(java.lang.Character.DIRECTIONALITY_RIGHT_TO_LEFT, DirectionalityOptions.RTL); // Strong BCT "R" in the Unicode specification. + directionalities.put(java.lang.Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC, DirectionalityOptions.RTL); // Strong BCT "AL" in the Unicode specification. + directionalities.put(java.lang.Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING, DirectionalityOptions.RTL); // Strong BCT "RLE" in the Unicode specification. + directionalities.put(java.lang.Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE, DirectionalityOptions.RTL); // Strong BCT "RLO" in the Unicode specification. + directionalities.put(java.lang.Character.DIRECTIONALITY_SEGMENT_SEPARATOR, DirectionalityOptions.RTL); // Neutral BCT "S" in the Unicode specification. + directionalities.put(java.lang.Character.DIRECTIONALITY_UNDEFINED, DirectionalityOptions.NONE); // Undefined BCT. + directionalities.put(java.lang.Character.DIRECTIONALITY_WHITESPACE, DirectionalityOptions.NONE); // Neutral BCT "WS" in the Unicode specification. + } + + /** + * Splits a TextChunk into N TextChunks, where each chunk is of a single directionality, and + * then reverse the RTL ones. + * what we're doing here is *reversing* the Unicode bidi algorithm + * in the language of that algorithm, each chunk is a (maximal) directional run. + * We attach whitespace to the beginning of non-RTL + **/ + public TextChunk groupByDirectionality(Boolean isLtrDominant) { + if (this.getTextElements().size() <= 0) { + throw new IllegalArgumentException(); + } + + ArrayList> chunks = new ArrayList<>(); + ArrayList buff = new ArrayList<>(); + DirectionalityOptions buffDirectionality = DirectionalityOptions.NONE; // the directionality of the characters in buff; + + for (TextElement te : this.getTextElements()) { + //TODO: we need to loop over the textelement characters + // because it is possible for a textelement to contain multiple characters? + + + // System.out.println(te.getText() + " is " + Character.getDirectionality(te.getText().charAt(0) ) + " " + directionalities.get(Character.getDirectionality(te.getText().charAt(0) ))); + if (buff.size() == 0) { + buff.add(te); + buffDirectionality = directionalities.get(Character.getDirectionality(te.getText().charAt(0))); + } else { + if (buffDirectionality == DirectionalityOptions.NONE) { + buffDirectionality = directionalities.get(Character.getDirectionality(te.getText().charAt(0))); + } + DirectionalityOptions teDirectionality = directionalities.get(Character.getDirectionality(te.getText().charAt(0))); + + if (teDirectionality == buffDirectionality || teDirectionality == DirectionalityOptions.NONE) { + if (Character.getDirectionality(te.getText().charAt(0)) == java.lang.Character.DIRECTIONALITY_WHITESPACE && (buffDirectionality == (isLtrDominant ? DirectionalityOptions.RTL : DirectionalityOptions.LTR))) { + buff.add(0, te); + } else { + buff.add(te); + } + } else { + // finish this chunk + if (buffDirectionality == DirectionalityOptions.RTL) { + Collections.reverse(buff); + } + chunks.add(buff); + + // and start a new one + buffDirectionality = directionalities.get(Character.getDirectionality(te.getText().charAt(0))); + buff = new ArrayList<>(); + buff.add(te); + } + } + } + if (buffDirectionality == DirectionalityOptions.RTL) { + Collections.reverse(buff); + } + chunks.add(buff); + ArrayList everything = new ArrayList<>(); + if (!isLtrDominant) { + Collections.reverse(chunks); + } + for (ArrayList group : chunks) { + everything.addAll(group); + } + return new TextChunk(everything); + } + + @Override public int isLtrDominant() { + int ltrCnt = 0; + int rtlCnt = 0; + for (int i = 0; i < this.getTextElements().size(); i++) { + String elementText = this.getTextElements().get(i).getText(); + for (int j = 0; j < elementText.length(); j++) { + byte dir = Character.getDirectionality(elementText.charAt(j)); + if ((dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT) || + (dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING) || + (dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE)) { + ltrCnt++; + } else if ((dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT) || + (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC) || + (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING) || + (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE)) { + rtlCnt++; + } + } + } + return java.lang.Integer.compare(ltrCnt, rtlCnt); // 1 is LTR, 0 is neutral, -1 is RTL + } + + public TextChunk merge(TextChunk other) { super.merge(other); return this; @@ -35,66 +158,69 @@ public void add(TextElement textElement) { this.textElements.add(textElement); this.merge(textElement); } - - public void add(List textElements) { - for (TextElement te: textElements) { + + public void add(List elements) { + for (TextElement te : elements) { this.add(te); } } - public List getTextElements() { + @Override public List getTextElements() { return textElements; } - - public String getText() { + + @Override public String getText() { if (this.textElements.size() == 0) { return ""; } StringBuilder sb = new StringBuilder(); - for (TextElement te: this.textElements) { + for (TextElement te : this.textElements) { sb.append(te.getText()); } - return sb.toString(); + return Normalizer.normalize(sb.toString(), Normalizer.Form.NFKC).trim(); } - + @Override public String getText(boolean useLineReturns) { // TODO Auto-generated method stub return null; } - + /** * Returns true if text contained in this TextChunk is the same repeated character */ public boolean isSameChar(Character c) { - return isSameChar(new Character[] { c }); + return isSameChar(new Character[]{c}); } - + public boolean isSameChar(Character[] c) { String s = this.getText(); List chars = Arrays.asList(c); for (int i = 0; i < s.length(); i++) { - if (!chars.contains(s.charAt(i))) { return false; } + if (!chars.contains(s.charAt(i))) { + return false; + } } return true; } - - /** Splits a TextChunk in two, at the position of the i-th TextElement + + /** + * Splits a TextChunk in two, at the position of the i-th TextElement */ public TextChunk[] splitAt(int i) { if (i < 1 || i >= this.getTextElements().size()) { throw new IllegalArgumentException(); } - - TextChunk[] rv = new TextChunk[] { + + TextChunk[] rv = new TextChunk[]{ new TextChunk(this.getTextElements().subList(0, i)), new TextChunk(this.getTextElements().subList(i, this.getTextElements().size())) }; return rv; } - + /** * Removes runs of identical TextElements in this TextChunk * For example, if the TextChunk contains this string of characters: "1234xxxxx56xx" @@ -105,21 +231,32 @@ public List squeeze(Character c, int minRunLength) { Character currentChar, lastChar = null; int subSequenceLength = 0, subSequenceStart = 0; TextChunk[] t; - List rv = new ArrayList(); - + List rv = new ArrayList<>(); + for (int i = 0; i < this.getTextElements().size(); i++) { TextElement textElement = this.getTextElements().get(i); - currentChar = textElement.getText().charAt(0); + String text = textElement.getText(); + if (text.length() > 1) { + currentChar = text.trim().charAt(0); + } else { + currentChar = text.charAt(0); + } + + if (lastChar != null && currentChar.equals(c) && lastChar.equals(currentChar)) { subSequenceLength++; - } - else { + } else { if (((lastChar != null && !lastChar.equals(currentChar)) || i + 1 == this.getTextElements().size()) && subSequenceLength >= minRunLength) { if (subSequenceStart == 0 && subSequenceLength <= this.getTextElements().size() - 1) { t = this.splitAt(subSequenceLength); - } - else { + } else { + // leave one symbol + if (isLtrDominant() == 1 + && subSequenceStart < this.getTextElements().size() - 1) { + subSequenceStart++; + } + t = this.splitAt(subSequenceStart); rv.add(t[0]); } @@ -132,74 +269,94 @@ public List squeeze(Character c, int minRunLength) { } lastChar = currentChar; } - - + + if (rv.isEmpty()) { // no splits occurred, hence this.squeeze() == [this] if (subSequenceLength >= minRunLength && subSequenceLength < this.textElements.size()) { - TextChunk[] chunks = this.splitAt(subSequenceStart); + TextChunk[] chunks = this.splitAt(subSequenceStart); rv.add(chunks[0]); - } - else { + } else { rv.add(this); } } - + return rv; } - - - + + + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + + ((textElements == null) ? 0 : textElements.hashCode()); + return result; + } + @Override - public int hashCode() { - final int prime = 31; - int result = super.hashCode(); - result = prime * result - + ((textElements == null) ? 0 : textElements.hashCode()); - return result; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (!super.equals(obj)) - return false; - if (getClass() != obj.getClass()) - return false; - TextChunk other = (TextChunk) obj; - if (textElements == null) { - if (other.textElements != null) - return false; - } else if (!textElements.equals(other.textElements)) - return false; - return true; - } - - public static boolean allSameChar(List textChunks) { - char first = textChunks.get(0).getText().charAt(0); - for (TextChunk tc: textChunks) { - if (!tc.isSameChar(first)) return false; - } + public boolean equals(Object obj) { + if (this == obj) + return true; + if (!super.equals(obj)) + return false; + if (getClass() != obj.getClass()) + return false; + TextChunk other = (TextChunk) obj; + if (textElements == null) { + if (other.textElements != null) + return false; + } else if (!textElements.equals(other.textElements)) + return false; return true; } - + + public static boolean allSameChar(List textChunks) { + /* the previous, far more elegant version of this method failed when there was an empty TextChunk in textChunks. + * so I rewrote it in an ugly way. but it works! + * it would be good for this to get rewritten eventually + * the purpose is basically just to return true iff there are 2+ TextChunks and they're identical. + * -Jeremy 5/13/2016 + */ + + if (textChunks.size() == 1) return false; + boolean hasHadAtLeastOneNonEmptyTextChunk = false; + char first = '\u0000'; + for (TextChunk tc : textChunks) { + if (tc.getText().length() == 0) { + continue; + } + if (first == '\u0000') { + first = tc.getText().charAt(0); + } else { + hasHadAtLeastOneNonEmptyTextChunk = true; + if (!tc.isSameChar(first)) return false; + } + } + return hasHadAtLeastOneNonEmptyTextChunk; + } + + /** + * 1. Add to the same line if vertical overlap between chunk rects is >= 0.1 + * 2. ? Remove lines consisting of same char and that are almost full line wide + * 3. Remove repeated spaces + */ public static List groupByLines(List textChunks) { - List lines = new ArrayList(); + List lines = new ArrayList<>(); if (textChunks.size() == 0) { return lines; } float bbwidth = Rectangle.boundingBoxOf(textChunks).width; - + Line l = new Line(); l.addTextChunk(textChunks.get(0)); textChunks.remove(0); lines.add(l); Line last = lines.get(lines.size() - 1); - for (TextChunk te: textChunks) { + for (TextChunk te : textChunks) { if (last.verticalOverlapRatio(te) < 0.1) { if (last.width / bbwidth > 0.9 && TextChunk.allSameChar(last.getTextElements())) { lines.remove(lines.size() - 1); @@ -209,17 +366,17 @@ public static List groupByLines(List textChunks) { } last.addTextChunk(te); } - + if (last.width / bbwidth > 0.9 && TextChunk.allSameChar(last.getTextElements())) { lines.remove(lines.size() - 1); } - - List rv = new ArrayList(lines.size()); - - for (Line line: lines) { + + List rv = new ArrayList<>(lines.size()); + + for (Line line : lines) { rv.add(Line.removeRepeatedCharacters(line, ' ', 3)); } - + return rv; } diff --git a/src/main/java/technology/tabula/TextElement.java b/src/main/java/technology/tabula/TextElement.java index b9b8143d..f54c4e2f 100644 --- a/src/main/java/technology/tabula/TextElement.java +++ b/src/main/java/technology/tabula/TextElement.java @@ -15,12 +15,12 @@ public class TextElement extends Rectangle implements HasText { private static final float AVERAGE_CHAR_TOLERANCE = 0.3f; public TextElement(float y, float x, float width, float height, - PDFont font, float fontSize, String c, float widthOfSpace) { - this(y, x, width, height, font, fontSize, c, widthOfSpace, 0f); - } + PDFont font, float fontSize, String c, float widthOfSpace) { + this(y, x, width, height, font, fontSize, c, widthOfSpace, 0f); + } public TextElement(float y, float x, float width, float height, - PDFont font, float fontSize, String c, float widthOfSpace, float dir) { + PDFont font, float fontSize, String c, float widthOfSpace, float dir) { super(); this.setRect(x, y, width, height); this.text = c; @@ -30,7 +30,7 @@ public TextElement(float y, float x, float width, float height, this.dir = dir; } - public String getText() { + @Override public String getText() { return text; } @@ -49,104 +49,107 @@ public PDFont getFont() { public float getFontSize() { return fontSize; } - - public String toString() { + + @Override public String toString() { StringBuilder sb = new StringBuilder(); String s = super.toString(); sb.append(s.substring(0, s.length() - 1)); sb.append(String.format(",text=\"%s\"]", this.getText())); return sb.toString(); } - + + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + java.lang.Float.floatToIntBits(dir); + result = prime * result + ((font == null) ? 0 : font.hashCode()); + result = prime * result + java.lang.Float.floatToIntBits(fontSize); + result = prime * result + ((text == null) ? 0 : text.hashCode()); + result = prime * result + java.lang.Float.floatToIntBits(widthOfSpace); + return result; + } + @Override - public int hashCode() { - final int prime = 31; - int result = super.hashCode(); - result = prime * result + java.lang.Float.floatToIntBits(dir); - result = prime * result + ((font == null) ? 0 : font.hashCode()); - result = prime * result + java.lang.Float.floatToIntBits(fontSize); - result = prime * result + ((text == null) ? 0 : text.hashCode()); - result = prime * result + java.lang.Float.floatToIntBits(widthOfSpace); - return result; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (!super.equals(obj)) - return false; - if (getClass() != obj.getClass()) - return false; - TextElement other = (TextElement) obj; - if (java.lang.Float.floatToIntBits(dir) != java.lang.Float - .floatToIntBits(other.dir)) - return false; - if (font == null) { - if (other.font != null) - return false; - } else if (!font.equals(other.font)) - return false; - if (java.lang.Float.floatToIntBits(fontSize) != java.lang.Float - .floatToIntBits(other.fontSize)) - return false; - if (text == null) { - if (other.text != null) - return false; - } else if (!text.equals(other.text)) - return false; - if (java.lang.Float.floatToIntBits(widthOfSpace) != java.lang.Float - .floatToIntBits(other.widthOfSpace)) - return false; - return true; - } - - public static List mergeWords(List textElements) { + public boolean equals(Object obj) { + if (this == obj) + return true; + if (!super.equals(obj)) + return false; + if (getClass() != obj.getClass()) + return false; + TextElement other = (TextElement) obj; + if (java.lang.Float.floatToIntBits(dir) != java.lang.Float + .floatToIntBits(other.dir)) + return false; + if (font == null) { + if (other.font != null) + return false; + } else if (!font.equals(other.font)) + return false; + if (java.lang.Float.floatToIntBits(fontSize) != java.lang.Float + .floatToIntBits(other.fontSize)) + return false; + if (text == null) { + if (other.text != null) + return false; + } else if (!text.equals(other.text)) + return false; + return java.lang.Float.floatToIntBits(widthOfSpace) == java.lang.Float + .floatToIntBits(other.widthOfSpace); + } + + public static List mergeWords(List textElements) { return mergeWords(textElements, new ArrayList()); } - + /** * heuristically merge a list of TextElement into a list of TextChunk * ported from from PDFBox's PDFTextStripper.writePage, with modifications. * Here be dragons */ public static List mergeWords(List textElements, List verticalRulings) { - - List textChunks = new ArrayList(); - + + List textChunks = new ArrayList<>(); + if (textElements.isEmpty()) { return textChunks; } - - textChunks.add(new TextChunk(textElements.remove(0))); - TextChunk firstTC = textChunks.get(0); - + + // it's a problem that this `remove` is side-effecty + // other things depend on `textElements` and it can sometimes lead to the first textElement in textElement + // not appearing in the final output because it's been removed here. + // https://github.com/tabulapdf/tabula-java/issues/78 + List copyOfTextElements = new ArrayList<>(textElements); + textChunks.add(new TextChunk(copyOfTextElements.remove(0))); + TextChunk firstTC = textChunks.get(0); + float previousAveCharWidth = (float) firstTC.getWidth(); - float endOfLastTextX = (float) firstTC.getRight(); - float maxYForLine = (float) firstTC.getBottom(); + float endOfLastTextX = firstTC.getRight(); + float maxYForLine = firstTC.getBottom(); float maxHeightForLine = (float) firstTC.getHeight(); - float minYTopForLine = (float) firstTC.getTop(); + float minYTopForLine = firstTC.getTop(); float lastWordSpacing = -1; float wordSpacing, deltaSpace, averageCharWidth, deltaCharWidth; float expectedStartOfNextWordX, dist; TextElement sp, prevChar; TextChunk currentChunk; boolean sameLine, acrossVerticalRuling; - - for (TextElement chr : textElements) { + + for (TextElement chr : copyOfTextElements) { currentChunk = textChunks.get(textChunks.size() - 1); prevChar = currentChunk.textElements.get(currentChunk.textElements.size() - 1); - + // if same char AND overlapped, skip if ((chr.getText().equals(prevChar.getText())) && (prevChar.overlapRatio(chr) > 0.5)) { continue; } - + // if chr is a space that overlaps with prevChar, skip if (chr.getText().equals(" ") && Utils.feq(prevChar.getLeft(), chr.getLeft()) && Utils.feq(prevChar.getTop(), chr.getTop())) { continue; } - + // Resets the average character width when we see a change in font // or a change in the font size if ((chr.getFont() != prevChar.getFont()) || !Utils.feq(chr.getFontSize(), prevChar.getFontSize())) { @@ -155,30 +158,28 @@ public static List mergeWords(List textElements, List r.getPosition()) || (prevChar.x > r.getPosition() && chr.x < r.getPosition()) - ) { + (prevChar.x < r.getPosition() && chr.x > r.getPosition()) || (prevChar.x > r.getPosition() && chr.x < r.getPosition()) + ) { acrossVerticalRuling = true; break; } - } - + } + // Estimate the expected width of the space based on the // space character with some margin. wordSpacing = chr.getWidthOfSpace(); deltaSpace = 0; if (java.lang.Float.isNaN(wordSpacing) || wordSpacing == 0) { deltaSpace = java.lang.Float.MAX_VALUE; - } - else if (lastWordSpacing < 0) { + } else if (lastWordSpacing < 0) { deltaSpace = wordSpacing * 0.5f; // 0.5 == spacing tolerance - } - else { + } else { deltaSpace = ((wordSpacing + lastWordSpacing) / 2.0f) * 0.5f; } - + // Estimate the expected width of the space based on the // average character width with some margin. This calculation does not // make a true average (average of averages) but we found that it gave the @@ -186,23 +187,22 @@ else if (lastWordSpacing < 0) { // .3 worked well. if (previousAveCharWidth < 0) { averageCharWidth = (float) (chr.getWidth() / chr.getText().length()); - } - else { + } else { averageCharWidth = (float) ((previousAveCharWidth + (chr.getWidth() / chr.getText().length())) / 2.0f); } deltaCharWidth = averageCharWidth * AVERAGE_CHAR_TOLERANCE; - + // Compares the values obtained by the average method and the wordSpacing method and picks // the smaller number. expectedStartOfNextWordX = -java.lang.Float.MAX_VALUE; - + if (endOfLastTextX != -1) { expectedStartOfNextWordX = endOfLastTextX + Math.min(deltaCharWidth, deltaSpace); } - + // new line? sameLine = true; - if (!Utils.overlap((float) chr.getBottom(), chr.height, maxYForLine, maxHeightForLine)) { + if (!Utils.overlap(chr.getBottom(), chr.height, maxYForLine, maxHeightForLine)) { endOfLastTextX = -1; expectedStartOfNextWordX = -java.lang.Float.MAX_VALUE; maxYForLine = -java.lang.Float.MAX_VALUE; @@ -210,54 +210,62 @@ else if (lastWordSpacing < 0) { minYTopForLine = java.lang.Float.MAX_VALUE; sameLine = false; } - - endOfLastTextX = (float) chr.getRight(); - + + endOfLastTextX = chr.getRight(); + // should we add a space? if (!acrossVerticalRuling && - sameLine && - expectedStartOfNextWordX < chr.getLeft() && - !prevChar.getText().endsWith(" ")) { - - sp = new TextElement((float) prevChar.getTop(), - (float) prevChar.getLeft(), - (float) (expectedStartOfNextWordX - prevChar.getLeft()), + sameLine && + expectedStartOfNextWordX < chr.getLeft() && + !prevChar.getText().endsWith(" ")) { + + sp = new TextElement(prevChar.getTop(), + prevChar.getLeft(), + expectedStartOfNextWordX - prevChar.getLeft(), (float) prevChar.getHeight(), prevChar.getFont(), prevChar.getFontSize(), " ", prevChar.getWidthOfSpace()); - + currentChunk.add(sp); - } - else { + } else { sp = null; } - - maxYForLine = (float) Math.max(chr.getBottom(), maxYForLine); + + maxYForLine = Math.max(chr.getBottom(), maxYForLine); maxHeightForLine = (float) Math.max(maxHeightForLine, chr.getHeight()); - minYTopForLine = (float) Math.min(minYTopForLine, chr.getTop()); + minYTopForLine = Math.min(minYTopForLine, chr.getTop()); - dist = (float) (chr.getLeft() - (sp != null ? sp.getRight() : prevChar.getRight())); + dist = chr.getLeft() - (sp != null ? sp.getRight() : prevChar.getRight()); if (!acrossVerticalRuling && - sameLine && - (dist < 0 ? currentChunk.verticallyOverlaps(chr) : dist < wordSpacing)) { + sameLine && + (dist < 0 ? currentChunk.verticallyOverlaps(chr) : dist < wordSpacing)) { currentChunk.add(chr); + } else { // create a new chunk + textChunks.add(new TextChunk(chr)); } - else { // create a new chunk - textChunks.add(new TextChunk(chr)); - } - + lastWordSpacing = wordSpacing; previousAveCharWidth = (float) (sp != null ? (averageCharWidth + sp.getWidth()) / 2.0f : averageCharWidth); } - return textChunks; + + + List textChunksSeparatedByDirectionality = new ArrayList<>(); + // count up characters by directionality + for (TextChunk chunk : textChunks) { + // choose the dominant direction + boolean isLtrDominant = chunk.isLtrDominant() != -1; // treat neutral as LTR + TextChunk dirChunk = chunk.groupByDirectionality(isLtrDominant); + textChunksSeparatedByDirectionality.add(dirChunk); + } + + return textChunksSeparatedByDirectionality; } - + private static boolean verticallyOverlapsRuling(TextElement te, Ruling r) { - // Utils.overlap(prevChar.getTop(), prevChar.getHeight(), r.getY1(), r.getY2() - r.getY1()) return Math.max(0, Math.min(te.getBottom(), r.getY2()) - Math.max(te.getTop(), r.getY1())) > 0; } - + } diff --git a/src/main/java/technology/tabula/TextStripper.java b/src/main/java/technology/tabula/TextStripper.java new file mode 100644 index 00000000..329d45a2 --- /dev/null +++ b/src/main/java/technology/tabula/TextStripper.java @@ -0,0 +1,159 @@ +package technology.tabula; + +import org.apache.fontbox.util.BoundingBox; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDFontDescriptor; +import org.apache.pdfbox.pdmodel.font.PDType3Font; +import org.apache.pdfbox.text.PDFTextStripper; +import org.apache.pdfbox.text.TextPosition; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class TextStripper extends PDFTextStripper { + + private static final String NBSP = "\u00A0"; + private static final float AVG_HEIGHT_MULT_THRESHOLD = 6.0f; + private static final float MAX_BLANK_FONT_SIZE = 40.0f; + private static final float MIN_BLANK_FONT_SIZE = 2.0f; + private PDDocument document; + public ArrayList textElements; + public RectangleSpatialIndex spatialIndex; + public float minCharWidth = Float.MAX_VALUE; + public float minCharHeight = Float.MAX_VALUE; + public float totalHeight = 0.0f; + public int countHeight = 0; + + public TextStripper(PDDocument document, int pageNumber) throws IOException { + super(); + this.document = document; + this.setStartPage(pageNumber); + this.setEndPage(pageNumber); + this.textElements = new ArrayList<>(); + this.spatialIndex = new RectangleSpatialIndex<>(); + } + + public void process() throws IOException { + this.getText(this.document); + } + + @Override + protected void writeString(String string, List textPositions) throws IOException + { + for (TextPosition textPosition: textPositions) + { + if (textPosition == null) { + continue; + } + + String c = textPosition.getUnicode(); + + // if c not printable, return + if (!isPrintable(c)) { + continue; + } + + Float h = textPosition.getHeightDir(); + + if (c.equals(NBSP)) { // replace non-breaking space for space + c = " "; + } + + float wos = textPosition.getWidthOfSpace(); + + TextElement te = new TextElement(Utils.round(textPosition.getYDirAdj() - h, 2), + Utils.round(textPosition.getXDirAdj(), 2), Utils.round(textPosition.getWidthDirAdj(), 2), + Utils.round(textPosition.getHeightDir(), 2), textPosition.getFont(), textPosition.getFontSizeInPt(), c, + // workaround a possible bug in PDFBox: + // https://issues.apache.org/jira/browse/PDFBOX-1755 + wos, textPosition.getDir()); + + this.minCharWidth = (float) Math.min(this.minCharWidth, te.getWidth()); + this.minCharHeight = (float) Math.min(this.minCharHeight, te.getHeight()); + + countHeight++; + totalHeight += te.getHeight(); + float avgHeight = totalHeight / countHeight; + + //We have an issue where tall blank cells throw off the row height calculation + //Introspect a blank cell a bit here to see if it should be thrown away + if ((te.getText() == null || te.getText().trim().equals(""))) { + //if the cell height is more than AVG_HEIGHT_MULT_THRESHOLDxaverage, throw it away + if (avgHeight > 0 + && te.getHeight() >= (avgHeight * AVG_HEIGHT_MULT_THRESHOLD)) { + continue; + } + + //if the font size is outside of reasonable ranges, throw it away + if (textPosition.getFontSizeInPt() > MAX_BLANK_FONT_SIZE || textPosition.getFontSizeInPt() < MIN_BLANK_FONT_SIZE) { + continue; + } + } + + this.spatialIndex.add(te); + this.textElements.add(te); + } + } + + @Override + protected float computeFontHeight(PDFont font) throws IOException + { + BoundingBox bbox = font.getBoundingBox(); + if (bbox.getLowerLeftY() < Short.MIN_VALUE) + { + // PDFBOX-2158 and PDFBOX-3130 + // files by Salmat eSolutions / ClibPDF Library + bbox.setLowerLeftY(- (bbox.getLowerLeftY() + 65536)); + } + // 1/2 the bbox is used as the height todo: why? + float glyphHeight = bbox.getHeight() / 2; + + // sometimes the bbox has very high values, but CapHeight is OK + PDFontDescriptor fontDescriptor = font.getFontDescriptor(); + if (fontDescriptor != null) + { + float capHeight = fontDescriptor.getCapHeight(); + if (Float.compare(capHeight, 0) != 0 && + (capHeight < glyphHeight || Float.compare(glyphHeight, 0) == 0)) + { + glyphHeight = capHeight; + } + // PDFBOX-3464, PDFBOX-448: + // sometimes even CapHeight has very high value, but Ascent and Descent are ok + float ascent = fontDescriptor.getAscent(); + float descent = fontDescriptor.getDescent(); + if (ascent > 0 && descent < 0 && + ((ascent - descent) / 2 < glyphHeight || Float.compare(glyphHeight, 0) == 0)) + { + glyphHeight = (ascent - descent) / 2; + } + } + + // transformPoint from glyph space -> text space + float height; + if (font instanceof PDType3Font) + { + height = font.getFontMatrix().transformPoint(0, glyphHeight).y; + } + else + { + height = glyphHeight / 1000; + } + + return height; + } + + private boolean isPrintable(String s) { + Character c; + Character.UnicodeBlock block; + boolean printable = false; + for (int i = 0; i < s.length(); i++) { + c = s.charAt(i); + block = Character.UnicodeBlock.of(c); + printable |= !Character.isISOControl(c) && block != null && block != Character.UnicodeBlock.SPECIALS; + } + return printable; + } +} diff --git a/src/main/java/technology/tabula/Utils.java b/src/main/java/technology/tabula/Utils.java index d3035a69..da123d20 100644 --- a/src/main/java/technology/tabula/Utils.java +++ b/src/main/java/technology/tabula/Utils.java @@ -4,58 +4,73 @@ import java.awt.geom.Line2D; import java.awt.geom.Point2D; import java.awt.geom.Rectangle2D; +import java.awt.image.BufferedImage; +import java.io.File; +import java.io.IOException; import java.math.BigDecimal; import java.util.*; import org.apache.commons.cli.ParseException; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.rendering.ImageType; +import org.apache.pdfbox.rendering.PDFRenderer; + +import javax.imageio.ImageIO; /** - * * @author manuel */ public class Utils { public static boolean within(double first, double second, double variance) { return second < first + variance && second > first - variance; } - + public static boolean overlap(double y1, double height1, double y2, double height2, double variance) { - return within( y1, y2, variance) || (y2 <= y1 && y2 >= y1 - height1) || (y1 <= y2 && y1 >= y2-height2); + return within(y1, y2, variance) || (y2 <= y1 && y2 >= y1 - height1) || (y1 <= y2 && y1 >= y2 - height2); } - + public static boolean overlap(double y1, double height1, double y2, double height2) { return overlap(y1, height1, y2, height2, 0.1f); } - + private final static float EPSILON = 0.01f; - protected static boolean useQuickSort = useCustomQuickSort(); - + protected static boolean useQuickSort = useCustomQuickSort(); + public static boolean feq(double f1, double f2) { - return (Math.abs(f1 - f2) < EPSILON); + return feq(f1, f2, EPSILON); } - + + public static boolean feq(double f1, double f2, double epsilon) { + return (Math.abs(f1 - f2) < epsilon); + } + public static float round(double d, int decimalPlace) { BigDecimal bd = new BigDecimal(Double.toString(d)); bd = bd.setScale(decimalPlace, BigDecimal.ROUND_HALF_UP); return bd.floatValue(); } - + + /** + * Find rectangle which contains all the shapes + */ public static Rectangle bounds(Collection shapes) { if (shapes.isEmpty()) { throw new IllegalArgumentException("shapes can't be empty"); } - + Iterator iter = shapes.iterator(); Rectangle rv = new Rectangle(); rv.setRect(iter.next().getBounds2D()); - do { + while (iter.hasNext()) { Rectangle2D.union(iter.next().getBounds2D(), rv, rv); - } while (iter.hasNext()); - + } + return rv; - + } - + // range iterator public static List range(final int begin, final int end) { return new AbstractList() { @@ -70,7 +85,7 @@ public int size() { } }; } - + /* from apache.commons-lang */ public static boolean isNumeric(final CharSequence cs) { @@ -85,27 +100,25 @@ public static boolean isNumeric(final CharSequence cs) { } return true; } - - public static String join(String glue, String...s) { + + public static String join(String glue, String... s) { int k = s.length; - if ( k == 0 ) - { - return null; + if (k == 0) { + return null; } StringBuilder out = new StringBuilder(); - out.append( s[0] ); - for ( int x=1; x < k; ++x ) - { - out.append(glue).append(s[x]); + out.append(s[0]); + for (int x = 1; x < k; ++x) { + out.append(glue).append(s[x]); } return out.toString(); } - + public static List> transpose(List> table) { - List> ret = new ArrayList>(); + List> ret = new ArrayList<>(); final int N = table.get(0).size(); for (int i = 0; i < N; i++) { - List col = new ArrayList(); + List col = new ArrayList<>(); for (List row : table) { col.add(row.get(i)); } @@ -114,63 +127,72 @@ public static List> transpose(List> table) { return ret; } - /** - * Wrap Collections.sort so we can fallback to a non-stable quicksort - * if we're running on JDK7+ - */ - public static > void sort(List list) { - if (useQuickSort) { - QuickSort.sort(list); - } - else { - Collections.sort(list); - } - } - + /** + * Wrap Collections.sort so we can fallback to a non-stable quicksort if we're + * running on JDK7+ + */ + public static > void sort(List list) { + if (useQuickSort) QuickSort.sort(list); + else Collections.sort(list); + } + + public static void sort(List list, Comparator comparator) { + if (useQuickSort) QuickSort.sort(list, comparator); + else Collections.sort(list, comparator); + } + private static boolean useCustomQuickSort() { // taken from PDFBOX: - + // check if we need to use the custom quicksort algorithm as a // workaround to the transitivity issue of TextPositionComparator: // https://issues.apache.org/jira/browse/PDFBOX-1512 - String[] versionComponents = System.getProperty("java.version").split( + + String numberybits = System.getProperty("java.version").split( + "-")[0]; // some Java version strings are 9-internal, which is dumb. + String[] versionComponents = numberybits.split( "\\."); - int javaMajorVersion = Integer.parseInt(versionComponents[0]); - int javaMinorVersion = Integer.parseInt(versionComponents[1]); + int javaMajorVersion; + int javaMinorVersion; + if (versionComponents.length >= 2) { + javaMajorVersion = Integer.parseInt(versionComponents[0]); + javaMinorVersion = Integer.parseInt(versionComponents[1]); + } else { + javaMajorVersion = 1; + javaMinorVersion = Integer.parseInt(versionComponents[0]); + } boolean is16orLess = javaMajorVersion == 1 && javaMinorVersion <= 6; String useLegacySort = System.getProperty("java.util.Arrays.useLegacyMergeSort"); return !is16orLess || (useLegacySort != null && useLegacySort.equals("true")); } - - - + + public static List parsePagesOption(String pagesSpec) throws ParseException { if (pagesSpec.equals("all")) { return null; } - - List rv = new ArrayList(); - + + List rv = new ArrayList<>(); + String[] ranges = pagesSpec.split(","); for (int i = 0; i < ranges.length; i++) { String[] r = ranges[i].split("-"); if (r.length == 0 || !Utils.isNumeric(r[0]) || r.length > 1 && !Utils.isNumeric(r[1])) { throw new ParseException("Syntax error in page range specification"); } - + if (r.length < 2) { rv.add(Integer.parseInt(r[0])); - } - else { + } else { int t = Integer.parseInt(r[0]); - int f = Integer.parseInt(r[1]); + int f = Integer.parseInt(r[1]); if (t > f) { throw new ParseException("Syntax error in page range specification"); } - rv.addAll(Utils.range(t, f+1)); + rv.addAll(Utils.range(t, f + 1)); } } - + Collections.sort(rv); return rv; } @@ -178,12 +200,12 @@ public static List parsePagesOption(String pagesSpec) throws ParseExcep public static void snapPoints(List rulings, float xThreshold, float yThreshold) { // collect points and keep a Line -> p1,p2 map - Map linesToPoints = new HashMap(); - List points = new ArrayList(); - for (Line2D.Float r: rulings) { + Map linesToPoints = new HashMap<>(); + List points = new ArrayList<>(); + for (Line2D.Float r : rulings) { Point2D p1 = r.getP1(); Point2D p2 = r.getP2(); - linesToPoints.put(r, new Point2D[] { p1, p2 }); + linesToPoints.put(r, new Point2D[]{p1, p2}); points.add(p1); points.add(p2); } @@ -196,26 +218,25 @@ public int compare(Point2D arg0, Point2D arg1) { } }); - List> groupedPoints = new ArrayList>(); - groupedPoints.add(new ArrayList(Arrays.asList(new Point2D[] { points.get(0) }))); + List> groupedPoints = new ArrayList<>(); + groupedPoints.add(new ArrayList<>(Arrays.asList(new Point2D[]{points.get(0)}))); - for (Point2D p: points.subList(1, points.size() - 1)) { + for (Point2D p : points.subList(1, points.size() - 1)) { List last = groupedPoints.get(groupedPoints.size() - 1); if (Math.abs(p.getX() - last.get(0).getX()) < xThreshold) { groupedPoints.get(groupedPoints.size() - 1).add(p); - } - else { - groupedPoints.add(new ArrayList(Arrays.asList(new Point2D[] { p }))); + } else { + groupedPoints.add(new ArrayList<>(Arrays.asList(new Point2D[]{p}))); } } - for(List group: groupedPoints) { + for (List group : groupedPoints) { float avgLoc = 0; - for(Point2D p: group) { + for (Point2D p : group) { avgLoc += p.getX(); } avgLoc /= group.size(); - for(Point2D p: group) { + for (Point2D p : group) { p.setLocation(avgLoc, p.getY()); } } @@ -229,35 +250,58 @@ public int compare(Point2D arg0, Point2D arg1) { } }); - groupedPoints = new ArrayList>(); - groupedPoints.add(new ArrayList(Arrays.asList(new Point2D[] { points.get(0) }))); + groupedPoints = new ArrayList<>(); + groupedPoints.add(new ArrayList<>(Arrays.asList(new Point2D[]{points.get(0)}))); - for (Point2D p: points.subList(1, points.size() - 1)) { + for (Point2D p : points.subList(1, points.size() - 1)) { List last = groupedPoints.get(groupedPoints.size() - 1); if (Math.abs(p.getY() - last.get(0).getY()) < yThreshold) { groupedPoints.get(groupedPoints.size() - 1).add(p); - } - else { - groupedPoints.add(new ArrayList(Arrays.asList(new Point2D[] { p }))); + } else { + groupedPoints.add(new ArrayList<>(Arrays.asList(new Point2D[]{p}))); } } - for(List group: groupedPoints) { + for (List group : groupedPoints) { float avgLoc = 0; - for(Point2D p: group) { + for (Point2D p : group) { avgLoc += p.getY(); } avgLoc /= group.size(); - for(Point2D p: group) { + for (Point2D p : group) { p.setLocation(p.getX(), avgLoc); } } // --- // finally, modify lines - for(Map.Entry ltp: linesToPoints.entrySet()) { + for (Map.Entry ltp : linesToPoints.entrySet()) { Point2D[] p = ltp.getValue(); ltp.getKey().setLine(p[0], p[1]); } } + + public static BufferedImage pageConvertToImage(PDPage page, int dpi, ImageType imageType) throws IOException { + try (PDDocument document = new PDDocument()) { + document.addPage(page); + PDFRenderer renderer = new PDFRenderer(document); + document.close(); + return renderer.renderImageWithDPI(0, dpi, imageType); + } + } + + public static BufferedImage pageConvertToImage(PDDocument doc, PDPage page, int dpi, ImageType imageType) throws IOException { + PDFRenderer renderer = new PDFRenderer(doc); + return renderer.renderImageWithDPI(doc.getPages().indexOf(page), dpi, imageType); + } + + public static void save(BufferedImage image, String path) { + File outputfile = new File(path + ".png"); + try { + ImageIO.write(image, "png", outputfile); + } catch (IOException e) { + e.printStackTrace(); + } + } + } diff --git a/src/main/java/technology/tabula/debug/Debug.java b/src/main/java/technology/tabula/debug/Debug.java index f1725bc3..5659f06c 100644 --- a/src/main/java/technology/tabula/debug/Debug.java +++ b/src/main/java/technology/tabula/debug/Debug.java @@ -13,8 +13,10 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.List; +import org.apache.commons.cli.*; import technology.tabula.Cell; import technology.tabula.CommandLineApp; import technology.tabula.Line; @@ -24,77 +26,111 @@ import technology.tabula.Rectangle; import technology.tabula.Ruling; import technology.tabula.Table; +import technology.tabula.TableColumnsFinder; import technology.tabula.TextChunk; import technology.tabula.TextElement; import technology.tabula.Utils; import technology.tabula.detectors.NurminenDetectionAlgorithm; import technology.tabula.extractors.BasicExtractionAlgorithm; import technology.tabula.extractors.SpreadsheetExtractionAlgorithm; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.GnuParser; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.OptionBuilder; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.util.ImageIOUtil; +import org.apache.pdfbox.rendering.ImageType; + +import javax.imageio.ImageIO; public class Debug { - + private static final float CIRCLE_RADIUS = 5f; - - private static final Color[] COLORS = { new Color(27, 158, 119), - new Color(217, 95, 2), new Color(117, 112, 179), - new Color(231, 41, 138), new Color(102, 166, 30) }; + + private static final Color[] COLORS = {new Color(27, 158, 119), new Color(217, 95, 2), new Color(117, 112, 179), + new Color(231, 41, 138), new Color(102, 166, 30)}; public static void debugIntersections(Graphics2D g, Page page) { int i = 0; - for (Point2D p: Ruling.findIntersections(page.getHorizontalRulings(), page.getVerticalRulings()).keySet()) { + for (Point2D p : Ruling.findIntersections(page.getHorizontalRulings(), page.getVerticalRulings()).keySet()) { g.setColor(COLORS[(i++) % 5]); - g.fill(new Ellipse2D.Float((float) p.getX() - CIRCLE_RADIUS/2f, (float) p.getY() - CIRCLE_RADIUS/2f, 5f, 5f)); + g.fill(new Ellipse2D.Float((float) p.getX() - CIRCLE_RADIUS / 2f, (float) p.getY() - CIRCLE_RADIUS / 2f, 5f, + 5f)); } } - + private static void debugNonCleanRulings(Graphics2D g, Page page) { drawShapes(g, page.getUnprocessedRulings()); } - + + private static void debugMixedAlgorithmRulings(Graphics2D g, Page page) { + // draw detected lines + NurminenDetectionAlgorithm detector = new NurminenDetectionAlgorithm(); + List guesses = detector.detect(page); + + for (Rectangle guessRect : guesses) { + Page newPage = page.getArea(guessRect); + BasicExtractionAlgorithm extractionAlgorithm = new BasicExtractionAlgorithm(); + extractionAlgorithm.setMixedTableExtractionEnabled(true); + List
extract = extractionAlgorithm.extract(newPage); + drawShapes(g, extractionAlgorithm.mixedExtractionRulings); + } + } + private static void debugRulings(Graphics2D g, Page page) { // draw detected lines - List rulings = new ArrayList(page.getHorizontalRulings()); + List rulings = new ArrayList<>(page.getHorizontalRulings()); rulings.addAll(page.getVerticalRulings()); drawShapes(g, rulings); } - + + private static void debugNurminenEdges(Graphics2D g, Page page) { + // draw detected lines + NurminenDetectionAlgorithm detectionAlgorithm = new NurminenDetectionAlgorithm(); + detectionAlgorithm.detect(page); + List allTextEdges = new ArrayList<>(detectionAlgorithm.allLeftTextEdges); + allTextEdges.addAll(detectionAlgorithm.allMidTextEdges); + allTextEdges.addAll(detectionAlgorithm.allRightTextEdges); + + for (NurminenDetectionAlgorithm.TextEdge textEdge : allTextEdges) { + textEdge.x1 = textEdge.x2 = (textEdge.x1 + textEdge.x2) / 2; + } + + drawShapes(g, detectionAlgorithm.allLeftTextEdges, Color.BLUE); + drawShapes(g, detectionAlgorithm.allMidTextEdges, Color.GREEN); + drawShapes(g, detectionAlgorithm.allRightTextEdges, Color.MAGENTA); + } + private static void debugColumns(Graphics2D g, Page page) { - List textChunks = TextElement.mergeWords(page.getText()); - List lines = TextChunk.groupByLines(textChunks); - List columns = BasicExtractionAlgorithm.columnPositions(lines); - int i = 0; - for(float p: columns) { - Ruling r = new Ruling(new Point2D.Float(p, (float) page.getTop()), new Point2D.Float(p, (float) page.getBottom())); - g.setColor(COLORS[(i++) % 5]); - drawShape(g, r); + NurminenDetectionAlgorithm detector = new NurminenDetectionAlgorithm(); + List guesses = detector.detect(page); + + for (Rectangle guessRect : guesses) { + Page newPage = page.getArea(guessRect); + List textChunks = TextElement.mergeWords(newPage.getText()); + List lines = TextChunk.groupByLines(textChunks); + List columns = new TableColumnsFinder(lines).generateColumns(); + int i = 0; + for (float p : columns) { + Ruling r = new Ruling(new Point2D.Float(p, newPage.getTop()), + new Point2D.Float(p, newPage.getBottom())); + g.setColor(COLORS[(i++) % 5]); + drawShape(g, r); + } } } - + private static void debugCharacters(Graphics2D g, Page page) { drawShapes(g, page.getText()); } - + private static void debugTextChunks(Graphics2D g, Page page) { List chunks = TextElement.mergeWords(page.getText(), page.getVerticalRulings()); drawShapes(g, chunks); } - + private static void debugSpreadsheets(Graphics2D g, Page page) { SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); List tables = sea.extract(page); drawShapes(g, tables); } - + private static void debugCells(Graphics2D g, Rectangle area, Page page) { List h = page.getHorizontalRulings(); List v = page.getVerticalRulings(); @@ -102,7 +138,6 @@ private static void debugCells(Graphics2D g, Rectangle area, Page page) { h = Ruling.cropRulingsToArea(h, area); v = Ruling.cropRulingsToArea(v, area); } - SpreadsheetExtractionAlgorithm ea = new SpreadsheetExtractionAlgorithm(); List cells = SpreadsheetExtractionAlgorithm.findCells(h, v); drawShapes(g, cells); } @@ -112,54 +147,68 @@ private static void debugDetectedTables(Graphics2D g, Page page) { List tables = detectionAlgorithm.detect(page); drawShapes(g, tables); } - - private static void drawShapes(Graphics2D g, Collection shapes, Stroke stroke) { + + private static void debugBluntlyDetectedTables(Graphics2D g, Page page) { + NurminenDetectionAlgorithm detectionAlgorithm = new NurminenDetectionAlgorithm(); + detectionAlgorithm.detect(page); + Rectangle table = detectionAlgorithm.bluntDetect(); + if (table != null) { + drawShapes(g, Collections.singleton(table)); + } + } + + private static void drawShapes(Graphics2D g, Collection shapes, Stroke stroke, Color color) { int i = 0; g.setStroke(stroke); - for (Shape s: shapes) { - g.setColor(COLORS[(i++) % 5]); + for (Shape s : shapes) { + g.setColor(color == null ? COLORS[(i++) % 5] : color); drawShape(g, s); } } - + private static void drawShapes(Graphics2D g, Collection shapes) { - drawShapes(g, shapes, new BasicStroke(2f)); + drawShapes(g, shapes, null); + } + private static void drawShapes(Graphics2D g, Collection shapes, Color color) { + drawShapes(g, shapes, new BasicStroke(2f), color); } - + private static void debugProjectionProfile(Graphics2D g, Page page) { float horizSmoothKernel = 0, vertSmoothKernel = 0; - //for (Rectangle r: page.getText()) { - for (Rectangle r: page.getText()) { + // for (Rectangle r: page.getText()) { + for (Rectangle r : page.getText()) { horizSmoothKernel += r.getWidth(); vertSmoothKernel += r.getHeight(); } horizSmoothKernel /= page.getText().size(); vertSmoothKernel /= page.getText().size(); System.out.println("hsk: " + horizSmoothKernel + " vsk: " + vertSmoothKernel); - //ProjectionProfile profile = new ProjectionProfile(page, page.getText(), horizSmoothKernel, vertSmoothKernel); - ProjectionProfile profile = new ProjectionProfile(page, TextElement.mergeWords(page.getText(), page.getVerticalRulings()), horizSmoothKernel * 1.5f, vertSmoothKernel); + // ProjectionProfile profile = new ProjectionProfile(page, + // page.getText(), horizSmoothKernel, vertSmoothKernel); + ProjectionProfile profile = new ProjectionProfile(page, + TextElement.mergeWords(page.getText(), page.getVerticalRulings()), horizSmoothKernel * 1.5f, + vertSmoothKernel); float prec = (float) Math.pow(10, ProjectionProfile.DECIMAL_PLACES); - float[] hproj = profile.getHorizontalProjection(); float[] vproj = profile.getVerticalProjection(); - + g.setStroke(new BasicStroke(1f)); g.setColor(Color.RED); // hproj - //Point2D last = new Point2D.Double(page.getLeft(), page.getBottom() - hproj[0] / prec), cur; + // Point2D last = new Point2D.Double(page.getLeft(), page.getBottom() - + // hproj[0] / prec), cur; Point2D last = new Point2D.Double(page.getLeft(), page.getBottom()), cur; for (int i = 0; i < hproj.length; i++) { cur = new Point2D.Double(page.getLeft() + i / prec, page.getBottom() - hproj[i]); g.draw(new Line2D.Double(last, cur)); last = cur; } - + // hproj first derivative g.setColor(Color.BLUE); - float[] deriv = ProjectionProfile.filter(ProjectionProfile - .getFirstDeriv(profile.getHorizontalProjection()), + float[] deriv = ProjectionProfile.filter(ProjectionProfile.getFirstDeriv(profile.getHorizontalProjection()), 0.01f); last = new Point2D.Double(page.getLeft(), page.getBottom()); for (int i = 0; i < deriv.length; i++) { @@ -167,16 +216,16 @@ private static void debugProjectionProfile(Graphics2D g, Page page) { g.draw(new Line2D.Double(last, cur)); last = cur; } - + // columns g.setColor(Color.MAGENTA); g.setStroke(new BasicStroke(1f)); float[] seps = profile.findVerticalSeparators(horizSmoothKernel * 2.5f); for (int i = 0; i < seps.length; i++) { - float x = (float) (page.getLeft() + seps[i]); + float x = page.getLeft() + seps[i]; g.draw(new Line2D.Double(x, page.getTop(), x, page.getBottom())); } - + // vproj g.setStroke(new BasicStroke(1f)); g.setColor(Color.GREEN); @@ -186,7 +235,7 @@ private static void debugProjectionProfile(Graphics2D g, Page page) { g.draw(new Line2D.Double(last, cur)); last = cur; } - + // vproj first derivative g.setColor(new Color(0, 0, 1, 0.5f)); deriv = ProjectionProfile.filter(ProjectionProfile.getFirstDeriv(vproj), 0.1f); @@ -196,46 +245,44 @@ private static void debugProjectionProfile(Graphics2D g, Page page) { g.draw(new Line2D.Double(last, cur)); last = cur; } - + // rows g.setStroke(new BasicStroke(1.5f)); seps = profile.findHorizontalSeparators(vertSmoothKernel); for (int i = 0; i < seps.length; i++) { - float y = (float) (page.getTop() + seps[i]); + float y = page.getTop() + seps[i]; g.draw(new Line2D.Double(page.getLeft(), y, page.getRight(), y)); } - + } - + private static void drawShape(Graphics2D g, Shape shape) { //g.setStroke(new BasicStroke(1)); g.draw(shape); } public static void renderPage(String pdfPath, String outPath, int pageNumber, Rectangle area, - boolean drawTextChunks, boolean drawSpreadsheets, boolean drawRulings, boolean drawIntersections, - boolean drawColumns, boolean drawCharacters, boolean drawArea, boolean drawCells, - boolean drawUnprocessedRulings, boolean drawProjectionProfile, boolean drawClippingPaths, - boolean drawDetectedTables) throws IOException { - PDDocument document = PDDocument.load(pdfPath); - - ObjectExtractor oe = new ObjectExtractor(document, true); - + boolean drawTextChunks, boolean drawSpreadsheets, boolean drawRulings, boolean drawIntersections, + boolean drawColumns, boolean drawCharacters, boolean drawArea, boolean drawCells, + boolean drawUnprocessedRulings, boolean drawProjectionProfile, boolean drawClippingPaths, + boolean drawDetectedTables, boolean drawBluntlyDetectedTables, + boolean drawNurminenEdges, boolean drawMixedRulings) throws IOException { + PDDocument document = PDDocument.load(new File(pdfPath)); + + ObjectExtractor oe = new ObjectExtractor(document); + Page page = oe.extract(pageNumber + 1); - + if (area != null) { page = page.getArea(area); } - - PDPage p = (PDPage) document.getDocumentCatalog().getAllPages().get(pageNumber); - -// PDFRenderer renderer = new PDFRenderer(document); -// BufferedImage image = renderer.renderImage(pageNumber); - - BufferedImage image = p.convertToImage(BufferedImage.TYPE_INT_RGB, 72); - + + PDPage p = document.getPage(pageNumber); + + BufferedImage image = Utils.pageConvertToImage(document, p, 72, ImageType.RGB); + Graphics2D g = (Graphics2D) image.getGraphics(); - + if (drawTextChunks) { debugTextChunks(g, page); } @@ -268,21 +315,31 @@ public static void renderPage(String pdfPath, String outPath, int pageNumber, Re debugProjectionProfile(g, page); } if (drawClippingPaths) { - drawShapes(g, oe.clippingPaths, new BasicStroke(2f, BasicStroke.CAP_BUTT, BasicStroke.JOIN_MITER, 10f, new float[] { 3f }, 0f)); + // TODO: Enable when oe.clippingPaths is done + //drawShapes(g, oe.clippingPaths, + // new BasicStroke(2f, BasicStroke.CAP_BUTT, BasicStroke.JOIN_MITER, 10f, new float[] { 3f }, 0f)); } if (drawDetectedTables) { debugDetectedTables(g, page); } + if (drawBluntlyDetectedTables) { + debugBluntlyDetectedTables(g, page); + } + if (drawNurminenEdges) { + debugNurminenEdges(g, page); + } + if (drawMixedRulings) { + debugMixedAlgorithmRulings(g, page); + } document.close(); - - ImageIOUtil.writeImage(image, outPath, 72); + + ImageIO.write(image, "jpg", new File(outPath)); } - - @SuppressWarnings("static-access") + private static Options buildOptions() { Options o = new Options(); - + o.addOption("h", "help", false, "Print this help text."); o.addOption("r", "rulings", false, "Show detected rulings."); o.addOption("i", "intersections", false, "Show intersections between rulings."); @@ -294,54 +351,57 @@ private static Options buildOptions() { o.addOption("l", "cells", false, "Show detected cells"); o.addOption("u", "unprocessed-rulings", false, "Show non-cleaned rulings"); o.addOption("f", "profile", false, "Show projection profile"); - o.addOption("n", "clipping-paths", false, "Show clipping paths"); + o.addOption("z", "clipping-paths", false, "Show clipping paths"); o.addOption("d", "detected-tables", false, "Show detected tables"); + o.addOption("b", "bluntly-detected-tables", false, "Show detected tables"); + o.addOption("n", "nurminen-edges", false, "Show ALL edges detected by Nurminen table detection code."); + o.addOption("m", "mixed-rulings", false, "Show ruling created by mixed extraction algorithm"); - o.addOption(OptionBuilder.withLongOpt("area") - .withDescription("Portion of the page to analyze (top,left,bottom,right). Example: --area 269.875,12.75,790.5,561. Default is entire page") + o.addOption(Option.builder("a").longOpt("area") + .desc("Portion of the page to analyze (top,left,bottom,right). Example: --area 269.875,12.75,790.5,561. Default is entire page") .hasArg() - .withArgName("AREA") - .create("a")); - o.addOption(OptionBuilder.withLongOpt("pages") - .withDescription("Comma separated list of ranges, or all. Examples: --pages 1-3,5-7, --pages 3 or --pages all. Default is --pages 1") + .argName("AREA") + .build()); + + o.addOption(Option.builder("p").longOpt("pages") + .desc("Comma separated list of ranges, or all. Examples: --pages 1-3,5-7, --pages 3 or --pages all. Default is --pages 1") .hasArg() - .withArgName("PAGES") - .create("p")); + .argName("PAGES") + .build()); + return o; } - - + public static void main(String[] args) throws IOException { - CommandLineParser parser = new GnuParser(); + CommandLineParser parser = new DefaultParser(); try { // parse the command line arguments - CommandLine line = parser.parse(buildOptions(), args ); - List pages = new ArrayList(); + CommandLine line = parser.parse(buildOptions(), args); + List pages = new ArrayList<>(); if (line.hasOption('p')) { pages = Utils.parsePagesOption(line.getOptionValue('p')); - } - else { + } else { pages.add(1); } - + if (line.hasOption('h')) { printHelp(); System.exit(0); } - + if (line.getArgs().length != 1) { throw new ParseException("Need one filename\nTry --help for help"); } - + File pdfFile = new File(line.getArgs()[0]); if (!pdfFile.exists()) { throw new ParseException("File does not exist"); } - + if (line.hasOption('g') && !line.hasOption('a')) { throw new ParseException("-g argument needs an area (-a)"); } - + Rectangle area = null; if (line.hasOption('a')) { List f = CommandLineApp.parseFloatList(line.getOptionValue('a')); @@ -350,39 +410,41 @@ public static void main(String[] args) throws IOException { } area = new Rectangle(f.get(0), f.get(1), f.get(3) - f.get(1), f.get(2) - f.get(0)); } - - for (int i: pages) { + + if (pages == null) { + // user specified all pages + PDDocument document = PDDocument.load(pdfFile); + + int numPages = document.getNumberOfPages(); + pages = new ArrayList<>(numPages); + + for (int i = 1; i <= numPages; i++) { + pages.add(i); + } + + document.close(); + } + + for (int i : pages) { renderPage(pdfFile.getAbsolutePath(), - new File(pdfFile.getParent(), removeExtension(pdfFile.getName()) + "-" + (i) + ".jpg").getAbsolutePath(), - i-1, - area, - line.hasOption('t'), - line.hasOption('s'), - line.hasOption('r'), - line.hasOption('i'), - line.hasOption('c'), - line.hasOption('e'), - line.hasOption('g'), - line.hasOption('l'), - line.hasOption('u'), - line.hasOption('f'), - line.hasOption('n'), - line.hasOption('d')); + new File(pdfFile.getParent(), removeExtension(pdfFile.getName()) + "-" + (i) + ".jpg") + .getAbsolutePath(), + i - 1, area, line.hasOption('t'), line.hasOption('s'), line.hasOption('r'), line.hasOption('i'), + line.hasOption('c'), line.hasOption('e'), line.hasOption('g'), line.hasOption('l'), + line.hasOption('u'), line.hasOption('f'), line.hasOption('z'), line.hasOption('d'), + line.hasOption('b'), line.hasOption('n'), line.hasOption('m')); } - } - catch (ParseException e) { + } catch (ParseException e) { System.err.println("Error: " + e.getMessage()); System.exit(1); - } + } } - - private static void printHelp() { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("tabula-debug", "Generate debugging images", buildOptions(), "", true); } - + private static String removeExtension(String s) { String separator = System.getProperty("file.separator"); @@ -403,8 +465,4 @@ private static String removeExtension(String s) { return filename.substring(0, extensionIndex); } - - - - } diff --git a/src/main/java/technology/tabula/detectors/DetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/DetectionAlgorithm.java index 6f717c5b..c292c282 100644 --- a/src/main/java/technology/tabula/detectors/DetectionAlgorithm.java +++ b/src/main/java/technology/tabula/detectors/DetectionAlgorithm.java @@ -1,10 +1,8 @@ package technology.tabula.detectors; -import org.apache.pdfbox.pdmodel.PDDocument; import technology.tabula.Page; import technology.tabula.Rectangle; -import java.io.File; import java.util.List; /** diff --git a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java index a19669bf..7a8d3ba3 100644 --- a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java +++ b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java @@ -1,29 +1,47 @@ package technology.tabula.detectors; +import java.awt.geom.Line2D; +import java.awt.geom.Point2D; +import java.awt.image.BufferedImage; +import java.awt.image.Raster; +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.TreeSet; + +import org.apache.pdfbox.contentstream.operator.Operator; +import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdfparser.PDFStreamParser; import org.apache.pdfbox.pdfwriter.ContentStreamWriter; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.PDStream; -import org.apache.pdfbox.util.PDFOperator; -import technology.tabula.*; +import org.apache.pdfbox.rendering.ImageType; + +import technology.tabula.Line; +import technology.tabula.Page; import technology.tabula.Rectangle; +import technology.tabula.Ruling; +import technology.tabula.TableColumnsFinder; +import technology.tabula.TextChunk; +import technology.tabula.TextElement; +import technology.tabula.Utils; import technology.tabula.extractors.SpreadsheetExtractionAlgorithm; -import java.awt.geom.Line2D; -import java.awt.geom.Point2D; -import java.awt.image.BufferedImage; -import java.awt.image.Raster; -import java.io.IOException; -import java.util.*; -import java.util.List; - /** * Created by matt on 2015-12-17. - * + *

* Attempt at an implementation of the table finding algorithm described by * Anssi Nurminen's master's thesis: - * http://dspace.cc.tut.fi/dpub/bitstream/handle/123456789/21520/Nurminen.pdf?sequence=3 + * https://trepo.tuni.fi/handle/123456789/21520 */ public class NurminenDetectionAlgorithm implements DetectionAlgorithm { @@ -36,23 +54,101 @@ public class NurminenDetectionAlgorithm implements DetectionAlgorithm { private static final int REQUIRED_TEXT_LINES_FOR_EDGE = 4; private static final int REQUIRED_CELLS_FOR_TABLE = 4; private static final float IDENTICAL_TABLE_OVERLAP_RATIO = 0.9f; + private static final float ROW_HEIGHT_THRESHOLD_MULT_BOTTOM = 1.5f; + private static final float ROW_HEIGHT_THRESHOLD_MULT_TOP = 2f; + public List allLeftTextEdges = new ArrayList<>(); + public List allMidTextEdges = new ArrayList<>(); + public List allRightTextEdges = new ArrayList<>(); + private Rectangle textBoundingBox; + private List allLines; + private ArrayList allShortLines; + private List horizontalRulings; + private Page page; + + // create a set of our current tables that will eliminate duplicate tables + private static final Comparator TABLE_COMPARATOR = new Comparator() { + @Override + public int compare(Rectangle o1, Rectangle o2) { + if (o1.equals(o2)) { + return 0; + } + + // o1 is "equal" to o2 if o2 contains all of o1 + if (o2.almostContains(o1)) { + return 0; + } + + if (o1.almostContains(o2)) { + o2.setRect(o1); // the bigger rect should remain + return 0; + } + + // otherwise see if these tables are "mostly" the same + float overlap = o1.overlapRatio(o2); + if (overlap >= IDENTICAL_TABLE_OVERLAP_RATIO) { + return 0; + } else { + return 1; + } + } + }; /** * Helper class that encapsulates a text edge */ - private static final class TextEdge extends Line2D.Float { + public static final class TextEdge extends Line2D.Float { // types of text edges public static final int LEFT = 0; public static final int MID = 1; public static final int RIGHT = 2; - public static final int NUM_TYPES = 3; + public static final int SIDE_EDGE = 3; + public static final int NUM_TYPES = 4; + /** + * Number of text element directly in touch with this edge + */ public int intersectingTextRowCount; public TextEdge(float x1, float y1, float x2, float y2) { super(x1, y1, x2, y2); this.intersectingTextRowCount = 0; } + + public float getYOverlapPercent(TextEdge textEdge) { + float a = Math.max(y1, textEdge.y1); + float b = Math.min(y2, textEdge.y2); + + if (a <= b) { + return (b - a)/Math.max(textEdge.y2 - textEdge.y1, y2 - y1); + } else { + return 0.0f; + } + } + + public float getWidth() { + return x2 - x1; + } + + public float getHeight() { + return y2 - y1; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + TextEdge textEdge = (TextEdge) o; + return java.lang.Float.compare(textEdge.x1, x1) == 0 && + java.lang.Float.compare(textEdge.y1, y1) == 0 && + java.lang.Float.compare(textEdge.x2, x2) == 0 && + java.lang.Float.compare(textEdge.y2, y2) == 0 && + intersectingTextRowCount == textEdge.intersectingTextRowCount; + } + + @Override + public int hashCode() { + return Objects.hash(x1, y1, x2, y2, intersectingTextRowCount); + } } /** @@ -81,7 +177,16 @@ public RelevantEdges(int edgeType, int edgeCount) { } @Override + /** + * 1. Convert to image + * 3. Find rulings in image + * 4. Using rulings in image, try to find full spreadsheet tables with cells. + * 5. Search the rest of space for tables using Nurminen edges + * 6. Using relevant edge count/type find rows that are relevant + * 7. Expand the area into top and bottom using horizontal rulings. + */ public List detect(Page page) { + this.page = page; // get horizontal & vertical lines // we get these from an image of the PDF and not the PDF itself because sometimes there are invisible PDF @@ -90,27 +195,40 @@ public List detect(Page page) { BufferedImage image; PDPage pdfPage = page.getPDPage(); try { - image = pdfPage.convertToImage(BufferedImage.TYPE_BYTE_GRAY, 144); + image = Utils.pageConvertToImage(page.getPDDoc(), pdfPage, 144, ImageType.GRAY); } catch (IOException e) { - return new ArrayList(); + return new ArrayList<>(); } - List horizontalRulings = this.getHorizontalRulings(image); + //Utils.save(image, "/tmp/lool"); + + horizontalRulings = this.getHorizontalRulings(image); // now check the page for vertical lines, but remove the text first to make things less confusing + PDDocument removeTextDocument = null; try { - this.removeText(pdfPage); - image = pdfPage.convertToImage(BufferedImage.TYPE_BYTE_GRAY, 144); + removeTextDocument = this.removeText(pdfPage); + pdfPage = removeTextDocument.getPage(0); + image = Utils.pageConvertToImage(removeTextDocument, pdfPage, 144, ImageType.GRAY); } catch (Exception e) { - return new ArrayList(); + return new ArrayList<>(); + } finally { + if (removeTextDocument != null) { + try { + removeTextDocument.close(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } } List verticalRulings = this.getVerticalRulings(image); - List allEdges = new ArrayList(horizontalRulings); + List allEdges = new ArrayList<>(horizontalRulings); allEdges.addAll(verticalRulings); - List tableAreas = new ArrayList(); + List tableAreas = new ArrayList<>(); // if we found some edges, try to find some tables based on them if (allEdges.size() > 0) { @@ -119,7 +237,7 @@ public List detect(Page page) { // normalize the rulings to make sure snapping didn't create any wacky non-horizontal/vertical rulings for (List rulings : Arrays.asList(horizontalRulings, verticalRulings)) { - for (Iterator iterator = rulings.iterator(); iterator.hasNext();) { + for (Iterator iterator = rulings.iterator(); iterator.hasNext(); ) { Ruling ruling = iterator.next(); ruling.normalize(); @@ -150,8 +268,8 @@ public List detect(Page page) { if (verticalRuling.intersects(tableArea) && !(tableArea.contains(verticalRuling.getP1()) && tableArea.contains(verticalRuling.getP2()))) { - tableArea.setTop((float)Math.floor(Math.min(tableArea.getTop(), verticalRuling.getY1()))); - tableArea.setBottom((float)Math.ceil(Math.max(tableArea.getBottom(), verticalRuling.getY2()))); + tableArea.setTop((float) Math.floor(Math.min(tableArea.getTop(), verticalRuling.getY1()))); + tableArea.setBottom((float) Math.ceil(Math.max(tableArea.getBottom(), verticalRuling.getY2()))); break; } } @@ -160,36 +278,39 @@ public List detect(Page page) { // the tabula Page coordinate space is half the size of the PDFBox image coordinate space // so halve the table area size before proceeding and add a bit of padding to make sure we capture everything for (Rectangle area : tableAreas) { - area.x = (float)Math.floor(area.x/2) - TABLE_PADDING_AMOUNT; - area.y = (float)Math.floor(area.y/2) - TABLE_PADDING_AMOUNT; - area.width = (float)Math.ceil(area.width/2) + TABLE_PADDING_AMOUNT; - area.height = (float)Math.ceil(area.height/2) + TABLE_PADDING_AMOUNT; + area.x = area.x / 2 - TABLE_PADDING_AMOUNT; + area.y = area.y / 2 - TABLE_PADDING_AMOUNT; + area.width = area.width / 2 + TABLE_PADDING_AMOUNT; + area.height = area.height / 2 + TABLE_PADDING_AMOUNT + 1; } // we're going to want halved horizontal lines later too for (Line2D.Float ruling : horizontalRulings) { - ruling.x1 = ruling.x1/2; - ruling.y1 = ruling.y1/2; - ruling.x2 = ruling.x2/2; - ruling.y2 = ruling.y2/2; + ruling.x1 = ruling.x1 / 2; + ruling.y1 = ruling.y1 / 2; + ruling.x2 = ruling.x2 / 2; + ruling.y2 = ruling.y2 / 2; } + textBoundingBox = page.getTextBounds(); + // now look at text rows to help us find more tables and flesh out existing ones List textChunks = TextElement.mergeWords(page.getText()); List lines = TextChunk.groupByLines(textChunks); + allLines = new ArrayList<>(lines); // first look for text rows that intersect an existing table - those lines should probably be part of the table for (Line textRow : lines) { for (Rectangle tableArea : tableAreas) { if (!tableArea.contains(textRow) && textRow.intersects(tableArea)) { - tableArea.setLeft((float)Math.floor(Math.min(textRow.getLeft(), tableArea.getLeft()))); - tableArea.setRight((float)Math.ceil(Math.max(textRow.getRight(), tableArea.getRight()))); + tableArea.setLeft((float) Math.floor(Math.min(textRow.getLeft(), tableArea.getLeft()))); + tableArea.setRight((float) Math.ceil(Math.max(textRow.getRight(), tableArea.getRight()))); } } } // get rid of tables that DO NOT intersect any text areas - these are likely graphs or some sort of graphic - for (Iterator iterator = tableAreas.iterator(); iterator.hasNext();) { + for (Iterator iterator = tableAreas.iterator(); iterator.hasNext(); ) { Rectangle table = iterator.next(); boolean intersectsText = false; @@ -205,6 +326,13 @@ public List detect(Page page) { } } + // delete long lines of text. They are likely not a part of table. + // This avoids detecting justified text as tables. + textChunks.removeIf(textChunk -> textChunk.width >= 0.4 * textBoundingBox.getWidth()); + List shortLines = TextChunk.groupByLines(textChunks); + allShortLines = new ArrayList<>(shortLines); + + // lastly, there may be some tables that don't have any vertical rulings at all // we'll use text edges we've found to try and guess which text rows are part of a table @@ -213,13 +341,23 @@ public List detect(Page page) { // the most text rows, and then use that magic number of "relevant" edges to decide what text rows should be // part of a table. - boolean foundTable; + boolean foundTable; + boolean savedEdges = false; do { foundTable = false; // get rid of any text lines contained within existing tables, this allows us to find more tables - for (Iterator iterator = lines.iterator(); iterator.hasNext();) { + for (Iterator iterator = lines.iterator(); iterator.hasNext(); ) { + Line textRow = iterator.next(); + for (Rectangle table : tableAreas) { + if (table.contains(textRow)) { + iterator.remove(); + break; + } + } + } + for (Iterator iterator = shortLines.iterator(); iterator.hasNext(); ) { Line textRow = iterator.next(); for (Rectangle table : tableAreas) { if (table.contains(textRow)) { @@ -235,61 +373,194 @@ public List detect(Page page) { List midTextEdges = textEdges.get(TextEdge.MID); List rightTextEdges = textEdges.get(TextEdge.RIGHT); + if (!savedEdges) { + allLeftTextEdges.addAll(leftTextEdges); + allMidTextEdges.addAll(midTextEdges); + allRightTextEdges.addAll(rightTextEdges); + savedEdges = true; + } + + List sideTextEdges = new ArrayList<>(rightTextEdges); + sideTextEdges.addAll(leftTextEdges); + textEdges.add(sideTextEdges); + // find the relevant text edges (the ones we think define where a table is) - RelevantEdges relevantEdgeInfo = this.getRelevantEdges(textEdges, lines); + RelevantEdges relevantEdgeInfo = this.getRelevantEdges(textEdges, shortLines, lines.size()); // we found something relevant so let's look for rows that fit our criteria if (relevantEdgeInfo.edgeType != -1) { List relevantEdges = null; - switch(relevantEdgeInfo.edgeType) { - case TextEdge.LEFT: - relevantEdges = leftTextEdges; + switch (relevantEdgeInfo.edgeType) { + case TextEdge.SIDE_EDGE: + relevantEdges = sideTextEdges; break; case TextEdge.MID: relevantEdges = midTextEdges; break; - case TextEdge.RIGHT: - relevantEdges = rightTextEdges; - break; } Rectangle table = this.getTableFromText(lines, relevantEdges, relevantEdgeInfo.edgeCount, horizontalRulings); if (table != null) { foundTable = true; - tableAreas.add(table); + + Rectangle expandedTable = expand(page, table); + tableAreas.add(expandedTable); } } } while (foundTable); - // create a set of our current tables that will eliminate duplicate tables - Set tableSet = new TreeSet(new Comparator() { - @Override - public int compare(Rectangle o1, Rectangle o2) { - if (o1.equals(o2)) { - return 0; - } + Set tableSet = new TreeSet<>(TABLE_COMPARATOR); + tableSet.addAll(tableAreas); + + return new ArrayList<>(tableSet); + } + + /*** + * Finds biggest table on the page. + * + * Is prone to false detects on pages where there are no tables. + * Also if there are more than one table, it will merge them. + * However can find tables {@link NurminenDetectionAlgorithm#detect} fails to find. + */ + public Rectangle bluntDetect() { + if (allLines == null || textBoundingBox == null || horizontalRulings == null) { + throw new RuntimeException("Please run detect first!"); + } + + // get text edges from remaining lines in the document + TextEdges textEdges = this.getTextEdges(allLines); + List leftTextEdges = textEdges.get(TextEdge.LEFT); + List rightTextEdges = textEdges.get(TextEdge.RIGHT); - // o1 is "equal" to o2 if o2 contains all of o1 - if (o2.contains(o1)) { - return 0; + List sideTextEdges = new ArrayList<>(rightTextEdges); + sideTextEdges.addAll(leftTextEdges); + + for (float tagetOverlap = 0.7f; tagetOverlap >= 0.1f; tagetOverlap -= 0.1f) { + for (int edgeCount = 8; edgeCount >= 3; edgeCount--) { + Rectangle table = this.getTableFromText(allShortLines, sideTextEdges, edgeCount, horizontalRulings); + + if (table != null && table.verticalOverlapPercent(textBoundingBox) > tagetOverlap) { + return expand(page, table); } + } + } + return null; + } - // otherwise see if these tables are "mostly" the same - float overlap = o1.overlapRatio(o2); - if (overlap >= IDENTICAL_TABLE_OVERLAP_RATIO) { - return 0; - } else { - return 1; + /** + * Expands the table to top and bottom, until new content intersects + * table column lines (see {@link TableColumnsFinder#generateColumns()}) + */ + private Rectangle expand(Page page, Rectangle table) { + Page tablePage = page.getArea(table); + List textChunks = TextElement.mergeWords(tablePage.getText()); + List relevantLines = TextChunk.groupByLines(textChunks); + + Page aboveTable = page.getArea(page.getTop(), + table.getLeft(), + table.getTop(), + table.getRight()); + Page belowTable = page.getArea(table.getBottom(), + table.getLeft(), + page.getBottom(), + table.getRight()); + + Rectangle withBelow = expandIntoArea(table, belowTable, false, relevantLines); + Rectangle rectangle = expandIntoArea(withBelow, aboveTable, true, relevantLines); + + return rectangle; + } + + private Rectangle expandIntoArea(Rectangle initialTablePage, + Page pageAreaToScan, boolean topPartOfTable, + List relevantLines) { + TableColumnsFinder columnsFinder = new TableColumnsFinder(relevantLines); + List columns = columnsFinder.generateColumns(); + + List expandedTextChunks = TextElement.mergeWords(pageAreaToScan.getText()); + List expandedLines = TextChunk.groupByLines(expandedTextChunks); + + if (topPartOfTable) { + Collections.reverse(expandedLines); + } + + Rectangle area = new Rectangle(initialTablePage.getTop(), initialTablePage.getLeft(), initialTablePage.width, initialTablePage.height); + + outerLoop: + for (Line line : expandedLines) { + // if any of chunks crosses existing column break + for (float column : columns) { + for (TextChunk textChunk : line.getTextElements()) { + if (textChunk.getLeft() <= column && + textChunk.getRight() - 5 >= column) { // give 5 margin for error + break outerLoop; + } } } - }); - tableSet.addAll(tableAreas); + // it's also a problem if new chunk creates a new area. + int regionsCount = columnsFinder.getRegions().size(); + columnsFinder.addLine(line, true); + if (columnsFinder.getRegions().size() != regionsCount) { + break; + } + + area.merge(line); + } + + if (topPartOfTable) { + area.setTop(area.getTop() - 1); // otherwise text can get cut-off + } else { + area.setBottom(area.getBottom() + 1); // otherwise text can get cut-off + } - return new ArrayList(tableSet); + return area; } + + /** + * Let's say we have a list which has bullet points (circles for example). + * Every circles will have 3 edges: left, right and center. + * Remove these excessive edges and leave only one. + * + * Otherwise we risk these areas being falsely detected as tables. + */ + private void reduceBulletPointEdges(List leftTextEdges, List midTextEdges, List rightTextEdges) { + List allTextEdges = new ArrayList<>(); + allTextEdges.addAll(leftTextEdges); + allTextEdges.addAll(midTextEdges); + allTextEdges.addAll(rightTextEdges); + Collections.sort(allTextEdges, (t1, t2) -> Float.compare(t1.y2 - t1.y1, t2.y2 - t2.y1)); + + + Set edgesToRemove = new HashSet<>(); + Float prevX = null; + TextEdge prevEdge = null; + for (Iterator iterator = allTextEdges.iterator(); iterator.hasNext(); ) { + TextEdge textEdge = iterator.next(); + + Float x = textEdge.x1; + if (prevX != null && Math.abs(x - prevX) < 5 && textEdge.getYOverlapPercent(prevEdge) > 0.9) { + edgesToRemove.add(textEdge); + } + prevX = x; + prevEdge = textEdge; + } + + leftTextEdges.removeIf(edgesToRemove::contains); + midTextEdges.removeIf(edgesToRemove::contains); + rightTextEdges.removeIf(edgesToRemove::contains); + } + + /** + * 1. Find rows which intersect with at least "relevantEdgeCount" of edges of relevant type (relevantEdges) + * to be considered part of table. + * 2. Regions with relevant number of edges are united in case all rows between them are + * closer than (totalRowSpacing / tableSpaceCount) * 2.5f + * 3. Bounds of all the rows are bounds of the table (meaning everything in between them is also part of table) + * 4. Expand the table to the top and to the bottom using horizontal rulings. + */ private Rectangle getTableFromText(List lines, List relevantEdges, int relevantEdgeCount, @@ -304,15 +575,47 @@ private Rectangle getTableFromText(List lines, int tableSpaceCount = 0; float totalRowSpacing = 0; + List edgeRectangles = new ArrayList<>(); + for (TextEdge edge : relevantEdges) { + edgeRectangles.add(new Rectangle((float) edge.getP1().getY(), + (float) edge.getP1().getX(), + edge.getWidth(), + edge.getHeight())); + } + + lines = new ArrayList<>(lines); + Collections.reverse(lines); // go through the lines and find the ones that have the correct count of the relevant edges for (Line textRow : lines) { int numRelevantEdges = 0; + int numRelevantEdgesToFullRow = 0; + + Rectangle fullTextRowRect = new Rectangle(textRow.getTop(), textRow.getLeft(), (float) textRow.getWidth(), (float) textRow.getHeight()); + fullTextRowRect.setRight(textBoundingBox.getRight()); + fullTextRowRect.setLeft(textBoundingBox.getLeft()); + + for (Rectangle edgeRectangle : edgeRectangles) { + if (textRow.intersects(edgeRectangle)) { + numRelevantEdges++; + } + if (fullTextRowRect.intersects(edgeRectangle)) { + numRelevantEdgesToFullRow++; + } + } if (firstTableRow != null && tableSpaceCount > 0) { // check to make sure this text row is within a line or so of the other lines already added // if it's not, we should stop the table here - float tableLineThreshold = (totalRowSpacing / tableSpaceCount) * 2.5f; - float lineDistance = textRow.getTop() - prevRow.getTop(); + float tableLineThreshold = (totalRowSpacing / tableSpaceCount); + float lineDistance = prevRow.getTop() - textRow.getTop(); + + if(numRelevantEdgesToFullRow == 0) { + tableLineThreshold = 0 ; + } else if(numRelevantEdgesToFullRow <= 3) { + tableLineThreshold *= 1.1f + numRelevantEdges / 10f; + } else { + tableLineThreshold *= numRelevantEdgesToFullRow / 2.4f; + } if (lineDistance > tableLineThreshold) { lastTableRow = prevRow; @@ -327,18 +630,14 @@ private Rectangle getTableFromText(List lines, relativeEdgeDifferenceThreshold = 0; } - for (TextEdge edge : relevantEdges) { - if (textRow.intersectsLine(edge)) { - numRelevantEdges++; - } - } + // see if we have a candidate text row if (numRelevantEdges >= (relevantEdgeCount - relativeEdgeDifferenceThreshold)) { // keep track of table row spacing if (prevRow != null && firstTableRow != null) { tableSpaceCount++; - totalRowSpacing += (textRow.getTop() - prevRow.getTop()); + totalRowSpacing += (prevRow.getTop() - textRow.getTop()); } // row is part of a table @@ -347,7 +646,7 @@ private Rectangle getTableFromText(List lines, table.setRect(textRow); } else { table.setLeft(Math.min(table.getLeft(), textRow.getLeft())); - table.setBottom(Math.max(table.getBottom(), textRow.getBottom())); + table.setTop(Math.min(table.getTop(), textRow.getTop())); table.setRight(Math.max(table.getRight(), textRow.getRight())); } } else { @@ -379,7 +678,7 @@ private Rectangle getTableFromText(List lines, avgRowHeight = lastTableRow.height; } - float rowHeightThreshold = avgRowHeight * 1.5f; + float rowHeightThreshold = avgRowHeight * ROW_HEIGHT_THRESHOLD_MULT_BOTTOM; // check lines after the bottom of the table for (Line2D.Float ruling : horizontalRulings) { @@ -388,12 +687,12 @@ private Rectangle getTableFromText(List lines, continue; } - float distanceFromTable = (float)ruling.getY1() - table.getBottom(); + float distanceFromTable = (float) ruling.getY1() - table.getBottom(); if (distanceFromTable <= rowHeightThreshold) { // use this ruling to help define the table - table.setBottom((float)Math.max(table.getBottom(), ruling.getY1())); - table.setLeft((float)Math.min(table.getLeft(), ruling.getX1())); - table.setRight((float)Math.max(table.getRight(), ruling.getX2())); + table.setBottom((float) Math.max(table.getBottom(), ruling.getY1())); + table.setLeft((float) Math.min(table.getLeft(), ruling.getX1())); + table.setRight((float) Math.max(table.getRight(), ruling.getX2())); } else { // no use checking any further break; @@ -402,229 +701,358 @@ private Rectangle getTableFromText(List lines, // do the same for lines at the top, but make the threshold greater since table headings tend to be // larger to fit up to three-ish rows of text (at least but we don't want to grab too much) - rowHeightThreshold = avgRowHeight * 3.5f; + rowHeightThreshold = avgRowHeight * ROW_HEIGHT_THRESHOLD_MULT_TOP; - for (int i=horizontalRulings.size() - 1; i>=0; i--) { + for (int i = horizontalRulings.size() - 1; i >= 0; i--) { Line2D.Float ruling = horizontalRulings.get(i); if (ruling.getY1() > table.getTop()) { continue; } - float distanceFromTable = table.getTop() - (float)ruling.getY1(); + float distanceFromTable = table.getTop() - (float) ruling.getY1(); if (distanceFromTable <= rowHeightThreshold) { - table.setTop((float)Math.min(table.getTop(), ruling.getY1())); - table.setLeft((float)Math.min(table.getLeft(), ruling.getX1())); - table.setRight((float)Math.max(table.getRight(), ruling.getX2())); + table.setTop((float) Math.min(table.getTop(), ruling.getY1())); + table.setLeft((float) Math.min(table.getLeft(), ruling.getX1())); + table.setRight((float) Math.max(table.getRight(), ruling.getX2())); } else { break; } } // add a bit of padding since the halved horizontal lines are a little fuzzy anyways - table.setTop((float)Math.floor(table.getTop()) - TABLE_PADDING_AMOUNT); - table.setBottom((float)Math.ceil(table.getBottom()) + TABLE_PADDING_AMOUNT); - table.setLeft((float)Math.floor(table.getLeft()) - TABLE_PADDING_AMOUNT); - table.setRight((float)Math.ceil(table.getRight()) + TABLE_PADDING_AMOUNT); + table.setTop((float) Math.floor(table.getTop()) - TABLE_PADDING_AMOUNT); + table.setBottom((float) Math.ceil(table.getBottom()) + TABLE_PADDING_AMOUNT); + table.setLeft((float) Math.floor(table.getLeft()) - TABLE_PADDING_AMOUNT); + table.setRight((float) Math.ceil(table.getRight()) + TABLE_PADDING_AMOUNT); return table; } - private RelevantEdges getRelevantEdges(TextEdges textEdges, List lines) { - List leftTextEdges = textEdges.get(TextEdge.LEFT); + private RelevantEdges getRelevantEdges(TextEdges textEdges, List lines, int linesCount) { List midTextEdges = textEdges.get(TextEdge.MID); - List rightTextEdges = textEdges.get(TextEdge.RIGHT); + List sideTextEdges = textEdges.get(TextEdge.SIDE_EDGE); // first we'll find the number of lines each type of edge crosses - int[][] edgeCountsPerLine = new int[lines.size()][TextEdge.NUM_TYPES]; + List[][] edgeCountsPerLine = new List[linesCount][TextEdge.NUM_TYPES]; - for (TextEdge edge : leftTextEdges) { - edgeCountsPerLine[edge.intersectingTextRowCount - 1][TextEdge.LEFT]++; - } - for (TextEdge edge : midTextEdges) { - edgeCountsPerLine[edge.intersectingTextRowCount - 1][TextEdge.MID]++; + for (TextEdge edge : sideTextEdges) { + if (edgeCountsPerLine[edge.intersectingTextRowCount - 1][TextEdge.SIDE_EDGE] == null) { + edgeCountsPerLine[edge.intersectingTextRowCount - 1][TextEdge.SIDE_EDGE] = new ArrayList(); + } + edgeCountsPerLine[edge.intersectingTextRowCount - 1][TextEdge.SIDE_EDGE].add(edge); } - for (TextEdge edge : rightTextEdges) { - edgeCountsPerLine[edge.intersectingTextRowCount - 1][TextEdge.RIGHT]++; + for (TextEdge edge : midTextEdges) { + if (edgeCountsPerLine[edge.intersectingTextRowCount - 1][TextEdge.MID] == null) { + edgeCountsPerLine[edge.intersectingTextRowCount - 1][TextEdge.MID] = new ArrayList<>(); + } + edgeCountsPerLine[edge.intersectingTextRowCount - 1][TextEdge.MID].add(edge); } // now let's find the relevant edge type and the number of those edges we should look for // we'll only take a minimum of two edges to look for tables int relevantEdgeType = -1; int relevantEdgeCount = 0; - for (int i=edgeCountsPerLine.length - 1; i>2; i--) { - if (edgeCountsPerLine[i][TextEdge.LEFT] > 2 && - edgeCountsPerLine[i][TextEdge.LEFT] >= edgeCountsPerLine[i][TextEdge.RIGHT] && - edgeCountsPerLine[i][TextEdge.LEFT] >= edgeCountsPerLine[i][TextEdge.MID]) { - relevantEdgeCount = edgeCountsPerLine[i][TextEdge.LEFT]; - relevantEdgeType = TextEdge.LEFT; - break; + for (int i = edgeCountsPerLine.length - 1; i > 2; i--) { + // if more than two left edges cross exactly i rows + // relevantEdgeCount = number of left edges + List sideEdges = edgeCountsPerLine[i][TextEdge.SIDE_EDGE]; + List midEdges = edgeCountsPerLine[i][TextEdge.MID]; + sideEdges = (sideEdges == null) ? new ArrayList<>() : new ArrayList<>(sideEdges); + midEdges = (midEdges == null) ? new ArrayList<>() : new ArrayList<>(midEdges); + + // add edges which have +-1 number of lines + // Not mid edges since they are more false-detect prone anyways + if (i > 3) { + listAddAll(sideEdges, edgeCountsPerLine[i - 1][TextEdge.SIDE_EDGE]); + + if (i < edgeCountsPerLine.length - 1) { + listAddAll(sideEdges, edgeCountsPerLine[i + 1][TextEdge.SIDE_EDGE]); + } } - if (edgeCountsPerLine[i][TextEdge.RIGHT] > 1 && - edgeCountsPerLine[i][TextEdge.RIGHT] >= edgeCountsPerLine[i][TextEdge.LEFT] && - edgeCountsPerLine[i][TextEdge.RIGHT] >= edgeCountsPerLine[i][TextEdge.MID]) { - relevantEdgeCount = edgeCountsPerLine[i][TextEdge.RIGHT]; - relevantEdgeType = TextEdge.RIGHT; - break; - } + // merge adjacent edges together and get edges count of only the biggest group + List sideGroups = getAdjacentGroups(sideEdges); + List midGroups = getAdjacentGroups(midEdges); + + int sideEdgesCount = sideEdges.size(); + int midEdgesCount = midEdges.size(); + + if (sideGroups.size() > 1) sideEdgesCount = Collections.max(sideGroups).count; + if (midEdges.size() > 1) midEdgesCount = Collections.max(midGroups).count; - if (edgeCountsPerLine[i][TextEdge.MID] > 1 && - edgeCountsPerLine[i][TextEdge.MID] >= edgeCountsPerLine[i][TextEdge.RIGHT] && - edgeCountsPerLine[i][TextEdge.MID] >= edgeCountsPerLine[i][TextEdge.LEFT]) { - relevantEdgeCount = edgeCountsPerLine[i][TextEdge.MID]; + + if (midEdgesCount > 1) { + relevantEdgeCount = midEdgesCount; relevantEdgeType = TextEdge.MID; break; } + if (sideEdgesCount > 2) { + relevantEdgeCount = sideEdgesCount; + relevantEdgeType = TextEdge.SIDE_EDGE; + break; + } } return new RelevantEdges(relevantEdgeType, relevantEdgeCount); } - private TextEdges getTextEdges(List lines) { + class LinesGroup extends Line2D.Float implements Comparable { + int count; - // get all text edges (lines that align with the left, middle and right of chunks of text) that extend - // uninterrupted over at least REQUIRED_TEXT_LINES_FOR_EDGE lines of text - List leftTextEdges = new ArrayList(); - List midTextEdges = new ArrayList(); - List rightTextEdges = new ArrayList(); + LinesGroup(Line2D.Float groupLine) { + super(groupLine.getP1(), groupLine.getP2()); + x1 = 0; // X coords doesn't matter + x2 = 0; + this.count = 1; + } - Map> currLeftEdges = new HashMap>(); - Map> currMidEdges = new HashMap>(); - Map> currRightEdges = new HashMap>(); + private boolean mergeByY(LinesGroup group) { + if (this.intersectsLine(group)) { + count += group.count; + y1 = Math.min(group.y1, y1); + y2 = Math.max(group.y2, y2); + return true; + } + return false; + } - for (Line textRow : lines) { - for (TextChunk text : textRow.getTextElements()) { - Integer left = new Integer((int)Math.floor(text.getLeft())); - Integer right = new Integer((int)Math.floor(text.getRight())); - Integer mid = new Integer(left + ((right - left)/2)); - - // first put this chunk into any edge buckets it belongs to - List leftEdge = currLeftEdges.get(left); - if (leftEdge == null) { - leftEdge = new ArrayList(); - currLeftEdges.put(left, leftEdge); - } - leftEdge.add(text); + @Override + public int compareTo(LinesGroup group) { + return Integer.compare(count, group.count); + } + } - List midEdge = currMidEdges.get(mid); - if (midEdge == null) { - midEdge = new ArrayList(); - currMidEdges.put(mid, midEdge); - } - midEdge.add(text); + private List getAdjacentGroups(List textEdges) { + List groups = new ArrayList<>(); + if (textEdges != null) { + for (TextEdge textEdge : textEdges) { + groups.add(new LinesGroup(textEdge)); + } - List rightEdge = currRightEdges.get(right); - if (rightEdge == null) { - rightEdge = new ArrayList(); - currRightEdges.put(right, rightEdge); + Iterator it = groups.iterator(); + while (it.hasNext()) { + LinesGroup groupI = it.next(); + for (LinesGroup groupJ : groups) { + if (groupI == groupJ) { + continue; + } + if (groupJ.mergeByY(groupI)) { + it.remove(); + break; + } } - rightEdge.add(text); + } + } + return groups; + } - // now see if this text chunk blows up any other edges - for (Iterator>> iterator = currLeftEdges.entrySet().iterator(); iterator.hasNext();) { - Map.Entry> entry = iterator.next(); - Integer key = entry.getKey(); - if (key > left && key < right) { - iterator.remove(); - List edgeChunks = entry.getValue(); - if (edgeChunks.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { - TextChunk first = edgeChunks.get(0); - TextChunk last = edgeChunks.get(edgeChunks.size() - 1); + private static void listAddAll(List a, List b) { + if (b != null) { + a.addAll(b); + } + } - TextEdge edge = new TextEdge(key, first.getTop(), key, last.getBottom()); - edge.intersectingTextRowCount = Math.min(edgeChunks.size(), lines.size()); + private static class Range { + private static final float HALF_RANGE_SIZE = 2.0f; + private static final float MID_HALF_RANGE_SIZE = 1.5f; - leftTextEdges.add(edge); - } - } - } - - for (Iterator>> iterator = currMidEdges.entrySet().iterator(); iterator.hasNext();) { - Map.Entry> entry = iterator.next(); - Integer key = entry.getKey(); - if (key > left && key < right && Math.abs(key - mid) > 2) { - iterator.remove(); - List edgeChunks = entry.getValue(); - if (edgeChunks.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { - TextChunk first = edgeChunks.get(0); - TextChunk last = edgeChunks.get(edgeChunks.size() - 1); + private final Type type; + float numbersSum; + float avg; + List numbers = new ArrayList<>(); + List edgeChunks = new ArrayList<>(); - TextEdge edge = new TextEdge(key, first.getTop(), key, last.getBottom()); - edge.intersectingTextRowCount = Math.min(edgeChunks.size(), lines.size()); + enum Type { + LEFT, + MID, + RIGHT; + } - midTextEdges.add(edge); - } - } - } + Range(float firstNumber, TextChunk edgeChunk, Type type) { + numbersSum = firstNumber; + numbers.add(firstNumber); + edgeChunks.add(edgeChunk); + avg = firstNumber; + this.type = type; + } - for (Iterator>> iterator = currRightEdges.entrySet().iterator(); iterator.hasNext();) { - Map.Entry> entry = iterator.next(); - Integer key = entry.getKey(); - if (key > left && key < right) { - iterator.remove(); - List edgeChunks = entry.getValue(); - if (edgeChunks.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { - TextChunk first = edgeChunks.get(0); - TextChunk last = edgeChunks.get(edgeChunks.size() - 1); + float getHalfRangeSize(float number, TextChunk text) { + float maxRangeSize = getHalfRangeSizeConst(); + // give less room for error for far rows + if (edgeChunks.size() > 0 && type != Type.MID) { + double distance = Math.abs(text.getMinY() - edgeChunks.get(edgeChunks.size() - 1).getMaxY()); + // multiply by log to make f=maxRangeSize(distance) grow faster than linear + double k = 60.0f / (distance * Math.log(Math.max(distance, 10))); + maxRangeSize = (float) k * maxRangeSize; + } - TextEdge edge = new TextEdge(key, first.getTop(), key, last.getBottom()); - edge.intersectingTextRowCount = Math.min(edgeChunks.size(), lines.size()); + return maxRangeSize; + } - rightTextEdges.add(edge); - } - } - } + boolean add(float number, TextChunk text) { + if (Math.abs(number - avg) < getHalfRangeSize(number, text)) { + numbersSum += number; + numbers.add(number); + edgeChunks.add(text); + avg = numbersSum / numbers.size(); + return true; } + return false; } - // add the leftovers - for (Integer key : currLeftEdges.keySet()) { - List edgeChunks = currLeftEdges.get(key); - if (edgeChunks.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { - TextChunk first = edgeChunks.get(0); - TextChunk last = edgeChunks.get(edgeChunks.size() - 1); + boolean addToBeginning(float number, TextChunk text) { + if (add(number, text)) { + numbers.add(0, number); + edgeChunks.add(0, text); + numbers.remove(numbers.size() - 1); + edgeChunks.remove(edgeChunks.size() - 1); - TextEdge edge = new TextEdge(key, first.getTop(), key, last.getBottom()); - edge.intersectingTextRowCount = Math.min(edgeChunks.size(), lines.size()); + return true; + } + return false; + } - leftTextEdges.add(edge); + boolean isBlownOut(TextChunk text, float left, float mid, float right) { + float edge; float halfRangeSize; + if (type == Type.MID) { + edge = mid; + halfRangeSize = getHalfRangeSizeConst(); + } else { + edge = (type == Type.LEFT) ? left : right; + halfRangeSize = getHalfRangeSize(edge, text) / 2; } + + return avg > left && avg < right && + Math.abs(edge - avg) >= halfRangeSize; } - for (Integer key : currMidEdges.keySet()) { - List edgeChunks = currMidEdges.get(key); - if (edgeChunks.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { - TextChunk first = edgeChunks.get(0); - TextChunk last = edgeChunks.get(edgeChunks.size() - 1); + TextEdge getTextEdge(int linesSize) { + TextChunk first = edgeChunks.get(0); + TextChunk last = edgeChunks.get(edgeChunks.size() - 1); + + TextEdge edge = new TextEdge(avg - getHalfRangeSizeConst(), first.getTop(), avg + getHalfRangeSizeConst(), last.getBottom()); + edge.intersectingTextRowCount = Math.min(edgeChunks.size(), linesSize); + + return edge; + } + + private float getHalfRangeSizeConst() { + return (type == Type.MID) ? MID_HALF_RANGE_SIZE : HALF_RANGE_SIZE; + } + } + + private TextEdges getTextEdges(List lines) { + List rangesLeft = new ArrayList<>(); List rangesMid = new ArrayList<>(); List rangesRight = new ArrayList<>(); + List rangesActiveLeft = new ArrayList<>(); List rangesActiveMid = new ArrayList<>(); List rangesActiveRight = new ArrayList<>(); + List[] rangesArr = new List[]{rangesLeft, rangesMid, rangesRight}; + List[] rangesActiveArr = new List[]{rangesActiveLeft, rangesActiveMid, rangesActiveRight}; + + for (Line textRow : lines) { + + for (TextChunk text : textRow.getTextElements()) { + // big continuous text chunk probably not a part of table. + // don't form rulings using them. + boolean isLongTextLine = text.width >= 0.4 * textBoundingBox.width; + + float left = text.getLeft(); + float right = text.getRight(); + float mid = left + ((right - left) / 2); + + for (int i = Range.Type.LEFT.ordinal() ; i <= Range.Type.RIGHT.ordinal(); i++) { + List ranges = rangesArr[i]; + List rangesActive = rangesActiveArr[i]; + Range.Type rangeType = Range.Type.values()[i]; + + float number = (rangeType == Range.Type.LEFT) ? left : + (rangeType == Range.Type.MID) ? mid : right; + + boolean added = false; + Float closestNumber = null; + Range closestRange = null; + if (!isLongTextLine) { + for (Range range : rangesActive) { + added = range.add(number, text); + + Float lastNumber = range.numbers.get(range.numbers.size() - 1); + if (closestNumber == null || (number > lastNumber && + Math.abs(number - lastNumber) < Math.abs(number - closestNumber))) { + closestNumber = lastNumber; + closestRange = range; + } - TextEdge edge = new TextEdge(key, first.getTop(), key, last.getBottom()); - edge.intersectingTextRowCount = Math.min(edgeChunks.size(), lines.size()); + if (added) { + break; + } + } + if (!added) { + Range newRange = new Range(number, text, rangeType); + + // backtrack and add close edges from previous lines. + if (closestRange != null) { + for (int j = closestRange.edgeChunks.size() - 1; j >= 0; j--) { + closestNumber = closestRange.numbers.get(j); + + if (Math.abs(number - closestNumber) > closestRange.getHalfRangeSizeConst()) { + break; + } + + if (!newRange.addToBeginning(closestNumber, closestRange.edgeChunks.get(j))) { + break; + } + } + } + rangesActive.add(newRange); + } + } + + rangesActive.removeIf(range -> { + if (range.isBlownOut(text, left, mid, right)) { + if (range.numbers.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { + ranges.add(range); + } + return true; + } + return false; + }); + } + } + } - midTextEdges.add(edge); + for (int i = Range.Type.LEFT.ordinal() ; i <= Range.Type.RIGHT.ordinal(); i++) { + for (Range range : rangesActiveArr[i]) { + if (range.numbers.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { + rangesArr[i].add(range); + } } } - for (Integer key : currRightEdges.keySet()) { - List edgeChunks = currRightEdges.get(key); - if (edgeChunks.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { - TextChunk first = edgeChunks.get(0); - TextChunk last = edgeChunks.get(edgeChunks.size() - 1); + List[] textEdges = new List[]{new ArrayList<>(), new ArrayList<>(), new ArrayList<>()}; + for (int i = Range.Type.LEFT.ordinal() ; i <= Range.Type.RIGHT.ordinal(); i++) { + for (Range range : rangesArr[i]) { + textEdges[i].add(range.getTextEdge(lines.size())); + } + } - TextEdge edge = new TextEdge(key, first.getTop(), key, last.getBottom()); - edge.intersectingTextRowCount = Math.min(edgeChunks.size(), lines.size()); + // remove left edges which are too close to beginning of the page + // they don't really indicate a table. + for (Iterator iterator = textEdges[0].iterator(); iterator.hasNext();) { + TextEdge textEdge = iterator.next(); - rightTextEdges.add(edge); + if (textEdge.x1 < textBoundingBox.getLeft() + 8) { + iterator.remove(); } } - return new TextEdges(leftTextEdges, midTextEdges, rightTextEdges); + reduceBulletPointEdges(textEdges[0], textEdges[1], textEdges[2]); + + return new TextEdges(textEdges[0], textEdges[1], textEdges[2]); } private List getTableAreasFromCells(List cells) { - List> cellGroups = new ArrayList>(); + List> cellGroups = new ArrayList<>(); for (Rectangle cell : cells) { boolean addedToGroup = false; @@ -634,8 +1062,8 @@ private List getTableAreasFromCells(List cells) Point2D[] groupCellCorners = groupCell.getPoints(); Point2D[] candidateCorners = cell.getPoints(); - for (int i=0; i getTableAreasFromCells(List cells) } if (!addedToGroup) { - ArrayList cellGroup = new ArrayList(); + ArrayList cellGroup = new ArrayList<>(); cellGroup.add(cell); cellGroups.add(cellGroup); } } // create table areas based on cell group - List tableAreas = new ArrayList(); + List tableAreas = new ArrayList<>(); for (List cellGroup : cellGroups) { // less than four cells should not make a table if (cellGroup.size() < REQUIRED_CELLS_FOR_TABLE) { @@ -683,19 +1111,19 @@ private List getHorizontalRulings(BufferedImage image) { // get all horizontal edges, which we'll define as a change in grayscale colour // along a straight line of a certain length - ArrayList horizontalRulings = new ArrayList(); + ArrayList horizontalRulings = new ArrayList<>(); Raster r = image.getRaster(); int width = r.getWidth(); int height = r.getHeight(); - for (int x=0; x GRAYSCALE_INTENSITY_THRESHOLD) { @@ -746,19 +1174,19 @@ private List getVerticalRulings(BufferedImage image) { // get all vertical edges, which we'll define as a change in grayscale colour // along a straight line of a certain length - ArrayList verticalRulings = new ArrayList(); + ArrayList verticalRulings = new ArrayList<>(); Raster r = image.getRaster(); int width = r.getWidth(); int height = r.getHeight(); - for (int y=0; y GRAYSCALE_INTENSITY_THRESHOLD) { @@ -805,19 +1233,19 @@ private List getVerticalRulings(BufferedImage image) { return verticalRulings; } - // taken from http://www.docjar.com/html/api/org/apache/pdfbox/examples/util/RemoveAllText.java.html - private void removeText(PDPage page) throws IOException { - PDFStreamParser parser = new PDFStreamParser(page.getContents()); - parser.parse(); - List tokens = parser.getTokens(); - List newTokens = new ArrayList(); + // taken from http://www.docjar.com/html/api/org/apache/pdfbox/examples/util/RemoveAllText.java.html + private PDDocument removeText(PDPage page) throws IOException { - for (int i=0; i tokens = parser.getTokens(); + List newTokens = new ArrayList<>(); + for (Object token : tokens) { + if (token instanceof Operator) { + Operator op = (Operator) token; + if (op.getName().equals("TJ") || op.getName().equals("Tj")) { + //remove the one argument to this operator newTokens.remove(newTokens.size() - 1); continue; } @@ -826,16 +1254,15 @@ private void removeText(PDPage page) throws IOException { } PDDocument document = new PDDocument(); - document.addPage(page); + PDPage newPage = document.importPage(page); + newPage.setResources(page.getResources()); PDStream newContents = new PDStream(document); - ContentStreamWriter writer = new ContentStreamWriter(newContents.createOutputStream()); + OutputStream out = newContents.createOutputStream(COSName.FLATE_DECODE); + ContentStreamWriter writer = new ContentStreamWriter(out); writer.writeTokens(newTokens); - newContents.addCompression(); - page.setContents(newContents); - - try { - document.close(); - } catch (Exception e) {} + out.close(); + newPage.setContents(newContents); + return document; } } diff --git a/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java index 87a2193d..243cc3bf 100644 --- a/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java +++ b/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java @@ -1,13 +1,10 @@ package technology.tabula.detectors; -import org.apache.pdfbox.pdmodel.PDDocument; import technology.tabula.Cell; import technology.tabula.Page; import technology.tabula.Rectangle; -import technology.tabula.Ruling; import technology.tabula.extractors.SpreadsheetExtractionAlgorithm; -import java.io.File; import java.util.Collections; import java.util.List; @@ -25,10 +22,10 @@ public List detect(Page page) { SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - List tables = sea.findSpreadsheetsFromCells(cells); + List tables = SpreadsheetExtractionAlgorithm.findSpreadsheetsFromCells(cells); // we want tables to be returned from top to bottom on the page - Collections.sort(tables); + Collections.sort(tables, Rectangle.ILL_DEFINED_ORDER); return tables; } diff --git a/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java b/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java index 91e52a97..a06ad150 100644 --- a/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java +++ b/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java @@ -3,21 +3,29 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; +import java.util.Iterator; import java.util.List; import java.util.Arrays; import technology.tabula.Line; import technology.tabula.Page; -import technology.tabula.Rectangle; import technology.tabula.Ruling; import technology.tabula.Table; +import technology.tabula.TableColumnsFinder; import technology.tabula.TextChunk; import technology.tabula.TextElement; +/** + * Extract a data Table, adding column lines in positions where there are no text on page. + * and matching strings into rows using their position + * + * See {@link BasicExtractionAlgorithm#extract(Page, List)} for more info + */ public class BasicExtractionAlgorithm implements ExtractionAlgorithm { - + private boolean mixedTableExtractionEnabled; // takes columns from basic extractor and row rulings (if present) from pdf. private List verticalRulings = null; - + public List mixedExtractionRulings = new ArrayList<>(); + public BasicExtractionAlgorithm() { } @@ -26,21 +34,38 @@ public BasicExtractionAlgorithm(List verticalRulings) { } public List

extract(Page page, List verticalRulingPositions) { - List verticalRulings = new ArrayList(verticalRulingPositions.size()); + List verticalRulings = new ArrayList<>(verticalRulingPositions.size()); for (Float p: verticalRulingPositions) { - verticalRulings.add(new Ruling((float) page.getTop(), (float) p, 0.0f, (float) page.getHeight())); + verticalRulings.add(new Ruling(page.getTop(), p, 0.0f, (float) page.getHeight())); } this.verticalRulings = verticalRulings; return this.extract(page); } + private List getRelevantRulings(Page page, List horizontalR) { + Iterator it = horizontalR.iterator(); + while (it.hasNext()) { + Ruling hr = it.next(); + + // if page not contains at least part of line + if (!page.intersectsLine(hr)) { + it.remove(); + } + } + return horizontalR; + } + + /** + * 1. Group strings by their Y coordinate into lines. + * 2. Find column coordinates (vertical lines which do to intersect text). + * 3. Extra columns using string lines and columns. + */ @Override public List
extract(Page page) { - List textElements = page.getText(); if (textElements.size() == 0) { - return Arrays.asList(new Table[] { Table.EMPTY }); + return Arrays.asList(new Table[] { Table.empty() }); } List textChunks = this.verticalRulings == null ? TextElement.mergeWords(page.getText()) : TextElement.mergeWords(page.getText(), this.verticalRulings); @@ -54,17 +79,19 @@ public int compare(Ruling arg0, Ruling arg1) { return Double.compare(arg0.getLeft(), arg1.getLeft()); } }); - columns = new ArrayList(this.verticalRulings.size()); + columns = new ArrayList<>(this.verticalRulings.size()); for (Ruling vr: this.verticalRulings) { columns.add(vr.getLeft()); } } else { - columns = columnPositions(lines); + columns = new TableColumnsFinder(lines).generateColumns(); } - Table table = new Table(page, this); - + Table table = new Table(this); + table.setRect(page.getLeft(), page.getTop(), page.getWidth(), page.getHeight()); + + //ArrayList columnsNew = new ArrayList<>(columns); for (int i = 0; i < lines.size(); i++) { Line line = lines.get(i); List elements = line.getTextElements(); @@ -78,7 +105,6 @@ public int compare(TextChunk o1, TextChunk o2) { }); for (TextChunk tc: elements) { - if (tc.isSameChar(Line.WHITE_SPACE_CHARS)) { continue; } @@ -87,79 +113,77 @@ public int compare(TextChunk o1, TextChunk o2) { boolean found = false; for(; j < columns.size(); j++) { if (tc.getLeft() <= columns.get(j)) { - found = true; + found = true; + //columnsNew.set(j, Math.max(columnsNew.get(j), tc.getRight())); break; } } table.add(tc, i, found ? j : columns.size()); } } - - return Arrays.asList(new Table[] { table } ); - } - - @Override - public String toString() { - return "basic"; - } - - - /** - * @param lines must be an array of lines sorted by their +top+ attribute - * @return a list of column boundaries (x axis) - */ - public static List columnPositions(List lines) { - List regions = new ArrayList(); - for (TextChunk tc: lines.get(0).getTextElements()) { - if (tc.isSameChar(Line.WHITE_SPACE_CHARS)) { - continue; - } - Rectangle r = new Rectangle(); - r.setRect(tc); - regions.add(r); + + // Mixed Extraction (Horizontal rulings are present, but vertical are not) + List horizontalR = page.getHorizontalRulings(); + horizontalR = Ruling.collapseOrientedRulings(horizontalR); + horizontalR = getRelevantRulings(page, horizontalR); + + float meaningfulRulingsCount = horizontalR.size(); + + float contentTop = lines.get(0).getTop(); + float contentBottom = lines.get(lines.size() - 1).getBottom(); + + float minHRuling = Float.MAX_VALUE; + float maxHRuling = Float.MIN_VALUE; + for (Ruling hr : horizontalR) { + minHRuling = Math.min(minHRuling, hr.y1); + maxHRuling = Math.max(maxHRuling, hr.y1); + + hr.setLeft(page.getLeft()); + hr.setRight(page.getRight()); } - - for (Line l: lines.subList(1, lines.size())) { - List lineTextElements = new ArrayList(); - for (TextChunk tc: l.getTextElements()) { - if (!tc.isSameChar(Line.WHITE_SPACE_CHARS)) { - lineTextElements.add(tc); - } - } - - for (Rectangle cr: regions) { - List overlaps = new ArrayList(); - for (TextChunk te: lineTextElements) { - if (cr.horizontallyOverlaps(te)) { - overlaps.add(te); - } - } - - for (TextChunk te: overlaps) { - cr.merge(te); - } - - lineTextElements.removeAll(overlaps); + // if the ruling above all text in table + if (contentTop >= minHRuling) { + meaningfulRulingsCount--; + } + // or bellow all text, don't count it. + if (contentBottom <= maxHRuling) { + meaningfulRulingsCount--; + } + + if (mixedTableExtractionEnabled && + lines.size() != 0 && meaningfulRulingsCount / lines.size() > 0.33) { + // incase there are text above top ruling we need a line on top of the page + if (contentTop < minHRuling) { + horizontalR.add(new Ruling(page.getPoints()[0], page.getPoints()[1])); // top line } - - for (TextChunk te: lineTextElements) { - Rectangle r = new Rectangle(); - r.setRect(te); - regions.add(r); + if (contentBottom > maxHRuling) { + horizontalR.add(new Ruling(page.getPoints()[3], page.getPoints()[2])); // bottom line } + + List verticalR = new ArrayList<>(); + columns.add(page.x - 1); + for (Float column : columns) { + // We add + 1 to column since if don't do it SpreadSheetExtractor can cut last letter. + verticalR.add(new Ruling(page.getTop(), column + 1, 0.1f, page.height)); + } + // If horizontal mixedExtractionRulings start after table start. First column will not be seen. Hence make them a bit larger + + verticalR.addAll(horizontalR); + mixedExtractionRulings = new ArrayList<>(verticalR); + return new SpreadsheetExtractionAlgorithm().extract(page, verticalR); } - List rv = new ArrayList(); - for (Rectangle r: regions) { - rv.add((float) r.getRight()); - } - - Collections.sort(rv); - - return rv; - + return Arrays.asList(new Table[] { table } ); + } + + @Override + public String toString() { + return "stream"; } + public void setMixedTableExtractionEnabled(boolean mixedTableExtractionEnabled) { + this.mixedTableExtractionEnabled = mixedTableExtractionEnabled; + } } diff --git a/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java b/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java index 321ee88a..abb588ce 100644 --- a/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java +++ b/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java @@ -1,7 +1,6 @@ package technology.tabula.extractors; import java.awt.geom.Point2D; -import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -19,10 +18,11 @@ import technology.tabula.TableWithRulingLines; import technology.tabula.TextElement; import technology.tabula.Utils; -import technology.tabula.writers.CSVWriter; /** - * @author manuel + * Extract a list of Table from page using rulings as separators. + * + * See {@link SpreadsheetExtractionAlgorithm#extract(Page, List)} for more info * */ public class SpreadsheetExtractionAlgorithm implements ExtractionAlgorithm { @@ -82,17 +82,23 @@ else if (arg0Y < arg1Y) { @Override - public List extract(Page page) { + public List
extract(Page page) { return extract(page, page.getRulings()); } /** - * Extract a list of Table from page using rulings as separators + * Extract cells from table divided by lines. + * + * 1. Clean rulings (remove similar, use only strictly vertical/horizontal) + * 2. Find cells using rulings interceptions + * 3. Find rectangle which contains all cells + * 4. Remove cells,rulings outside of this rectangle + * 5. Merge words in same cells. */ - public List extract(Page page, List rulings) { + public List
extract(Page page, List rulings) { // split rulings into horizontal and vertical - List horizontalR = new ArrayList(), - verticalR = new ArrayList(); + List horizontalR = new ArrayList<>(), + verticalR = new ArrayList<>(); for (Ruling r: rulings) { if (r.horizontal()) { @@ -108,10 +114,10 @@ else if (r.vertical()) { List cells = findCells(horizontalR, verticalR); List spreadsheetAreas = findSpreadsheetsFromCells(cells); - List spreadsheets = new ArrayList(); + List
spreadsheets = new ArrayList<>(); for (Rectangle area: spreadsheetAreas) { - List overlappingCells = new ArrayList(); + List overlappingCells = new ArrayList<>(); for (Cell c: cells) { if (c.intersects(area)) { @@ -120,32 +126,34 @@ else if (r.vertical()) { } } - List horizontalOverlappingRulings = new ArrayList(); + List horizontalOverlappingRulings = new ArrayList<>(); for (Ruling hr: horizontalR) { if (area.intersectsLine(hr)) { horizontalOverlappingRulings.add(hr); } } - List verticalOverlappingRulings = new ArrayList(); + List verticalOverlappingRulings = new ArrayList<>(); for (Ruling vr: verticalR) { if (area.intersectsLine(vr)) { verticalOverlappingRulings.add(vr); } } - TableWithRulingLines t = new TableWithRulingLines(area, page, overlappingCells, - horizontalOverlappingRulings, verticalOverlappingRulings); - - t.setExtractionAlgorithm(this); - + TableWithRulingLines t = new TableWithRulingLines(area, overlappingCells, horizontalOverlappingRulings, verticalOverlappingRulings, this); spreadsheets.add(t); } - Utils.sort(spreadsheets); + Utils.sort(spreadsheets, Rectangle.ILL_DEFINED_ORDER); return spreadsheets; } public boolean isTabular(Page page) { + // if there's no text at all on the page, it's not a table + // (we won't be able to do anything with it though) + if(page.getText().isEmpty()){ + return false; + } + // get minimal region of page that contains every character (in effect, // removes white "margins") Page minimalRegion = page.getArea(Utils.bounds(page.getText())); @@ -155,26 +163,25 @@ public boolean isTabular(Page page) { return false; } Table table = tables.get(0); - int rowsDefinedByLines = table.getRows().size(); - int colsDefinedByLines = table.getCols().size(); + int rowsDefinedByLines = table.getRowCount(); + int colsDefinedByLines = table.getColCount(); tables = new BasicExtractionAlgorithm().extract(minimalRegion); if (tables.size() == 0) { // TODO WHAT DO WE DO HERE? } table = tables.get(0); - int rowsDefinedWithoutLines = table.getRows().size(); - int colsDefinedWithoutLines = table.getCols().size(); + int rowsDefinedWithoutLines = table.getRowCount(); + int colsDefinedWithoutLines = table.getColCount(); float ratio = (((float) colsDefinedByLines / colsDefinedWithoutLines) + ((float) rowsDefinedByLines / rowsDefinedWithoutLines)) / 2.0f; - return ratio > MAGIC_HEURISTIC_NUMBER && ratio < (1/MAGIC_HEURISTIC_NUMBER); } public static List findCells(List horizontalRulingLines, List verticalRulingLines) { - List cellsFound = new ArrayList(); + List cellsFound = new ArrayList<>(); Map intersectionPoints = Ruling.findIntersections(horizontalRulingLines, verticalRulingLines); - List intersectionPointsList = new ArrayList(intersectionPoints.keySet()); + List intersectionPointsList = new ArrayList<>(intersectionPoints.keySet()); Collections.sort(intersectionPointsList, POINT_COMPARATOR); boolean doBreak = false; @@ -184,9 +191,9 @@ public static List findCells(List horizontalRulingLines, List xPoints = new ArrayList(); + List xPoints = new ArrayList<>(); // CrossingPointsDirectlyToTheRight( topLeft ); - List yPoints = new ArrayList(); + List yPoints = new ArrayList<>(); for (Point2D p: intersectionPointsList.subList(i, intersectionPointsList.size())) { if (p.getX() == topLeft.getX() && p.getY() > topLeft.getY()) { @@ -227,19 +234,22 @@ public static List findCells(List horizontalRulingLines, List findSpreadsheetsFromCells(List cells) { // via: http://stackoverflow.com/questions/13746284/merging-multiple-adjacent-rectangles-into-one-polygon - List rectangles = new ArrayList(); - Set pointSet = new HashSet(); - Map edgesH = new HashMap(); - Map edgesV = new HashMap(); + List rectangles = new ArrayList<>(); + Set pointSet = new HashSet<>(); + Map edgesH = new HashMap<>(); + Map edgesV = new HashMap<>(); int i = 0; - cells = new ArrayList(new HashSet(cells)); - - Collections.sort(cells); - + cells = new ArrayList<>(new HashSet<>(cells)); + + Utils.sort(cells, Rectangle.ILL_DEFINED_ORDER); + for (Rectangle cell: cells) { for(Point2D pt: cell.getPoints()) { if (pointSet.contains(pt)) { // shared vertex, remove it @@ -252,10 +262,10 @@ public static List findSpreadsheetsFromCells(List pointsSortX = new ArrayList(pointSet); + List pointsSortX = new ArrayList<>(pointSet); Collections.sort(pointsSortX, X_FIRST_POINT_COMPARATOR); // Y first sort - List pointsSortY = new ArrayList(pointSet); + List pointsSortY = new ArrayList<>(pointSet); Collections.sort(pointsSortY, POINT_COMPARATOR); while (i < pointSet.size()) { @@ -278,10 +288,10 @@ public static List findSpreadsheetsFromCells(List> polygons = new ArrayList>(); + List> polygons = new ArrayList<>(); Point2D nextVertex; while (!edgesH.isEmpty()) { - ArrayList polygon = new ArrayList(); + ArrayList polygon = new ArrayList<>(); Point2D first = edgesH.keySet().iterator().next(); polygon.add(new PolygonVertex(first, Direction.HORIZONTAL)); edgesH.remove(first); @@ -315,7 +325,9 @@ public static List findSpreadsheetsFromCells(List poly: polygons) { float top = java.lang.Float.MAX_VALUE; @@ -336,7 +348,7 @@ public static List findSpreadsheetsFromCells(List> { + + public static final RectangularTextContainerSerializer INSTANCE = new RectangularTextContainerSerializer(); + + private RectangularTextContainerSerializer() { + // singleton + } + + @Override + public JsonElement serialize(RectangularTextContainer src, Type typeOfSrc, JsonSerializationContext context) { + JsonObject result = new JsonObject(); + result.addProperty("top", src.getTop()); + result.addProperty("left", src.getLeft()); + result.addProperty("width", src.getWidth()); + result.addProperty("height", src.getHeight()); + result.addProperty("text", src.getText()); + return result; + } + +} \ No newline at end of file diff --git a/src/main/java/technology/tabula/json/RulingSerializer.java b/src/main/java/technology/tabula/json/RulingSerializer.java index 8a3fe297..19fe8bcd 100644 --- a/src/main/java/technology/tabula/json/RulingSerializer.java +++ b/src/main/java/technology/tabula/json/RulingSerializer.java @@ -2,21 +2,18 @@ import java.lang.reflect.Type; -import technology.tabula.Ruling; - import com.google.gson.JsonElement; -import com.google.gson.JsonObject; import com.google.gson.JsonSerializationContext; import com.google.gson.JsonSerializer; +import technology.tabula.Ruling; + +@Deprecated +/** @deprecated This class is unused (Aug 2017) and will be removed at some later point */ public class RulingSerializer implements JsonSerializer { @Override - public JsonElement serialize(Ruling arg0, Type arg1, - JsonSerializationContext arg2) { - - JsonObject object = new JsonObject(); - + public JsonElement serialize(Ruling src, Type typeOfSrc, JsonSerializationContext context) { return null; } diff --git a/src/main/java/technology/tabula/json/TableSerializer.java b/src/main/java/technology/tabula/json/TableSerializer.java index 9e0538f2..ba04a7cf 100644 --- a/src/main/java/technology/tabula/json/TableSerializer.java +++ b/src/main/java/technology/tabula/json/TableSerializer.java @@ -12,29 +12,37 @@ import com.google.gson.JsonSerializationContext; import com.google.gson.JsonSerializer; -public class TableSerializer implements JsonSerializer
{ - - @Override - public JsonElement serialize(Table table, Type type, - JsonSerializationContext context) { - - JsonObject object = new JsonObject(); - object.addProperty("extraction_method", table.getExtractionAlgorithm().toString()); - object.addProperty("top", table.getTop()); - object.addProperty("left", table.getLeft()); - object.addProperty("width", table.getWidth()); - object.addProperty("height", table.getHeight()); - - JsonArray jsonDataArray = new JsonArray(); - for (List row: table.getRows()) { - JsonArray jsonRowArray = new JsonArray(); - for (RectangularTextContainer textChunk: row) { - jsonRowArray.add(context.serialize(textChunk)); - } - jsonDataArray.add(jsonRowArray); - } - object.add("data", jsonDataArray); - - return object; - } -} \ No newline at end of file +public final class TableSerializer implements JsonSerializer
{ + + public static final TableSerializer INSTANCE = new TableSerializer(); + + private TableSerializer() { + // singleton + } + + @Override + public JsonElement serialize(Table src, Type typeOfSrc, JsonSerializationContext context) { + + JsonObject result = new JsonObject(); + + result.addProperty("extraction_method", src.getExtractionMethod()); + result.addProperty("top", src.getTop()); + result.addProperty("left", src.getLeft()); + result.addProperty("width", src.getWidth()); + result.addProperty("height", src.getHeight()); + result.addProperty("right", src.getRight()); + result.addProperty("bottom", src.getBottom()); + + JsonArray data; + result.add("data", data = new JsonArray()); + + for (List srcRow : src.getRows()) { + JsonArray row = new JsonArray(); + for (RectangularTextContainer textChunk : srcRow) row.add(context.serialize(textChunk)); + data.add(row); + } + + return result; + } + +} diff --git a/src/main/java/technology/tabula/json/TextChunkSerializer.java b/src/main/java/technology/tabula/json/TextChunkSerializer.java deleted file mode 100644 index 5f4252c1..00000000 --- a/src/main/java/technology/tabula/json/TextChunkSerializer.java +++ /dev/null @@ -1,27 +0,0 @@ -package technology.tabula.json; - -import java.lang.reflect.Type; - -import technology.tabula.RectangularTextContainer; - -import com.google.gson.JsonElement; -import com.google.gson.JsonObject; -import com.google.gson.JsonSerializationContext; -import com.google.gson.JsonSerializer; - -public class TextChunkSerializer implements JsonSerializer { - - @Override - public JsonElement serialize(RectangularTextContainer textChunk, Type arg1, - JsonSerializationContext context) { - JsonObject object = new JsonObject(); - - object.addProperty("top", textChunk.getTop()); - object.addProperty("left", textChunk.getLeft()); - object.addProperty("width", textChunk.getWidth()); - object.addProperty("height", textChunk.getHeight()); - object.addProperty("text", textChunk.getText()); - - return object; - } -} \ No newline at end of file diff --git a/src/main/java/technology/tabula/writers/CSVWriter.java b/src/main/java/technology/tabula/writers/CSVWriter.java index 16382585..2466cc6e 100644 --- a/src/main/java/technology/tabula/writers/CSVWriter.java +++ b/src/main/java/technology/tabula/writers/CSVWriter.java @@ -2,6 +2,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import org.apache.commons.csv.CSVPrinter; @@ -11,47 +12,34 @@ import technology.tabula.Table; public class CSVWriter implements Writer { - - CSVPrinter printer; - private boolean useLineReturns = true; - -// public CSVWriter() { -// super(); -// } -// -// public CSVWriter(boolean useLineReturns) { -// super(); -// this.useLineReturns = useLineReturns; -// } - - void createWriter(Appendable out) { - try { - this.printer = new CSVPrinter(out, CSVFormat.EXCEL); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - - @Override - public void write(Appendable out, Table table) throws IOException { - this.createWriter(out); - for (List row: table.getRows()) { - List cells = new ArrayList(row.size()); - for (RectangularTextContainer tc: row) { - cells.add(tc.getText()); - } - this.printer.printRecord(cells); - } - printer.flush(); - } + + public CSVWriter() { + this(CSVFormat.EXCEL); + } + + protected CSVWriter(CSVFormat format) { + this.format = format; + } + + private final CSVFormat format; + + @Override + public void write(Appendable out, Table table) throws IOException { + write(out, Collections.singletonList(table)); + } @Override public void write(Appendable out, List
tables) throws IOException { - for (Table table : tables) { - write(out, table); + try (CSVPrinter printer = new CSVPrinter(out, format)) { + for (Table table : tables) { + for (List row : table.getRows()) { + List cells = new ArrayList<>(row.size()); + for (RectangularTextContainer tc : row) cells.add(tc.getText()); + printer.printRecord(cells); + } + } + printer.flush(); } - } } diff --git a/src/main/java/technology/tabula/writers/JSONWriter.java b/src/main/java/technology/tabula/writers/JSONWriter.java index 88b5c1f1..59e9b274 100644 --- a/src/main/java/technology/tabula/writers/JSONWriter.java +++ b/src/main/java/technology/tabula/writers/JSONWriter.java @@ -4,54 +4,46 @@ import java.lang.reflect.Modifier; import java.util.List; +import com.google.gson.ExclusionStrategy; +import com.google.gson.FieldAttributes; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.gson.JsonArray; + import technology.tabula.Cell; import technology.tabula.RectangularTextContainer; import technology.tabula.Table; import technology.tabula.TextChunk; +import technology.tabula.json.RectangularTextContainerSerializer; import technology.tabula.json.TableSerializer; -import technology.tabula.json.TextChunkSerializer; - -import com.google.gson.ExclusionStrategy; -import com.google.gson.FieldAttributes; -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; public class JSONWriter implements Writer { - - class TableSerializerExclusionStrategy implements ExclusionStrategy { - - @Override - public boolean shouldSkipClass(Class arg0) { - return false; - } - - @Override - public boolean shouldSkipField(FieldAttributes fa) { - return !fa.hasModifier(Modifier.PUBLIC); - } - } - - - final Gson gson; - - public JSONWriter() { - gson = new GsonBuilder() - .addSerializationExclusionStrategy(new TableSerializerExclusionStrategy()) - .registerTypeAdapter(Table.class, new TableSerializer()) - .registerTypeAdapter(RectangularTextContainer.class, new TextChunkSerializer()) - .registerTypeAdapter(Cell.class, new TextChunkSerializer()) - .registerTypeAdapter(TextChunk.class, new TextChunkSerializer()) - .create(); - } - - @Override - public void write(Appendable out, Table table) throws IOException { - - out.append(gson.toJson(table, Table.class)); - } - - public void write(Appendable out, List
tables) throws IOException { - - out.append(gson.toJson(tables.toArray(), Table[].class)); - } + + private static final ExclusionStrategy ALLCLASSES_SKIPNONPUBLIC = new ExclusionStrategy() { + @Override public boolean shouldSkipClass(Class c) { return false; } + @Override public boolean shouldSkipField(FieldAttributes fa) { return !fa.hasModifier(Modifier.PUBLIC); } + }; + + @Override + public void write(Appendable out, Table table) throws IOException { + out.append(gson().toJson(table, Table.class)); + } + + @Override public void write(Appendable out, List
tables) throws IOException { + Gson gson = gson(); + JsonArray array = new JsonArray(); + for (Table table : tables) array.add(gson.toJsonTree(table, Table.class)); + out.append(gson.toJson(array)); + } + + private static Gson gson() { + return new GsonBuilder() + .addSerializationExclusionStrategy(ALLCLASSES_SKIPNONPUBLIC) + .registerTypeAdapter(Table.class, TableSerializer.INSTANCE) + .registerTypeAdapter(RectangularTextContainer.class, RectangularTextContainerSerializer.INSTANCE) + .registerTypeAdapter(Cell.class, RectangularTextContainerSerializer.INSTANCE) + .registerTypeAdapter(TextChunk.class, RectangularTextContainerSerializer.INSTANCE) + .create(); + } + } diff --git a/src/main/java/technology/tabula/writers/TSVWriter.java b/src/main/java/technology/tabula/writers/TSVWriter.java index 225ba980..c3ddb737 100644 --- a/src/main/java/technology/tabula/writers/TSVWriter.java +++ b/src/main/java/technology/tabula/writers/TSVWriter.java @@ -1,20 +1,11 @@ package technology.tabula.writers; -import java.io.IOException; - import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVPrinter; public class TSVWriter extends CSVWriter { - - @Override - void createWriter(Appendable out) { - try { - this.printer = new CSVPrinter(out, CSVFormat.TDF); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - + + public TSVWriter() { + super(CSVFormat.TDF); + } + } diff --git a/src/test/java/technology/tabula/TableTest.java b/src/test/java/technology/tabula/TableTest.java new file mode 100644 index 00000000..c574a553 --- /dev/null +++ b/src/test/java/technology/tabula/TableTest.java @@ -0,0 +1,45 @@ +package technology.tabula; + +import static org.junit.Assert.*; + +import org.junit.Test; + +public class TableTest { + + @Test public void testEmpty() { + Table empty = Table.empty(); + + assertEquals(TextChunk.EMPTY, empty.getCell(0, 0)); + assertEquals(TextChunk.EMPTY, empty.getCell(1, 1)); + + assertEquals(0, empty.getRowCount()); + assertEquals(0, empty.getColCount()); + + assertEquals("", empty.getExtractionMethod()); + + assertEquals(0, empty.getTop(), 0); + assertEquals(0, empty.getRight(), 0); + assertEquals(0, empty.getBottom(), 0); + assertEquals(0, empty.getLeft(), 0); + + assertEquals(0, empty.getArea(), 0); + } + + @Test public void testRowColCounts() { + Table table = Table.empty(); + + assertEquals(0, table.getRowCount()); + assertEquals(0, table.getColCount()); + + table.add(TextChunk.EMPTY, 0, 0); + + assertEquals(1, table.getRowCount()); + assertEquals(1, table.getColCount()); + + table.add(TextChunk.EMPTY, 9, 9); + + assertEquals(10, table.getRowCount()); + assertEquals(10, table.getColCount()); + } + +} diff --git a/src/test/java/technology/tabula/TestBasicExtractor.java b/src/test/java/technology/tabula/TestBasicExtractor.java index 66ae980b..5d5d985c 100644 --- a/src/test/java/technology/tabula/TestBasicExtractor.java +++ b/src/test/java/technology/tabula/TestBasicExtractor.java @@ -2,1790 +2,133 @@ import static org.junit.Assert.*; +import java.io.File; import java.io.IOException; +import java.nio.charset.Charset; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; -import junit.framework.Assert; - +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVRecord; import org.junit.Test; -import technology.tabula.Page; -import technology.tabula.Ruling; -import technology.tabula.Table; import technology.tabula.extractors.BasicExtractionAlgorithm; import technology.tabula.writers.CSVWriter; -import technology.tabula.UtilsForTesting; public class TestBasicExtractor { - private static final String[][] EXPECTED_CORRECT_COLUMNS = { - { "", "", "Involvement of pupils in ", "" }, - { "", "Preperation and ", "Production of ", "Presentation an" }, - { "", "planing ", "materials ", "evaluation " }, - { "Knowledge and awareness of different cultures ", "0,2885 ", - "0,3974 ", "0,3904 " }, - { "Foreign language competence ", "0,3057 ", "0,4184 ", "0,3899 " }, - { "Social skills and abilities ", "0,3416 ", "0,3369 ", "0,4303 " }, - { "Acquaintance of special knowledge ", "0,2569 ", "0,2909 ", - "0,3557 " }, - { "Self competence ", "0,3791 ", "0,3320 ", "0,4617 " } }; + private static final String EU_002_PDF = "src/test/resources/technology/tabula/eu-002.pdf"; + private static final String[][] EU_002_EXPECTED = { + {"", "", "Involvement of pupils in", ""}, + {"", "Preperation and", "Production of", "Presentation an"}, + {"", "planing", "materials", "evaluation"}, + {"Knowledge and awareness of different cultures", "0,2885", "0,3974", "0,3904"}, + {"Foreign language competence", "0,3057", "0,4184", "0,3899"}, + {"Social skills and abilities", "0,3416", "0,3369", "0,4303"}, + {"Acquaintance of special knowledge", "0,2569", "0,2909", "0,3557"}, + {"Self competence", "0,3791", "0,3320", "0,4617"} + }; + + private static final String ARGENTINA_DIPUTADOS_VOTING_RECORD_PDF = "src/test/resources/technology/tabula/argentina_diputados_voting_record.pdf"; + private static final String[][] ARGENTINA_DIPUTADOS_VOTING_RECORD_EXPECTED = { + {"ABDALA de MATARAZZO, Norma Amanda", "Frente Cívico por Santiago", "Santiago del Estero", "AFIRMATIVO"}, + {"ALBRIEU, Oscar Edmundo Nicolas", "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}, + {"ALONSO, María Luz", "Frente para la Victoria - PJ", "La Pampa", "AFIRMATIVO"}, + {"ARENA, Celia Isabel", "Frente para la Victoria - PJ", "Santa Fe", "AFIRMATIVO"}, + {"ARREGUI, Andrés Roberto", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, + {"AVOSCAN, Herman Horacio", "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}, + {"BALCEDO, María Ester", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, + {"BARRANDEGUY, Raúl Enrique", "Frente para la Victoria - PJ", "Entre Ríos", "AFIRMATIVO"}, + {"BASTERRA, Luis Eugenio", "Frente para la Victoria - PJ", "Formosa", "AFIRMATIVO"}, + {"BEDANO, Nora Esther", "Frente para la Victoria - PJ", "Córdoba", "AFIRMATIVO"}, + {"BERNAL, María Eugenia", "Frente para la Victoria - PJ", "Jujuy", "AFIRMATIVO"}, + {"BERTONE, Rosana Andrea", "Frente para la Victoria - PJ", "Tierra del Fuego", "AFIRMATIVO"}, + {"BIANCHI, María del Carmen", "Frente para la Victoria - PJ", "Cdad. Aut. Bs. As.", "AFIRMATIVO"}, + {"BIDEGAIN, Gloria Mercedes", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, + {"BRAWER, Mara", "Frente para la Victoria - PJ", "Cdad. Aut. Bs. As.", "AFIRMATIVO"}, + {"BRILLO, José Ricardo", "Movimiento Popular Neuquino", "Neuquén", "AFIRMATIVO"}, + {"BROMBERG, Isaac Benjamín", "Frente para la Victoria - PJ", "Tucumán", "AFIRMATIVO"}, + {"BRUE, Daniel Agustín", "Frente Cívico por Santiago", "Santiago del Estero", "AFIRMATIVO"}, + {"CALCAGNO, Eric", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, + {"CARLOTTO, Remo Gerardo", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, + {"CARMONA, Guillermo Ramón", "Frente para la Victoria - PJ", "Mendoza", "AFIRMATIVO"}, + {"CATALAN MAGNI, Julio César", "Frente para la Victoria - PJ", "Tierra del Fuego", "AFIRMATIVO"}, + {"CEJAS, Jorge Alberto", "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}, + {"CHIENO, María Elena", "Frente para la Victoria - PJ", "Corrientes", "AFIRMATIVO"}, + {"CIAMPINI, José Alberto", "Frente para la Victoria - PJ", "Neuquén", "AFIRMATIVO"}, + {"CIGOGNA, Luis Francisco Jorge", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, + {"CLERI, Marcos", "Frente para la Victoria - PJ", "Santa Fe", "AFIRMATIVO"}, + {"COMELLI, Alicia Marcela", "Movimiento Popular Neuquino", "Neuquén", "AFIRMATIVO"}, + {"CONTI, Diana Beatriz", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, + {"CORDOBA, Stella Maris", "Frente para la Victoria - PJ", "Tucumán", "AFIRMATIVO"}, + {"CURRILEN, Oscar Rubén", "Frente para la Victoria - PJ", "Chubut", "AFIRMATIVO"} + }; + + private static final String EU_017_PDF = "src/test/resources/technology/tabula/eu-017.pdf"; + private static final String[][] EU_017_EXPECTED = { + {"", "Austria", "77", "1", "78"}, + {"", "Belgium", "159", "2", "161"}, + {"", "Bulgaria", "52", "0", "52"}, + {"", "Croatia", "144", "0", "144"}, + {"", "Cyprus", "43", "2", "45"}, + {"", "Czech Republic", "78", "0", "78"}, + {"", "Denmark", "151", "2", "153"}, + {"", "Estonia", "46", "0", "46"}, + {"", "Finland", "201", "1", "202"}, + {"", "France", "428", "7", "435"}, + {"", "Germany", "646", "21", "667"}, + {"", "Greece", "113", "2", "115"}, + {"", "Hungary", "187", "0", "187"}, + {"", "Iceland", "18", "0", "18"}, + {"", "Ireland", "213", "4", "217"}, + {"", "Israel", "25", "0", "25"}, + {"", "Italy", "627", "12", "639"}, + {"", "Latvia", "7", "0", "7"}, + {"", "Lithuania", "94", "1", "95"}, + {"", "Luxembourg", "22", "0", "22"}, + {"", "Malta", "18", "0", "18"}, + {"", "Netherlands", "104", "1", "105"}, + {"", "Norway", "195", "0", "195"}, + {"", "Poland", "120", "1", "121"}, + {"", "Portugal", "532", "3", "535"}, + {"", "Romania", "110", "0", "110"}, + {"", "Slovakia", "176", "0", "176"}, + {"", "Slovenia", "56", "0", "56"}, + {"", "Spain", "614", "3", "617"}, + {"", "Sweden", "122", "3", "125"}, + {"", "Switzerland", "64", "0", "64"}, + {"", "Turkey", "96", "0", "96"}, + {"", "United Kingdom", "572", "14", "586"} + }; - private static final String[][] EXPECTED_COLUMN_RECOGNITION = { - { "ABDALA de MATARAZZO, Norma Amanda ", - "Frente Cívico por Santiago ", "Santiago del Estero ", - "AFIRMATIVO" }, - { "ALBRIEU, Oscar Edmundo Nicolas ", - "Frente para la Victoria - PJ ", "Rio Negro ", "AFIRMATIVO" }, - { "ALONSO, María Luz ", "Frente para la Victoria - PJ ", - "La Pampa ", "AFIRMATIVO" }, - { "ARENA, Celia Isabel ", "Frente para la Victoria - PJ ", - "Santa Fe ", "AFIRMATIVO" }, - { "ARREGUI, Andrés Roberto ", "Frente para la Victoria - PJ ", - "Buenos Aires ", "AFIRMATIVO" }, - { "AVOSCAN, Herman Horacio ", "Frente para la Victoria - PJ ", - "Rio Negro ", "AFIRMATIVO" }, - { "BALCEDO, María Ester ", "Frente para la Victoria - PJ ", - "Buenos Aires ", "AFIRMATIVO" }, - { "BARRANDEGUY, Raúl Enrique ", "Frente para la Victoria - PJ ", - "Entre Ríos ", "AFIRMATIVO" }, - { "BASTERRA, Luis Eugenio ", "Frente para la Victoria - PJ ", - "Formosa ", "AFIRMATIVO" }, - { "BEDANO, Nora Esther ", "Frente para la Victoria - PJ ", - "Córdoba ", "AFIRMATIVO" }, - { "BERNAL, María Eugenia ", "Frente para la Victoria - PJ ", - "Jujuy ", "AFIRMATIVO" }, - { "BERTONE, Rosana Andrea ", "Frente para la Victoria - PJ ", - "Tierra del Fuego ", "AFIRMATIVO" }, - { "BIANCHI, María del Carmen ", "Frente para la Victoria - PJ ", - "Cdad. Aut. Bs. As. ", "AFIRMATIVO" }, - { "BIDEGAIN, Gloria Mercedes ", "Frente para la Victoria - PJ ", - "Buenos Aires ", "AFIRMATIVO" }, - { "BRAWER, Mara ", "Frente para la Victoria - PJ ", - "Cdad. Aut. Bs. As. ", "AFIRMATIVO" }, - { "BRILLO, José Ricardo ", "Movimiento Popular Neuquino ", - "Neuquén ", "AFIRMATIVO" }, - { "BROMBERG, Isaac Benjamín ", "Frente para la Victoria - PJ ", - "Tucumán ", "AFIRMATIVO" }, - { "BRUE, Daniel Agustín ", "Frente Cívico por Santiago ", - "Santiago del Estero ", "AFIRMATIVO" }, - { "CALCAGNO, Eric ", "Frente para la Victoria - PJ ", - "Buenos Aires ", "AFIRMATIVO" }, - { "CARLOTTO, Remo Gerardo ", "Frente para la Victoria - PJ ", - "Buenos Aires ", "AFIRMATIVO" }, - { "CARMONA, Guillermo Ramón ", "Frente para la Victoria - PJ ", - "Mendoza ", "AFIRMATIVO" }, - { "CATALAN MAGNI, Julio César ", "Frente para la Victoria - PJ ", - "Tierra del Fuego ", "AFIRMATIVO" }, - { "CEJAS, Jorge Alberto ", "Frente para la Victoria - PJ ", - "Rio Negro ", "AFIRMATIVO" }, - { "CHIENO, María Elena ", "Frente para la Victoria - PJ ", - "Corrientes ", "AFIRMATIVO" }, - { "CIAMPINI, José Alberto ", "Frente para la Victoria - PJ ", - "Neuquén ", "AFIRMATIVO" }, - { "CIGOGNA, Luis Francisco Jorge ", - "Frente para la Victoria - PJ ", "Buenos Aires ", - "AFIRMATIVO" }, - { "CLERI, Marcos ", "Frente para la Victoria - PJ ", "Santa Fe ", - "AFIRMATIVO" }, - { "COMELLI, Alicia Marcela ", "Movimiento Popular Neuquino ", - "Neuquén ", "AFIRMATIVO" }, - { "CONTI, Diana Beatriz ", "Frente para la Victoria - PJ ", - "Buenos Aires ", "AFIRMATIVO" }, - { "CORDOBA, Stella Maris ", "Frente para la Victoria - PJ ", - "Tucumán ", "AFIRMATIVO" }, - { "CURRILEN, Oscar Rubén ", "Frente para la Victoria - PJ ", - "Chubut ", "AFIRMATIVO" } }; - - private static final String[][] EXPECTED_COLUMN_EXTRACTION2 = { - {"","Austria","77","1","78"}, - {"","Belgium","159","2","161"}, - {"","Bulgaria","52","0","52"}, - {"","Croatia","144","0","144"}, - {"","Cyprus","43","2","45"}, - {"","Czech Republic","78","0","78"}, - {"","Denmark","151","2","153"}, - {"","Estonia","46","0","46"}, - {"","Finland","201","1","202"}, - {"","France","428","7","435"}, - {"","Germany","646","21","667"}, - {"","Greece","113","2","115"}, - {"","Hungary","187","0","187"}, - {"","Iceland","18","0","18"}, - {"","Ireland","213","4","217"}, - {"","Israel","25","0","25"}, - {"","Italy","627","12","639"}, - {"","Latvia","7","0","7"}, - {"","Lithuania","94","1","95"}, - {"","Luxembourg","22","0","22"}, - {"","Malta","18","0","18"}, - {"","Netherlands","104","1","105"}, - {"","Norway","195","0","195"}, - {"","Poland","120","1","121"}, - {"","Portugal","532","3","535"}, - {"","Romania","110","0","110"}, - {"","Slovakia","176","0","176"}, - {"","Slovenia","56","0","56"}, - {"","Spain","614","3","617"}, - {"","Sweden","122","3","125"}, - {"","Switzerland","64","0","64"}, - {"","Turkey","96","0","96"}, - {"","United Kingdom","572","14","586"} - }; - - private static final String[][] EXPECTED_TABLE_EXTRACTION = { - {"AANONSEN, DEBORAH, A ","","","STATEN ISLAND, NY ","MEALS ","$85.00"}, - {"TOTAL ","","","","","$85.00"}, - {"AARON, CAREN, T ","","","RICHMOND, VA ","EDUCATIONAL ITEMS ","$78.80"}, - {"AARON, CAREN, T ","","","RICHMOND, VA ","MEALS ","$392.45"}, - {"TOTAL ","","","","","$471.25"}, - {"AARON, JOHN ","","","CLARKSVILLE, TN ","MEALS ","$20.39"}, - {"TOTAL ","","","","","$20.39"}, - {"AARON, JOSHUA, N ","","","WEST GROVE, PA ","MEALS ","$310.33"}, - {"AARON , JOSHUA , N ","REGIONAL PULMONARY & SLEEPMEDICINE ","","WEST GROVE, PA ","SPEAKING FEES ","$4,700.00"}, - {"TOTAL ","","","","","$5,010.33"}, - {"AARON, MAUREEN, M ","","","MARTINSVILLE, VA ","MEALS ","$193.67"}, - {"TOTAL ","","","","","$193.67"}, - {"AARON, MICHAEL, L ","","","WEST ISLIP, NY ","MEALS ","$19.50"}, - {"TOTAL ","","","","","$19.50"}, - {"AARON, MICHAEL, R ","","","BROOKLYN, NY ","MEALS ","$65.92"} - }; + private static final String FRX_2012_DISCLOSURE_PDF = "src/test/resources/technology/tabula/frx_2012_disclosure.pdf"; + private static final String[][] FRX_2012_DISCLOSURE_EXPECTED = { + {"AANONSEN, DEBORAH, A", "", "STATEN ISLAND, NY", "MEALS", "$85.00"}, + {"TOTAL", "", "", "", "$85.00"}, + {"AARON, CAREN, T", "", "RICHMOND, VA", "EDUCATIONAL ITEMS", "$78.80"}, + {"AARON, CAREN, T", "", "RICHMOND, VA", "MEALS", "$392.45"}, + {"TOTAL", "", "", "", "$471.25"}, + {"AARON, JOHN", "", "CLARKSVILLE, TN", "MEALS", "$20.39"}, + {"TOTAL", "", "", "", "$20.39"}, + {"AARON, JOSHUA, N", "", "WEST GROVE, PA", "MEALS", "$310.33"}, + {"", "REGIONAL PULMONARY & SLEEP", "", "", ""}, + {"AARON, JOSHUA, N", "", "WEST GROVE, PA", "SPEAKING FEES", "$4,700.00"}, + {"", "MEDICINE", "", "", ""}, + {"TOTAL", "", "", "", "$5,010.33"}, + {"AARON, MAUREEN, M", "", "MARTINSVILLE, VA", "MEALS", "$193.67"}, + {"TOTAL", "", "", "", "$193.67"}, + {"AARON, MICHAEL, L", "", "WEST ISLIP, NY", "MEALS", "$19.50"}, + {"TOTAL", "", "", "", "$19.50"}, + {"AARON, MICHAEL, R", "", "BROOKLYN, NY", "MEALS", "$65.92"} + }; + private static final String[][] EXPECTED_EMPTY_TABLE = { /* actually empty! */ }; - private static final Rectangle[] RECTANGLES_TEST_NATURAL_ORDER = { - new Rectangle(38.368214f, 405.48f, 6.5260315f, 6.1515007f), - new Rectangle(38.368214f, 412.02048f, 5.2577515f, 6.1515007f), - new Rectangle(38.368214f, 417.3023f, 4.8965454f, 6.1515007f), - new Rectangle(38.368214f, 422.26788f, 2.7211914f, 6.1515007f), - new Rectangle(38.368214f, 425.0292f, 2.4161682f, 6.1515007f), - new Rectangle(38.368214f, 427.5144f, 5.4825134f, 6.1515007f), - new Rectangle(38.368214f, 432.97363f, 5.1935425f, 6.1515007f), - new Rectangle(38.368214f, 438.2362f, 2.7211914f, 6.1515007f), - new Rectangle(38.368214f, 440.93732f, 2.2235107f, 6.1515007f), - new Rectangle(38.368214f, 443.15842f, 4.8965454f, 6.1515007f), - new Rectangle(38.368214f, 448.124f, 4.8804626f, 6.1515007f), - new Rectangle(38.368214f, 453.04462f, 7.529419f, 6.1515007f), - new Rectangle(38.368214f, 460.60452f, 5.4744873f, 6.1515007f), - new Rectangle(38.368214f, 466.18335f, 5.217621f, 6.1515007f), - new Rectangle(38.368214f, 471.40417f, 2.4161682f, 6.1515007f), - new Rectangle(38.368214f, 473.8661f, 2.2235107f, 6.1515007f), - new Rectangle(38.368214f, 476.0872f, 4.01355f, 6.1515007f), - new Rectangle(38.368214f, 480.10715f, 2.2235107f, 6.1515007f), - new Rectangle(38.368214f, 482.38846f, 4.8724365f, 6.1515007f), - new Rectangle(38.368214f, 487.32996f, 4.3025208f, 6.1515007f), - new Rectangle(38.368214f, 491.7015f, 7.376892f, 6.1515007f), - new Rectangle(38.368214f, 499.0816f, 2.2235107f, 6.1515007f), - new Rectangle(38.368214f, 501.3629f, 4.4470215f, 6.1515007f), - new Rectangle(38.368214f, 505.87897f, 4.4470215f, 6.1515007f), - new Rectangle(38.368214f, 510.31717f, 2.2234802f, 6.1515007f), - new Rectangle(49.408497f, 462.0598f, 2.2235107f, 6.1515007f), - new Rectangle(49.408497f, 464.2809f, 3.893158f, 6.1515007f), - new Rectangle(49.408497f, 468.1861f, 1.6054077f, 6.1515007f), - new Rectangle(49.408497f, 469.80356f, 4.8965454f, 6.1515007f), - new Rectangle(49.408497f, 474.78436f, 5.4825134f, 6.1515007f), - new Rectangle(49.408497f, 480.2436f, 1.6054077f, 6.1515007f), - new Rectangle(49.408497f, 481.92206f, 2.2235107f, 6.1515007f), - new Rectangle(49.408497f, 484.14316f, 4.8724365f, 6.1515007f), - new Rectangle(49.408497f, 489.02765f, 5.217621f, 6.1515007f), - new Rectangle(49.408497f, 494.30865f, 5.4744873f, 6.1515007f), - new Rectangle(49.408497f, 499.79517f, 5.2577515f, 6.1515007f), - new Rectangle(49.408497f, 505.1372f, 2.4161682f, 6.1515007f), - new Rectangle(49.408497f, 507.5654f, 2.7211914f, 6.1515007f), - new Rectangle(49.408497f, 510.2986f, 2.2234802f, 6.1515007f), - new Rectangle(793.4082f, 85.08f, 7.3170013f, 6.1515007f), - new Rectangle(793.4082f, 92.3403f, 6.6600037f, 6.1515007f), - new Rectangle(793.4082f, 99.1227f, 5.4629974f, 6.1515007f), - new Rectangle(793.4082f, 104.58301f, 4.4820023f, 6.1515007f), - new Rectangle(793.4082f, 109.022705f, 6.6600037f, 6.1515007f), - new Rectangle(793.4082f, 115.744804f, 2.4930038f, 6.1515007f), - new Rectangle(793.4082f, 437.04028f, 5.328003f, 6.1515007f), - new Rectangle(793.4082f, 442.36377f, 6.147003f, 6.1515007f), - new Rectangle(793.4082f, 448.50626f, 6.057007f, 6.1515007f), - new Rectangle(793.4082f, 454.55875f, 5.850006f, 6.1515007f), - new Rectangle(793.4082f, 460.40424f, 2.4930115f, 6.1515007f), - new Rectangle(793.4082f, 462.92514f, 4.9859924f, 6.1515007f), - new Rectangle(793.4082f, 467.90665f, 4.9859924f, 6.1515007f), - new Rectangle(793.4082f, 472.88815f, 4.9859924f, 6.1515007f), - new Rectangle(793.4082f, 477.86966f, 2.4930115f, 6.1515007f), - new Rectangle(793.4082f, 480.39056f, 5.894989f, 6.1515007f), - new Rectangle(793.4082f, 486.28107f, 2.8259888f, 6.1515007f), - new Rectangle(793.4082f, 489.10257f, 2.4930115f, 6.1515007f), - new Rectangle(793.4082f, 491.62347f, 4.9859924f, 6.1515007f), - new Rectangle(793.4082f, 496.60498f, 4.9859924f, 6.1515007f), - new Rectangle(793.4082f, 501.5865f, 4.9859924f, 6.1515007f), - new Rectangle(793.4082f, 506.568f, 2.4930115f, 6.1515007f), - new Rectangle(805.94824f, 510.3f, 2.492981f, 6.1515007f), - new Rectangle(120.76954f, 155.28f, 5.7591705f, 5.5301404f), - new Rectangle(120.76954f, 161.03598f, 4.873764f, 5.5301404f), - new Rectangle(120.76954f, 165.90656f, 4.873764f, 5.5301404f), - new Rectangle(120.76954f, 170.77713f, 4.873764f, 5.5301404f), - new Rectangle(120.76954f, 175.6477f, 2.6562347f, 5.5301404f), - new Rectangle(120.76954f, 178.30075f, 3.1029358f, 5.5301404f), - new Rectangle(120.76954f, 181.4005f, 4.4350433f, 5.5301404f), - new Rectangle(115.60969f, 217.92023f, 5.320465f, 5.5301404f), - new Rectangle(115.60969f, 223.24467f, 4.4350433f, 5.5301404f), - new Rectangle(115.60969f, 227.68372f, 2.2175293f, 5.5301404f), - new Rectangle(115.60969f, 229.90523f, 2.2175293f, 5.5301404f), - new Rectangle(115.60969f, 232.12674f, 4.873764f, 5.5301404f), - new Rectangle(115.60969f, 237.00449f, 2.2175293f, 5.5301404f), - new Rectangle(115.60969f, 239.226f, 3.1029358f, 5.5301404f), - new Rectangle(115.60969f, 242.33292f, 4.4350433f, 5.5301404f), - new Rectangle(115.60969f, 246.77196f, 4.873764f, 5.5301404f), - new Rectangle(115.60969f, 251.6497f, 2.2175293f, 5.5301404f), - new Rectangle(115.60969f, 253.87122f, 2.2175293f, 5.5301404f), - new Rectangle(115.60969f, 256.0927f, 4.4350586f, 5.5301404f), - new Rectangle(115.60969f, 260.53174f, 4.4350586f, 5.5301404f), - new Rectangle(115.60969f, 264.97076f, 2.2175293f, 5.5301404f), - new Rectangle(125.989265f, 219.60011f, 4.873764f, 5.5301404f), - new Rectangle(125.989265f, 224.47467f, 4.4350433f, 5.5301404f), - new Rectangle(125.989265f, 228.9105f, 3.1029358f, 5.5301404f), - new Rectangle(125.989265f, 232.01424f, 2.2175293f, 5.5301404f), - new Rectangle(125.989265f, 234.23575f, 4.4350433f, 5.5301404f), - new Rectangle(125.989265f, 238.67159f, 4.873764f, 5.5301404f), - new Rectangle(125.989265f, 243.54614f, 4.873764f, 5.5301404f), - new Rectangle(125.989265f, 248.4207f, 4.873764f, 5.5301404f), - new Rectangle(125.989265f, 253.29526f, 2.6562347f, 5.5301404f), - new Rectangle(125.989265f, 255.9523f, 3.1029358f, 5.5301404f), - new Rectangle(125.989265f, 259.05603f, 4.4350586f, 5.5301404f), - new Rectangle(110.38917f, 285.3002f, 5.759186f, 5.5301404f), - new Rectangle(110.38917f, 291.05618f, 4.873749f, 5.5301404f), - new Rectangle(110.38917f, 295.92676f, 7.091278f, 5.5301404f), - new Rectangle(110.38917f, 303.01486f, 4.873749f, 5.5301404f), - new Rectangle(110.38917f, 307.88544f, 4.4350586f, 5.5301404f), - new Rectangle(110.38917f, 312.3173f, 3.1029358f, 5.5301404f), - new Rectangle(110.38917f, 315.41702f, 2.2175293f, 5.5301404f), - new Rectangle(110.38917f, 317.63852f, 4.873749f, 5.5301404f), - new Rectangle(110.38917f, 322.5091f, 2.65625f, 5.5301404f), - new Rectangle(110.38917f, 325.16214f, 2.2175293f, 5.5301404f), - new Rectangle(120.76954f, 275.22003f, 3.1029358f, 5.5301404f), - new Rectangle(120.76954f, 278.32855f, 4.4350586f, 5.5301404f), - new Rectangle(120.76954f, 282.7692f, 4.4350586f, 5.5301404f), - new Rectangle(120.76954f, 287.20984f, 4.4350586f, 5.5301404f), - new Rectangle(120.76954f, 291.65048f, 4.4350586f, 5.5301404f), - new Rectangle(120.76954f, 296.09113f, 3.1029358f, 5.5301404f), - new Rectangle(120.76954f, 299.19965f, 4.4350586f, 5.5301404f), - new Rectangle(120.76954f, 303.6403f, 4.873749f, 5.5301404f), - new Rectangle(120.76954f, 308.51962f, 4.4350586f, 5.5301404f), - new Rectangle(120.76954f, 312.96027f, 3.1029358f, 5.5301404f), - new Rectangle(120.76954f, 316.0688f, 4.4350586f, 5.5301404f), - new Rectangle(120.76954f, 320.50943f, 2.2175293f, 5.5301404f), - new Rectangle(120.76954f, 322.73093f, 4.873749f, 5.5301404f), - new Rectangle(120.76954f, 327.61026f, 4.4350586f, 5.5301404f), - new Rectangle(120.76954f, 332.0509f, 3.1029358f, 5.5301404f), - new Rectangle(120.76954f, 335.15942f, 2.2175293f, 5.5301404f), - new Rectangle(131.14992f, 284.70035f, 4.4350586f, 5.5301404f), - new Rectangle(131.14992f, 289.133f, 4.873749f, 5.5301404f), - new Rectangle(131.14992f, 294.00436f, 4.873749f, 5.5301404f), - new Rectangle(131.14992f, 298.87573f, 4.873749f, 5.5301404f), - new Rectangle(131.14992f, 303.7471f, 2.65625f, 5.5301404f), - new Rectangle(131.14992f, 306.40094f, 3.1029358f, 5.5301404f), - new Rectangle(131.14992f, 309.5015f, 4.4350586f, 5.5301404f), - new Rectangle(131.14992f, 313.76025f, 2.2175293f, 5.5301404f), - new Rectangle(131.14992f, 315.98175f, 2.65625f, 5.5301404f), - new Rectangle(131.14992f, 318.6356f, 4.4350586f, 5.5301404f), - new Rectangle(131.14992f, 323.06824f, 2.65625f, 5.5301404f), - new Rectangle(120.76954f, 360.60025f, 4.873749f, 5.5301404f), - new Rectangle(120.76954f, 365.4724f, 2.2175293f, 5.5301404f), - new Rectangle(120.76954f, 367.6939f, 2.65625f, 5.5301404f), - new Rectangle(120.76954f, 370.34854f, 4.4350586f, 5.5301404f), - new Rectangle(120.76954f, 374.78198f, 2.65625f, 5.5301404f), - new Rectangle(120.76954f, 403.82193f, 5.759186f, 5.5301404f), - new Rectangle(120.76954f, 409.3402f, 4.4350586f, 5.5301404f), - new Rectangle(120.76954f, 413.77365f, 4.4350586f, 5.5301404f), - new Rectangle(120.76954f, 418.2071f, 4.873749f, 5.5301404f), - new Rectangle(120.76954f, 423.07925f, 3.1029358f, 5.5301404f), - new Rectangle(120.76954f, 426.1806f, 4.4350586f, 5.5301404f), - new Rectangle(120.76954f, 430.61404f, 4.4350586f, 5.5301404f), - new Rectangle(120.76954f, 435.0475f, 4.4350586f, 5.5301404f), - new Rectangle(120.76954f, 439.30624f, 2.2175293f, 5.5301404f), - new Rectangle(120.76954f, 441.52774f, 4.873749f, 5.5301404f), - new Rectangle(120.76954f, 446.3999f, 4.4350586f, 5.5301404f), - new Rectangle(120.76954f, 450.83334f, 4.4350586f, 5.5301404f), - new Rectangle(120.76954f, 455.0921f, 4.4350586f, 5.5301404f), - new Rectangle(120.76954f, 459.53113f, 2.2175293f, 5.5301404f), - new Rectangle(148.37137f, 132.9f, 5.5688934f, 6.6683106f), - new Rectangle(148.37137f, 138.4699f, 5.5688934f, 6.6683106f), - new Rectangle(148.37137f, 144.0398f, 2.7844543f, 6.6683106f), - new Rectangle(148.37137f, 146.82524f, 5.0079956f, 6.6683106f), - new Rectangle(148.37137f, 151.83424f, 2.2235565f, 6.6683106f), - new Rectangle(148.37137f, 154.05879f, 5.5688934f, 6.6683106f), - new Rectangle(148.37137f, 238.71704f, 5.5688934f, 6.6683106f), - new Rectangle(148.37137f, 292.7153f, 5.5689087f, 6.6683106f), - new Rectangle(148.37137f, 298.2852f, 2.7844543f, 6.6683106f), - new Rectangle(148.37137f, 301.07065f, 5.5689087f, 6.6683106f), - new Rectangle(148.37137f, 306.64053f, 5.5689087f, 6.6683106f), - new Rectangle(148.37137f, 312.21042f, 5.5689087f, 6.6683106f), - new Rectangle(148.37137f, 350.90823f, 5.5689087f, 6.6683106f), - new Rectangle(148.37137f, 356.47812f, 2.7844543f, 6.6683106f), - new Rectangle(148.37137f, 359.26358f, 5.5689087f, 6.6683106f), - new Rectangle(148.37137f, 364.83347f, 5.5689087f, 6.6683106f), - new Rectangle(148.37137f, 370.40335f, 5.5689087f, 6.6683106f), - new Rectangle(148.37137f, 375.97324f, 5.5689087f, 6.6683106f), - new Rectangle(148.37137f, 381.54312f, 5.5689087f, 6.6683106f), - new Rectangle(160.85129f, 132.9f, 8.343323f, 6.6683106f), - new Rectangle(160.85129f, 141.24632f, 5.5688934f, 6.6683106f), - new Rectangle(160.85129f, 146.81822f, 2.2235565f, 6.6683106f), - new Rectangle(160.85129f, 149.04478f, 2.7844543f, 6.6683106f), - new Rectangle(160.85129f, 151.83224f, 5.5688934f, 6.6683106f), - new Rectangle(160.85129f, 235.89052f, 5.5688934f, 6.6683106f), - new Rectangle(160.85129f, 241.46942f, 5.5688934f, 6.6683106f), - new Rectangle(160.85129f, 296.908f, 5.5689087f, 6.6683106f), - new Rectangle(160.85129f, 302.4799f, 5.5689087f, 6.6683106f), - new Rectangle(160.85129f, 308.0518f, 5.5689087f, 6.6683106f), - new Rectangle(160.85129f, 350.89023f, 5.5689087f, 6.6683106f), - new Rectangle(160.85129f, 356.46213f, 2.7844543f, 6.6683106f), - new Rectangle(160.85129f, 359.24957f, 5.5689087f, 6.6683106f), - new Rectangle(160.85129f, 364.82147f, 5.5689087f, 6.6683106f), - new Rectangle(160.85129f, 370.39337f, 5.5689087f, 6.6683106f), - new Rectangle(160.85129f, 375.96527f, 5.5689087f, 6.6683106f), - new Rectangle(160.85129f, 381.53717f, 5.5689087f, 6.6683106f), - new Rectangle(173.3312f, 132.9f, 2.7844543f, 6.6683106f), - new Rectangle(173.3312f, 135.69044f, 5.0079956f, 6.6683106f), - new Rectangle(173.3312f, 140.70445f, 5.5688934f, 6.6683106f), - new Rectangle(173.3312f, 146.27936f, 2.2235565f, 6.6683106f), - new Rectangle(173.3312f, 148.50893f, 5.5688934f, 6.6683106f), - new Rectangle(173.3312f, 154.08383f, 5.5688934f, 6.6683106f), - new Rectangle(173.3312f, 159.65874f, 5.5688934f, 6.6683106f), - new Rectangle(173.3312f, 235.85646f, 5.5688934f, 6.6683106f), - new Rectangle(173.3312f, 241.43137f, 5.5688934f, 6.6683106f), - new Rectangle(173.3312f, 292.66922f, 5.5689087f, 6.6683106f), - new Rectangle(173.3312f, 298.24414f, 2.7844543f, 6.6683106f), - new Rectangle(173.3312f, 301.0346f, 5.5689087f, 6.6683106f), - new Rectangle(173.3312f, 306.60953f, 5.5689087f, 6.6683106f), - new Rectangle(173.3312f, 312.18445f, 5.5689087f, 6.6683106f), - new Rectangle(173.3312f, 350.88226f, 5.5689087f, 6.6683106f), - new Rectangle(173.3312f, 356.45718f, 2.7844543f, 6.6683106f), - new Rectangle(173.3312f, 359.24765f, 5.5689087f, 6.6683106f), - new Rectangle(173.3312f, 364.82257f, 5.5689087f, 6.6683106f), - new Rectangle(173.3312f, 370.3975f, 5.5689087f, 6.6683106f), - new Rectangle(173.3312f, 375.9724f, 5.5689087f, 6.6683106f), - new Rectangle(173.3312f, 381.54733f, 5.5689087f, 6.6683106f), - new Rectangle(185.81113f, 132.9f, 5.5688934f, 6.6683106f), - new Rectangle(185.81113f, 138.47389f, 5.5688934f, 6.6683106f), - new Rectangle(185.81113f, 144.04779f, 5.0079956f, 6.6683106f), - new Rectangle(185.81113f, 149.06079f, 5.5688934f, 6.6683106f), - new Rectangle(185.81113f, 154.63469f, 8.343323f, 6.6683106f), - new Rectangle(185.81113f, 162.98303f, 5.5688934f, 6.6683106f), - new Rectangle(185.81113f, 168.55693f, 5.5688934f, 6.6683106f), - new Rectangle(185.81113f, 174.13083f, 5.5688934f, 6.6683106f), - new Rectangle(185.81113f, 179.70473f, 3.3353271f, 6.6683106f), - new Rectangle(185.81113f, 183.04506f, 5.5688934f, 6.6683106f), - new Rectangle(185.81113f, 235.8434f, 5.5688934f, 6.6683106f), - new Rectangle(185.81113f, 241.4223f, 5.5688934f, 6.6683106f), - new Rectangle(185.81113f, 292.66016f, 5.5689087f, 6.6683106f), - new Rectangle(185.81113f, 298.23907f, 2.7844543f, 6.6683106f), - new Rectangle(185.81113f, 301.02853f, 5.5689087f, 6.6683106f), - new Rectangle(185.81113f, 306.60245f, 5.5689087f, 6.6683106f), - new Rectangle(185.81113f, 312.17636f, 5.5689087f, 6.6683106f), - new Rectangle(185.81113f, 350.87418f, 5.5689087f, 6.6683106f), - new Rectangle(185.81113f, 356.4481f, 2.7844543f, 6.6683106f), - new Rectangle(185.81113f, 359.23755f, 5.5689087f, 6.6683106f), - new Rectangle(185.81113f, 364.81146f, 5.5689087f, 6.6683106f), - new Rectangle(185.81113f, 370.38538f, 5.5689087f, 6.6683106f), - new Rectangle(185.81113f, 375.9593f, 5.5689087f, 6.6683106f), - new Rectangle(185.81113f, 381.5332f, 5.5689087f, 6.6683106f), - new Rectangle(198.05154f, 132.9f, 2.7844543f, 6.6683106f), - new Rectangle(198.05154f, 135.69145f, 5.0079956f, 6.6683106f), - new Rectangle(198.05154f, 140.70647f, 3.3353271f, 6.6683106f), - new Rectangle(198.05154f, 144.04881f, 5.5688934f, 6.6683106f), - new Rectangle(198.05154f, 149.62473f, 5.5688934f, 6.6683106f), - new Rectangle(198.05154f, 155.20064f, 2.2235565f, 6.6683106f), - new Rectangle(198.05154f, 235.83844f, 5.5688934f, 6.6683106f), - new Rectangle(198.05154f, 241.41435f, 5.5688934f, 6.6683106f), - new Rectangle(198.05154f, 289.8928f, 5.5689087f, 6.6683106f), - new Rectangle(198.05154f, 295.4687f, 5.5689087f, 6.6683106f), - new Rectangle(198.05154f, 301.0446f, 2.7844543f, 6.6683106f), - new Rectangle(198.05154f, 303.83606f, 5.5689087f, 6.6683106f), - new Rectangle(198.05154f, 309.41196f, 5.5689087f, 6.6683106f), - new Rectangle(198.05154f, 314.98785f, 5.5689087f, 6.6683106f), - new Rectangle(198.05154f, 350.86615f, 5.5689087f, 6.6683106f), - new Rectangle(198.05154f, 356.44205f, 2.7844543f, 6.6683106f), - new Rectangle(198.05154f, 359.23352f, 5.5689087f, 6.6683106f), - new Rectangle(198.05154f, 364.80942f, 5.5689087f, 6.6683106f), - new Rectangle(198.05154f, 370.3853f, 5.5689087f, 6.6683106f), - new Rectangle(198.05154f, 375.9612f, 5.5689087f, 6.6683106f), - new Rectangle(198.05154f, 381.5371f, 5.5689087f, 6.6683106f), - new Rectangle(211.49136f, 132.9f, 7.231552f, 6.6683106f), - new Rectangle(211.49136f, 140.13855f, 5.0079956f, 6.6683106f), - new Rectangle(211.49136f, 145.15356f, 5.5688934f, 6.6683106f), - new Rectangle(211.49136f, 150.72948f, 3.3353271f, 6.6683106f), - new Rectangle(211.49136f, 154.07182f, 5.5688934f, 6.6683106f), - new Rectangle(211.49136f, 159.64774f, 5.0079956f, 6.6683106f), - new Rectangle(211.49136f, 235.84846f, 5.5688934f, 6.6683106f), - new Rectangle(211.49136f, 241.42438f, 5.5688934f, 6.6683106f), - new Rectangle(211.49136f, 292.66324f, 5.5689087f, 6.6683106f), - new Rectangle(211.49136f, 298.23914f, 2.7844543f, 6.6683106f), - new Rectangle(211.49136f, 301.0306f, 5.5689087f, 6.6683106f), - new Rectangle(211.49136f, 306.6065f, 5.5689087f, 6.6683106f), - new Rectangle(211.49136f, 312.1824f, 5.5689087f, 6.6683106f), - new Rectangle(211.49136f, 350.88123f, 5.5689087f, 6.6683106f), - new Rectangle(211.49136f, 356.45712f, 2.7844543f, 6.6683106f), - new Rectangle(211.49136f, 359.2486f, 5.5689087f, 6.6683106f), - new Rectangle(211.49136f, 364.8245f, 5.5689087f, 6.6683106f), - new Rectangle(211.49136f, 370.4004f, 5.5689087f, 6.6683106f), - new Rectangle(211.49136f, 375.9763f, 5.5689087f, 6.6683106f), - new Rectangle(211.49136f, 381.5522f, 5.5689087f, 6.6683106f), - new Rectangle(223.97128f, 132.9f, 6.6806793f, 6.6683106f), - new Rectangle(223.97128f, 139.58366f, 5.0079956f, 6.6683106f), - new Rectangle(223.97128f, 144.59467f, 2.7844543f, 6.6683106f), - new Rectangle(223.97128f, 147.38213f, 5.5688934f, 6.6683106f), - new Rectangle(223.97128f, 152.95403f, 5.5688934f, 6.6683106f), - new Rectangle(223.97128f, 158.52592f, 2.2235565f, 6.6683106f), - new Rectangle(223.97128f, 160.75249f, 5.5688934f, 6.6683106f), - new Rectangle(223.97128f, 235.87048f, 5.5688934f, 6.6683106f), - new Rectangle(223.97128f, 241.44939f, 5.5688934f, 6.6683106f), - new Rectangle(223.97128f, 292.68723f, 5.5689087f, 6.6683106f), - new Rectangle(223.97128f, 298.26614f, 2.7844543f, 6.6683106f), - new Rectangle(223.97128f, 301.0536f, 5.5689087f, 6.6683106f), - new Rectangle(223.97128f, 306.6255f, 5.5689087f, 6.6683106f), - new Rectangle(223.97128f, 312.1974f, 5.5689087f, 6.6683106f), - new Rectangle(223.97128f, 350.8952f, 5.5689087f, 6.6683106f), - new Rectangle(223.97128f, 356.4671f, 2.7844543f, 6.6683106f), - new Rectangle(223.97128f, 359.25455f, 5.5689087f, 6.6683106f), - new Rectangle(223.97128f, 364.82645f, 5.5689087f, 6.6683106f), - new Rectangle(223.97128f, 370.39835f, 5.5689087f, 6.6683106f), - new Rectangle(223.97128f, 375.97025f, 5.5689087f, 6.6683106f), - new Rectangle(223.97128f, 381.54214f, 5.5689087f, 6.6683106f), - new Rectangle(236.4512f, 132.9f, 6.6806793f, 6.6683106f), - new Rectangle(236.4512f, 139.58366f, 5.5688934f, 6.6683106f), - new Rectangle(236.4512f, 145.15556f, 2.2235565f, 6.6683106f), - new Rectangle(236.4512f, 147.38213f, 5.5688934f, 6.6683106f), - new Rectangle(236.4512f, 152.95403f, 5.5688934f, 6.6683106f), - new Rectangle(236.4512f, 158.52592f, 3.3353271f, 6.6683106f), - new Rectangle(236.4512f, 161.86426f, 2.2235565f, 6.6683106f), - new Rectangle(236.4512f, 164.09082f, 5.5688934f, 6.6683106f), - new Rectangle(236.4512f, 235.84845f, 5.5688934f, 6.6683106f), - new Rectangle(236.4512f, 241.42035f, 5.5688934f, 6.6683106f), - new Rectangle(236.4512f, 289.8978f, 5.5689087f, 6.6683106f), - new Rectangle(236.4512f, 295.4697f, 5.5689087f, 6.6683106f), - new Rectangle(236.4512f, 301.0416f, 2.7844543f, 6.6683106f), - new Rectangle(236.4512f, 303.82904f, 5.5689087f, 6.6683106f), - new Rectangle(236.4512f, 309.40094f, 5.5689087f, 6.6683106f), - new Rectangle(236.4512f, 314.97284f, 5.5689087f, 6.6683106f), - new Rectangle(236.4512f, 350.85016f, 5.5689087f, 6.6683106f), - new Rectangle(236.4512f, 356.42206f, 2.7844543f, 6.6683106f), - new Rectangle(236.4512f, 359.2095f, 5.5689087f, 6.6683106f), - new Rectangle(236.4512f, 364.7814f, 5.5689087f, 6.6683106f), - new Rectangle(236.4512f, 370.3533f, 5.5689087f, 6.6683106f), - new Rectangle(236.4512f, 375.9252f, 5.5689087f, 6.6683106f), - new Rectangle(236.4512f, 381.4971f, 5.5689087f, 6.6683106f), - new Rectangle(248.93112f, 132.9f, 6.6806793f, 6.6683106f), - new Rectangle(248.93112f, 139.58267f, 2.2235565f, 6.6683106f), - new Rectangle(248.93112f, 141.80823f, 5.5688934f, 6.6683106f), - new Rectangle(248.93112f, 147.37912f, 5.0079956f, 6.6683106f), - new Rectangle(248.93112f, 152.38913f, 5.5688934f, 6.6683106f), - new Rectangle(248.93112f, 157.96002f, 5.5688934f, 6.6683106f), - new Rectangle(248.93112f, 163.53091f, 2.2235565f, 6.6683106f), - new Rectangle(248.93112f, 165.75647f, 5.5688934f, 6.6683106f), - new Rectangle(248.93112f, 235.8945f, 5.5688934f, 6.6683106f), - new Rectangle(248.93112f, 241.4734f, 5.5688934f, 6.6683106f), - new Rectangle(248.93112f, 289.95184f, 5.5689087f, 6.6683106f), - new Rectangle(248.93112f, 295.52274f, 5.5689087f, 6.6683106f), - new Rectangle(248.93112f, 301.09363f, 2.7844543f, 6.6683106f), - new Rectangle(248.93112f, 303.88007f, 5.5689087f, 6.6683106f), - new Rectangle(248.93112f, 309.45096f, 5.5689087f, 6.6683106f), - new Rectangle(248.93112f, 315.02185f, 5.5689087f, 6.6683106f), - new Rectangle(248.93112f, 350.90015f, 5.5689087f, 6.6683106f), - new Rectangle(248.93112f, 356.47104f, 2.7844543f, 6.6683106f), - new Rectangle(248.93112f, 359.25748f, 5.5689087f, 6.6683106f), - new Rectangle(248.93112f, 364.82837f, 5.5689087f, 6.6683106f), - new Rectangle(248.93112f, 370.39926f, 5.5689087f, 6.6683106f), - new Rectangle(248.93112f, 375.97015f, 5.5689087f, 6.6683106f), - new Rectangle(248.93112f, 381.54105f, 5.5689087f, 6.6683106f), - new Rectangle(261.41104f, 132.9f, 6.6806793f, 6.6683106f), - new Rectangle(261.41104f, 139.58667f, 7.231552f, 6.6683106f), - new Rectangle(261.41104f, 146.82423f, 2.2235565f, 6.6683106f), - new Rectangle(261.41104f, 149.0538f, 2.7844543f, 6.6683106f), - new Rectangle(261.41104f, 151.84425f, 5.0079956f, 6.6683106f), - new Rectangle(261.41104f, 156.85826f, 5.5688934f, 6.6683106f), - new Rectangle(261.41104f, 162.43317f, 3.3353271f, 6.6683106f), - new Rectangle(261.41104f, 165.7745f, 2.2235565f, 6.6683106f), - new Rectangle(261.41104f, 168.00407f, 5.5688934f, 6.6683106f), - new Rectangle(261.41104f, 173.57898f, 5.5688934f, 6.6683106f), - new Rectangle(261.41104f, 179.15388f, 5.5688934f, 6.6683106f), - new Rectangle(261.41104f, 235.85246f, 5.5688934f, 6.6683106f), - new Rectangle(261.41104f, 241.42737f, 5.5688934f, 6.6683106f), - new Rectangle(261.41104f, 289.90582f, 5.5689087f, 6.6683106f), - new Rectangle(261.41104f, 295.48074f, 5.5689087f, 6.6683106f), - new Rectangle(261.41104f, 301.05566f, 2.7844543f, 6.6683106f), - new Rectangle(261.41104f, 303.84613f, 5.5689087f, 6.6683106f), - new Rectangle(261.41104f, 309.42105f, 5.5689087f, 6.6683106f), - new Rectangle(261.41104f, 314.99597f, 5.5689087f, 6.6683106f), - new Rectangle(261.41104f, 350.8743f, 5.5689087f, 6.6683106f), - new Rectangle(261.41104f, 356.44922f, 2.7844543f, 6.6683106f), - new Rectangle(261.41104f, 359.2397f, 5.5689087f, 6.6683106f), - new Rectangle(261.41104f, 364.8146f, 5.5689087f, 6.6683106f), - new Rectangle(261.41104f, 370.38953f, 5.5689087f, 6.6683106f), - new Rectangle(261.41104f, 375.96445f, 5.5689087f, 6.6683106f), - new Rectangle(261.41104f, 381.53937f, 5.5689087f, 6.6683106f), - new Rectangle(273.89096f, 132.9f, 6.6806793f, 6.6683106f), - new Rectangle(273.89096f, 139.58467f, 5.5688934f, 6.6683106f), - new Rectangle(273.89096f, 145.15758f, 5.0079956f, 6.6683106f), - new Rectangle(273.89096f, 150.16959f, 2.7844543f, 6.6683106f), - new Rectangle(273.89096f, 152.95804f, 3.3353271f, 6.6683106f), - new Rectangle(273.89096f, 156.29738f, 2.2235565f, 6.6683106f), - new Rectangle(273.89096f, 158.52493f, 5.5688934f, 6.6683106f), - new Rectangle(273.89096f, 235.86348f, 5.5688934f, 6.6683106f), - new Rectangle(273.89096f, 241.4424f, 5.5688934f, 6.6683106f), - new Rectangle(273.89096f, 289.92084f, 5.5689087f, 6.6683106f), - new Rectangle(273.89096f, 295.49374f, 5.5689087f, 6.6683106f), - new Rectangle(273.89096f, 301.06665f, 2.7844543f, 6.6683106f), - new Rectangle(273.89096f, 303.8551f, 5.5689087f, 6.6683106f), - new Rectangle(273.89096f, 309.428f, 5.5689087f, 6.6683106f), - new Rectangle(273.89096f, 315.00092f, 5.5689087f, 6.6683106f), - new Rectangle(273.89096f, 350.87924f, 5.5689087f, 6.6683106f), - new Rectangle(273.89096f, 356.45215f, 2.7844543f, 6.6683106f), - new Rectangle(273.89096f, 359.2406f, 5.5689087f, 6.6683106f), - new Rectangle(273.89096f, 364.8135f, 5.5689087f, 6.6683106f), - new Rectangle(273.89096f, 370.3864f, 5.5689087f, 6.6683106f), - new Rectangle(273.89096f, 375.95932f, 5.5689087f, 6.6683106f), - new Rectangle(273.89096f, 381.53223f, 5.5689087f, 6.6683106f), - new Rectangle(286.37088f, 132.9f, 7.231552f, 6.6683106f), - new Rectangle(286.37088f, 140.14056f, 5.0079956f, 6.6683106f), - new Rectangle(286.37088f, 145.15758f, 5.5688934f, 6.6683106f), - new Rectangle(286.37088f, 150.73549f, 5.0079956f, 6.6683106f), - new Rectangle(286.37088f, 155.7525f, 5.5688934f, 6.6683106f), - new Rectangle(286.37088f, 161.33041f, 2.7844543f, 6.6683106f), - new Rectangle(286.37088f, 164.09183f, 7.231552f, 6.6683106f), - new Rectangle(286.37088f, 171.3324f, 5.5688934f, 6.6683106f), - new Rectangle(286.37088f, 176.91031f, 5.5688934f, 6.6683106f), - new Rectangle(286.37088f, 182.48822f, 5.5688934f, 6.6683106f), - new Rectangle(286.37088f, 188.06613f, 5.5688934f, 6.6683106f), - new Rectangle(286.37088f, 193.64404f, 2.2235565f, 6.6683106f), - new Rectangle(286.37088f, 195.8766f, 2.2235565f, 6.6683106f), - new Rectangle(286.37088f, 198.10916f, 5.0079956f, 6.6683106f), - new Rectangle(286.37088f, 235.78935f, 5.5688934f, 6.6683106f), - new Rectangle(286.37088f, 241.36726f, 5.5688934f, 6.6683106f), - new Rectangle(286.37088f, 289.8457f, 5.5689087f, 6.6683106f), - new Rectangle(286.37088f, 295.4236f, 5.5689087f, 6.6683106f), - new Rectangle(286.37088f, 301.00153f, 2.7844543f, 6.6683106f), - new Rectangle(286.37088f, 303.79498f, 5.5689087f, 6.6683106f), - new Rectangle(286.37088f, 309.3729f, 5.5689087f, 6.6683106f), - new Rectangle(286.37088f, 314.9508f, 5.5689087f, 6.6683106f), - new Rectangle(286.37088f, 350.82913f, 5.5689087f, 6.6683106f), - new Rectangle(286.37088f, 356.40704f, 2.7844543f, 6.6683106f), - new Rectangle(286.37088f, 359.2005f, 5.5689087f, 6.6683106f), - new Rectangle(286.37088f, 364.7784f, 5.5689087f, 6.6683106f), - new Rectangle(286.37088f, 370.35632f, 5.5689087f, 6.6683106f), - new Rectangle(286.37088f, 375.93423f, 5.5689087f, 6.6683106f), - new Rectangle(286.37088f, 381.51215f, 5.5689087f, 6.6683106f), - new Rectangle(298.6113f, 132.9f, 5.5688934f, 6.6683106f), - new Rectangle(298.6113f, 138.4729f, 2.2235565f, 6.6683106f), - new Rectangle(298.6113f, 140.70045f, 2.7844543f, 6.6683106f), - new Rectangle(298.6113f, 143.4889f, 5.5688934f, 6.6683106f), - new Rectangle(298.6113f, 149.06181f, 5.5688934f, 6.6683106f), - new Rectangle(298.6113f, 154.63472f, 5.5688934f, 6.6683106f), - new Rectangle(298.6113f, 160.20763f, 5.5688934f, 6.6683106f), - new Rectangle(298.6113f, 165.78053f, 2.2235565f, 6.6683106f), - new Rectangle(298.6113f, 168.00809f, 5.5688934f, 6.6683106f), - new Rectangle(298.6113f, 235.86649f, 5.5688934f, 6.6683106f), - new Rectangle(298.6113f, 241.4454f, 5.5688934f, 6.6683106f), - new Rectangle(298.6113f, 289.92386f, 5.5689087f, 6.6683106f), - new Rectangle(298.6113f, 295.49677f, 5.5689087f, 6.6683106f), - new Rectangle(298.6113f, 301.06967f, 2.7844543f, 6.6683106f), - new Rectangle(298.6113f, 303.85812f, 5.5689087f, 6.6683106f), - new Rectangle(298.6113f, 309.43103f, 5.5689087f, 6.6683106f), - new Rectangle(298.6113f, 315.00394f, 5.5689087f, 6.6683106f), - new Rectangle(298.6113f, 350.88226f, 5.5689087f, 6.6683106f), - new Rectangle(298.6113f, 356.45517f, 2.7844543f, 6.6683106f), - new Rectangle(298.6113f, 359.24362f, 5.5689087f, 6.6683106f), - new Rectangle(298.6113f, 364.81653f, 5.5689087f, 6.6683106f), - new Rectangle(298.6113f, 370.38943f, 5.5689087f, 6.6683106f), - new Rectangle(298.6113f, 375.96234f, 5.5689087f, 6.6683106f), - new Rectangle(298.6113f, 381.53525f, 5.5689087f, 6.6683106f), - new Rectangle(312.05112f, 132.9f, 6.1197815f, 6.6683106f), - new Rectangle(312.05112f, 139.02779f, 5.5688934f, 6.6683106f), - new Rectangle(312.05112f, 144.60469f, 3.3353271f, 6.6683106f), - new Rectangle(312.05112f, 147.94803f, 5.0079956f, 6.6683106f), - new Rectangle(312.05112f, 152.96404f, 5.5688934f, 6.6683106f), - new Rectangle(312.05112f, 158.54094f, 5.0079956f, 6.6683106f), - new Rectangle(312.05112f, 235.82037f, 5.5688934f, 6.6683106f), - new Rectangle(312.05112f, 241.39728f, 5.5688934f, 6.6683106f), - new Rectangle(312.05112f, 289.8757f, 5.5689087f, 6.6683106f), - new Rectangle(312.05112f, 295.4526f, 5.5689087f, 6.6683106f), - new Rectangle(312.05112f, 301.0295f, 2.7844543f, 6.6683106f), - new Rectangle(312.05112f, 303.82196f, 5.5689087f, 6.6683106f), - new Rectangle(312.05112f, 309.39886f, 5.5689087f, 6.6683106f), - new Rectangle(312.05112f, 314.97577f, 5.5689087f, 6.6683106f), - new Rectangle(312.05112f, 350.85406f, 5.5689087f, 6.6683106f), - new Rectangle(312.05112f, 356.43097f, 2.7844543f, 6.6683106f), - new Rectangle(312.05112f, 359.22342f, 5.5689087f, 6.6683106f), - new Rectangle(312.05112f, 364.80032f, 5.5689087f, 6.6683106f), - new Rectangle(312.05112f, 370.37723f, 5.5689087f, 6.6683106f), - new Rectangle(312.05112f, 375.95413f, 5.5689087f, 6.6683106f), - new Rectangle(312.05112f, 381.53104f, 5.5689087f, 6.6683106f), - new Rectangle(324.53104f, 132.9f, 7.231552f, 6.6683106f), - new Rectangle(324.53104f, 140.13855f, 5.5688934f, 6.6683106f), - new Rectangle(324.53104f, 145.71446f, 2.7844543f, 6.6683106f), - new Rectangle(324.53104f, 148.50592f, 5.5688934f, 6.6683106f), - new Rectangle(324.53104f, 154.08183f, 5.5688934f, 6.6683106f), - new Rectangle(324.53104f, 159.65775f, 3.3353271f, 6.6683106f), - new Rectangle(324.53104f, 163.00009f, 2.2235565f, 6.6683106f), - new Rectangle(324.53104f, 165.23065f, 5.5688934f, 6.6683106f), - new Rectangle(324.53104f, 170.80656f, 5.5688934f, 6.6683106f), - new Rectangle(324.53104f, 176.38248f, 5.5688934f, 6.6683106f), - new Rectangle(324.53104f, 181.95839f, 5.0079956f, 6.6683106f), - new Rectangle(324.53104f, 233.07806f, 5.5688934f, 6.6683106f), - new Rectangle(324.53104f, 238.65398f, 5.5688934f, 6.6683106f), - new Rectangle(324.53104f, 244.22989f, 5.5688934f, 6.6683106f), - new Rectangle(324.53104f, 287.06833f, 5.5689087f, 6.6683106f), - new Rectangle(324.53104f, 292.64423f, 5.5689087f, 6.6683106f), - new Rectangle(324.53104f, 298.22012f, 5.5689087f, 6.6683106f), - new Rectangle(324.53104f, 303.79602f, 2.7844543f, 6.6683106f), - new Rectangle(324.53104f, 306.5875f, 5.5689087f, 6.6683106f), - new Rectangle(324.53104f, 312.1634f, 5.5689087f, 6.6683106f), - new Rectangle(324.53104f, 317.7393f, 5.5689087f, 6.6683106f), - new Rectangle(324.53104f, 350.85718f, 5.5689087f, 6.6683106f), - new Rectangle(324.53104f, 356.43307f, 2.7844543f, 6.6683106f), - new Rectangle(324.53104f, 359.22455f, 5.5689087f, 6.6683106f), - new Rectangle(324.53104f, 364.80045f, 5.5689087f, 6.6683106f), - new Rectangle(324.53104f, 370.37634f, 5.5689087f, 6.6683106f), - new Rectangle(324.53104f, 375.95224f, 5.5689087f, 6.6683106f), - new Rectangle(324.53104f, 381.52814f, 5.5689087f, 6.6683106f), - new Rectangle(337.01096f, 132.9f, 7.231552f, 6.6683106f), - new Rectangle(337.01096f, 140.13855f, 5.5688934f, 6.6683106f), - new Rectangle(337.01096f, 145.71446f, 8.343323f, 6.6683106f), - new Rectangle(337.01096f, 154.0648f, 5.5688934f, 6.6683106f), - new Rectangle(337.01096f, 159.64072f, 5.5688934f, 6.6683106f), - new Rectangle(337.01096f, 165.21663f, 2.2235565f, 6.6683106f), - new Rectangle(337.01096f, 167.44719f, 5.5688934f, 6.6683106f), - new Rectangle(337.01096f, 233.08505f, 5.5688934f, 6.6683106f), - new Rectangle(337.01096f, 238.66096f, 5.5688934f, 6.6683106f), - new Rectangle(337.01096f, 244.23688f, 5.5688934f, 6.6683106f), - new Rectangle(337.01096f, 289.8948f, 5.5689087f, 6.6683106f), - new Rectangle(337.01096f, 295.4707f, 5.5689087f, 6.6683106f), - new Rectangle(337.01096f, 301.0466f, 2.7844543f, 6.6683106f), - new Rectangle(337.01096f, 303.83807f, 5.5689087f, 6.6683106f), - new Rectangle(337.01096f, 309.41397f, 5.5689087f, 6.6683106f), - new Rectangle(337.01096f, 314.98987f, 5.5689087f, 6.6683106f), - new Rectangle(337.01096f, 350.86816f, 5.5689087f, 6.6683106f), - new Rectangle(337.01096f, 356.44406f, 2.7844543f, 6.6683106f), - new Rectangle(337.01096f, 359.23553f, 5.5689087f, 6.6683106f), - new Rectangle(337.01096f, 364.81143f, 5.5689087f, 6.6683106f), - new Rectangle(337.01096f, 370.38733f, 5.5689087f, 6.6683106f), - new Rectangle(337.01096f, 375.96323f, 5.5689087f, 6.6683106f), - new Rectangle(337.01096f, 381.53912f, 5.5689087f, 6.6683106f), - new Rectangle(349.49088f, 132.9f, 7.79245f, 6.6683106f), - new Rectangle(349.49088f, 140.70146f, 3.3353271f, 6.6683106f), - new Rectangle(349.49088f, 144.0458f, 5.5688934f, 6.6683106f), - new Rectangle(349.49088f, 149.62372f, 5.5688934f, 6.6683106f), - new Rectangle(349.49088f, 155.20163f, 5.0079956f, 6.6683106f), - new Rectangle(349.49088f, 160.21864f, 5.5688934f, 6.6683106f), - new Rectangle(349.49088f, 233.05699f, 5.5688934f, 6.6683106f), - new Rectangle(349.49088f, 238.6349f, 5.5688934f, 6.6683106f), - new Rectangle(349.49088f, 244.21281f, 5.5688934f, 6.6683106f), - new Rectangle(349.49088f, 289.87076f, 5.5689087f, 6.6683106f), - new Rectangle(349.49088f, 295.44867f, 5.5689087f, 6.6683106f), - new Rectangle(349.49088f, 301.02658f, 2.7844543f, 6.6683106f), - new Rectangle(349.49088f, 303.82004f, 5.5689087f, 6.6683106f), - new Rectangle(349.49088f, 309.39795f, 5.5689087f, 6.6683106f), - new Rectangle(349.49088f, 314.97586f, 5.5689087f, 6.6683106f), - new Rectangle(349.49088f, 350.8542f, 5.5689087f, 6.6683106f), - new Rectangle(349.49088f, 356.4321f, 2.7844543f, 6.6683106f), - new Rectangle(349.49088f, 359.22556f, 5.5689087f, 6.6683106f), - new Rectangle(349.49088f, 364.80347f, 5.5689087f, 6.6683106f), - new Rectangle(349.49088f, 370.38138f, 5.5689087f, 6.6683106f), - new Rectangle(349.49088f, 375.9593f, 5.5689087f, 6.6683106f), - new Rectangle(349.49088f, 381.5372f, 5.5689087f, 6.6683106f), - new Rectangle(361.9708f, 132.9f, 6.6806793f, 6.6683106f), - new Rectangle(361.9708f, 139.58568f, 5.5688934f, 6.6683106f), - new Rectangle(361.9708f, 145.15958f, 2.2235565f, 6.6683106f), - new Rectangle(361.9708f, 147.38814f, 5.5688934f, 6.6683106f), - new Rectangle(361.9708f, 152.96204f, 5.5688934f, 6.6683106f), - new Rectangle(361.9708f, 158.53593f, 5.5688934f, 6.6683106f), - new Rectangle(361.9708f, 233.11406f, 5.5688934f, 6.6683106f), - new Rectangle(361.9708f, 238.68796f, 5.5688934f, 6.6683106f), - new Rectangle(361.9708f, 244.26186f, 5.5688934f, 6.6683106f), - new Rectangle(361.9708f, 287.10028f, 5.5689087f, 6.6683106f), - new Rectangle(361.9708f, 292.6742f, 5.5689087f, 6.6683106f), - new Rectangle(361.9708f, 298.2481f, 5.5689087f, 6.6683106f), - new Rectangle(361.9708f, 303.82202f, 2.7844543f, 6.6683106f), - new Rectangle(361.9708f, 306.61148f, 5.5689087f, 6.6683106f), - new Rectangle(361.9708f, 312.1854f, 5.5689087f, 6.6683106f), - new Rectangle(361.9708f, 317.7593f, 5.5689087f, 6.6683106f), - new Rectangle(361.9708f, 350.87723f, 5.5689087f, 6.6683106f), - new Rectangle(361.9708f, 356.45114f, 2.7844543f, 6.6683106f), - new Rectangle(361.9708f, 359.2406f, 5.5689087f, 6.6683106f), - new Rectangle(361.9708f, 364.8145f, 5.5689087f, 6.6683106f), - new Rectangle(361.9708f, 370.38843f, 5.5689087f, 6.6683106f), - new Rectangle(361.9708f, 375.96234f, 5.5689087f, 6.6683106f), - new Rectangle(361.9708f, 381.53625f, 5.5689087f, 6.6683106f), - new Rectangle(374.4507f, 132.9f, 6.6806793f, 6.6683106f), - new Rectangle(374.4507f, 139.58768f, 7.231552f, 6.6683106f), - new Rectangle(374.4507f, 146.82623f, 5.5688934f, 6.6683106f), - new Rectangle(374.4507f, 152.40215f, 5.5688934f, 6.6683106f), - new Rectangle(374.4507f, 157.97806f, 5.5688934f, 6.6683106f), - new Rectangle(374.4507f, 163.55397f, 5.5688934f, 6.6683106f), - new Rectangle(374.4507f, 233.09206f, 5.5688934f, 6.6683106f), - new Rectangle(374.4507f, 238.66797f, 5.5688934f, 6.6683106f), - new Rectangle(374.4507f, 244.24388f, 5.5688934f, 6.6683106f), - new Rectangle(374.4507f, 287.0823f, 5.5689087f, 6.6683106f), - new Rectangle(374.4507f, 292.6582f, 5.5689087f, 6.6683106f), - new Rectangle(374.4507f, 298.2341f, 5.5689087f, 6.6683106f), - new Rectangle(374.4507f, 303.81f, 2.7844543f, 6.6683106f), - new Rectangle(374.4507f, 306.60147f, 5.5689087f, 6.6683106f), - new Rectangle(374.4507f, 312.17737f, 5.5689087f, 6.6683106f), - new Rectangle(374.4507f, 317.75327f, 5.5689087f, 6.6683106f), - new Rectangle(374.4507f, 350.87115f, 5.5689087f, 6.6683106f), - new Rectangle(374.4507f, 356.44705f, 2.7844543f, 6.6683106f), - new Rectangle(374.4507f, 359.23853f, 5.5689087f, 6.6683106f), - new Rectangle(374.4507f, 364.81442f, 5.5689087f, 6.6683106f), - new Rectangle(374.4507f, 370.39032f, 5.5689087f, 6.6683106f), - new Rectangle(374.4507f, 375.96622f, 5.5689087f, 6.6683106f), - new Rectangle(374.4507f, 381.5421f, 5.5689087f, 6.6683106f), - new Rectangle(386.93063f, 132.9f, 7.231552f, 6.6683106f), - new Rectangle(386.93063f, 140.13855f, 3.3353271f, 6.6683106f), - new Rectangle(386.93063f, 143.4809f, 5.5688934f, 6.6683106f), - new Rectangle(386.93063f, 149.05681f, 5.5688934f, 6.6683106f), - new Rectangle(386.93063f, 154.63272f, 2.7844543f, 6.6683106f), - new Rectangle(386.93063f, 157.42418f, 2.2235565f, 6.6683106f), - new Rectangle(386.93063f, 159.65474f, 5.5688934f, 6.6683106f), - new Rectangle(386.93063f, 233.09406f, 5.5688934f, 6.6683106f), - new Rectangle(386.93063f, 238.66997f, 5.5688934f, 6.6683106f), - new Rectangle(386.93063f, 244.24588f, 5.5688934f, 6.6683106f), - new Rectangle(386.93063f, 289.90482f, 5.5689087f, 6.6683106f), - new Rectangle(386.93063f, 295.4807f, 5.5689087f, 6.6683106f), - new Rectangle(386.93063f, 301.0566f, 2.7844543f, 6.6683106f), - new Rectangle(386.93063f, 303.84808f, 5.5689087f, 6.6683106f), - new Rectangle(386.93063f, 309.42398f, 5.5689087f, 6.6683106f), - new Rectangle(386.93063f, 314.99988f, 5.5689087f, 6.6683106f), - new Rectangle(386.93063f, 350.87918f, 5.5689087f, 6.6683106f), - new Rectangle(386.93063f, 356.45508f, 2.7844543f, 6.6683106f), - new Rectangle(386.93063f, 359.24655f, 5.5689087f, 6.6683106f), - new Rectangle(386.93063f, 364.82245f, 5.5689087f, 6.6683106f), - new Rectangle(386.93063f, 370.39835f, 5.5689087f, 6.6683106f), - new Rectangle(386.93063f, 375.97424f, 5.5689087f, 6.6683106f), - new Rectangle(386.93063f, 381.55014f, 5.5689087f, 6.6683106f), - new Rectangle(399.41055f, 132.9f, 7.231552f, 6.6683106f), - new Rectangle(399.41055f, 140.13956f, 5.5688934f, 6.6683106f), - new Rectangle(399.41055f, 145.71646f, 5.5688934f, 6.6683106f), - new Rectangle(399.41055f, 151.29337f, 8.343323f, 6.6683106f), - new Rectangle(399.41055f, 159.64471f, 5.5688934f, 6.6683106f), - new Rectangle(399.41055f, 165.22162f, 3.3353271f, 6.6683106f), - new Rectangle(399.41055f, 168.56496f, 5.0079956f, 6.6683106f), - new Rectangle(399.41055f, 233.06499f, 5.5688934f, 6.6683106f), - new Rectangle(399.41055f, 238.64189f, 5.5688934f, 6.6683106f), - new Rectangle(399.41055f, 244.2188f, 5.5688934f, 6.6683106f), - new Rectangle(399.41055f, 289.8767f, 5.5689087f, 6.6683106f), - new Rectangle(399.41055f, 295.4536f, 5.5689087f, 6.6683106f), - new Rectangle(399.41055f, 301.03052f, 2.7844543f, 6.6683106f), - new Rectangle(399.41055f, 303.82297f, 5.5689087f, 6.6683106f), - new Rectangle(399.41055f, 309.39987f, 5.5689087f, 6.6683106f), - new Rectangle(399.41055f, 314.97678f, 5.5689087f, 6.6683106f), - new Rectangle(399.41055f, 350.85507f, 5.5689087f, 6.6683106f), - new Rectangle(399.41055f, 356.43198f, 2.7844543f, 6.6683106f), - new Rectangle(399.41055f, 359.22443f, 5.5689087f, 6.6683106f), - new Rectangle(399.41055f, 364.80133f, 5.5689087f, 6.6683106f), - new Rectangle(399.41055f, 370.37823f, 5.5689087f, 6.6683106f), - new Rectangle(399.41055f, 375.95514f, 5.5689087f, 6.6683106f), - new Rectangle(399.41055f, 381.53204f, 5.5689087f, 6.6683106f), - new Rectangle(411.89047f, 132.9f, 6.6806793f, 6.6683106f), - new Rectangle(411.89047f, 139.58467f, 5.5688934f, 6.6683106f), - new Rectangle(411.89047f, 145.15758f, 2.2235565f, 6.6683106f), - new Rectangle(411.89047f, 147.38513f, 5.5688934f, 6.6683106f), - new Rectangle(411.89047f, 152.95804f, 2.2235565f, 6.6683106f), - new Rectangle(411.89047f, 155.1856f, 5.5688934f, 6.6683106f), - new Rectangle(411.89047f, 160.7585f, 8.343323f, 6.6683106f), - new Rectangle(411.89047f, 233.11809f, 5.5688934f, 6.6683106f), - new Rectangle(411.89047f, 238.691f, 5.5688934f, 6.6683106f), - new Rectangle(411.89047f, 244.2639f, 5.5688934f, 6.6683106f), - new Rectangle(411.89047f, 289.92184f, 5.5689087f, 6.6683106f), - new Rectangle(411.89047f, 295.49475f, 5.5689087f, 6.6683106f), - new Rectangle(411.89047f, 301.06766f, 2.7844543f, 6.6683106f), - new Rectangle(411.89047f, 303.8561f, 5.5689087f, 6.6683106f), - new Rectangle(411.89047f, 309.42902f, 5.5689087f, 6.6683106f), - new Rectangle(411.89047f, 315.00192f, 5.5689087f, 6.6683106f), - new Rectangle(411.89047f, 350.88025f, 5.5689087f, 6.6683106f), - new Rectangle(411.89047f, 356.45316f, 2.7844543f, 6.6683106f), - new Rectangle(411.89047f, 359.2416f, 5.5689087f, 6.6683106f), - new Rectangle(411.89047f, 364.8145f, 5.5689087f, 6.6683106f), - new Rectangle(411.89047f, 370.38742f, 5.5689087f, 6.6683106f), - new Rectangle(411.89047f, 375.96033f, 5.5689087f, 6.6683106f), - new Rectangle(411.89047f, 381.53323f, 5.5689087f, 6.6683106f), - new Rectangle(424.3704f, 132.9f, 6.6806793f, 6.6683106f), - new Rectangle(424.3704f, 139.58467f, 2.2235565f, 6.6683106f), - new Rectangle(424.3704f, 141.81223f, 5.5688934f, 6.6683106f), - new Rectangle(424.3704f, 147.38513f, 5.0079956f, 6.6683106f), - new Rectangle(424.3704f, 152.39714f, 5.5688934f, 6.6683106f), - new Rectangle(424.3704f, 157.97005f, 5.0079956f, 6.6683106f), - new Rectangle(424.3704f, 162.98206f, 2.2235565f, 6.6683106f), - new Rectangle(424.3704f, 165.20961f, 5.5688934f, 6.6683106f), - new Rectangle(424.3704f, 233.1291f, 5.5688934f, 6.6683106f), - new Rectangle(424.3704f, 238.70201f, 5.5688934f, 6.6683106f), - new Rectangle(424.3704f, 244.27492f, 5.5688934f, 6.6683106f), - new Rectangle(424.3704f, 289.93387f, 5.5689087f, 6.6683106f), - new Rectangle(424.3704f, 295.50677f, 5.5689087f, 6.6683106f), - new Rectangle(424.3704f, 301.07968f, 2.7844543f, 6.6683106f), - new Rectangle(424.3704f, 303.86813f, 5.5689087f, 6.6683106f), - new Rectangle(424.3704f, 309.44104f, 5.5689087f, 6.6683106f), - new Rectangle(424.3704f, 315.01395f, 5.5689087f, 6.6683106f), - new Rectangle(424.3704f, 350.89325f, 5.5689087f, 6.6683106f), - new Rectangle(424.3704f, 356.46616f, 2.7844543f, 6.6683106f), - new Rectangle(424.3704f, 359.2546f, 5.5689087f, 6.6683106f), - new Rectangle(424.3704f, 364.8275f, 5.5689087f, 6.6683106f), - new Rectangle(424.3704f, 370.40042f, 5.5689087f, 6.6683106f), - new Rectangle(424.3704f, 375.97333f, 5.5689087f, 6.6683106f), - new Rectangle(424.3704f, 381.54623f, 5.5689087f, 6.6683106f), - new Rectangle(436.8503f, 132.9f, 7.231552f, 6.6683106f), - new Rectangle(436.8503f, 140.13956f, 5.5688934f, 6.6683106f), - new Rectangle(436.8503f, 145.71646f, 5.5688934f, 6.6683106f), - new Rectangle(436.8503f, 151.29337f, 5.5688934f, 6.6683106f), - new Rectangle(436.8503f, 156.87027f, 5.5688934f, 6.6683106f), - new Rectangle(436.8503f, 162.44717f, 3.3353271f, 6.6683106f), - new Rectangle(436.8503f, 165.79051f, 5.0079956f, 6.6683106f), - new Rectangle(436.8503f, 233.04994f, 5.5688934f, 6.6683106f), - new Rectangle(436.8503f, 238.62685f, 5.5688934f, 6.6683106f), - new Rectangle(436.8503f, 244.20375f, 5.5688934f, 6.6683106f), - new Rectangle(436.8503f, 289.8617f, 5.5689087f, 6.6683106f), - new Rectangle(436.8503f, 295.4386f, 5.5689087f, 6.6683106f), - new Rectangle(436.8503f, 301.0155f, 2.7844543f, 6.6683106f), - new Rectangle(436.8503f, 303.80795f, 5.5689087f, 6.6683106f), - new Rectangle(436.8503f, 309.38486f, 5.5689087f, 6.6683106f), - new Rectangle(436.8503f, 314.96176f, 5.5689087f, 6.6683106f), - new Rectangle(436.8503f, 350.84006f, 5.5689087f, 6.6683106f), - new Rectangle(436.8503f, 356.41696f, 2.7844543f, 6.6683106f), - new Rectangle(436.8503f, 359.2094f, 5.5689087f, 6.6683106f), - new Rectangle(436.8503f, 364.78632f, 5.5689087f, 6.6683106f), - new Rectangle(436.8503f, 370.36322f, 5.5689087f, 6.6683106f), - new Rectangle(436.8503f, 375.94012f, 5.5689087f, 6.6683106f), - new Rectangle(436.8503f, 381.51703f, 5.5689087f, 6.6683106f), - new Rectangle(449.33023f, 132.9f, 7.231552f, 6.6683106f), - new Rectangle(449.33023f, 140.14056f, 5.5688934f, 6.6683106f), - new Rectangle(449.33023f, 145.71848f, 3.3353271f, 6.6683106f), - new Rectangle(449.33023f, 149.06282f, 7.231552f, 6.6683106f), - new Rectangle(449.33023f, 156.30339f, 5.5688934f, 6.6683106f), - new Rectangle(449.33023f, 161.8813f, 5.0079956f, 6.6683106f), - new Rectangle(449.33023f, 233.04099f, 5.5688934f, 6.6683106f), - new Rectangle(449.33023f, 238.6189f, 5.5688934f, 6.6683106f), - new Rectangle(449.33023f, 244.19681f, 5.5688934f, 6.6683106f), - new Rectangle(449.33023f, 289.85474f, 5.5689087f, 6.6683106f), - new Rectangle(449.33023f, 295.43265f, 5.5689087f, 6.6683106f), - new Rectangle(449.33023f, 301.01056f, 2.7844543f, 6.6683106f), - new Rectangle(449.33023f, 303.80402f, 5.5689087f, 6.6683106f), - new Rectangle(449.33023f, 309.38193f, 5.5689087f, 6.6683106f), - new Rectangle(449.33023f, 314.95984f, 5.5689087f, 6.6683106f), - new Rectangle(449.33023f, 350.83817f, 5.5689087f, 6.6683106f), - new Rectangle(449.33023f, 356.41608f, 2.7844543f, 6.6683106f), - new Rectangle(449.33023f, 359.20953f, 5.5689087f, 6.6683106f), - new Rectangle(449.33023f, 364.78745f, 5.5689087f, 6.6683106f), - new Rectangle(449.33023f, 370.36536f, 5.5689087f, 6.6683106f), - new Rectangle(449.33023f, 375.94327f, 5.5689087f, 6.6683106f), - new Rectangle(449.33023f, 381.52118f, 5.5689087f, 6.6683106f), - new Rectangle(461.81015f, 132.9f, 6.1197815f, 6.6683106f), - new Rectangle(461.81015f, 139.02478f, 2.2235565f, 6.6683106f), - new Rectangle(461.81015f, 141.25334f, 5.5688934f, 6.6683106f), - new Rectangle(461.81015f, 146.82724f, 2.2235565f, 6.6683106f), - new Rectangle(461.81015f, 149.0558f, 5.5688934f, 6.6683106f), - new Rectangle(461.81015f, 154.6297f, 5.5688934f, 6.6683106f), - new Rectangle(461.81015f, 160.2036f, 5.5688934f, 6.6683106f), - new Rectangle(461.81015f, 233.10205f, 5.5688934f, 6.6683106f), - new Rectangle(461.81015f, 238.67595f, 5.5688934f, 6.6683106f), - new Rectangle(461.81015f, 244.24985f, 5.5688934f, 6.6683106f), - new Rectangle(461.81015f, 289.90778f, 5.5689087f, 6.6683106f), - new Rectangle(461.81015f, 295.4817f, 5.5689087f, 6.6683106f), - new Rectangle(461.81015f, 301.0556f, 2.7844543f, 6.6683106f), - new Rectangle(461.81015f, 303.84506f, 5.5689087f, 6.6683106f), - new Rectangle(461.81015f, 309.41898f, 5.5689087f, 6.6683106f), - new Rectangle(461.81015f, 314.9929f, 5.5689087f, 6.6683106f), - new Rectangle(461.81015f, 350.87122f, 5.5689087f, 6.6683106f), - new Rectangle(461.81015f, 356.44513f, 2.7844543f, 6.6683106f), - new Rectangle(461.81015f, 359.2346f, 5.5689087f, 6.6683106f), - new Rectangle(461.81015f, 364.8085f, 5.5689087f, 6.6683106f), - new Rectangle(461.81015f, 370.38242f, 5.5689087f, 6.6683106f), - new Rectangle(461.81015f, 375.95633f, 5.5689087f, 6.6683106f), - new Rectangle(461.81015f, 381.53024f, 5.5689087f, 6.6683106f), - new Rectangle(474.05057f, 132.9f, 2.7844543f, 6.6683106f), - new Rectangle(474.05057f, 135.69044f, 3.3353271f, 6.6683106f), - new Rectangle(474.05057f, 139.03178f, 5.5688934f, 6.6683106f), - new Rectangle(474.05057f, 144.60669f, 2.2235565f, 6.6683106f), - new Rectangle(474.05057f, 146.83626f, 5.5688934f, 6.6683106f), - new Rectangle(474.05057f, 152.41116f, 5.5688934f, 6.6683106f), - new Rectangle(474.05057f, 157.98607f, 5.5688934f, 6.6683106f), - new Rectangle(474.05057f, 233.10406f, 5.5688934f, 6.6683106f), - new Rectangle(474.05057f, 238.67897f, 5.5688934f, 6.6683106f), - new Rectangle(474.05057f, 244.25388f, 5.5688934f, 6.6683106f), - new Rectangle(474.05057f, 289.9118f, 5.5689087f, 6.6683106f), - new Rectangle(474.05057f, 295.48672f, 5.5689087f, 6.6683106f), - new Rectangle(474.05057f, 301.06165f, 2.7844543f, 6.6683106f), - new Rectangle(474.05057f, 303.8521f, 5.5689087f, 6.6683106f), - new Rectangle(474.05057f, 309.42703f, 5.5689087f, 6.6683106f), - new Rectangle(474.05057f, 315.00195f, 5.5689087f, 6.6683106f), - new Rectangle(474.05057f, 350.88028f, 5.5689087f, 6.6683106f), - new Rectangle(474.05057f, 356.4552f, 2.7844543f, 6.6683106f), - new Rectangle(474.05057f, 359.24567f, 5.5689087f, 6.6683106f), - new Rectangle(474.05057f, 364.8206f, 5.5689087f, 6.6683106f), - new Rectangle(474.05057f, 370.3955f, 5.5689087f, 6.6683106f), - new Rectangle(474.05057f, 375.97043f, 5.5689087f, 6.6683106f), - new Rectangle(474.05057f, 381.54535f, 5.5689087f, 6.6683106f), - new Rectangle(487.4904f, 132.9f, 6.1197815f, 6.6683106f), - new Rectangle(487.4904f, 139.02779f, 3.3353271f, 6.6683106f), - new Rectangle(487.4904f, 142.37112f, 5.5688934f, 6.6683106f), - new Rectangle(487.4904f, 147.94803f, 5.5688934f, 6.6683106f), - new Rectangle(487.4904f, 153.52493f, 5.0079956f, 6.6683106f), - new Rectangle(487.4904f, 158.54094f, 5.5688934f, 6.6683106f), - new Rectangle(487.4904f, 233.05898f, 5.5688934f, 6.6683106f), - new Rectangle(487.4904f, 238.63588f, 5.5688934f, 6.6683106f), - new Rectangle(487.4904f, 244.21278f, 5.5688934f, 6.6683106f), - new Rectangle(487.4904f, 287.0512f, 5.5689087f, 6.6683106f), - new Rectangle(487.4904f, 292.6281f, 5.5689087f, 6.6683106f), - new Rectangle(487.4904f, 298.20502f, 5.5689087f, 6.6683106f), - new Rectangle(487.4904f, 303.78192f, 2.7844543f, 6.6683106f), - new Rectangle(487.4904f, 306.57437f, 5.5689087f, 6.6683106f), - new Rectangle(487.4904f, 312.15128f, 5.5689087f, 6.6683106f), - new Rectangle(487.4904f, 317.72818f, 5.5689087f, 6.6683106f), - new Rectangle(487.4904f, 350.84607f, 5.5689087f, 6.6683106f), - new Rectangle(487.4904f, 356.42297f, 2.7844543f, 6.6683106f), - new Rectangle(487.4904f, 359.21542f, 5.5689087f, 6.6683106f), - new Rectangle(487.4904f, 364.79233f, 5.5689087f, 6.6683106f), - new Rectangle(487.4904f, 370.36923f, 5.5689087f, 6.6683106f), - new Rectangle(487.4904f, 375.94614f, 5.5689087f, 6.6683106f), - new Rectangle(487.4904f, 381.52304f, 5.5689087f, 6.6683106f), - new Rectangle(499.9703f, 132.9f, 6.6806793f, 6.6683106f), - new Rectangle(499.9703f, 139.58568f, 5.5688934f, 6.6683106f), - new Rectangle(499.9703f, 145.15958f, 3.3353271f, 6.6683106f), - new Rectangle(499.9703f, 148.49991f, 2.7844543f, 6.6683106f), - new Rectangle(499.9703f, 151.28937f, 5.5688934f, 6.6683106f), - new Rectangle(499.9703f, 156.86327f, 5.5688934f, 6.6683106f), - new Rectangle(499.9703f, 162.43716f, 5.5688934f, 6.6683106f), - new Rectangle(499.9703f, 168.01106f, 2.2235565f, 6.6683106f), - new Rectangle(499.9703f, 233.10905f, 5.5688934f, 6.6683106f), - new Rectangle(499.9703f, 238.68295f, 5.5688934f, 6.6683106f), - new Rectangle(499.9703f, 244.25685f, 5.5688934f, 6.6683106f), - new Rectangle(499.9703f, 289.9148f, 5.5689087f, 6.6683106f), - new Rectangle(499.9703f, 295.4887f, 5.5689087f, 6.6683106f), - new Rectangle(499.9703f, 301.06262f, 2.7844543f, 6.6683106f), - new Rectangle(499.9703f, 303.85208f, 5.5689087f, 6.6683106f), - new Rectangle(499.9703f, 309.426f, 5.5689087f, 6.6683106f), - new Rectangle(499.9703f, 314.9999f, 5.5689087f, 6.6683106f), - new Rectangle(499.9703f, 350.87823f, 5.5689087f, 6.6683106f), - new Rectangle(499.9703f, 356.45215f, 2.7844543f, 6.6683106f), - new Rectangle(499.9703f, 359.2416f, 5.5689087f, 6.6683106f), - new Rectangle(499.9703f, 364.81552f, 5.5689087f, 6.6683106f), - new Rectangle(499.9703f, 370.38943f, 5.5689087f, 6.6683106f), - new Rectangle(499.9703f, 375.96335f, 5.5689087f, 6.6683106f), - new Rectangle(499.9703f, 381.53726f, 5.5689087f, 6.6683106f), - new Rectangle(512.4502f, 132.9f, 7.231552f, 6.6683106f), - new Rectangle(512.4502f, 140.13655f, 5.5688934f, 6.6683106f), - new Rectangle(512.4502f, 145.71045f, 2.2235565f, 6.6683106f), - new Rectangle(512.4502f, 147.93901f, 2.7844543f, 6.6683106f), - new Rectangle(512.4502f, 150.72847f, 5.5688934f, 6.6683106f), - new Rectangle(512.4502f, 156.30237f, 5.5688934f, 6.6683106f), - new Rectangle(512.4502f, 161.87627f, 2.7844543f, 6.6683106f), - new Rectangle(512.4502f, 164.63768f, 6.6806793f, 6.6683106f), - new Rectangle(512.4502f, 171.32336f, 2.2235565f, 6.6683106f), - new Rectangle(512.4502f, 173.55193f, 5.5688934f, 6.6683106f), - new Rectangle(512.4502f, 179.12582f, 5.5688934f, 6.6683106f), - new Rectangle(512.4502f, 184.69972f, 5.5688934f, 6.6683106f), - new Rectangle(512.4502f, 190.27362f, 5.5688934f, 6.6683106f), - new Rectangle(512.4502f, 195.84752f, 8.343323f, 6.6683106f), - new Rectangle(512.4502f, 233.10704f, 5.5688934f, 6.6683106f), - new Rectangle(512.4502f, 238.68094f, 5.5688934f, 6.6683106f), - new Rectangle(512.4502f, 244.25484f, 5.5688934f, 6.6683106f), - new Rectangle(512.4502f, 287.09326f, 5.5689087f, 6.6683106f), - new Rectangle(512.4502f, 292.66718f, 5.5689087f, 6.6683106f), - new Rectangle(512.4502f, 298.2411f, 5.5689087f, 6.6683106f), - new Rectangle(512.4502f, 303.815f, 2.7844543f, 6.6683106f), - new Rectangle(512.4502f, 306.60446f, 5.5689087f, 6.6683106f), - new Rectangle(512.4502f, 312.17838f, 5.5689087f, 6.6683106f), - new Rectangle(512.4502f, 317.7523f, 5.5689087f, 6.6683106f), - new Rectangle(512.4502f, 350.8702f, 5.5689087f, 6.6683106f), - new Rectangle(512.4502f, 356.44412f, 2.7844543f, 6.6683106f), - new Rectangle(512.4502f, 359.23358f, 5.5689087f, 6.6683106f), - new Rectangle(512.4502f, 364.8075f, 5.5689087f, 6.6683106f), - new Rectangle(512.4502f, 370.3814f, 5.5689087f, 6.6683106f), - new Rectangle(512.4502f, 375.95532f, 5.5689087f, 6.6683106f), - new Rectangle(512.4502f, 381.52924f, 5.5689087f, 6.6683106f), - new Rectangle(524.9301f, 132.9f, 6.6806793f, 6.6683106f), - new Rectangle(524.9301f, 139.58568f, 5.5688934f, 6.6683106f), - new Rectangle(524.9301f, 145.15958f, 5.5688934f, 6.6683106f), - new Rectangle(524.9301f, 150.73347f, 2.2235565f, 6.6683106f), - new Rectangle(524.9301f, 152.96204f, 5.5688934f, 6.6683106f), - new Rectangle(524.9301f, 233.12007f, 5.5688934f, 6.6683106f), - new Rectangle(524.9301f, 238.69397f, 5.5688934f, 6.6683106f), - new Rectangle(524.9301f, 244.26787f, 5.5688934f, 6.6683106f), - new Rectangle(524.9301f, 287.1063f, 5.5689087f, 6.6683106f), - new Rectangle(524.9301f, 292.6802f, 5.5689087f, 6.6683106f), - new Rectangle(524.9301f, 298.25412f, 5.5689087f, 6.6683106f), - new Rectangle(524.9301f, 303.82803f, 2.7844543f, 6.6683106f), - new Rectangle(524.9301f, 306.6175f, 5.5689087f, 6.6683106f), - new Rectangle(524.9301f, 312.1914f, 5.5689087f, 6.6683106f), - new Rectangle(524.9301f, 317.76532f, 5.5689087f, 6.6683106f), - new Rectangle(524.9301f, 350.88324f, 5.5689087f, 6.6683106f), - new Rectangle(524.9301f, 356.45715f, 2.7844543f, 6.6683106f), - new Rectangle(524.9301f, 359.2466f, 5.5689087f, 6.6683106f), - new Rectangle(524.9301f, 364.82053f, 5.5689087f, 6.6683106f), - new Rectangle(524.9301f, 370.39444f, 5.5689087f, 6.6683106f), - new Rectangle(524.9301f, 375.96835f, 5.5689087f, 6.6683106f), - new Rectangle(524.9301f, 381.54227f, 5.5689087f, 6.6683106f), - new Rectangle(537.41003f, 132.9f, 2.7844543f, 6.6683106f), - new Rectangle(537.41003f, 135.68745f, 2.7844543f, 6.6683106f), - new Rectangle(537.41003f, 138.47491f, 5.5688934f, 6.6683106f), - new Rectangle(537.41003f, 144.04681f, 2.2235565f, 6.6683106f), - new Rectangle(537.41003f, 146.27338f, 5.0079956f, 6.6683106f), - new Rectangle(537.41003f, 233.15315f, 5.5688934f, 6.6683106f), - new Rectangle(537.41003f, 238.72505f, 5.5688934f, 6.6683106f), - new Rectangle(537.41003f, 244.29695f, 5.5688934f, 6.6683106f), - new Rectangle(537.41003f, 287.13538f, 5.5689087f, 6.6683106f), - new Rectangle(537.41003f, 292.70728f, 5.5689087f, 6.6683106f), - new Rectangle(537.41003f, 298.27917f, 5.5689087f, 6.6683106f), - new Rectangle(537.41003f, 303.85107f, 2.7844543f, 6.6683106f), - new Rectangle(537.41003f, 306.63852f, 5.5689087f, 6.6683106f), - new Rectangle(537.41003f, 312.21042f, 5.5689087f, 6.6683106f), - new Rectangle(537.41003f, 317.78232f, 5.5689087f, 6.6683106f), - new Rectangle(537.41003f, 350.9002f, 5.5689087f, 6.6683106f), - new Rectangle(537.41003f, 356.4721f, 2.7844543f, 6.6683106f), - new Rectangle(537.41003f, 359.25955f, 5.5689087f, 6.6683106f), - new Rectangle(537.41003f, 364.83145f, 5.5689087f, 6.6683106f), - new Rectangle(537.41003f, 370.40335f, 5.5689087f, 6.6683106f), - new Rectangle(537.41003f, 375.97525f, 5.5689087f, 6.6683106f), - new Rectangle(537.41003f, 381.54715f, 5.5689087f, 6.6683106f), - new Rectangle(549.65045f, 132.9f, 7.79245f, 6.6683106f), - new Rectangle(549.65045f, 140.69945f, 5.5688934f, 6.6683106f), - new Rectangle(549.65045f, 146.27536f, 3.3353271f, 6.6683106f), - new Rectangle(549.65045f, 149.6177f, 8.343323f, 6.6683106f), - new Rectangle(549.65045f, 157.96805f, 5.5688934f, 6.6683106f), - new Rectangle(549.65045f, 163.54396f, 5.5688934f, 6.6683106f), - new Rectangle(549.65045f, 169.11987f, 5.0079956f, 6.6683106f), - new Rectangle(549.65045f, 233.08005f, 5.5688934f, 6.6683106f), - new Rectangle(549.65045f, 238.65596f, 5.5688934f, 6.6683106f), - new Rectangle(549.65045f, 244.23187f, 5.5688934f, 6.6683106f), - new Rectangle(549.65045f, 287.0703f, 5.5689087f, 6.6683106f), - new Rectangle(549.65045f, 292.6462f, 5.5689087f, 6.6683106f), - new Rectangle(549.65045f, 298.2221f, 5.5689087f, 6.6683106f), - new Rectangle(549.65045f, 303.798f, 2.7844543f, 6.6683106f), - new Rectangle(549.65045f, 306.58948f, 5.5689087f, 6.6683106f), - new Rectangle(549.65045f, 312.16537f, 5.5689087f, 6.6683106f), - new Rectangle(549.65045f, 317.74127f, 5.5689087f, 6.6683106f), - new Rectangle(549.65045f, 350.85916f, 5.5689087f, 6.6683106f), - new Rectangle(549.65045f, 356.43506f, 2.7844543f, 6.6683106f), - new Rectangle(549.65045f, 359.22653f, 5.5689087f, 6.6683106f), - new Rectangle(549.65045f, 364.80243f, 5.5689087f, 6.6683106f), - new Rectangle(549.65045f, 370.37833f, 5.5689087f, 6.6683106f), - new Rectangle(549.65045f, 375.95422f, 5.5689087f, 6.6683106f), - new Rectangle(549.65045f, 381.53012f, 5.5689087f, 6.6683106f), - new Rectangle(518.2708f, 412.49863f, 6.680664f, 6.6683106f), - new Rectangle(518.2708f, 419.1803f, 5.0079956f, 6.6683106f), - new Rectangle(518.2708f, 424.1893f, 5.0079956f, 6.6683106f), - new Rectangle(518.2708f, 429.1983f, 5.5689087f, 6.6683106f), - new Rectangle(518.2708f, 434.7682f, 2.2235413f, 6.6683106f), - new Rectangle(518.2708f, 436.99274f, 2.2235413f, 6.6683106f), - new Rectangle(518.2708f, 439.2173f, 5.5689087f, 6.6683106f), - new Rectangle(518.2708f, 444.78717f, 5.5689087f, 6.6683106f), - new Rectangle(518.2708f, 450.35706f, 2.7844543f, 6.6683106f), - new Rectangle(76.27562f, 147.47928f, 7.231552f, 6.9438605f), - new Rectangle(76.27562f, 154.71384f, 5.5688934f, 6.9438605f), - new Rectangle(76.27562f, 160.28574f, 5.5688934f, 6.9438605f), - new Rectangle(76.27562f, 165.85764f, 6.1197815f, 6.9438605f), - new Rectangle(76.27562f, 171.98042f, 3.896225f, 6.9438605f), - new Rectangle(76.27562f, 175.87965f, 5.5688934f, 6.9438605f), - new Rectangle(76.27562f, 181.45155f, 5.5688934f, 6.9438605f), - new Rectangle(76.27562f, 187.02345f, 5.5688934f, 6.9438605f), - new Rectangle(76.27562f, 192.42207f, 2.7844543f, 6.9438605f), - new Rectangle(76.27562f, 195.18349f, 6.1197815f, 6.9438605f), - new Rectangle(76.27562f, 201.30627f, 3.3353271f, 6.9438605f), - new Rectangle(76.27562f, 204.6446f, 2.7844543f, 6.9438605f), - new Rectangle(76.27562f, 207.40602f, 3.3353271f, 6.9438605f), - new Rectangle(76.27562f, 210.74435f, 6.1197815f, 6.9438605f), - new Rectangle(76.27562f, 216.86714f, 5.5688934f, 6.9438605f), - new Rectangle(76.27562f, 222.43904f, 2.7844543f, 6.9438605f), - new Rectangle(76.27562f, 225.20045f, 3.896225f, 6.9438605f), - new Rectangle(76.27562f, 229.09969f, 5.5688934f, 6.9438605f), - new Rectangle(76.27562f, 234.67159f, 6.1197815f, 6.9438605f), - new Rectangle(76.27562f, 240.79437f, 2.7844543f, 6.9438605f), - new Rectangle(76.27562f, 243.58183f, 2.7844543f, 6.9438605f), - new Rectangle(76.27562f, 246.3693f, 5.5688934f, 6.9438605f), - new Rectangle(76.27562f, 251.9412f, 5.5689087f, 6.9438605f), - new Rectangle(76.27562f, 257.5131f, 2.7844543f, 6.9438605f), - new Rectangle(76.27562f, 260.2745f, 6.1197815f, 6.9438605f), - new Rectangle(76.27562f, 266.39728f, 6.1197815f, 6.9438605f), - new Rectangle(76.27562f, 272.52005f, 3.3353271f, 6.9438605f), - new Rectangle(76.27562f, 275.85837f, 5.5689087f, 6.9438605f), - new Rectangle(76.27562f, 281.43027f, 2.7844543f, 6.9438605f), - new Rectangle(76.27562f, 284.2177f, 6.1197815f, 6.9438605f), - new Rectangle(76.27562f, 290.34048f, 5.5689087f, 6.9438605f), - new Rectangle(76.27562f, 295.91238f, 6.1197815f, 6.9438605f), - new Rectangle(76.27562f, 302.03516f, 2.7844543f, 6.9438605f), - new Rectangle(76.27562f, 304.79657f, 3.3353271f, 6.9438605f), - new Rectangle(76.27562f, 308.1349f, 3.8962097f, 6.9438605f), - new Rectangle(76.27562f, 312.03412f, 6.1197815f, 6.9438605f), - new Rectangle(76.27562f, 318.1569f, 8.904236f, 6.9438605f), - new Rectangle(76.27562f, 327.06412f, 2.7844543f, 6.9438605f), - new Rectangle(76.27562f, 329.82553f, 3.3353271f, 6.9438605f), - new Rectangle(76.27562f, 333.16385f, 6.1197815f, 6.9438605f), - new Rectangle(76.27562f, 339.28662f, 5.5689087f, 6.9438605f), - new Rectangle(76.27562f, 344.85852f, 2.7844543f, 6.9438605f), - new Rectangle(76.27562f, 347.61993f, 5.5689087f, 6.9438605f), - new Rectangle(76.27562f, 353.19183f, 6.1197815f, 6.9438605f), - new Rectangle(76.27562f, 359.3146f, 3.8962097f, 6.9438605f), - new Rectangle(76.27562f, 363.21384f, 5.5689087f, 6.9438605f), - new Rectangle(76.27562f, 368.67255f, 5.5689087f, 6.9438605f), - new Rectangle(76.27562f, 374.24445f, 5.5689087f, 6.9438605f), - new Rectangle(76.27562f, 379.64307f, 2.7844543f, 6.9438605f), - new Rectangle(76.27562f, 382.40448f, 5.5689087f, 6.9438605f), - new Rectangle(76.27562f, 387.97638f, 5.5689087f, 6.9438605f), - new Rectangle(76.27562f, 393.54828f, 3.8962097f, 6.9438605f), - new Rectangle(76.27562f, 397.4475f, 3.8962097f, 6.9438605f), - new Rectangle(76.27562f, 401.34674f, 2.7844543f, 6.9438605f), - new Rectangle(76.27562f, 404.1342f, 5.5689087f, 6.9438605f), - new Rectangle(76.27562f, 409.7061f, 6.1197815f, 6.9438605f), - new Rectangle(76.27562f, 415.82886f, 2.7844543f, 6.9438605f), - new Rectangle(76.27562f, 418.59027f, 6.1197815f, 6.9438605f), - new Rectangle(76.27562f, 424.71304f, 6.1197815f, 6.9438605f), - new Rectangle(76.27562f, 430.83582f, 3.3353271f, 6.9438605f), - new Rectangle(76.27562f, 434.17413f, 2.7844543f, 6.9438605f), - new Rectangle(76.27562f, 436.93555f, 6.1197815f, 6.9438605f), - new Rectangle(76.27562f, 443.05832f, 5.5689087f, 6.9438605f), - new Rectangle(76.27562f, 448.45694f, 2.7844543f, 6.9438605f), - new Rectangle(88.9359f, 186.0589f, 7.231552f, 6.9438605f), - new Rectangle(88.9359f, 193.29646f, 7.231552f, 6.9438605f), - new Rectangle(88.9359f, 200.53403f, 7.231552f, 6.9438605f), - new Rectangle(88.9359f, 207.77159f, 6.6806793f, 6.9438605f), - new Rectangle(88.9359f, 214.45827f, 7.231552f, 6.9438605f), - new Rectangle(88.9359f, 221.69583f, 2.7844543f, 6.9438605f), - new Rectangle(88.9359f, 224.45724f, 3.3353271f, 6.9438605f), - new Rectangle(88.9359f, 227.79858f, 5.5688934f, 6.9438605f), - new Rectangle(88.9359f, 233.37349f, 5.5688934f, 6.9438605f), - new Rectangle(88.9359f, 238.9484f, 5.5688934f, 6.9438605f), - new Rectangle(88.9359f, 244.5233f, 5.5688934f, 6.9438605f), - new Rectangle(88.9359f, 250.0982f, 2.7844543f, 6.9438605f), - new Rectangle(88.9359f, 252.88866f, 2.7844543f, 6.9438605f), - new Rectangle(88.9359f, 255.65007f, 6.119766f, 6.9438605f), - new Rectangle(88.9359f, 261.77585f, 6.1197815f, 6.9438605f), - new Rectangle(88.9359f, 267.90164f, 3.3353271f, 6.9438605f), - new Rectangle(88.9359f, 271.24298f, 5.5689087f, 6.9438605f), - new Rectangle(88.9359f, 276.8179f, 2.7844543f, 6.9438605f), - new Rectangle(88.9359f, 279.60837f, 2.7844543f, 6.9438605f), - new Rectangle(88.9359f, 282.36978f, 5.5689087f, 6.9438605f), - new Rectangle(88.9359f, 287.9447f, 6.1197815f, 6.9438605f), - new Rectangle(88.9359f, 294.0705f, 3.8962097f, 6.9438605f), - new Rectangle(88.9359f, 297.97272f, 5.5689087f, 6.9438605f), - new Rectangle(88.9359f, 303.43146f, 5.5689087f, 6.9438605f), - new Rectangle(88.9359f, 309.00638f, 5.5689087f, 6.9438605f), - new Rectangle(88.9359f, 314.40503f, 2.7844543f, 6.9438605f), - new Rectangle(88.9359f, 317.16644f, 6.1197815f, 6.9438605f), - new Rectangle(88.9359f, 323.29224f, 6.1197815f, 6.9438605f), - new Rectangle(88.9359f, 329.41803f, 6.1197815f, 6.9438605f), - new Rectangle(88.9359f, 335.54382f, 6.1197815f, 6.9438605f), - new Rectangle(88.9359f, 341.66962f, 2.7844543f, 6.9438605f), - new Rectangle(88.9359f, 344.46008f, 5.5689087f, 6.9438605f), - new Rectangle(88.9359f, 350.035f, 3.3353271f, 6.9438605f), - new Rectangle(88.9359f, 353.37634f, 2.7844543f, 6.9438605f), - new Rectangle(88.9359f, 356.1668f, 6.1197815f, 6.9438605f), - new Rectangle(88.9359f, 362.2926f, 6.1197815f, 6.9438605f), - new Rectangle(88.9359f, 368.4184f, 2.7844543f, 6.9438605f), - new Rectangle(88.9359f, 371.1798f, 7.2315674f, 6.9438605f), - new Rectangle(88.9359f, 378.41736f, 5.8493347f, 6.9438605f), - new Rectangle(88.9359f, 384.2727f, 5.5689087f, 6.9438605f), - new Rectangle(88.9359f, 389.84763f, 5.5689087f, 6.9438605f), - new Rectangle(88.9359f, 395.42255f, 5.5689087f, 6.9438605f), - new Rectangle(88.9359f, 400.99747f, 5.5689087f, 6.9438605f), - new Rectangle(88.9359f, 406.5724f, 3.3353271f, 6.9438605f), - new Rectangle(172.91153f, 423.59833f, 5.5689087f, 6.6683106f), - new Rectangle(172.91153f, 429.17725f, 5.5689087f, 6.6683106f), - new Rectangle(172.91153f, 434.75616f, 7.2315674f, 6.6683106f), - new Rectangle(254.75186f, 414.95853f, 8.343323f, 6.6683106f), - new Rectangle(254.75186f, 423.30685f, 5.5689087f, 6.6683106f), - new Rectangle(254.75186f, 428.88077f, 5.5689087f, 6.6683106f), - new Rectangle(254.75186f, 434.45468f, 2.2235413f, 6.6683106f), - new Rectangle(254.75186f, 436.68323f, 5.5689087f, 6.6683106f), - new Rectangle(254.75186f, 442.25714f, 8.343323f, 6.6683106f), - new Rectangle(392.75232f, 422.4585f, 7.2315674f, 6.6683106f), - new Rectangle(392.75232f, 429.70108f, 2.2235413f, 6.6683106f), - new Rectangle(392.75232f, 431.93564f, 5.5689087f, 6.6683106f), - new Rectangle(392.75232f, 437.51556f, 5.5689087f, 6.6683106f), - new Rectangle(554.5925f, 464.88f, 3.5270386f, 6.0470705f), - new Rectangle(576.26807f, 121.08f, 4.0859985f, 5.4315f), - new Rectangle(576.26807f, 125.1588f, 5.723999f, 5.4315f), - new Rectangle(576.26807f, 130.8756f, 4.0859985f, 5.4315f), - new Rectangle(576.26807f, 134.95439f, 3.1679993f, 5.4315f), - new Rectangle(576.26807f, 138.198f, 6.9389954f, 5.4315f), - new Rectangle(576.26807f, 145.16219f, 5.4089966f, 5.4315f), - new Rectangle(576.26807f, 150.56578f, 3.5460052f, 5.4315f), - new Rectangle(576.26807f, 154.10458f, 5.4089966f, 5.4315f), - new Rectangle(576.26807f, 159.50638f, 3.1679993f, 5.4315f), - new Rectangle(576.26807f, 162.74998f, 5.4629974f, 5.4315f), - new Rectangle(576.26807f, 168.20578f, 3.1679993f, 5.4315f), - new Rectangle(576.26807f, 171.36658f, 3.1679993f, 5.4315f), - new Rectangle(576.26807f, 174.61018f, 3.8430023f, 5.4315f), - new Rectangle(576.26807f, 178.44598f, 5.3639984f, 5.4315f), - new Rectangle(576.26807f, 183.80278f, 4.6889954f, 5.4315f), - new Rectangle(576.26807f, 188.48457f, 5.3639984f, 5.4315f), - new Rectangle(576.26807f, 193.84137f, 5.4089966f, 5.4315f), - new Rectangle(576.26807f, 199.24316f, 3.8430023f, 5.4315f), - new Rectangle(576.26807f, 203.07896f, 4.6889954f, 5.4315f), - new Rectangle(576.26807f, 207.76076f, 5.697006f, 5.4315f), - new Rectangle(576.26807f, 213.45056f, 5.3639984f, 5.4315f), - new Rectangle(576.26807f, 218.80736f, 3.8430023f, 5.4315f), - new Rectangle(576.26807f, 222.70976f, 4.6889954f, 5.4315f), - new Rectangle(576.26807f, 227.39156f, 3.1679993f, 5.4315f), - new Rectangle(576.26807f, 230.63516f, 2.4660034f, 5.4315f), - new Rectangle(576.26807f, 233.15157f, 5.697006f, 5.4315f), - new Rectangle(576.26807f, 238.85307f, 3.1679993f, 5.4315f), - new Rectangle(576.26807f, 242.09668f, 3.5460052f, 5.4315f), - new Rectangle(576.26807f, 245.63548f, 5.697006f, 5.4315f), - new Rectangle(576.26807f, 251.32529f, 5.3640137f, 5.4315f), - new Rectangle(576.26807f, 256.6821f, 3.1679993f, 5.4315f), - new Rectangle(576.26807f, 259.9257f, 5.6069946f, 5.4315f), - new Rectangle(576.26807f, 265.52548f, 2.4660034f, 5.4315f), - new Rectangle(576.26807f, 268.04187f, 3.1679993f, 5.4315f), - new Rectangle(576.26807f, 271.20267f, 3.1679993f, 5.4315f), - new Rectangle(576.26807f, 274.36346f, 5.3640137f, 5.4315f), - new Rectangle(576.26807f, 279.72028f, 3.842987f, 5.4315f), - new Rectangle(576.26807f, 283.55606f, 5.3640137f, 5.4315f), - new Rectangle(576.26807f, 288.91287f, 5.696991f, 5.4315f), - new Rectangle(576.26807f, 294.60266f, 3.54599f, 5.4315f), - new Rectangle(576.26807f, 298.14145f, 3.1679993f, 5.4315f), - new Rectangle(576.26807f, 301.38504f, 4.6889954f, 5.4315f), - new Rectangle(576.26807f, 306.06683f, 5.4630127f, 5.4315f), - new Rectangle(576.26807f, 311.52264f, 5.696991f, 5.4315f), - new Rectangle(576.26807f, 317.21243f, 5.696991f, 5.4315f), - new Rectangle(576.26807f, 322.90222f, 3.54599f, 5.4315f), - new Rectangle(576.26807f, 326.441f, 3.842987f, 5.4315f), - new Rectangle(576.26807f, 330.2768f, 2.4660034f, 5.4315f), - new Rectangle(576.26807f, 332.79318f, 5.3640137f, 5.4315f), - new Rectangle(576.26807f, 338.15f, 4.6889954f, 5.4315f), - new Rectangle(576.26807f, 342.8318f, 3.1679993f, 5.4315f), - new Rectangle(576.26807f, 346.07538f, 5.4089966f, 5.4315f), - new Rectangle(576.26807f, 351.47717f, 3.842987f, 5.4315f), - new Rectangle(576.26807f, 355.31296f, 5.3640137f, 5.4315f), - new Rectangle(576.26807f, 360.66977f, 3.1679993f, 5.4315f), - new Rectangle(576.26807f, 363.91336f, 5.3640137f, 5.4315f), - new Rectangle(576.26807f, 369.27017f, 5.328003f, 5.4315f), - new Rectangle(576.26807f, 374.59097f, 3.54599f, 5.4315f), - new Rectangle(576.26807f, 378.12976f, 3.842987f, 5.4315f), - new Rectangle(576.26807f, 381.96555f, 5.4089966f, 5.4315f), - new Rectangle(576.26807f, 387.36734f, 4.6889954f, 5.4315f), - new Rectangle(576.26807f, 392.04913f, 3.54599f, 5.4315f), - new Rectangle(576.26807f, 395.58792f, 5.3640137f, 5.4315f), - new Rectangle(576.26807f, 400.99063f, 5.6069946f, 5.4315f), - new Rectangle(576.26807f, 406.59042f, 3.1679993f, 5.4315f), - new Rectangle(576.26807f, 409.834f, 3.1679993f, 5.4315f), - new Rectangle(576.26807f, 412.9948f, 3.842987f, 5.4315f), - new Rectangle(576.26807f, 416.8306f, 5.4630127f, 5.4315f), - new Rectangle(576.26807f, 422.2864f, 8.756989f, 5.4315f), - new Rectangle(576.26807f, 431.0362f, 3.1679993f, 5.4315f), - new Rectangle(576.26807f, 434.2798f, 5.6879883f, 5.4315f), - new Rectangle(576.26807f, 439.96057f, 5.696991f, 5.4315f), - new Rectangle(576.26807f, 445.65036f, 3.842987f, 5.4315f), - new Rectangle(576.26807f, 449.48615f, 5.4630127f, 5.4315f), - new Rectangle(576.26807f, 454.94196f, 4.6889954f, 5.4315f), - new Rectangle(576.26807f, 459.62375f, 3.54599f, 5.4315f), - new Rectangle(576.26807f, 463.16254f, 5.4089966f, 5.4315f), - new Rectangle(576.26807f, 468.56433f, 3.54599f, 5.4315f), - new Rectangle(576.26807f, 472.10312f, 3.1679993f, 5.4315f), - new Rectangle(587.18774f, 121.08f, 4.0859985f, 5.4315f), - new Rectangle(587.18774f, 125.154305f, 4.1310043f, 5.4315f), - new Rectangle(587.18774f, 129.2736f, 6.156006f, 5.4315f), - new Rectangle(587.18774f, 135.4575f, 4.6889954f, 5.4315f), - new Rectangle(587.18774f, 140.13751f, 2.4660034f, 5.4315f), - new Rectangle(587.18774f, 142.65392f, 5.3639984f, 5.4315f), - new Rectangle(587.18774f, 147.9963f, 5.697006f, 5.4315f), - new Rectangle(587.18774f, 153.68161f, 4.6889954f, 5.4315f), - new Rectangle(587.18774f, 158.35892f, 5.3639984f, 5.4315f), - new Rectangle(587.18774f, 163.71121f, 3.1679993f, 5.4315f), - new Rectangle(587.18774f, 166.89542f, 5.4089966f, 5.4315f), - new Rectangle(587.18774f, 172.29271f, 5.697006f, 5.4315f), - new Rectangle(587.18774f, 177.97801f, 5.6069946f, 5.4315f), - new Rectangle(587.18774f, 183.57332f, 3.1679993f, 5.4315f), - new Rectangle(587.18774f, 186.75752f, 3.5460052f, 5.4315f), - new Rectangle(587.18774f, 190.29182f, 5.3639984f, 5.4315f), - new Rectangle(587.18774f, 195.64412f, 4.6889954f, 5.4315f), - new Rectangle(587.18774f, 200.32143f, 5.697006f, 5.4315f), - new Rectangle(587.18774f, 206.00673f, 5.697006f, 5.4315f), - new Rectangle(587.18774f, 211.69203f, 5.4629974f, 5.4315f), - new Rectangle(587.18774f, 217.14333f, 2.4660034f, 5.4315f), - new Rectangle(587.18774f, 219.65973f, 5.4629974f, 5.4315f), - new Rectangle(587.18774f, 225.12003f, 5.6069946f, 5.4315f), - new Rectangle(587.18774f, 230.71533f, 5.328003f, 5.4315f), - new Rectangle(587.18774f, 236.03163f, 3.1679993f, 5.4315f), - new Rectangle(587.18774f, 239.21584f, 2.4660034f, 5.4315f), - new Rectangle(587.18774f, 241.73224f, 5.697006f, 5.4315f), - new Rectangle(587.18774f, 247.43375f, 3.1679993f, 5.4315f), - new Rectangle(587.18774f, 250.61795f, 5.6879883f, 5.4315f), - new Rectangle(587.18774f, 256.29425f, 5.696991f, 5.4315f), - new Rectangle(587.18774f, 261.97955f, 3.842987f, 5.4315f), - new Rectangle(587.18774f, 265.81085f, 5.4630127f, 5.4315f), - new Rectangle(587.18774f, 271.26215f, 5.6069946f, 5.4315f), - new Rectangle(587.18774f, 276.85745f, 5.3640137f, 5.4315f), - new Rectangle(587.18774f, 282.20975f, 4.0859985f, 5.4315f), - new Rectangle(587.18774f, 286.28406f, 6.156006f, 5.4315f), - new Rectangle(587.18774f, 292.46796f, 3.54599f, 5.4315f), - new Rectangle(587.18774f, 296.00226f, 5.4089966f, 5.4315f), - new Rectangle(587.18774f, 301.39957f, 3.54599f, 5.4315f), - new Rectangle(587.18774f, 304.93387f, 2.4660034f, 5.4315f), - new Rectangle(587.18774f, 307.4503f, 4.6889954f, 5.4315f), - new Rectangle(587.18774f, 312.1276f, 3.54599f, 5.4315f), - new Rectangle(587.18774f, 315.6619f, 2.4660034f, 5.4315f), - new Rectangle(587.18774f, 318.1783f, 4.6889954f, 5.4315f), - new Rectangle(587.18774f, 322.8583f, 5.4089966f, 5.4315f), - new Rectangle(587.18774f, 328.2556f, 2.4660034f, 5.4315f), - new Rectangle(587.18774f, 330.77203f, 3.1679993f, 5.4315f), - new Rectangle(587.18774f, 333.95624f, 5.6069946f, 5.4315f), - new Rectangle(587.18774f, 339.55154f, 5.4630127f, 5.4315f), - new Rectangle(587.18774f, 345.00284f, 4.6889954f, 5.4315f), - new Rectangle(587.18774f, 349.68015f, 5.328003f, 5.4315f), - new Rectangle(587.18774f, 354.99646f, 5.3640137f, 5.4315f), - new Rectangle(587.18774f, 360.34875f, 3.54599f, 5.4315f), - new Rectangle(587.18774f, 363.88306f, 5.6069946f, 5.4315f), - new Rectangle(587.18774f, 369.47836f, 5.4630127f, 5.4315f), - new Rectangle(587.18774f, 374.92966f, 5.4630127f, 5.4315f), - new Rectangle(587.18774f, 380.38095f, 5.328003f, 5.4315f), - new Rectangle(587.18774f, 385.69727f, 4.131012f, 5.4315f), - new Rectangle(587.18774f, 389.81656f, 3.276001f, 5.4315f), - new Rectangle(587.18774f, 393.11868f, 3.1679993f, 5.4315f), - new Rectangle(587.18774f, 396.3029f, 5.723999f, 5.4315f), - new Rectangle(587.18774f, 402.0152f, 5.723999f, 5.4315f), - new Rectangle(587.18774f, 407.7275f, 5.723999f, 5.4315f), - new Rectangle(587.18774f, 413.49023f, 5.723999f, 5.4315f), - new Rectangle(587.18774f, 419.20255f, 4.0859985f, 5.4315f), - new Rectangle(587.18774f, 423.27686f, 3.276001f, 5.4315f), - new Rectangle(587.18774f, 426.57898f, 3.1679993f, 5.4315f), - new Rectangle(610.1675f, 121.08f, 4.0859985f, 5.4315f), - new Rectangle(610.1675f, 125.1597f, 5.723999f, 5.4315f), - new Rectangle(610.1675f, 130.8774f, 4.0859985f, 5.4315f), - new Rectangle(610.1675f, 134.95709f, 3.1679993f, 5.4315f), - new Rectangle(610.1675f, 142.16069f, 5.6069946f, 5.4315f), - new Rectangle(610.1675f, 147.76138f, 3.1679993f, 5.4315f), - new Rectangle(610.1675f, 154.96498f, 3.8430023f, 5.4315f), - new Rectangle(610.1675f, 158.80168f, 5.3639984f, 5.4315f), - new Rectangle(610.1675f, 164.15938f, 5.6069946f, 5.4315f), - new Rectangle(610.1675f, 169.76007f, 3.8430023f, 5.4315f), - new Rectangle(610.1675f, 173.66248f, 5.3639984f, 5.4315f), - new Rectangle(610.1675f, 179.02017f, 4.6889954f, 5.4315f), - new Rectangle(610.1675f, 183.70287f, 5.3639984f, 5.4315f), - new Rectangle(610.1675f, 189.06056f, 5.697006f, 5.4315f), - new Rectangle(610.1675f, 194.75127f, 3.5460052f, 5.4315f), - new Rectangle(610.1675f, 198.29097f, 4.6889954f, 5.4315f), - new Rectangle(610.1675f, 202.97366f, 3.1679993f, 5.4315f), - new Rectangle(610.1675f, 210.17726f, 3.5460052f, 5.4315f), - new Rectangle(610.1675f, 213.71696f, 5.697006f, 5.4315f), - new Rectangle(610.1675f, 219.40767f, 5.3639984f, 5.4315f), - new Rectangle(610.1675f, 224.76537f, 3.1679993f, 5.4315f), - new Rectangle(610.1675f, 231.96896f, 5.4089966f, 5.4315f), - new Rectangle(610.1675f, 237.43286f, 4.6889954f, 5.4315f), - new Rectangle(610.1675f, 242.11555f, 4.6889954f, 5.4315f), - new Rectangle(610.1675f, 246.79825f, 5.697006f, 5.4315f), - new Rectangle(610.1675f, 252.48895f, 3.842987f, 5.4315f), - new Rectangle(610.1675f, 256.32565f, 5.4089966f, 5.4315f), - new Rectangle(610.1675f, 261.72836f, 4.6889954f, 5.4315f), - new Rectangle(610.1675f, 266.41107f, 5.328003f, 5.4315f), - new Rectangle(610.1675f, 271.73276f, 3.1679993f, 5.4315f), - new Rectangle(610.1675f, 278.93637f, 4.6889954f, 5.4315f), - new Rectangle(610.1675f, 283.61908f, 5.4089966f, 5.4315f), - new Rectangle(610.1675f, 289.0218f, 2.4660034f, 5.4315f), - new Rectangle(610.1675f, 291.53818f, 4.6889954f, 5.4315f), - new Rectangle(610.1675f, 296.1579f, 5.696991f, 5.4315f), - new Rectangle(610.1675f, 301.8486f, 2.4660034f, 5.4315f), - new Rectangle(610.1675f, 304.365f, 5.4089966f, 5.4315f), - new Rectangle(610.1675f, 309.7677f, 3.54599f, 5.4315f), - new Rectangle(610.1675f, 313.3074f, 5.3640137f, 5.4315f), - new Rectangle(610.1675f, 318.6651f, 5.6069946f, 5.4315f), - new Rectangle(610.1675f, 324.2658f, 3.1679993f, 5.4315f), - new Rectangle(610.1675f, 331.46942f, 3.54599f, 5.4315f), - new Rectangle(610.1675f, 335.00912f, 5.696991f, 5.4315f), - new Rectangle(610.1675f, 340.69983f, 3.842987f, 5.4315f), - new Rectangle(610.1675f, 344.53653f, 5.4630127f, 5.4315f), - new Rectangle(610.1675f, 349.99323f, 5.696991f, 5.4315f), - new Rectangle(610.1675f, 355.68393f, 5.6069946f, 5.4315f), - new Rectangle(610.1675f, 361.28464f, 5.696991f, 5.4315f), - new Rectangle(610.1675f, 366.97534f, 3.1679993f, 5.4315f), - new Rectangle(610.1675f, 374.17896f, 3.54599f, 5.4315f), - new Rectangle(610.1675f, 377.71866f, 5.696991f, 5.4315f), - new Rectangle(610.1675f, 383.40936f, 5.3640137f, 5.4315f), - new Rectangle(610.1675f, 388.76706f, 3.1679993f, 5.4315f), - new Rectangle(610.1675f, 395.97067f, 3.1679993f, 5.4315f), - new Rectangle(610.1675f, 399.1324f, 5.4630127f, 5.4315f), - new Rectangle(610.1675f, 404.58908f, 3.842987f, 5.4315f), - new Rectangle(610.1675f, 408.42578f, 8.756989f, 5.4315f), - new Rectangle(610.1675f, 417.17648f, 5.696991f, 5.4315f), - new Rectangle(610.1675f, 422.8672f, 2.4660034f, 5.4315f), - new Rectangle(610.1675f, 425.38358f, 5.4089966f, 5.4315f), - new Rectangle(610.1675f, 430.7863f, 3.1679993f, 5.4315f), - new Rectangle(610.1675f, 437.9305f, 3.1679993f, 5.4315f), - new Rectangle(610.1675f, 441.09222f, 5.4630127f, 5.4315f), - new Rectangle(610.1675f, 446.54892f, 3.842987f, 5.4315f), - new Rectangle(610.1675f, 450.38562f, 3.1679993f, 5.4315f), - new Rectangle(610.1675f, 457.58923f, 3.54599f, 5.4315f), - new Rectangle(610.1675f, 461.12894f, 5.696991f, 5.4315f), - new Rectangle(610.1675f, 466.77103f, 5.3640137f, 5.4315f), - new Rectangle(610.1675f, 472.12872f, 3.1679993f, 5.4315f), - new Rectangle(621.08716f, 121.08f, 5.6070023f, 5.4315f), - new Rectangle(621.08716f, 126.659996f, 5.364006f, 5.4315f), - new Rectangle(621.08716f, 132.0024f, 3.5460052f, 5.4315f), - new Rectangle(621.08716f, 135.5421f, 5.3639984f, 5.4315f), - new Rectangle(621.08716f, 140.88449f, 3.8430023f, 5.4315f), - new Rectangle(621.08716f, 144.7266f, 8.757004f, 5.4315f), - new Rectangle(621.08716f, 153.4899f, 2.4660034f, 5.4315f), - new Rectangle(621.08716f, 155.9901f, 5.697006f, 5.4315f), - new Rectangle(621.08716f, 161.6916f, 5.4089966f, 5.4315f), - new Rectangle(621.08716f, 167.09521f, 3.5460052f, 5.4315f), - new Rectangle(621.08716f, 170.63492f, 2.4660034f, 5.4315f), - new Rectangle(621.08716f, 173.13512f, 5.4629974f, 5.4315f), - new Rectangle(621.08716f, 178.59543f, 5.697006f, 5.4315f), - new Rectangle(621.08716f, 184.29694f, 3.1679993f, 5.4315f), - new Rectangle(621.08716f, 187.48114f, 5.4629974f, 5.4315f), - new Rectangle(621.08716f, 192.94144f, 3.1679993f, 5.4315f), - new Rectangle(621.08716f, 196.12563f, 3.1679993f, 5.4315f), - new Rectangle(621.08716f, 199.30983f, 3.5460052f, 5.4315f), - new Rectangle(621.08716f, 202.84953f, 5.697006f, 5.4315f), - new Rectangle(621.08716f, 208.55104f, 5.3639984f, 5.4315f), - new Rectangle(621.08716f, 213.89343f, 3.1679993f, 5.4315f), - new Rectangle(621.08716f, 217.07764f, 4.6889954f, 5.4315f), - new Rectangle(621.08716f, 221.75764f, 5.4089966f, 5.4315f), - new Rectangle(621.08716f, 227.16125f, 8.757004f, 5.4315f), - new Rectangle(621.08716f, 235.86426f, 5.6069946f, 5.4315f), - new Rectangle(621.08716f, 241.44426f, 2.4660034f, 5.4315f), - new Rectangle(621.08716f, 243.94446f, 5.3639984f, 5.4315f), - new Rectangle(621.08716f, 249.28685f, 2.4210052f, 5.4315f), - new Rectangle(621.08716f, 251.69075f, 4.6890106f, 5.4315f), - new Rectangle(621.08716f, 256.37073f, 3.1679993f, 5.4315f), - new Rectangle(621.08716f, 259.55493f, 4.6889954f, 5.4315f), - new Rectangle(621.08716f, 264.23492f, 2.4660034f, 5.4315f), - new Rectangle(621.08716f, 266.75134f, 4.725006f, 5.4315f), - new Rectangle(621.08716f, 271.51053f, 5.3640137f, 5.4315f), - new Rectangle(621.08716f, 276.85294f, 3.276001f, 5.4315f), - new Rectangle(621.08716f, 280.16315f, 3.1679993f, 5.4315f), - new Rectangle(644.00745f, 121.08f, 6.155998f, 5.4315f), - new Rectangle(644.00745f, 127.2324f, 7.083f, 5.4315f), - new Rectangle(644.00745f, 134.3118f, 6.5879974f, 5.4315f), - new Rectangle(644.00745f, 140.8962f, 6.255005f, 5.4315f), - new Rectangle(644.00745f, 147.1476f, 6.2819977f, 5.4315f), - new Rectangle(644.00745f, 153.426f, 5.6880035f, 5.4315f), - new Rectangle(644.00745f, 159.1104f, 6.156006f, 5.4315f), - new Rectangle(644.00745f, 165.2628f, 4.0859985f, 5.4315f), - new Rectangle(644.00745f, 169.3452f, 3.1679993f, 5.4315f), - new Rectangle(644.00745f, 176.4894f, 6.9389954f, 5.4315f), - new Rectangle(644.00745f, 183.42479f, 5.4089966f, 5.4315f), - new Rectangle(644.00745f, 188.83018f, 3.5460052f, 5.4315f), - new Rectangle(644.00745f, 192.37259f, 5.4089966f, 5.4315f), - new Rectangle(644.00745f, 197.77798f, 3.1679993f, 5.4315f), - new Rectangle(644.00745f, 204.98158f, 3.1679993f, 5.4315f), - new Rectangle(644.00745f, 208.14598f, 5.4629974f, 5.4315f), - new Rectangle(644.00745f, 213.60538f, 3.8430023f, 5.4315f), - new Rectangle(644.00745f, 217.44478f, 3.1679993f, 5.4315f), - new Rectangle(644.00745f, 224.58897f, 6.156006f, 5.4315f), - new Rectangle(644.00745f, 230.74138f, 7.3619995f, 5.4315f), - new Rectangle(644.00745f, 238.09978f, 2.4660034f, 5.4315f), - new Rectangle(644.00745f, 240.61618f, 3.5460052f, 5.4315f), - new Rectangle(644.00745f, 244.15588f, 4.725006f, 5.4315f), - new Rectangle(644.00745f, 248.87729f, 5.3639984f, 5.4315f), - new Rectangle(644.00745f, 254.23769f, 3.8430023f, 5.4315f), - new Rectangle(644.00745f, 258.0195f, 2.4660034f, 5.4315f), - new Rectangle(644.00745f, 260.5359f, 5.4089966f, 5.4315f), - new Rectangle(644.00745f, 265.94128f, 5.696991f, 5.4315f), - new Rectangle(644.00745f, 271.63467f, 5.6069946f, 5.4315f), - new Rectangle(644.00745f, 277.23807f, 3.1679993f, 5.4315f), - new Rectangle(644.00745f, 284.44168f, 5.4089966f, 5.4315f), - new Rectangle(644.00745f, 289.84708f, 3.842987f, 5.4315f), - new Rectangle(644.00745f, 293.68646f, 5.3640137f, 5.4315f), - new Rectangle(644.00745f, 299.04688f, 3.1679993f, 5.4315f), - new Rectangle(644.00745f, 306.2505f, 5.3640137f, 5.4315f), - new Rectangle(644.00745f, 311.6109f, 5.328003f, 5.4315f), - new Rectangle(644.00745f, 316.9353f, 3.54599f, 5.4315f), - new Rectangle(644.00745f, 320.4777f, 3.842987f, 5.4315f), - new Rectangle(644.00745f, 324.31708f, 5.4089966f, 5.4315f), - new Rectangle(644.00745f, 329.72247f, 4.6889954f, 5.4315f), - new Rectangle(644.00745f, 334.40787f, 3.54599f, 5.4315f), - new Rectangle(644.00745f, 337.95026f, 5.3640137f, 5.4315f), - new Rectangle(644.00745f, 343.31067f, 5.6069946f, 5.4315f), - new Rectangle(644.00745f, 348.91406f, 3.1679993f, 5.4315f), - new Rectangle(644.00745f, 356.11768f, 3.1679993f, 5.4315f), - new Rectangle(644.00745f, 359.28207f, 3.842987f, 5.4315f), - new Rectangle(644.00745f, 363.12146f, 5.4630127f, 5.4315f), - new Rectangle(644.00745f, 368.58087f, 8.756989f, 5.4315f), - new Rectangle(644.00745f, 377.33426f, 3.1679993f, 5.4315f), - new Rectangle(644.00745f, 384.53787f, 3.54599f, 5.4315f), - new Rectangle(644.00745f, 388.08026f, 5.696991f, 5.4315f), - new Rectangle(644.00745f, 393.77365f, 5.3640137f, 5.4315f), - new Rectangle(644.00745f, 399.13406f, 3.1679993f, 5.4315f), - new Rectangle(644.00745f, 406.2783f, 6.156006f, 5.4315f), - new Rectangle(644.00745f, 412.4307f, 7.3619995f, 5.4315f), - new Rectangle(644.00745f, 419.7891f, 2.4660034f, 5.4315f), - new Rectangle(644.00745f, 422.30548f, 4.6889954f, 5.4315f), - new Rectangle(644.00745f, 426.99088f, 4.6889954f, 5.4315f), - new Rectangle(644.00745f, 431.67627f, 3.1679993f, 5.4315f), - new Rectangle(644.00745f, 438.87988f, 5.174988f, 5.4315f), - new Rectangle(644.00745f, 444.05127f, 5.3640137f, 5.4315f), - new Rectangle(644.00745f, 449.41168f, 5.6069946f, 5.4315f), - new Rectangle(644.00745f, 455.01508f, 5.3640137f, 5.4315f), - new Rectangle(644.00745f, 460.3755f, 3.842987f, 5.4315f), - new Rectangle(644.00745f, 464.27786f, 5.4089966f, 5.4315f), - new Rectangle(644.00745f, 469.68146f, 2.4660034f, 5.4315f), - new Rectangle(644.00745f, 472.14386f, 3.1679993f, 5.4315f), - new Rectangle(654.9874f, 121.08f, 6.155998f, 5.4315f), - new Rectangle(654.9874f, 127.245f, 3.5459976f, 5.4315f), - new Rectangle(654.9874f, 130.8f, 5.4089966f, 5.4315f), - new Rectangle(654.9874f, 136.218f, 3.5460052f, 5.4315f), - new Rectangle(654.9874f, 139.773f, 2.4660034f, 5.4315f), - new Rectangle(654.9874f, 142.248f, 4.6889954f, 5.4315f), - new Rectangle(654.9874f, 146.946f, 3.5460052f, 5.4315f), - new Rectangle(654.9874f, 150.42538f, 2.4660034f, 5.4315f), - new Rectangle(654.9874f, 152.94179f, 4.6889954f, 5.4315f), - new Rectangle(654.9874f, 157.63979f, 5.4089966f, 5.4315f), - new Rectangle(654.9874f, 163.05779f, 2.4660034f, 5.4315f), - new Rectangle(654.9874f, 165.53279f, 3.1679993f, 5.4315f), - new Rectangle(654.9874f, 170.99669f, 7.083008f, 5.4315f), - new Rectangle(654.9874f, 178.08868f, 3.1679993f, 5.4315f), - new Rectangle(654.9874f, 181.21259f, 3.1679993f, 5.4315f), - new Rectangle(654.9874f, 184.38959f, 2.4660034f, 5.4315f), - new Rectangle(654.9874f, 186.8646f, 4.6889954f, 5.4315f), - new Rectangle(654.9874f, 191.56259f, 5.3639984f, 5.4315f), - new Rectangle(654.9874f, 196.9356f, 3.276001f, 5.4315f), - new Rectangle(654.9874f, 200.2206f, 3.1679993f, 5.4315f), - new Rectangle(654.9874f, 205.6845f, 5.6069946f, 5.4315f), - new Rectangle(654.9874f, 211.30049f, 5.4089966f, 5.4315f), - new Rectangle(654.9874f, 216.71849f, 3.5460052f, 5.4315f), - new Rectangle(654.9874f, 220.27348f, 5.4089966f, 5.4315f), - new Rectangle(654.9874f, 225.69148f, 3.1679993f, 5.4315f), - new Rectangle(654.9874f, 231.15538f, 3.1679993f, 5.4315f), - new Rectangle(654.9874f, 234.33238f, 5.4629974f, 5.4315f), - new Rectangle(654.9874f, 239.80438f, 3.8430023f, 5.4315f), - new Rectangle(654.9874f, 243.65639f, 3.1679993f, 5.4315f), - new Rectangle(654.9874f, 249.12029f, 3.7890015f, 5.4315f), - new Rectangle(654.9874f, 252.91829f, 4.6890106f, 5.4315f), - new Rectangle(654.9874f, 257.6163f, 3.842987f, 5.4315f), - new Rectangle(654.9874f, 261.4683f, 5.4089966f, 5.4315f), - new Rectangle(654.9874f, 266.8863f, 5.3640137f, 5.4315f), - new Rectangle(654.9874f, 272.25928f, 2.4660034f, 5.4315f), - new Rectangle(654.9874f, 274.73428f, 3.1679993f, 5.4315f), - new Rectangle(654.9874f, 280.19818f, 3.1679993f, 5.4315f), - new Rectangle(654.9874f, 283.37518f, 3.842987f, 5.4315f), - new Rectangle(654.9874f, 287.22717f, 5.4630127f, 5.4315f), - new Rectangle(654.9874f, 292.69916f, 8.756989f, 5.4315f), - new Rectangle(654.9874f, 301.3203f, 3.1679993f, 5.4315f), - new Rectangle(654.9874f, 306.7842f, 3.54599f, 5.4315f), - new Rectangle(654.9874f, 310.3275f, 5.696991f, 5.4315f), - new Rectangle(654.9874f, 316.02182f, 5.3640137f, 5.4315f), - new Rectangle(654.9874f, 321.38312f, 3.1679993f, 5.4315f), - new Rectangle(654.9874f, 326.84702f, 6.282013f, 5.4315f), - new Rectangle(654.9874f, 333.1263f, 5.3640137f, 5.4315f), - new Rectangle(654.9874f, 338.4876f, 5.696991f, 5.4315f), - new Rectangle(654.9874f, 344.18192f, 3.54599f, 5.4315f), - new Rectangle(654.9874f, 347.72522f, 3.842987f, 5.4315f), - new Rectangle(654.9874f, 351.62762f, 5.4089966f, 5.4315f), - new Rectangle(654.9874f, 357.03125f, 2.4660034f, 5.4315f), - new Rectangle(654.9874f, 359.54764f, 3.1679993f, 5.4315f), - new Rectangle(654.9874f, 365.01154f, 6.174011f, 5.4315f), - new Rectangle(654.9874f, 371.18283f, 5.696991f, 5.4315f), - new Rectangle(654.9874f, 376.87714f, 3.842987f, 5.4315f), - new Rectangle(654.9874f, 380.71744f, 5.3640137f, 5.4315f), - new Rectangle(654.9874f, 386.07874f, 5.4089966f, 5.4315f), - new Rectangle(654.9874f, 391.48505f, 5.696991f, 5.4315f), - new Rectangle(654.9874f, 397.17935f, 3.1679993f, 5.4315f), - new Rectangle(654.9874f, 402.64325f, 5.4630127f, 5.4315f), - new Rectangle(654.9874f, 408.10355f, 3.1679993f, 5.4315f), - new Rectangle(654.9874f, 411.26886f, 3.1679993f, 5.4315f), - new Rectangle(654.9874f, 416.73276f, 6.156006f, 5.4315f), - new Rectangle(654.9874f, 422.88605f, 3.54599f, 5.4315f), - new Rectangle(654.9874f, 426.42935f, 5.4089966f, 5.4315f), - new Rectangle(654.9874f, 431.83566f, 3.54599f, 5.4315f), - new Rectangle(654.9874f, 435.37897f, 2.4660034f, 5.4315f), - new Rectangle(654.9874f, 437.89536f, 4.6889954f, 5.4315f), - new Rectangle(654.9874f, 442.57538f, 3.54599f, 5.4315f), - new Rectangle(654.9874f, 446.11868f, 2.4660034f, 5.4315f), - new Rectangle(654.9874f, 448.63507f, 4.6889954f, 5.4315f), - new Rectangle(654.9874f, 453.32138f, 4.6889954f, 5.4315f), - new Rectangle(654.9874f, 458.0077f, 3.1679993f, 5.4315f), - new Rectangle(654.9874f, 463.4716f, 5.4630127f, 5.4315f), - new Rectangle(654.9874f, 468.9319f, 3.1679993f, 5.4315f), - new Rectangle(654.9874f, 472.0972f, 3.1679993f, 5.4315f), - new Rectangle(665.9071f, 121.08002f, 3.7890015f, 5.4315f), - new Rectangle(665.9071f, 124.86272f, 4.6889954f, 5.4315f), - new Rectangle(665.9071f, 129.54541f, 3.8430023f, 5.4315f), - new Rectangle(665.9071f, 133.38211f, 5.4089966f, 5.4315f), - new Rectangle(665.9071f, 138.7848f, 5.3639984f, 5.4315f), - new Rectangle(665.9071f, 144.1425f, 2.4660034f, 5.4315f), - new Rectangle(665.9071f, 146.6589f, 3.276001f, 5.4315f), - new Rectangle(665.9071f, 149.961f, 3.1679993f, 5.4315f), - new Rectangle(665.9071f, 153.1452f, 5.4089966f, 5.4315f), - new Rectangle(665.9071f, 158.5479f, 5.697006f, 5.4315f), - new Rectangle(665.9071f, 164.2386f, 5.6069946f, 5.4315f), - new Rectangle(665.9071f, 169.8393f, 3.1679993f, 5.4315f), - new Rectangle(665.9071f, 173.0235f, 5.6069946f, 5.4315f), - new Rectangle(665.9071f, 178.6242f, 5.4089966f, 5.4315f), - new Rectangle(665.9071f, 184.0269f, 3.5460052f, 5.4315f), - new Rectangle(665.9071f, 187.5666f, 5.4089966f, 5.4315f), - new Rectangle(665.9071f, 192.9693f, 3.1679993f, 5.4315f), - new Rectangle(665.9071f, 196.1535f, 3.1679993f, 5.4315f), - new Rectangle(665.9071f, 199.3152f, 5.4629974f, 5.4315f), - new Rectangle(665.9071f, 204.7719f, 3.8430023f, 5.4315f), - new Rectangle(665.9071f, 208.6086f, 3.1679993f, 5.4315f), - new Rectangle(665.9071f, 211.7928f, 6.5879974f, 5.4315f), - new Rectangle(665.9071f, 218.3745f, 5.697006f, 5.4315f), - new Rectangle(665.9071f, 224.0652f, 2.4660034f, 5.4315f), - new Rectangle(665.9071f, 226.5816f, 3.5460052f, 5.4315f), - new Rectangle(665.9071f, 230.1213f, 5.3639984f, 5.4315f), - new Rectangle(665.9071f, 235.479f, 5.6069946f, 5.4315f), - new Rectangle(665.9071f, 241.07971f, 3.1679993f, 5.4315f), - new Rectangle(665.9071f, 244.26392f, 6.2369995f, 5.4315f), - new Rectangle(665.9071f, 250.49461f, 2.4660034f, 5.4315f), - new Rectangle(665.9071f, 253.01102f, 5.696991f, 5.4315f), - new Rectangle(665.9071f, 258.70172f, 5.6069946f, 5.4315f), - new Rectangle(665.9071f, 264.30243f, 5.6069946f, 5.4315f), - new Rectangle(665.9071f, 269.90314f, 5.4630127f, 5.4315f), - new Rectangle(665.9071f, 275.35983f, 8.756989f, 5.4315f), - new Rectangle(665.9071f, 284.11053f, 3.1679993f, 5.4315f), - new Rectangle(665.9071f, 287.29474f, 3.1679993f, 5.4315f), - new Rectangle(665.9071f, 290.45645f, 3.842987f, 5.4315f), - new Rectangle(665.9071f, 294.29315f, 5.4630127f, 5.4315f), - new Rectangle(665.9071f, 299.74985f, 8.756989f, 5.4315f), - new Rectangle(665.9071f, 308.39972f, 3.1679993f, 5.4315f), - new Rectangle(665.9071f, 311.58392f, 3.54599f, 5.4315f), - new Rectangle(665.9071f, 315.12363f, 5.696991f, 5.4315f), - new Rectangle(665.9071f, 320.81433f, 5.3640137f, 5.4315f), - new Rectangle(665.9071f, 326.17203f, 3.1679993f, 5.4315f), - new Rectangle(665.9071f, 329.35623f, 7.083008f, 5.4315f), - new Rectangle(665.9071f, 336.43292f, 3.1679993f, 5.4315f), - new Rectangle(665.9071f, 339.59464f, 3.1679993f, 5.4315f), - new Rectangle(665.9071f, 342.75635f, 2.4660034f, 5.4315f), - new Rectangle(665.9071f, 345.27182f, 4.6889954f, 5.4315f), - new Rectangle(665.9071f, 349.95453f, 5.3640137f, 5.4315f), - new Rectangle(665.9071f, 355.31223f, 3.1679993f, 5.4315f), - new Rectangle(665.9071f, 358.49643f, 3.1679993f, 5.4315f), - new Rectangle(665.9071f, 361.65814f, 5.4630127f, 5.4315f), - new Rectangle(665.9071f, 367.11484f, 3.842987f, 5.4315f), - new Rectangle(665.9071f, 370.95154f, 3.1679993f, 5.4315f), - new Rectangle(665.9071f, 374.13574f, 6.7319946f, 5.4315f), - new Rectangle(665.9071f, 380.86145f, 5.4089966f, 5.4315f), - new Rectangle(665.9071f, 386.26416f, 3.54599f, 5.4315f), - new Rectangle(665.9071f, 389.80386f, 2.4660034f, 5.4315f), - new Rectangle(665.9071f, 392.31934f, 5.4630127f, 5.4315f), - new Rectangle(665.9071f, 397.77603f, 5.696991f, 5.4315f), - new Rectangle(665.9071f, 403.46674f, 5.4089966f, 5.4315f), - new Rectangle(665.9071f, 408.80914f, 2.4660034f, 5.4315f), - new Rectangle(665.9071f, 411.32462f, 3.1679993f, 5.4315f), - new Rectangle(665.9071f, 414.44852f, 6.156006f, 5.4315f), - new Rectangle(665.9071f, 420.5982f, 3.54599f, 5.4315f), - new Rectangle(665.9071f, 424.1379f, 5.4089966f, 5.4315f), - new Rectangle(665.9071f, 429.54062f, 3.54599f, 5.4315f), - new Rectangle(665.9071f, 433.08032f, 2.4660034f, 5.4315f), - new Rectangle(665.9071f, 435.5958f, 4.6889954f, 5.4315f), - new Rectangle(665.9071f, 440.2749f, 3.54599f, 5.4315f), - new Rectangle(665.9071f, 443.75342f, 2.4660034f, 5.4315f), - new Rectangle(665.9071f, 446.2689f, 4.6889954f, 5.4315f), - new Rectangle(665.9071f, 450.9516f, 4.6889954f, 5.4315f), - new Rectangle(665.9071f, 455.6343f, 3.276001f, 5.4315f), - new Rectangle(665.9071f, 458.90402f, 3.1679993f, 5.4315f), - new Rectangle(688.82745f, 137.10002f, 5.0130005f, 5.4315f), - new Rectangle(688.82745f, 142.10312f, 2.4660034f, 5.4315f), - new Rectangle(688.82745f, 144.61952f, 5.3639984f, 5.4315f), - new Rectangle(688.82745f, 149.97362f, 4.6889954f, 5.4315f), - new Rectangle(688.82745f, 154.65271f, 5.697006f, 5.4315f), - new Rectangle(688.82745f, 160.33981f, 3.5460052f, 5.4315f), - new Rectangle(688.82745f, 163.87592f, 5.3639984f, 5.4315f), - new Rectangle(688.82745f, 169.23001f, 5.697006f, 5.4315f), - new Rectangle(688.82745f, 174.91711f, 4.6889954f, 5.4315f), - new Rectangle(688.82745f, 179.59622f, 3.5460052f, 5.4315f), - new Rectangle(688.82745f, 183.13232f, 5.3639984f, 5.4315f), - new Rectangle(688.82745f, 188.48642f, 2.4660034f, 5.4315f), - new Rectangle(688.82745f, 191.00282f, 5.697006f, 5.4315f), - new Rectangle(688.82745f, 196.68993f, 3.1679993f, 5.4315f), - new Rectangle(688.82745f, 199.87413f, 5.697006f, 5.4315f), - new Rectangle(688.82745f, 205.56123f, 5.4089966f, 5.4315f), - new Rectangle(688.82745f, 210.96033f, 4.6889954f, 5.4315f), - new Rectangle(688.82745f, 215.63943f, 3.1679993f, 5.4315f), - new Rectangle(688.82745f, 218.82364f, 5.697006f, 5.4315f), - new Rectangle(688.82745f, 224.51074f, 5.4629974f, 5.4315f), - new Rectangle(688.82745f, 229.96384f, 3.5460052f, 5.4315f), - new Rectangle(688.82745f, 233.49994f, 3.1679993f, 5.4315f), - new Rectangle(688.82745f, 236.68414f, 5.6069946f, 5.4315f), - new Rectangle(688.82745f, 242.28125f, 5.3639984f, 5.4315f), - new Rectangle(688.82745f, 247.63535f, 5.3639984f, 5.4315f), - new Rectangle(688.82745f, 253.03804f, 5.697006f, 5.4315f), - new Rectangle(688.82745f, 258.73953f, 3.1679993f, 5.4315f), - new Rectangle(688.82745f, 261.92374f, 2.4660034f, 5.4315f), - new Rectangle(688.82745f, 264.44012f, 5.696991f, 5.4315f), - new Rectangle(688.82745f, 270.14163f, 4.6889954f, 5.4315f), - new Rectangle(688.82745f, 274.76135f, 2.4660034f, 5.4315f), - new Rectangle(688.82745f, 277.27774f, 5.696991f, 5.4315f), - new Rectangle(688.82745f, 282.96484f, 5.6069946f, 5.4315f), - new Rectangle(688.82745f, 288.56195f, 5.3640137f, 5.4315f), - new Rectangle(688.82745f, 293.91605f, 5.6069946f, 5.4315f), - new Rectangle(688.82745f, 299.51315f, 3.1679993f, 5.4315f), - new Rectangle(688.82745f, 302.69736f, 2.4660034f, 5.4315f), - new Rectangle(688.82745f, 305.21375f, 5.696991f, 5.4315f), - new Rectangle(688.82745f, 310.91525f, 3.1679993f, 5.4315f), - new Rectangle(688.82745f, 314.03915f, 3.54599f, 5.4315f), - new Rectangle(688.82745f, 317.57526f, 5.696991f, 5.4315f), - new Rectangle(688.82745f, 323.26236f, 5.3640137f, 5.4315f), - new Rectangle(688.82745f, 328.61646f, 3.1679993f, 5.4315f), - new Rectangle(688.82745f, 331.80066f, 3.54599f, 5.4315f), - new Rectangle(688.82745f, 335.33676f, 5.4089966f, 5.4315f), - new Rectangle(688.82745f, 340.73587f, 5.6069946f, 5.4315f), - new Rectangle(688.82745f, 346.33298f, 2.4660034f, 5.4315f), - new Rectangle(688.82745f, 348.84937f, 5.3640137f, 5.4315f), - new Rectangle(688.82745f, 354.20346f, 4.0859985f, 5.4315f), - new Rectangle(688.82745f, 358.27957f, 3.1679993f, 5.4315f), - new Rectangle(688.82745f, 361.46378f, 6.7319946f, 5.4315f), - new Rectangle(688.82745f, 368.18588f, 5.4630127f, 5.4315f), - new Rectangle(688.82745f, 373.63898f, 3.1679993f, 5.4315f), - new Rectangle(688.82745f, 376.82318f, 5.4089966f, 5.4315f), - new Rectangle(688.82745f, 382.2223f, 5.696991f, 5.4315f), - new Rectangle(688.82745f, 387.9094f, 4.6889954f, 5.4315f), - new Rectangle(688.82745f, 392.5885f, 7.3619995f, 5.4315f), - new Rectangle(688.82745f, 399.9406f, 5.3640137f, 5.4315f), - new Rectangle(688.82745f, 405.2947f, 3.842987f, 5.4315f), - new Rectangle(688.82745f, 409.1278f, 4.6889954f, 5.4315f), - new Rectangle(688.82745f, 413.80692f, 3.1679993f, 5.4315f), - new Rectangle(688.82745f, 416.99112f, 3.842987f, 5.4315f), - new Rectangle(688.82745f, 420.82422f, 5.3640137f, 5.4315f), - new Rectangle(688.82745f, 426.1783f, 4.6889954f, 5.4315f), - new Rectangle(688.82745f, 430.85742f, 5.3640137f, 5.4315f), - new Rectangle(688.82745f, 436.21152f, 2.4660034f, 5.4315f), - new Rectangle(688.82745f, 438.7279f, 5.328003f, 5.4315f), - new Rectangle(688.82745f, 444.06943f, 5.3640137f, 5.4315f), - new Rectangle(688.82745f, 449.42352f, 5.6069946f, 5.4315f), - new Rectangle(688.82745f, 455.02063f, 3.276001f, 5.4315f), - new Rectangle(688.82745f, 458.32275f, 3.1679993f, 5.4315f), - new Rectangle(711.8071f, 203.70001f, 5.5440063f, 5.4315f), - new Rectangle(711.8071f, 209.23412f, 5.4089966f, 5.4315f), - new Rectangle(711.8071f, 214.63321f, 5.6069946f, 5.4315f), - new Rectangle(711.8071f, 220.23032f, 2.4660034f, 5.4315f), - new Rectangle(711.8071f, 222.74582f, 5.3639984f, 5.4315f), - new Rectangle(711.8071f, 228.09991f, 3.1679993f, 5.4315f), - new Rectangle(711.8071f, 231.28322f, 5.723999f, 5.4315f), - new Rectangle(711.8071f, 236.99731f, 5.723999f, 5.4315f), - new Rectangle(711.8071f, 242.71141f, 3.1679993f, 5.4315f), - new Rectangle(711.8071f, 245.89471f, 5.723999f, 5.4315f), - new Rectangle(711.8071f, 251.60881f, 3.1679993f, 5.4315f), - new Rectangle(711.8071f, 254.79211f, 6.156006f, 5.4315f), - new Rectangle(711.8071f, 260.9751f, 4.6889954f, 5.4315f), - new Rectangle(711.8071f, 265.6542f, 4.6889954f, 5.4315f), - new Rectangle(711.8071f, 270.3333f, 5.696991f, 5.4315f), - new Rectangle(711.8071f, 276.02042f, 3.842987f, 5.4315f), - new Rectangle(711.8071f, 279.85352f, 5.4089966f, 5.4315f), - new Rectangle(711.8071f, 285.25262f, 4.6889954f, 5.4315f), - new Rectangle(711.8071f, 289.93173f, 5.328003f, 5.4315f), - new Rectangle(711.8071f, 295.24982f, 3.1679993f, 5.4315f), - new Rectangle(711.8071f, 298.4331f, 5.4630127f, 5.4315f), - new Rectangle(711.8071f, 303.8862f, 3.1679993f, 5.4315f), - new Rectangle(711.8071f, 307.0443f, 3.1679993f, 5.4315f), - new Rectangle(711.8071f, 310.2276f, 3.54599f, 5.4315f), - new Rectangle(711.8071f, 313.7637f, 5.696991f, 5.4315f), - new Rectangle(711.8071f, 319.4508f, 5.3640137f, 5.4315f), - new Rectangle(711.8071f, 324.8049f, 3.1679993f, 5.4315f), - new Rectangle(711.8071f, 327.9882f, 4.6889954f, 5.4315f), - new Rectangle(711.8071f, 332.6673f, 5.696991f, 5.4315f), - new Rectangle(711.8071f, 338.3544f, 3.842987f, 5.4315f), - new Rectangle(711.8071f, 342.1875f, 5.328003f, 5.4315f), - new Rectangle(711.8071f, 347.5056f, 5.3640137f, 5.4315f), - new Rectangle(711.8071f, 352.8597f, 5.328003f, 5.4315f), - new Rectangle(711.8071f, 358.1778f, 3.1679993f, 5.4315f), - new Rectangle(711.8071f, 361.36108f, 3.842987f, 5.4315f), - new Rectangle(711.8071f, 365.19418f, 5.3640137f, 5.4315f), - new Rectangle(711.8071f, 370.54828f, 4.6889954f, 5.4315f), - new Rectangle(711.8071f, 375.2274f, 5.696991f, 5.4315f), - new Rectangle(711.8071f, 380.9145f, 2.4660034f, 5.4315f), - new Rectangle(711.8071f, 383.42996f, 3.54599f, 5.4315f), - new Rectangle(711.8071f, 386.96875f, 4.6889954f, 5.4315f), - new Rectangle(711.8071f, 391.64786f, 3.1679993f, 5.4315f) }; @Test public void testRemoveSequentialSpaces() throws IOException { - Page page = UtilsForTesting.getAreaFromFirstPage( - "src/test/resources/technology/tabula/m27.pdf", 79.2f, - 28.28f, 103.04f, 732.6f); + Page page = UtilsForTesting.getAreaFromFirstPage("src/test/resources/technology/tabula/m27.pdf", 79.2f, 28.28f, 103.04f, 732.6f); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.extract(page).get(0); List firstRow = table.getRows().get(0); @@ -1796,19 +139,16 @@ public void testRemoveSequentialSpaces() throws IOException { @Test public void testColumnRecognition() throws IOException { - Page page = UtilsForTesting - .getAreaFromFirstPage( - "src/test/resources/technology/tabula/argentina_diputados_voting_record.pdf", - 269.875f, 12.75f, 790.5f, 561f); + Page page = UtilsForTesting.getAreaFromFirstPage(ARGENTINA_DIPUTADOS_VOTING_RECORD_PDF, 269.875f, 12.75f, 790.5f, 561f); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.extract(page).get(0); - UtilsForTesting.assertTableEquals(table, EXPECTED_COLUMN_RECOGNITION); + assertArrayEquals(ARGENTINA_DIPUTADOS_VOTING_RECORD_EXPECTED, UtilsForTesting.tableToArrayOfRows(table)); } - + @Test public void testVerticalRulingsPreventMergingOfColumns() throws IOException { - List rulings = new ArrayList(); - Float[] rulingsVerticalPositions = { 147f, 256f, 310f, 375f, 431f, 504f }; + List rulings = new ArrayList<>(); + Float[] rulingsVerticalPositions = {147f, 256f, 310f, 375f, 431f, 504f}; for (int i = 0; i < 6; i++) { rulings.add(new Ruling(255.57f, rulingsVerticalPositions[i], 0, 398.76f - 255.57f)); } @@ -1821,48 +161,43 @@ public void testVerticalRulingsPreventMergingOfColumns() throws IOException { List sixthRow = table.getRows().get(5); assertTrue(sixthRow.get(0).getText().equals("VALSANGIACOMO BLANC")); - assertTrue(sixthRow.get(1).getText().equals("OFERNANDO JORGE ")); + assertTrue(sixthRow.get(1).getText().equals("OFERNANDO JORGE")); } @Test public void testExtractColumnsCorrectly() throws IOException { - Page page = UtilsForTesting.getAreaFromPage( - "src/test/resources/technology/tabula/eu-002.pdf", 1, - 115.0f, 70.0f, 233.0f, 510.0f); + Page page = UtilsForTesting.getAreaFromPage(EU_002_PDF, 1, 115.0f, 70.0f, 233.0f, 510.0f); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.extract(page).get(0); - UtilsForTesting.assertTableEquals(table, EXPECTED_CORRECT_COLUMNS); + assertArrayEquals(EU_002_EXPECTED, UtilsForTesting.tableToArrayOfRows(table)); } - + @Test public void testExtractColumnsCorrectly2() throws IOException { - Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/eu-017.pdf", 3); + Page page = UtilsForTesting.getPage(EU_017_PDF, 3); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(page.getVerticalRulings()); Table table = bea.extract(page.getArea(299.625f, 148.44f, 711.875f, 452.32f)).get(0); - UtilsForTesting.assertTableEquals(table, EXPECTED_COLUMN_EXTRACTION2); + assertArrayEquals(EU_017_EXPECTED, UtilsForTesting.tableToArrayOfRows(table)); } - + @Test public void testExtractColumnsCorrectly3() throws IOException { - Page page = UtilsForTesting.getAreaFromFirstPage("src/test/resources/technology/tabula/frx_2012_disclosure.pdf", - 106.01f, 48.09f, 227.31f, 551.89f); + Page page = UtilsForTesting.getAreaFromFirstPage(FRX_2012_DISCLOSURE_PDF, 106.01f, 48.09f, 227.31f, 551.89f); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.extract(page).get(0); - - UtilsForTesting.assertTableEquals(table, EXPECTED_TABLE_EXTRACTION); - + assertArrayEquals(FRX_2012_DISCLOSURE_EXPECTED, UtilsForTesting.tableToArrayOfRows(table)); } - + @Test public void testCheckSqueezeDoesntBreak() throws IOException { - Page page = UtilsForTesting.getAreaFromFirstPage("src/test/resources/technology/tabula/12s0324.pdf", + Page page = UtilsForTesting.getAreaFromFirstPage("src/test/resources/technology/tabula/12s0324.pdf", 99.0f, 17.25f, 316.5f, 410.25f); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.extract(page).get(0); List> rows = table.getRows(); List firstRow = rows.get(0); List lastRow = rows.get(rows.size() - 1); - assertTrue(firstRow.get(0).getText().equals("Violent crime . . . . . . . . . . . . . . . . . . ")); + assertTrue(firstRow.get(0).getText().equals("Violent crime . . . . . . . . . . . . . . . . . .")); assertTrue(lastRow.get(lastRow.size() - 1).getText().equals("(X)")); } @@ -1874,88 +209,133 @@ public void testNaturalOrderOfRectangles() throws IOException { BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm( page.getVerticalRulings()); Table table = bea.extract(page).get(0); - - List cells = table.getCells(); + + List cells = new ArrayList<>(table.cells.values()); for (RectangularTextContainer rectangularTextContainer : cells) { - System.out.println(rectangularTextContainer.getText()); - } + System.out.println(rectangularTextContainer.getText()); + } //Column headers assertEquals("Project", cells.get(0).getText()); assertEquals("Agency", cells.get(1).getText()); - assertEquals(" Institution", cells.get(2).getText()); + assertEquals("Institution", cells.get(2).getText()); //First row assertEquals("Nanotechnology and its publics", cells.get(3).getText()); assertEquals("NSF", cells.get(4).getText()); assertEquals("Pennsylvania State Universit", cells.get(5).getText()); - + //Second row - assertEquals("Public information and deliberation in nanoscience and ", cells.get(6).getText()); - assertEquals("North Carolina State ", cells.get(7).getText()); + assertEquals("Public information and deliberation in nanoscience and", cells.get(6).getText()); + assertEquals("North Carolina State", cells.get(7).getText()); assertEquals("Interagency", cells.get(8).getText()); assertEquals("nanotechnology policy (SGER)", cells.get(9).getText()); assertEquals("University", cells.get(10).getText()); //Third row - assertEquals("Social and ethical research and education in agrifood ", cells.get(11).getText()); + assertEquals("Social and ethical research and education in agrifood", cells.get(11).getText()); assertEquals("NSF", cells.get(12).getText()); assertEquals("Michigan State University", cells.get(13).getText()); assertEquals("nanotechnology (NIRT)", cells.get(14).getText()); - + //Fourth row - assertEquals("From laboratory to society: developing an informed ", cells.get(15).getText()); + assertEquals("From laboratory to society: developing an informed", cells.get(15).getText()); assertEquals("NSF", cells.get(16).getText()); assertEquals("University of South Carolina", cells.get(17).getText()); assertEquals("approach to nanoscale science and engineering (NIRT)", cells.get(18).getText()); - + //Fifth row assertEquals("Database and innovation timeline for nanotechnology", cells.get(19).getText()); assertEquals("NSF", cells.get(20).getText()); assertEquals("UCLA", cells.get(21).getText()); - + //Sixth row assertEquals("Social and ethical dimensions of nanotechnology", cells.get(22).getText()); assertEquals("NSF", cells.get(23).getText()); assertEquals("University of Virginia", cells.get(24).getText()); - + //Seventh row - assertEquals("Undergraduate exploration of nanoscience, ", cells.get(25).getText()); - assertEquals("Michigan Technological ", cells.get(26).getText()); + assertEquals("Undergraduate exploration of nanoscience,", cells.get(25).getText()); + assertEquals("Michigan Technological", cells.get(26).getText()); assertEquals("NSF", cells.get(27).getText()); assertEquals("applications and societal implications (NUE)", cells.get(28).getText()); assertEquals("University", cells.get(29).getText()); - + //Eighth row - assertEquals("Ethics and belief inside the development of ", cells.get(30).getText()); + assertEquals("Ethics and belief inside the development of", cells.get(30).getText()); assertEquals("NSF", cells.get(31).getText()); assertEquals("University of Virginia", cells.get(32).getText()); assertEquals("nanotechnology (CAREER)", cells.get(33).getText()); - + //Ninth row - assertEquals("All centers, NNIN and NCN have a societal ", cells.get(34).getText()); - assertEquals("NSF, DOE, ", cells.get(35).getText()); - assertEquals("All nanotechnology centers ", cells.get(36).getText()); - assertEquals("implications components ", cells.get(37).getText()); + assertEquals("All centers, NNIN and NCN have a societal", cells.get(34).getText()); + assertEquals("NSF, DOE,", cells.get(35).getText()); + assertEquals("All nanotechnology centers", cells.get(36).getText()); + assertEquals("implications components", cells.get(37).getText()); assertEquals("DOD, and NIH", cells.get(38).getText()); assertEquals("and networks", cells.get(39).getText()); - + } - + @Test public void testNaturalOrderOfRectanglesOneMoreTime() throws IOException { - - List rectangles = Arrays.asList(RECTANGLES_TEST_NATURAL_ORDER); - Utils.sort(rectangles); - - for (int i = 0; i < (rectangles.size() - 1); i++) { - Rectangle rectangle = rectangles.get(i); - Rectangle nextRectangle = rectangles.get(i + 1); - - assertTrue(rectangle.compareTo(nextRectangle) < 0); - - - } + CSVParser parse = org.apache.commons.csv.CSVParser.parse(new File("src/test/resources/technology/tabula/csv/TestBasicExtractor-RECTANGLE_TEST_NATURAL_ORDER.csv"), + Charset.forName("utf-8"), + CSVFormat.DEFAULT); + + List rectangles = new ArrayList<>(); + + for (CSVRecord record : parse) { + rectangles.add(new Rectangle(Float.parseFloat(record.get(0)), + Float.parseFloat(record.get(1)), + Float.parseFloat(record.get(2)), + Float.parseFloat(record.get(3)))); + } + + + //List rectangles = Arrays.asList(RECTANGLES_TEST_NATURAL_ORDER); + Utils.sort(rectangles, Rectangle.ILL_DEFINED_ORDER); + + for (int i = 0; i < (rectangles.size() - 1); i++) { + Rectangle rectangle = rectangles.get(i); + Rectangle nextRectangle = rectangles.get(i + 1); + + assertTrue(rectangle.compareTo(nextRectangle) < 0); + } + } + + @Test + public void testRealLifeRTL2() throws IOException { + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/indictb1h_14.csv"); + Page page = UtilsForTesting.getAreaFromPage("src/test/resources/technology/tabula/indictb1h_14.pdf", 1, + 205.0f, 120.0f, 622.82f, 459.9f); + BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); + Table table = bea.extract(page).get(0); + + StringBuilder sb = new StringBuilder(); + (new CSVWriter()).write(sb, table); + assertEquals(expectedCsv, sb.toString()); + } + + + @Test + public void testEmptyRegion() throws IOException { + Page page = UtilsForTesting.getAreaFromPage("src/test/resources/technology/tabula/indictb1h_14.pdf", 1, 0, 0, 80.82f, 100.9f); // an empty area + BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); + Table table = bea.extract(page).get(0); + assertArrayEquals(EXPECTED_EMPTY_TABLE, UtilsForTesting.tableToArrayOfRows(table)); + } + + + @Test + public void testTableWithMultilineHeader() throws IOException { + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/us-020.csv"); + Page page = UtilsForTesting.getAreaFromPage("src/test/resources/technology/tabula/us-020.pdf", 2, 103.0f, 35.0f, 641.0f, 560.0f); + BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); + Table table = bea.extract(page).get(0); + StringBuilder sb = new StringBuilder(); + (new CSVWriter()).write(sb, table); + assertEquals(expectedCsv, sb.toString()); } } diff --git a/src/test/java/technology/tabula/TestCell.java b/src/test/java/technology/tabula/TestCell.java new file mode 100644 index 00000000..de1b8cb8 --- /dev/null +++ b/src/test/java/technology/tabula/TestCell.java @@ -0,0 +1,45 @@ +package technology.tabula; + +import static org.junit.Assert.*; + +import java.util.List; +import java.util.ArrayList; + +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.junit.Test; + +public class TestCell { + + @Test + public void testIsSpanning() { + Cell cell = new Cell(0, 0, 0, 0); + assertFalse(cell.isSpanning()); + cell.setSpanning(true); + assertTrue(cell.isSpanning()); + } + + @Test + public void testIsPlaceholder() { + Cell cell = new Cell(0, 0, 0, 0); + assertFalse(cell.isPlaceholder()); + cell.setPlaceholder(true); + assertTrue(cell.isPlaceholder()); + } + + @Test + public void testGetTextElements() { + Cell cell = new Cell(0, 0, 0, 0); + assertTrue(cell.getTextElements().isEmpty()); + + TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5); + TextChunk tChunk = new TextChunk(tElement); + List tList = new ArrayList<>(); + tList.add(tChunk); + cell.setTextElements(tList); + + assertEquals("test", cell.getTextElements().get(0).getText()); + + + } + +} diff --git a/src/test/java/technology/tabula/TestCommandLineApp.java b/src/test/java/technology/tabula/TestCommandLineApp.java index c385c4af..a430063c 100644 --- a/src/test/java/technology/tabula/TestCommandLineApp.java +++ b/src/test/java/technology/tabula/TestCommandLineApp.java @@ -2,55 +2,218 @@ import static org.junit.Assert.*; +import java.io.File; import java.io.IOException; +import java.nio.file.*; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.ParseException; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.TemporaryFolder; public class TestCommandLineApp { - private String csvFromCommandLineArgs(String[] args) throws ParseException { - CommandLineParser parser = new GnuParser(); - CommandLine cmd = parser.parse(CommandLineApp.buildOptions(), args); - - StringBuilder stringBuilder = new StringBuilder(); - new CommandLineApp(stringBuilder).extractTables(cmd); - - return stringBuilder.toString(); - } - - @Test - public void testExtractSpreadsheetWithArea() throws ParseException, IOException { - - String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/spreadsheet_no_bounding_frame.csv"); - - assertEquals(expectedCsv, this.csvFromCommandLineArgs(new String[] { - "src/test/resources/technology/tabula/spreadsheet_no_bounding_frame.pdf", - "-p", "1", "-a", - "150.56,58.9,654.7,536.12", "-f", - "CSV" - })); - } - - @Test - public void testGuessOption() throws ParseException, IOException { - String expectedCsvNoGuessing = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/TestCommandLineApp_testGuessOption_no_guessing.csv"); - assertEquals(expectedCsvNoGuessing, this.csvFromCommandLineArgs(new String[] { - "src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-001.pdf", - "-p", "1", - "-f", "CSV" - })); - - String expectedCsvWithGuessing = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/TestCommandLineApp_testGuessOption_with_guessing.csv"); - assertEquals(expectedCsvWithGuessing, this.csvFromCommandLineArgs(new String[] { - "src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-001.pdf", - "-p", "1", - "-f", "CSV", - "-g" - })); - } + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + + private String csvFromCommandLineArgs(String[] args) throws ParseException { + CommandLineParser parser = new DefaultParser(); + CommandLine cmd = parser.parse(CommandLineApp.buildOptions(), args); + + StringBuilder stringBuilder = new StringBuilder(); + new CommandLineApp(stringBuilder, cmd).extractTables(cmd); + + return stringBuilder.toString(); + } + + @Test + public void testExtractSpreadsheetWithArea() throws ParseException, IOException { + + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/spreadsheet_no_bounding_frame.csv"); + + assertEquals(expectedCsv, this.csvFromCommandLineArgs(new String[]{ + "src/test/resources/technology/tabula/spreadsheet_no_bounding_frame.pdf", + "-p", "1", "-a", + "150.56,58.9,654.7,536.12", "-f", + "CSV" + })); + } + + @Test + public void testExtractBatchSpreadsheetWithArea() throws ParseException, IOException { + FileSystem fs = FileSystems.getDefault(); + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/spreadsheet_no_bounding_frame.csv"); + Path tmpFolder = Files.createTempDirectory("tabula-java-batch-test"); + tmpFolder.toFile().deleteOnExit(); + + Path copiedPDF = tmpFolder.resolve(fs.getPath("spreadsheet.pdf")); + Path sourcePDF = fs.getPath("src/test/resources/technology/tabula/spreadsheet_no_bounding_frame.pdf"); + Files.copy(sourcePDF, copiedPDF); + copiedPDF.toFile().deleteOnExit(); + + this.csvFromCommandLineArgs(new String[]{ + "-m", tmpFolder.toString(), + "-p", "1", "-a", + "150.56,58.9,654.7,536.12", "-f", + "CSV" + }); + + Path csvPath = tmpFolder.resolve(fs.getPath("spreadsheet.csv")); + assertTrue(csvPath.toFile().exists()); + assertArrayEquals(expectedCsv.getBytes(), Files.readAllBytes(csvPath)); + } + + @Test + public void testExtractSpreadsheetWithAreaAndNewFile() throws ParseException, IOException { + + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/spreadsheet_no_bounding_frame.csv"); + + File newFile = folder.newFile(); + this.csvFromCommandLineArgs(new String[]{ + "src/test/resources/technology/tabula/spreadsheet_no_bounding_frame.pdf", + "-p", "1", "-a", + "150.56,58.9,654.7,536.12", "-f", + "CSV", "-o", newFile.getAbsolutePath() + }); + + assertArrayEquals(expectedCsv.getBytes(), Files.readAllBytes(Paths.get(newFile.getAbsolutePath()))); + } + + + @Test + public void testExtractJSONWithArea() throws ParseException, IOException { + + String expectedJson = UtilsForTesting.loadJson("src/test/resources/technology/tabula/json/spanning_cells_basic.json"); + + assertEquals(expectedJson, this.csvFromCommandLineArgs(new String[]{ + "src/test/resources/technology/tabula/spanning_cells.pdf", + "-p", "1", "-a", + "150.56,58.9,654.7,536.12", "-f", + "JSON", "-l" + })); + } + + @Test + public void testExtractCSVWithArea() throws ParseException, IOException { + + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/spanning_cells.csv"); + + assertEquals(expectedCsv, this.csvFromCommandLineArgs(new String[]{ + "src/test/resources/technology/tabula/spanning_cells.pdf", + "-p", "1", "-a", + "150.56,58.9,654.7,536.12", "-f", + "CSV", "-l" + })); + } + + @Test + public void testGuessOption() throws ParseException, IOException { + String expectedCsvNoGuessing = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/TestCommandLineApp_testGuessOption_no_guessing.csv"); + assertEquals(expectedCsvNoGuessing, this.csvFromCommandLineArgs(new String[]{ + "src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-001.pdf", + "-p", "1", + "-f", "CSV" + })); + +/* String expectedCsvWithGuessing = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/TestCommandLineApp_testGuessOption_with_guessing.csv"); + assertEquals(expectedCsvWithGuessing, this.csvFromCommandLineArgs(new String[]{ + "src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-001.pdf", + "-p", "1", + "-f", "CSV", + "-g" + }));*/ + } + + @Test + public void testEncryptedPasswordSupplied() throws ParseException { + String s = this.csvFromCommandLineArgs(new String[]{ + "src/test/resources/technology/tabula/encrypted.pdf", + "-s", "userpassword", + "-p", "1", + "-f", "CSV", "-g" + }); + assertEquals("FLA Audit Profile,", s.split("\\r?\\n")[0]); + } + + @Test(expected=org.apache.commons.cli.ParseException.class) + public void testEncryptedWrongPassword() throws ParseException { + String s = this.csvFromCommandLineArgs(new String[]{ + "src/test/resources/technology/tabula/encrypted.pdf", + "-s", "wrongpassword", + "-p", "1", + "-f", "CSV" + }); + } + + @Test + public void testExtractWithMultiplePercentArea() throws ParseException, IOException { + + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/MultiColumn.csv"); + + assertEquals(expectedCsv, this.csvFromCommandLineArgs(new String[]{ + "src/test/resources/technology/tabula/MultiColumn.pdf", + "-p", "1", "-a", + "%0,0,100,50", "-a", + "%0,50,100,100", "-f", + "CSV" + })); + } + + @Test + public void testExtractWithMultipleAbsoluteArea() throws ParseException, IOException { + + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/MultiColumn.csv"); + + assertEquals(expectedCsv, this.csvFromCommandLineArgs(new String[]{ + "src/test/resources/technology/tabula/MultiColumn.pdf", + "-p", "1", "-a", + "0,0,451,212", "-a", + "0,212,451,425", "-f", + "CSV" + })); + } + + @Test + public void testExtractWithPercentAndAbsoluteArea() throws ParseException, IOException { + + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/MultiColumn.csv"); + + assertEquals(expectedCsv, this.csvFromCommandLineArgs(new String[]{ + "src/test/resources/technology/tabula/MultiColumn.pdf", + "-p", "1", "-a", + "%0,0,100,50", "-a", + "0,212,451,425", "-f", + "CSV" + })); + } + + @Test + public void testLatticeModeWithColumnOption() throws ParseException, IOException { + + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/AnimalSounds.csv"); + + assertEquals(expectedCsv, this.csvFromCommandLineArgs(new String[]{ + "src/test/resources/technology/tabula/AnimalSounds.pdf", + "-p", "1", "-c", + "59,218,331,551", + "-r", "-f", "CSV" + })); + } + + @Test + public void testLatticeModeWithColumnAndMultipleAreasOption() throws ParseException, IOException { + + String expectedJson = UtilsForTesting.loadJson("src/test/resources/technology/tabula/json/AnimalSounds1.json"); + String resultJson = this.csvFromCommandLineArgs(new String[]{ + "src/test/resources/technology/tabula/AnimalSounds1.pdf", + "-p", "1", "-c", "57,136,197,296,314,391,457,553", + "-a", "%0,0,100,50", "-a", "%0,50,100,100", + "-r", "-f", "JSON" + }); + assertEquals(expectedJson, resultJson); + } } diff --git a/src/test/java/technology/tabula/TestDebug.java b/src/test/java/technology/tabula/TestDebug.java index febbbd15..2e8de98c 100644 --- a/src/test/java/technology/tabula/TestDebug.java +++ b/src/test/java/technology/tabula/TestDebug.java @@ -1,13 +1,5 @@ package technology.tabula; -import static org.junit.Assert.*; - -import java.io.File; -import java.io.IOException; - -import org.junit.Test; -import technology.tabula.debug.Debug; - public class TestDebug { private final static String PATH = "src/test/resources/technology/tabula/spanning_cells.pdf"; diff --git a/src/test/java/technology/tabula/TestLine.java b/src/test/java/technology/tabula/TestLine.java new file mode 100644 index 00000000..90df0e31 --- /dev/null +++ b/src/test/java/technology/tabula/TestLine.java @@ -0,0 +1,71 @@ +package technology.tabula; + +import static org.junit.Assert.*; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.junit.Test; + +public class TestLine { + + @Test + public void testSetTextElements() { + Line line = new Line(); + + TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5); + TextChunk tChunk = new TextChunk(tElement); + List tList = new ArrayList<>(); + tList.add(tChunk); + line.setTextElements(tList); + + assertEquals("test", line.getTextElements().get(0).getText()); + + } + + @Test + public void testAddTextChunkIntTextChunk() { + Line line = new Line(); + + TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5); + TextChunk tChunk = new TextChunk(tElement); + line.addTextChunk(3, tChunk); + + assertEquals("test", line.getTextElements().get(3).getText()); + } + + @Test + public void testLessThanAddTextChunkIntTextChunk() { + Line line = new Line(); + + TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5); + TextChunk tChunk = new TextChunk(tElement); + line.addTextChunk(0, tChunk); + line.addTextChunk(0, tChunk); + + assertEquals("testtest", line.getTextElements().get(0).getText()); + } + + @Test(expected = IllegalArgumentException.class) + public void testErrorAddTextChunkIntTextChunk() { + Line line = new Line(); + + TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5); + TextChunk tChunk = new TextChunk(tElement); + line.addTextChunk(-1, tChunk); + } + + @Test + public void testToString() { + Line line = new Line(); + + TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5); + TextChunk tChunk = new TextChunk(tElement); + line.addTextChunk(0, tChunk); + line.addTextChunk(0, tChunk); + + assertEquals("technology.tabula.Line[x=0.0,y=0.0,w=0.0,h=0.0,bottom=0.000000,right=0.000000,chunks='testtest', ]", line.toString()); + } + +} diff --git a/src/test/java/technology/tabula/TestObjectExtractor.java b/src/test/java/technology/tabula/TestObjectExtractor.java index 150ea8ab..fe458b87 100644 --- a/src/test/java/technology/tabula/TestObjectExtractor.java +++ b/src/test/java/technology/tabula/TestObjectExtractor.java @@ -12,23 +12,23 @@ public class TestObjectExtractor { - @Test(expected=IOException.class) + /*@Test(expected=IOException.class) public void testWrongPasswordRaisesException() throws IOException { - PDDocument pdf_document = PDDocument.load("src/test/resources/technology/tabula/encrypted.pdf"); + PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/encrypted.pdf")); ObjectExtractor oe = new ObjectExtractor(pdf_document, "wrongpass"); oe.extract().next(); - } - - @Test(expected=IOException.class) + }*/ + + @Test(expected = IOException.class) public void testEmptyOnEncryptedFileRaisesException() throws IOException { - PDDocument pdf_document = PDDocument.load("src/test/resources/technology/tabula/encrypted.pdf"); - ObjectExtractor oe = new ObjectExtractor(pdf_document); + PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/encrypted.pdf")); + ObjectExtractor oe = new ObjectExtractor(pdf_document); oe.extract().next(); } - + @Test public void testCanReadPDFWithOwnerEncryption() throws IOException { - PDDocument pdf_document = PDDocument.load("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf"); + PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf")); ObjectExtractor oe = new ObjectExtractor(pdf_document); PageIterator pi = oe.extract(); int i = 0; @@ -39,78 +39,107 @@ public void testCanReadPDFWithOwnerEncryption() throws IOException { assertEquals(2, i); } + @Test public void testGoodPassword() throws IOException { - PDDocument pdf_document = PDDocument.load("src/test/resources/technology/tabula/encrypted.pdf"); - ObjectExtractor oe = new ObjectExtractor(pdf_document, "userpassword"); - List pages = new ArrayList(); + PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/encrypted.pdf"), "userpassword"); + ObjectExtractor oe = new ObjectExtractor(pdf_document); + List pages = new ArrayList<>(); PageIterator pi = oe.extract(); while (pi.hasNext()) { pages.add(pi.next()); } assertEquals(1, pages.size()); } - + + @Test public void testTextExtractionDoesNotRaise() throws IOException { - PDDocument pdf_document = PDDocument.load("src/test/resources/technology/tabula/rotated_page.pdf"); + PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/rotated_page.pdf")); ObjectExtractor oe = new ObjectExtractor(pdf_document); PageIterator pi = oe.extract(); - + assertTrue(pi.hasNext()); assertNotNull(pi.next()); assertFalse(pi.hasNext()); - + } - + @Test public void testShouldDetectRulings() throws IOException { - PDDocument pdf_document = PDDocument.load("src/test/resources/technology/tabula/should_detect_rulings.pdf"); + PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/should_detect_rulings.pdf")); ObjectExtractor oe = new ObjectExtractor(pdf_document); PageIterator pi = oe.extract(); - - while (pi.hasNext()) { - assertNotEquals(0, pi.next().getRulings().size()); + + Page page = pi.next(); + List rulings = page.getRulings(); + + for (Ruling r: rulings) { + assertTrue(page.contains(r.getBounds())); } } - + @Test public void testDontThrowNPEInShfill() throws IOException { - PDDocument pdf_document = PDDocument.load("src/test/resources/technology/tabula/labor.pdf"); + PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/labor.pdf")); ObjectExtractor oe = new ObjectExtractor(pdf_document); PageIterator pi = oe.extract(); assertTrue(pi.hasNext()); try { Page p = pi.next(); assertNotNull(p); - } - catch (NullPointerException e) { + } catch (NullPointerException e) { fail("NPE in ObjectExtractor " + e.toString()); } } - + @Test - public void testExtractOnePage() throws IOException{ - PDDocument pdf_document = PDDocument.load("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf"); + public void testExtractOnePage() throws IOException { + PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf")); assertEquals(2, pdf_document.getNumberOfPages()); - + ObjectExtractor oe = new ObjectExtractor(pdf_document); Page page = oe.extract(2); - + assertNotNull(page); - + } - + @Test(expected = IndexOutOfBoundsException.class) - public void testExtractWrongPageNumber() throws IOException{ - PDDocument pdf_document = PDDocument.load("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf"); + public void testExtractWrongPageNumber() throws IOException { + PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf")); assertEquals(2, pdf_document.getNumberOfPages()); - + ObjectExtractor oe = new ObjectExtractor(pdf_document); oe.extract(3); - + + } + + @Test + public void testTextElementsContainedInPage() throws IOException { + PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/cs-en-us-pbms.pdf")); + ObjectExtractor oe = new ObjectExtractor(pdf_document); + + Page page = oe.extractPage(1); + + for (TextElement te: page.getText()) { + assertTrue(page.contains(te)); + } + } + + @Test public void testDoNotNPEInPointComparator() throws IOException { + PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/npe_issue_206.pdf")); + ObjectExtractor oe = new ObjectExtractor(pdf_document); + + try { + Page p = oe.extractPage(1); + assertNotNull(p); + } catch (NullPointerException e) { + fail("NPE in ObjectExtractor " + e.toString()); + } } + /* @Test public void testExtractWithoutExtractingRulings() throws IOException { PDDocument pdf_document = PDDocument.load("src/test/resources/technology/tabula/should_detect_rulings.pdf"); @@ -123,5 +152,6 @@ public void testExtractWithoutExtractingRulings() throws IOException { assertEquals(0, page.getRulings().size()); assertFalse(pi.hasNext()); } - + */ + } diff --git a/src/test/java/technology/tabula/TestProjectionProfile.java b/src/test/java/technology/tabula/TestProjectionProfile.java new file mode 100644 index 00000000..4a3462f9 --- /dev/null +++ b/src/test/java/technology/tabula/TestProjectionProfile.java @@ -0,0 +1,101 @@ +package technology.tabula; + +import static org.junit.Assert.*; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.junit.Before; +import org.junit.Test; + +public class TestProjectionProfile { + + ProjectionProfile pProfile; + Page page; + + @Before + public void setUpProjectionProfile() { + PDPage pdPage = new PDPage(); + PDDocument pdDocument = new PDDocument(); + + TextElement textElement = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "test", 1f); + TextElement textElement2 = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "test", 1f); + List textList = new ArrayList<>(); + textList.add(textElement); + textList.add(textElement2); + + Ruling ruling = new Ruling(0, 0, 10, 10); + List rulingList = new ArrayList<>(); + rulingList.add(ruling); + + + page = new Page(0, 0, 1, 1, 0, 1, pdPage, pdDocument, textList, rulingList); + + List rectangles = new ArrayList<>(); + rectangles.add(new Rectangle(0f, 0f, 500f, 5f)); + + pProfile = new ProjectionProfile(page, rectangles, 5, 5); + } + + @Test + public void testGetVerticalProjection() { + float[] projection = pProfile.getVerticalProjection(); + assertTrue(projection.length == 10); + } + + @Test + public void testGetHorizontalProjection() { + float[] projection = pProfile.getHorizontalProjection(); + assertTrue(projection.length == 10); + } + + @Test + public void testFindVerticalSeparators() { + float[] seperators = pProfile.findVerticalSeparators(page.getText().size() * 2.5f); + assertTrue(seperators.length == 0); + } + + @Test + public void testFindHorizontalSeparators() { + float[] seperators = pProfile.findHorizontalSeparators(page.getText().size() * 2.5f); + assertTrue(seperators.length == 0); + } + + @Test + public void testSmooth() { + float[] data = {0, 1, 2}; + float[] rv = ProjectionProfile.smooth(data, 3); + + assertEquals(1f, rv[2], 1e-5); + } + + @Test + public void testFilter() { + float[] data = {0, 1, 2}; + float[] rv = ProjectionProfile.filter(data, 3); + + assertEquals(3f, rv[1], 1e-5); + } + + @Test + public void testGetAutocorrelation() { + float[] projection = {0, 1, 2}; + float[] rv = ProjectionProfile.getAutocorrelation(projection); + + assertEquals(0f, rv[0], 1e-5); + assertTrue(rv.length == 2); + + } + + @Test + public void testGetFirstDeriv() { +// float[] +// float[] projection = pProfile.getFirstDeriv(new float[]{0.0, 0.0) +// System.out.println(Arrays.toString(projection)); +// assertEquals(10, projection[0], 1e-15); + } + +} diff --git a/src/test/java/technology/tabula/TestRectangle.java b/src/test/java/technology/tabula/TestRectangle.java index e9dadd42..7fa66f7a 100644 --- a/src/test/java/technology/tabula/TestRectangle.java +++ b/src/test/java/technology/tabula/TestRectangle.java @@ -1,239 +1,291 @@ package technology.tabula; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.*; import java.awt.geom.Point2D; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import org.junit.Assert; +import org.junit.Ignore; import org.junit.Test; public class TestRectangle { - - + + @Test public void testCompareEqualsRectangles() { Rectangle first = new Rectangle(); Rectangle second = new Rectangle(); - + assertTrue(first.equals(second)); assertTrue(second.equals(first)); } - + @Test public void testCompareAlignedHorizontalRectangle() { Rectangle lower = new Rectangle(0f, 10f, 10f, 10f); Rectangle upper = new Rectangle(0f,20f, 10f, 10f); - + assertTrue(lower.compareTo(upper) < 0); } - + @Test public void testCompareAlignedVerticalRectangle() { Rectangle lower = new Rectangle(10f, 0f, 10f, 10f); Rectangle upper = new Rectangle(20f,0f, 10f, 10f); - + assertTrue(lower.compareTo(upper) < 0); } - + @Test public void testCompareVerticalOverlapRectangle() { Rectangle lower = new Rectangle(5f, 0f, 10f, 10f); Rectangle upper = new Rectangle(0f, 10f, 10f, 10f); - + assertTrue(lower.compareTo(upper) < 0); } - + @Test public void testCompareVerticalOverlapLessThresholdRectangle() { Rectangle lower = new Rectangle(0f, 10f, 10f, 10f); Rectangle upper = new Rectangle(9.8f, 0f, 10f, 10f); - + assertTrue(lower.compareTo(upper) < 0); } - - - + + + @Test public void testQuickSortOneUpperThanOther() { - + Rectangle lower = new Rectangle(175.72f, 72.72f, 1.67f, 1.52f); //, (Comma after AARON) Rectangle upper = new Rectangle(169.21f, 161.16f, 4.33f, 4.31f); // R (REGIONAL PULMONARY) - + assertTrue(lower.compareTo(upper) > 0); - + } - + @Test public void testQuickSortRectangleList() { - + //Testing wrong sorting // Expected: AARON, JOSHUA, N // but was: AARON JOSHUA N , , - Rectangle first = new Rectangle(172.92999267578125f, 51.47999954223633f, 4.0f, 4.309999942779541f); //A + Rectangle first = new Rectangle(172.92999267578125f, 51.47999954223633f, 4.0f, 4.309999942779541f); //A Rectangle second = new Rectangle(175.72000122070312f, 72.72000122070312f, 1.6699999570846558f, 1.5199999809265137f); //, Rectangle third = new Rectangle(172.92999267578125f, 96.36000061035156f, 4.0f, 4.309999942779541f); //A Rectangle fourth = new Rectangle(175.72000122070312f, 100.31999969482422f, 1.6699999570846558f, 1.5199999809265137f); //, Rectangle fifth = new Rectangle(172.92999267578125f, 103.68000030517578f, 4.329999923706055f, 4.309999942779541f); //N - Rectangle six = new Rectangle(169.2100067138672f, 161.16000366210938f, 4.329999923706055f, 4.309999942779541f); //R - - List expectedList = new ArrayList(); + Rectangle sixth = new Rectangle(169.2100067138672f, 161.16000366210938f, 4.329999923706055f, 4.309999942779541f); //R + + List expectedList = new ArrayList<>(); expectedList.add(first); - expectedList.add(six); + expectedList.add(sixth); expectedList.add(second); expectedList.add(third); expectedList.add(fourth); expectedList.add(fifth); - List toSortList = new ArrayList(); - toSortList.add(six); + List toSortList = new ArrayList<>(); + toSortList.add(sixth); toSortList.add(second); toSortList.add(third); toSortList.add(fifth); toSortList.add(first); toSortList.add(fourth); - - Collections.sort(toSortList); - + + Collections.sort(toSortList, Rectangle.ILL_DEFINED_ORDER); + assertEquals(expectedList, toSortList); } - + @Test public void testGetVerticalOverlapShouldReturnZero() { - + Rectangle lower = new Rectangle(10f, 0f, 10f, 10f); Rectangle upper = new Rectangle(20f,0f, 10f, 10f); - + float overlap = lower.verticalOverlap(upper); - + assertEquals(0f, overlap, 0); assertTrue(!lower.verticallyOverlaps(upper)); assertEquals(0f, lower.verticalOverlapRatio(upper), 0); assertEquals(0f, lower.overlapRatio(upper), 0); - + } - + @Test public void testGetVerticalOverlapShouldReturnMoreThanZero() { - + Rectangle lower = new Rectangle(15f, 10f, 10f, 10f); Rectangle upper = new Rectangle(20f, 0f, 10f, 10f); - + float overlap = lower.verticalOverlap(upper); - + assertEquals(5f, overlap, 0); assertTrue(lower.verticallyOverlaps(upper)); assertEquals(0.5f, lower.verticalOverlapRatio(upper), 0); assertEquals(0f, lower.overlapRatio(upper), 0); - + } - + @Test public void testGetHorizontalOverlapShouldReturnZero() { - + Rectangle one = new Rectangle(0f, 0f, 10f, 10f); Rectangle two = new Rectangle(10f, 10f, 10f, 10f); - + assertTrue(!one.horizontallyOverlaps(two)); assertEquals(0f, one.overlapRatio(two), 0); - + } - + @Test public void testGetHorizontalOverlapShouldReturnMoreThanZero() { - + Rectangle one = new Rectangle(0f, 0f, 10f, 10f); Rectangle two = new Rectangle(10f, 5f, 10f, 10f); - + assertTrue(one.horizontallyOverlaps(two)); assertEquals(5f, one.horizontalOverlap(two), 0); assertEquals(0f, one.overlapRatio(two), 0); - + } - + @Test public void testGetOverlapShouldReturnMoreThanZero() { - + Rectangle one = new Rectangle(0f, 0f, 10f, 10f); Rectangle two = new Rectangle(5f, 5f, 10f, 10f); - + assertTrue(one.horizontallyOverlaps(two)); assertTrue(one.verticallyOverlaps(two)); assertEquals(5f, one.horizontalOverlap(two), 0); assertEquals(5f, one.verticalOverlap(two), 0); assertEquals((25f/175), one.overlapRatio(two), 0); - + } - + @Test public void testMergeNoOverlappingRectangles() { - + Rectangle one = new Rectangle(0f, 0f, 10f, 10f); Rectangle two = new Rectangle(0f, 10f, 10f, 10f); - + one.merge(two); - + assertEquals(20f, one.getWidth(), 0); assertEquals(10f, one.getHeight(), 0); assertEquals(0f, one.getLeft(), 0); assertEquals(0f, one.getTop(), 0); assertEquals(10f, one.getBottom(), 0); assertEquals(20f * 10f, one.getArea(), 0); - + } - + @Test public void testMergeOverlappingRectangles() { - + Rectangle one = new Rectangle(0f, 0f, 10f, 10f); Rectangle two = new Rectangle(5f, 5f, 10f, 10f); - + one.merge(two); - + assertEquals(15f, one.getWidth(), 0); assertEquals(15f, one.getHeight(), 0); assertEquals(0f, one.getLeft(), 0); assertEquals(0f, one.getTop(), 0); - + } - + @Test public void testRectangleGetPoints() { - + Rectangle one = new Rectangle(10f, 20f, 30f, 40f); - + Point2D[] points = one.getPoints(); - - Point2D[] expectedPoints = new Point2D[]{ + + Point2D[] expectedPoints = new Point2D[]{ new Point2D.Float(20f, 10f), new Point2D.Float(50f, 10f), new Point2D.Float(50f, 50f), new Point2D.Float(20f, 50f) - + }; - + Assert.assertArrayEquals(expectedPoints, points); - + } - + @Test public void testGetBoundingBox() { - - List rectangles = new ArrayList(); + + List rectangles = new ArrayList<>(); rectangles.add(new Rectangle(0f, 0f, 10f, 10f)); rectangles.add(new Rectangle(20f, 30f, 10f, 10f)); - + Rectangle boundingBoxOf = Rectangle.boundingBoxOf(rectangles); - + assertEquals(new Rectangle(0f, 0f, 40f, 30f), boundingBoxOf); - - - - + + + + } - - + + @Test + public void testTransitiveComparison1() { + // +-------+ + // | | + // | A | +-------+ + // | | | | + // +-------+ | B | +-------+ + // | | | | + // +-------+ | C | + // | | + // +-------+ + Rectangle a = new Rectangle(0,0,2,2); + Rectangle b = new Rectangle(1,1,2,2); + Rectangle c = new Rectangle(2,2,2,2); + assertTrue(a.compareTo(b) < 0); + assertTrue(b.compareTo(c) < 0); + assertTrue(a.compareTo(c) < 0); + } + + @Test @Ignore + public void testTransitiveComparison2() { + // +-------+ + // | | + // +-------+ | C | + // | | | | + // +-------+ | B | +-------+ + // | | | | + // | A | +-------+ + // | | + // +-------+ + Rectangle a = new Rectangle(2,0,2,2); + Rectangle b = new Rectangle(1,1,2,2); + Rectangle c = new Rectangle(0,2,2,2); + assertTrue(a.compareTo(b) < 0); + assertTrue(b.compareTo(c) < 0); + assertTrue(a.compareTo(c) < 0); + } + + @Test @Ignore + public void testWellDefinedComparison1() { + Rectangle a = new Rectangle(2,0,2,2); + Rectangle b = new Rectangle(1,1,2,2); + Rectangle c = new Rectangle(0,2,2,2); + List l1 = new ArrayList<>(Arrays.asList(b, a, c)); + List l2 = new ArrayList<>(Arrays.asList(c, b, a)); + QuickSort.sort(l1, Rectangle.ILL_DEFINED_ORDER); + QuickSort.sort(l2, Rectangle.ILL_DEFINED_ORDER); + assertEquals(l1.get(0), l2.get(0)); + assertEquals(l1.get(1), l2.get(1)); + assertEquals(l1.get(2), l2.get(2)); + } + } diff --git a/src/test/java/technology/tabula/TestRectangleSpatialIndex.java b/src/test/java/technology/tabula/TestRectangleSpatialIndex.java new file mode 100644 index 00000000..46eb1ea3 --- /dev/null +++ b/src/test/java/technology/tabula/TestRectangleSpatialIndex.java @@ -0,0 +1,21 @@ +package technology.tabula; + +import static org.junit.Assert.*; + +import org.junit.Test; + +public class TestRectangleSpatialIndex { + + @Test + public void testIntersects() { + + Rectangle r = new Rectangle(0, 0, 0, 0); + + RectangleSpatialIndex rSpatialIndex = new RectangleSpatialIndex<>(); + rSpatialIndex.add(r); + + assertTrue(rSpatialIndex.intersects(r).size() > 0); + + } + +} diff --git a/src/test/java/technology/tabula/TestRuling.java b/src/test/java/technology/tabula/TestRuling.java new file mode 100644 index 00000000..e21e3a27 --- /dev/null +++ b/src/test/java/technology/tabula/TestRuling.java @@ -0,0 +1,107 @@ +package technology.tabula; + +import static org.junit.Assert.*; + +import org.junit.Before; +import org.junit.Test; + +public class TestRuling { + + Ruling ruling; + + @Before + public void setUpRuling() { + ruling = new Ruling(0, 0, 10, 10); + } + + @Test + public void testGetWidth() { + assertEquals(10f, ruling.getWidth(), 1e-5); + } + + @Test + public void testGetHeight() { + assertEquals(10f, ruling.getHeight(), 1e-5); + } + + @Test + public void testToString() { + assertEquals("class technology.tabula.Ruling[x1=0.000000 y1=0.000000 x2=10.000000 y2=10.000000]",ruling.toString()); + } + + @Test + public void testEqualsOther() { + Ruling other = new Ruling(0, 0, 11, 10); + assertTrue(ruling.equals(ruling)); + } + + @Test + public void testEqualsDifferentInstance() { + assertFalse(ruling.equals("test")); + } + + @Test + public void testNearlyIntersects(){ + Ruling another = new Ruling(0, 0, 11, 10); + + assertTrue(ruling.nearlyIntersects(another)); + } + + @Test(expected = UnsupportedOperationException.class) + public void testGetPositionError(){ + Ruling other = new Ruling(0, 0, 1, 1); + other.getPosition(); + fail(); + } + + @Test(expected = UnsupportedOperationException.class) + public void testSetPositionError(){ + Ruling other = new Ruling(0, 0, 1, 1); + other.setPosition(5f); + fail(); + } + + @Test(expected = UnsupportedOperationException.class) + public void testsetPosition(){ + ruling.setPosition(0); + } + + @Test(expected = UnsupportedOperationException.class) + public void testGetStartError(){ + Ruling other = new Ruling(0, 0, 1, 1); + other.getStart(); + fail(); + } + + @Test(expected = UnsupportedOperationException.class) + public void testGetEndError(){ + Ruling other = new Ruling(0, 0, 1, 1); + other.getEnd(); + fail(); + } + + @Test(expected = UnsupportedOperationException.class) + public void testSetEndError(){ + Ruling other = new Ruling(0, 0, 1, 1); + other.setEnd(5f); + fail(); + } + + + @Test + public void testColinear(){ +// Ruling another = new Ruling(0, 0, 500, 5); + java.awt.geom.Point2D.Float float1 = new java.awt.geom.Point2D.Float(20, 20); + java.awt.geom.Point2D.Float float2 = new java.awt.geom.Point2D.Float(0, 0); + java.awt.geom.Point2D.Float float3 = new java.awt.geom.Point2D.Float(20, 0); + java.awt.geom.Point2D.Float float4 = new java.awt.geom.Point2D.Float(0, 20); + + assertFalse(ruling.colinear(float1)); + assertTrue(ruling.colinear(float2)); + assertFalse(ruling.colinear(float3)); + assertFalse(ruling.colinear(float4)); + + + } + +} diff --git a/src/test/java/technology/tabula/TestSpreadsheetExtractor.java b/src/test/java/technology/tabula/TestSpreadsheetExtractor.java index 3e1fbd09..fb5cd9bc 100644 --- a/src/test/java/technology/tabula/TestSpreadsheetExtractor.java +++ b/src/test/java/technology/tabula/TestSpreadsheetExtractor.java @@ -1,165 +1,41 @@ package technology.tabula; -import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; import java.awt.geom.Point2D; +import java.io.File; import java.io.IOException; +import java.nio.charset.Charset; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; import org.apache.commons.csv.CSVRecord; import org.junit.Test; -import technology.tabula.Cell; -import technology.tabula.Page; -import technology.tabula.Rectangle; -import technology.tabula.Ruling; import technology.tabula.extractors.SpreadsheetExtractionAlgorithm; import technology.tabula.writers.CSVWriter; -import technology.tabula.UtilsForTesting; import technology.tabula.writers.JSONWriter; public class TestSpreadsheetExtractor { - private static final Cell[] CELLS = new Cell[] { - new Cell(40.0f, 18.0f, 208.0f, 4.0f), - new Cell(44.0f, 18.0f, 52.0f, 6.0f), - new Cell(50.0f, 18.0f, 52.0f, 4.0f), - new Cell(54.0f, 18.0f, 52.0f, 6.0f), - new Cell(60.0f, 18.0f, 52.0f, 4.0f), - new Cell(64.0f, 18.0f, 52.0f, 6.0f), - new Cell(70.0f, 18.0f, 52.0f, 4.0f), - new Cell(74.0f, 18.0f, 52.0f, 6.0f), - new Cell(90.0f, 18.0f, 52.0f, 4.0f), - new Cell(94.0f, 18.0f, 52.0f, 6.0f), - new Cell(100.0f, 18.0f, 52.0f, 28.0f), - new Cell(128.0f, 18.0f, 52.0f, 4.0f), - new Cell(132.0f, 18.0f, 52.0f, 64.0f), - new Cell(196.0f, 18.0f, 52.0f, 66.0f), - new Cell(262.0f, 18.0f, 52.0f, 4.0f), - new Cell(266.0f, 18.0f, 52.0f, 84.0f), - new Cell(350.0f, 18.0f, 52.0f, 4.0f), - new Cell(354.0f, 18.0f, 52.0f, 32.0f), - new Cell(386.0f, 18.0f, 52.0f, 38.0f), - new Cell(424.0f, 18.0f, 52.0f, 18.0f), - new Cell(442.0f, 18.0f, 52.0f, 74.0f), - new Cell(516.0f, 18.0f, 52.0f, 28.0f), - new Cell(544.0f, 18.0f, 52.0f, 4.0f), - new Cell(44.0f, 70.0f, 156.0f, 6.0f), - new Cell(50.0f, 70.0f, 156.0f, 4.0f), - new Cell(54.0f, 70.0f, 156.0f, 6.0f), - new Cell(60.0f, 70.0f, 156.0f, 4.0f), - new Cell(64.0f, 70.0f, 156.0f, 6.0f), - new Cell(70.0f, 70.0f, 156.0f, 4.0f), - new Cell(74.0f, 70.0f, 156.0f, 6.0f), - new Cell(84.0f, 70.0f, 2.0f, 6.0f), - new Cell(90.0f, 70.0f, 156.0f, 4.0f), - new Cell(94.0f, 70.0f, 156.0f, 6.0f), - new Cell(100.0f, 70.0f, 156.0f, 28.0f), - new Cell(128.0f, 70.0f, 156.0f, 4.0f), - new Cell(132.0f, 70.0f, 156.0f, 64.0f), - new Cell(196.0f, 70.0f, 156.0f, 66.0f), - new Cell(262.0f, 70.0f, 156.0f, 4.0f), - new Cell(266.0f, 70.0f, 156.0f, 84.0f), - new Cell(350.0f, 70.0f, 156.0f, 4.0f), - new Cell(354.0f, 70.0f, 156.0f, 32.0f), - new Cell(386.0f, 70.0f, 156.0f, 38.0f), - new Cell(424.0f, 70.0f, 156.0f, 18.0f), - new Cell(442.0f, 70.0f, 156.0f, 74.0f), - new Cell(516.0f, 70.0f, 156.0f, 28.0f), - new Cell(544.0f, 70.0f, 156.0f, 4.0f), - new Cell(84.0f, 72.0f, 446.0f, 6.0f), - new Cell(90.0f, 226.0f, 176.0f, 4.0f), - new Cell(94.0f, 226.0f, 176.0f, 6.0f), - new Cell(100.0f, 226.0f, 176.0f, 28.0f), - new Cell(128.0f, 226.0f, 176.0f, 4.0f), - new Cell(132.0f, 226.0f, 176.0f, 64.0f), - new Cell(196.0f, 226.0f, 176.0f, 66.0f), - new Cell(262.0f, 226.0f, 176.0f, 4.0f), - new Cell(266.0f, 226.0f, 176.0f, 84.0f), - new Cell(350.0f, 226.0f, 176.0f, 4.0f), - new Cell(354.0f, 226.0f, 176.0f, 32.0f), - new Cell(386.0f, 226.0f, 176.0f, 38.0f), - new Cell(424.0f, 226.0f, 176.0f, 18.0f), - new Cell(442.0f, 226.0f, 176.0f, 74.0f), - new Cell(516.0f, 226.0f, 176.0f, 28.0f), - new Cell(544.0f, 226.0f, 176.0f, 4.0f), - new Cell(90.0f, 402.0f, 116.0f, 4.0f), - new Cell(94.0f, 402.0f, 116.0f, 6.0f), - new Cell(100.0f, 402.0f, 116.0f, 28.0f), - new Cell(128.0f, 402.0f, 116.0f, 4.0f), - new Cell(132.0f, 402.0f, 116.0f, 64.0f), - new Cell(196.0f, 402.0f, 116.0f, 66.0f), - new Cell(262.0f, 402.0f, 116.0f, 4.0f), - new Cell(266.0f, 402.0f, 116.0f, 84.0f), - new Cell(350.0f, 402.0f, 116.0f, 4.0f), - new Cell(354.0f, 402.0f, 116.0f, 32.0f), - new Cell(386.0f, 402.0f, 116.0f, 38.0f), - new Cell(424.0f, 402.0f, 116.0f, 18.0f), - new Cell(442.0f, 402.0f, 116.0f, 74.0f), - new Cell(516.0f, 402.0f, 116.0f, 28.0f), - new Cell(544.0f, 402.0f, 116.0f, 4.0f), - new Cell(84.0f, 518.0f, 246.0f, 6.0f), - new Cell(90.0f, 518.0f, 186.0f, 4.0f), - new Cell(94.0f, 518.0f, 186.0f, 6.0f), - new Cell(100.0f, 518.0f, 186.0f, 28.0f), - new Cell(128.0f, 518.0f, 186.0f, 4.0f), - new Cell(132.0f, 518.0f, 186.0f, 64.0f), - new Cell(196.0f, 518.0f, 186.0f, 66.0f), - new Cell(262.0f, 518.0f, 186.0f, 4.0f), - new Cell(266.0f, 518.0f, 186.0f, 84.0f), - new Cell(350.0f, 518.0f, 186.0f, 4.0f), - new Cell(354.0f, 518.0f, 186.0f, 32.0f), - new Cell(386.0f, 518.0f, 186.0f, 38.0f), - new Cell(424.0f, 518.0f, 186.0f, 18.0f), - new Cell(442.0f, 518.0f, 186.0f, 74.0f), - new Cell(516.0f, 518.0f, 186.0f, 28.0f), - new Cell(544.0f, 518.0f, 186.0f, 4.0f), - new Cell(90.0f, 704.0f, 60.0f, 4.0f), - new Cell(94.0f, 704.0f, 60.0f, 6.0f), - new Cell(100.0f, 704.0f, 60.0f, 28.0f), - new Cell(128.0f, 704.0f, 60.0f, 4.0f), - new Cell(132.0f, 704.0f, 60.0f, 64.0f), - new Cell(196.0f, 704.0f, 60.0f, 66.0f), - new Cell(262.0f, 704.0f, 60.0f, 4.0f), - new Cell(266.0f, 704.0f, 60.0f, 84.0f), - new Cell(350.0f, 704.0f, 60.0f, 4.0f), - new Cell(354.0f, 704.0f, 60.0f, 32.0f), - new Cell(386.0f, 704.0f, 60.0f, 38.0f), - new Cell(424.0f, 704.0f, 60.0f, 18.0f), - new Cell(442.0f, 704.0f, 60.0f, 74.0f), - new Cell(516.0f, 704.0f, 60.0f, 28.0f), - new Cell(544.0f, 704.0f, 60.0f, 4.0f), - new Cell(84.0f, 764.0f, 216.0f, 6.0f), - new Cell(90.0f, 764.0f, 216.0f, 4.0f), - new Cell(94.0f, 764.0f, 216.0f, 6.0f), - new Cell(100.0f, 764.0f, 216.0f, 28.0f), - new Cell(128.0f, 764.0f, 216.0f, 4.0f), - new Cell(132.0f, 764.0f, 216.0f, 64.0f), - new Cell(196.0f, 764.0f, 216.0f, 66.0f), - new Cell(262.0f, 764.0f, 216.0f, 4.0f), - new Cell(266.0f, 764.0f, 216.0f, 84.0f), - new Cell(350.0f, 764.0f, 216.0f, 4.0f), - new Cell(354.0f, 764.0f, 216.0f, 32.0f), - new Cell(386.0f, 764.0f, 216.0f, 38.0f), - new Cell(424.0f, 764.0f, 216.0f, 18.0f), - new Cell(442.0f, 764.0f, 216.0f, 74.0f), - new Cell(516.0f, 764.0f, 216.0f, 28.0f), - new Cell(544.0f, 764.0f, 216.0f, 4.0f) }; - + public static final Rectangle[] EXPECTED_RECTANGLES = { - new Rectangle(40.0f, 18.0f, 208.0f, 40.0f), - new Rectangle(84.0f, 18.0f, 962.0f, 464.0f) + new Rectangle(40.0f, 18.0f, 208.0f, 40.0f), + new Rectangle(84.0f, 18.0f, 962.0f, 464.0f) }; - - private static final Ruling[] VERTICAL_RULING_LINES = { + + private static final Ruling[] VERTICAL_RULING_LINES = { new Ruling(40.0f, 18.0f, 0.0f, 40.0f), new Ruling(44.0f, 70.0f, 0.0f, 36.0f), - new Ruling(40.0f, 226.0f, 0.0f, 40.0f) - }; + new Ruling(40.0f, 226.0f, 0.0f, 40.0f) + }; private static final Ruling[] HORIZONTAL_RULING_LINES = { new Ruling(40.0f, 18.0f, 208.0f, 0.0f), @@ -170,10 +46,10 @@ public class TestSpreadsheetExtractor { new Ruling(64.0f, 18.0f, 208.0f, 0.0f), new Ruling(70.0f, 18.0f, 208.0f, 0.0f), new Ruling(74.0f, 18.0f, 208.0f, 0.0f), - new Ruling(80.0f, 18.0f, 208.0f, 0.0f) + new Ruling(80.0f, 18.0f, 208.0f, 0.0f) }; - - private static final Cell[] EXPECTED_CELLS = { + + private static final Cell[] EXPECTED_CELLS = { new Cell(40.0f, 18.0f, 208.0f, 4.0f), new Cell(44.0f, 18.0f, 52.0f, 6.0f), new Cell(50.0f, 18.0f, 52.0f, 4.0f), @@ -188,34 +64,34 @@ public class TestSpreadsheetExtractor { new Cell(60.0f, 70.0f, 156.0f, 4.0f), new Cell(64.0f, 70.0f, 156.0f, 6.0f), new Cell(70.0f, 70.0f, 156.0f, 4.0f), - new Cell(74.0f, 70.0f, 156.0f, 6.0f) }; - + new Cell(74.0f, 70.0f, 156.0f, 6.0f)}; + private static final Ruling[][] SINGLE_CELL_RULINGS = { - { - new Ruling(new Point2D.Float(151.653545f, 185.66929f), new Point2D.Float(380.73438f, 185.66929f)), - new Ruling(new Point2D.Float(151.653545f, 314.64567f), new Point2D.Float(380.73438f, 314.64567f)) - }, - { - new Ruling(new Point2D.Float(151.653545f, 185.66929f), new Point2D.Float(151.653545f, 314.64567f)), - new Ruling(new Point2D.Float(380.73438f, 185.66929f), new Point2D.Float(380.73438f, 314.64567f)) - } + { + new Ruling(new Point2D.Float(151.653545f, 185.66929f), new Point2D.Float(380.73438f, 185.66929f)), + new Ruling(new Point2D.Float(151.653545f, 314.64567f), new Point2D.Float(380.73438f, 314.64567f)) + }, + { + new Ruling(new Point2D.Float(151.653545f, 185.66929f), new Point2D.Float(151.653545f, 314.64567f)), + new Ruling(new Point2D.Float(380.73438f, 185.66929f), new Point2D.Float(380.73438f, 314.64567f)) + } }; - + private static final Ruling[][] TWO_SINGLE_CELL_RULINGS = { - { - new Ruling(new Point2D.Float(151.653545f, 185.66929f), new Point2D.Float(287.4074f, 185.66929f)), - new Ruling(new Point2D.Float(151.653545f, 262.101f), new Point2D.Float(287.4074f, 262.101f)), - new Ruling(new Point2D.Float(232.44095f, 280.62992f), new Point2D.Float(368.1948f, 280.62992f)), - new Ruling(new Point2D.Float(232.44095f, 357.06164f), new Point2D.Float(368.1948f, 357.06164f)) - }, - { - new Ruling(new Point2D.Float(151.653545f, 185.66929f), new Point2D.Float(151.653545f, 262.101f)), - new Ruling(new Point2D.Float(287.4074f, 185.66929f), new Point2D.Float(287.4074f, 262.101f)), - new Ruling(new Point2D.Float(232.44095f, 280.62992f), new Point2D.Float(232.44095f, 357.06164f)), - new Ruling(new Point2D.Float(368.1948f, 280.62992f), new Point2D.Float(368.1948f, 357.06164f)) - } + { + new Ruling(new Point2D.Float(151.653545f, 185.66929f), new Point2D.Float(287.4074f, 185.66929f)), + new Ruling(new Point2D.Float(151.653545f, 262.101f), new Point2D.Float(287.4074f, 262.101f)), + new Ruling(new Point2D.Float(232.44095f, 280.62992f), new Point2D.Float(368.1948f, 280.62992f)), + new Ruling(new Point2D.Float(232.44095f, 357.06164f), new Point2D.Float(368.1948f, 357.06164f)) + }, + { + new Ruling(new Point2D.Float(151.653545f, 185.66929f), new Point2D.Float(151.653545f, 262.101f)), + new Ruling(new Point2D.Float(287.4074f, 185.66929f), new Point2D.Float(287.4074f, 262.101f)), + new Ruling(new Point2D.Float(232.44095f, 280.62992f), new Point2D.Float(232.44095f, 357.06164f)), + new Ruling(new Point2D.Float(368.1948f, 280.62992f), new Point2D.Float(368.1948f, 357.06164f)) + } }; - + private static final Ruling[] EXTERNALLY_DEFINED_RULINGS = { new Ruling(new Point2D.Float(320.0f, 285.0f), new Point2D.Float(564.4409f, 285.0f)), new Ruling(new Point2D.Float(320.0f, 457.0f), new Point2D.Float(564.4409f, 457.0f)), @@ -232,33 +108,33 @@ public class TestSpreadsheetExtractor { new Ruling(new Point2D.Float(565.0f, 285.0f), new Point2D.Float(564.4409f, 457.0f)), new Ruling(new Point2D.Float(470.5542f, 285.0f), new Point2D.Float(470.36865f, 457.0f)) }; - + private static final Ruling[] EXTERNALLY_DEFINED_RULINGS2 = { - new Ruling(new Point2D.Float(51.796964f, 180.0f), new Point2D.Float(560.20312f, 180.0f)), - new Ruling(new Point2D.Float(51.797017f, 219.0f), new Point2D.Float(560.2031f, 219.0f)), - new Ruling(new Point2D.Float(51.797f, 239.0f), new Point2D.Float(560.2031f, 239.0f)), - new Ruling(new Point2D.Float(51.797f, 262.0f), new Point2D.Float(560.20312f, 262.0f)), - new Ruling(new Point2D.Float(51.797f, 283.50247f), new Point2D.Float(560.05024f, 283.50247f)), - new Ruling(new Point2D.Float(51.796964f, 309.0f), new Point2D.Float(560.20312f, 309.0f)), - new Ruling(new Point2D.Float(51.796982f, 333.0f), new Point2D.Float(560.20312f, 333.0f)), - new Ruling(new Point2D.Float(51.797f, 366.0f), new Point2D.Float(560.20312f, 366.0f)), - - - new Ruling(new Point2D.Float(52.0f, 181.0f), new Point2D.Float(51.797f, 366.0f)), - new Ruling(new Point2D.Float(208.62891f, 181.0f), new Point2D.Float(208.62891f, 366.0f)), - new Ruling(new Point2D.Float(357.11328f, 180.0f), new Point2D.Float(357.0f, 366.0f)), - new Ruling(new Point2D.Float(560.11328f, 180.0f), new Point2D.Float(560.0f, 366.0f)) + new Ruling(new Point2D.Float(51.796964f, 180.0f), new Point2D.Float(560.20312f, 180.0f)), + new Ruling(new Point2D.Float(51.797017f, 219.0f), new Point2D.Float(560.2031f, 219.0f)), + new Ruling(new Point2D.Float(51.797f, 239.0f), new Point2D.Float(560.2031f, 239.0f)), + new Ruling(new Point2D.Float(51.797f, 262.0f), new Point2D.Float(560.20312f, 262.0f)), + new Ruling(new Point2D.Float(51.797f, 283.50247f), new Point2D.Float(560.05024f, 283.50247f)), + new Ruling(new Point2D.Float(51.796964f, 309.0f), new Point2D.Float(560.20312f, 309.0f)), + new Ruling(new Point2D.Float(51.796982f, 333.0f), new Point2D.Float(560.20312f, 333.0f)), + new Ruling(new Point2D.Float(51.797f, 366.0f), new Point2D.Float(560.20312f, 366.0f)), + + + new Ruling(new Point2D.Float(52.0f, 181.0f), new Point2D.Float(51.797f, 366.0f)), + new Ruling(new Point2D.Float(208.62891f, 181.0f), new Point2D.Float(208.62891f, 366.0f)), + new Ruling(new Point2D.Float(357.11328f, 180.0f), new Point2D.Float(357.0f, 366.0f)), + new Ruling(new Point2D.Float(560.11328f, 180.0f), new Point2D.Float(560.0f, 366.0f)) }; - + @Test public void testLinesToCells() { List cells = SpreadsheetExtractionAlgorithm.findCells(Arrays.asList(HORIZONTAL_RULING_LINES), Arrays.asList(VERTICAL_RULING_LINES)); - Collections.sort(cells); + Collections.sort(cells, Rectangle.ILL_DEFINED_ORDER); List expected = Arrays.asList(EXPECTED_CELLS); - Collections.sort(expected); + Collections.sort(expected, Rectangle.ILL_DEFINED_ORDER); assertTrue(cells.equals(expected)); } - + @Test public void testDetectSingleCell() { List cells = SpreadsheetExtractionAlgorithm.findCells(Arrays.asList(SINGLE_CELL_RULINGS[0]), @@ -279,18 +155,31 @@ public void testDetectTwoSingleCells() { // should not overlap assertFalse(cells.get(0).intersects(cells.get(1))); } - + @Test - public void testFindSpreadsheetsFromCells() { - SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); - List cells = Arrays.asList(CELLS); + public void testFindSpreadsheetsFromCells() throws IOException { + + CSVParser parse = org.apache.commons.csv.CSVParser.parse(new File("src/test/resources/technology/tabula/csv/TestSpreadsheetExtractor-CELLS.csv"), + Charset.forName("utf-8"), + CSVFormat.DEFAULT); + + List cells = new ArrayList<>(); + + for (CSVRecord record : parse) { + cells.add(new Cell(Float.parseFloat(record.get(0)), + Float.parseFloat(record.get(1)), + Float.parseFloat(record.get(2)), + Float.parseFloat(record.get(3)))); + } + + List expected = Arrays.asList(EXPECTED_RECTANGLES); - Collections.sort(expected); - List foundRectangles = se.findSpreadsheetsFromCells(cells); - Collections.sort(foundRectangles); + Collections.sort(expected, Rectangle.ILL_DEFINED_ORDER); + List foundRectangles = SpreadsheetExtractionAlgorithm.findSpreadsheetsFromCells(cells); + Collections.sort(foundRectangles, Rectangle.ILL_DEFINED_ORDER); assertTrue(foundRectangles.equals(expected)); } - + // TODO Add assertions @Test public void testSpreadsheetExtraction() throws IOException { @@ -298,7 +187,7 @@ public void testSpreadsheetExtraction() throws IOException { .getAreaFromFirstPage( "src/test/resources/technology/tabula/argentina_diputados_voting_record.pdf", 269.875f, 12.75f, 790.5f, 561f); - + SpreadsheetExtractionAlgorithm.findCells(page.getHorizontalRulings(), page.getVerticalRulings()); } @@ -308,16 +197,32 @@ public void testSpanningCells() throws IOException { .getPage("src/test/resources/technology/tabula/spanning_cells.pdf", 1); String expectedJson = UtilsForTesting.loadJson("src/test/resources/technology/tabula/json/spanning_cells.json"); SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); - List tables = se.extract(page); + List
tables = se.extract(page); assertEquals(2, tables.size()); - + StringBuilder sb = new StringBuilder(); - (new JSONWriter()).write(sb, (List
) tables); + (new JSONWriter()).write(sb, tables); assertEquals(expectedJson, sb.toString()); } - + + @Test + public void testSpanningCellsToCsv() throws IOException { + Page page = UtilsForTesting + .getPage("src/test/resources/technology/tabula/spanning_cells.pdf", 1); + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/spanning_cells.csv"); + SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); + List
tables = se.extract(page); + assertEquals(2, tables.size()); + + + StringBuilder sb = new StringBuilder(); + (new CSVWriter()).write(sb, tables); + assertEquals(expectedCsv, sb.toString()); + + } + @Test public void testIncompleteGrid() throws IOException { Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/china.pdf", 1); @@ -325,7 +230,7 @@ public void testIncompleteGrid() throws IOException { List tables = se.extract(page); assertEquals(2, tables.size()); } - + @Test public void testNaturalOrderOfRectanglesDoesNotBreakContract() throws IOException { Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/us-017.pdf", 2); @@ -334,31 +239,31 @@ public void testNaturalOrderOfRectanglesDoesNotBreakContract() throws IOExceptio StringBuilder sb = new StringBuilder(); (new CSVWriter()).write(sb, tables.get(0)); - + String result = sb.toString(); - String expected = "Project,Agency,Institution\r\nNanotechnology and its publics,NSF,Pennsylvania State University\r\n\"Public information and deliberation in nanoscience and \rnanotechnology policy (SGER)\",Interagency,\"North Carolina State \rUniversity\"\r\n\"Social and ethical research and education in agrifood \rnanotechnology (NIRT)\",NSF,Michigan State University\r\n\"From laboratory to society: developing an informed \rapproach to nanoscale science and engineering (NIRT)\",NSF,University of South Carolina\r\nDatabase and innovation timeline for nanotechnology,NSF,UCLA\r\nSocial and ethical dimensions of nanotechnology,NSF,University of Virginia\r\n\"Undergraduate exploration of nanoscience, \rapplications and societal implications (NUE)\",NSF,\"Michigan Technological \rUniversity\"\r\n\"Ethics and belief inside the development of \rnanotechnology (CAREER)\",NSF,University of Virginia\r\n\"All centers, NNIN and NCN have a societal \rimplications components\",\"NSF, DOE, \rDOD, and NIH\",\"All nanotechnology centers \rand networks\"\r\n"; - + String expected = "Project,Agency,Institution\r\nNanotechnology and its publics,NSF,Pennsylvania State University\r\n\"Public information and deliberation in nanoscience and\rnanotechnology policy (SGER)\",Interagency,\"North Carolina State\rUniversity\"\r\n\"Social and ethical research and education in agrifood\rnanotechnology (NIRT)\",NSF,Michigan State University\r\n\"From laboratory to society: developing an informed\rapproach to nanoscale science and engineering (NIRT)\",NSF,University of South Carolina\r\nDatabase and innovation timeline for nanotechnology,NSF,UCLA\r\nSocial and ethical dimensions of nanotechnology,NSF,University of Virginia\r\n\"Undergraduate exploration of nanoscience,\rapplications and societal implications (NUE)\",NSF,\"Michigan Technological\rUniversity\"\r\n\"Ethics and belief inside the development of\rnanotechnology (CAREER)\",NSF,University of Virginia\r\n\"All centers, NNIN and NCN have a societal\rimplications components\",\"NSF, DOE,\rDOD, and NIH\",\"All nanotechnology centers\rand networks\"\r\n"; + assertEquals(expected, result); } - + @Test public void testMergeLinesCloseToEachOther() throws IOException { Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/20.pdf", 1); List rulings = page.getVerticalRulings(); - float[] expectedRulings = new float[] { 105.549774f,107.52332f,160.58167f,377.1792f,434.95804f,488.21783f }; + float[] expectedRulings = new float[]{105.549774f, 160.58167f, 377.1792f, 434.95804f, 488.21783f}; for (int i = 0; i < rulings.size(); i++) { assertEquals(expectedRulings[i], rulings.get(i).getLeft(), 0.1); } - assertEquals(6, rulings.size()); + assertEquals(5, rulings.size()); + - } @Test public void testSpreadsheetWithNoBoundingFrameShouldBeSpreadsheet() throws IOException { Page page = UtilsForTesting.getAreaFromPage("src/test/resources/technology/tabula/spreadsheet_no_bounding_frame.pdf", 1, - 150.56f,58.9f,654.7f,536.12f); - + 150.56f, 58.9f, 654.7f, 536.12f); + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/spreadsheet_no_bounding_frame.csv"); SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); @@ -367,11 +272,11 @@ public void testSpreadsheetWithNoBoundingFrameShouldBeSpreadsheet() throws IOExc List tables = se.extract(page); StringBuilder sb = new StringBuilder(); (new CSVWriter()).write(sb, tables.get(0)); - + assertEquals(expectedCsv, sb.toString()); - + } - + @Test public void testExtractSpreadsheetWithinAnArea() throws IOException { Page page = UtilsForTesting.getAreaFromPage( @@ -382,40 +287,40 @@ public void testExtractSpreadsheetWithinAnArea() throws IOException { List tables = se.extract(page); Table table = tables.get(0); assertEquals(15, table.getRows().size()); - - String expected = "\"\",TM,M.U$S,TM,M.U$S,TM,M.U$S,TM,M.U$S,TM,M.U$S,TM,M.U$S,TM\n" + - "Peces vivos,1,25,1,23,2,38,1,37,2,67,2,89,1\n" + - "\"Pescado fresco\n" + - "o refrigerado.\n" + - "exc. filetes\",7.704,7.175,8.931,6.892,12.635,10.255,16.742,13.688,14.357,11.674,13.035,13.429,9.727\n" + - "\"Pescado congelado\n" + - "exc. filetes\",90.560,105.950,112.645,108.416,132.895,115.874,152.767,133.765,148.882,134.847,156.619,165.134,137.179\n" + - "\"Filetes y demás car-\n" + - "nes de pescado\",105.434,200.563,151.142,218.389,152.174,227.780,178.123,291.863,169.422,313.735,176.427,381.640,144.814\n" + - "\"Pescado sec./sal./\n" + - "en salm. har./pol./\n" + - "pell. aptos\n" + - "p/c humano\",6.837,14.493,6.660,9.167,14.630,17.579,18.150,21.302,18.197,25.739,13.460,23.549,11.709\n" + - "Crustáceos,61.691,375.798,52.488,251.043,47.635,387.783,27.815,217.443,7.123,86.019,39.488,373.583,45.191\n" + - "Moluscos,162.027,174.507,109.436,111.443,90.834,104.741,57.695,109.141,98.182,206.304,187.023,251.352,157.531\n" + - "\"Prod. no exp. en\n" + - "otros capítulos.\n" + - "No apto p/c humano\",203,328,7,35,521,343,\"1,710\",\"1,568\",125,246,124,263,131\n" + - "\"Grasas y aceites de\n" + - "pescado y mamíferos\n" + - "marinos\",913,297,\"1,250\",476,\"1,031\",521,\"1,019\",642,690,483,489,710,959\n" + - "\"Extractos y jugos de\n" + - "pescado y mariscos\",5,25,1,3,4,4,31,93,39,117,77,230,80\n" + - "\"Preparaciones y con-\n" + - "servas de pescado\",846,\"3,737\",\"1,688\",\"4,411\",\"1,556\",\"3,681\",\"2,292\",\"5,474\",\"2,167\",\"7,494\",\"2,591\",\"8,833\",\"2,795\"\n" + - "\"Preparaciones y con-\n" + - "servas de mariscos\",348,\"3,667\",345,\"1,771\",738,\"3,627\",561,\"2,620\",607,\"3,928\",314,\"2,819\",250\n" + - "\"Harina, polvo y pe-\n" + - "llets de pescado.No\n" + - "aptos p/c humano\",\"16,947\",\"8,547\",\"11,867\",\"6,315\",\"32,528\",\"13,985\",\"37,313\",\"18,989\",\"35,787\",\"19,914\",\"37,821\",\"27,174\",\"30,000\"\n" + + + String expected = "\"\",TM,M.U$S,TM,M.U$S,TM,M.U$S,TM,M.U$S,TM,M.U$S,TM,M.U$S,TM\n" + + "Peces vivos,1,25,1,23,2,38,1,37,2,67,2,89,1\n" + + "\"Pescado fresco\n" + + "o refrigerado.\n" + + "exc. filetes\",7.704,7.175,8.931,6.892,12.635,10.255,16.742,13.688,14.357,11.674,13.035,13.429,9.727\n" + + "\"Pescado congelado\n" + + "exc. filetes\",90.560,105.950,112.645,108.416,132.895,115.874,152.767,133.765,148.882,134.847,156.619,165.134,137.179\n" + + "\"Filetes y demás car-\n" + + "nes de pescado\",105.434,200.563,151.142,218.389,152.174,227.780,178.123,291.863,169.422,313.735,176.427,381.640,144.814\n" + + "\"Pescado sec./sal./\n" + + "en salm. har./pol./\n" + + "pell. aptos\n" + + "p/c humano\",6.837,14.493,6.660,9.167,14.630,17.579,18.150,21.302,18.197,25.739,13.460,23.549,11.709\n" + + "Crustáceos,61.691,375.798,52.488,251.043,47.635,387.783,27.815,217.443,7.123,86.019,39.488,373.583,45.191\n" + + "Moluscos,162.027,174.507,109.436,111.443,90.834,104.741,57.695,109.141,98.182,206.304,187.023,251.352,157.531\n" + + "\"Prod. no exp. en\n" + + "otros capítulos.\n" + + "No apto p/c humano\",203,328,7,35,521,343,\"1,710\",\"1,568\",125,246,124,263,131\n" + + "\"Grasas y aceites de\n" + + "pescado y mamíferos\n" + + "marinos\",913,297,\"1,250\",476,\"1,031\",521,\"1,019\",642,690,483,489,710,959\n" + + "\"Extractos y jugos de\n" + + "pescado y mariscos\",5,25,1,3,4,4,31,93,39,117,77,230,80\n" + + "\"Preparaciones y con-\n" + + "servas de pescado\",846,\"3,737\",\"1,688\",\"4,411\",\"1,556\",\"3,681\",\"2,292\",\"5,474\",\"2,167\",\"7,494\",\"2,591\",\"8,833\",\"2,795\"\n" + + "\"Preparaciones y con-\n" + + "servas de mariscos\",348,\"3,667\",345,\"1,771\",738,\"3,627\",561,\"2,620\",607,\"3,928\",314,\"2,819\",250\n" + + "\"Harina, polvo y pe-\n" + + "llets de pescado.No\n" + + "aptos p/c humano\",\"16,947\",\"8,547\",\"11,867\",\"6,315\",\"32,528\",\"13,985\",\"37,313\",\"18,989\",\"35,787\",\"19,914\",\"37,821\",\"27,174\",\"30,000\"\n" + "TOTAL,\"453,515\",\"895,111\",\"456,431\",\"718,382\",\"487,183\",\"886,211\",\"494,220\",\"816,623\",\"495,580\",\"810,565\",\"627,469\",\"1,248,804\",\"540,367\"\n"; - + // TODO add better assertions StringBuilder sb = new StringBuilder(); (new CSVWriter()).write(sb, tables.get(0)); @@ -423,22 +328,22 @@ public void testExtractSpreadsheetWithinAnArea() throws IOException { List parsedExpected = org.apache.commons.csv.CSVParser.parse(expected, CSVFormat.EXCEL).getRecords(); List parsedResult = org.apache.commons.csv.CSVParser.parse(result, CSVFormat.EXCEL).getRecords(); - + assertEquals(parsedResult.size(), parsedExpected.size()); - for (int i = 0; i < parsedResult.size(); i ++) { + for (int i = 0; i < parsedResult.size(); i++) { assertEquals(parsedResult.get(i).size(), parsedExpected.get(i).size()); } - + } - + @Test public void testAlmostIntersectingRulingsShouldIntersect() { Ruling v = new Ruling(new Point2D.Float(555.960876f, 271.569641f), new Point2D.Float(555.960876f, 786.899902f)); Ruling h = new Ruling(new Point2D.Float(25.620499f, 786.899902f), new Point2D.Float(555.960754f, 786.899902f)); - Map m = Ruling.findIntersections(Arrays.asList(new Ruling[] { h }), Arrays.asList(new Ruling[] { v })); + Map m = Ruling.findIntersections(Arrays.asList(new Ruling[]{h}), Arrays.asList(new Ruling[]{v})); assertEquals(m.values().size(), 1); } - + // TODO add assertions @Test public void testDontRaiseSortException() throws IOException { @@ -448,9 +353,9 @@ public void testDontRaiseSortException() throws IOException { 446.0f, 97.0f, 685.0f, 520.0f); page.getText(); SpreadsheetExtractionAlgorithm bea = new SpreadsheetExtractionAlgorithm(); - Table table = bea.extract(page).get(0); + bea.extract(page).get(0); } - + @Test public void testShouldDetectASingleSpreadsheet() throws IOException { Page page = UtilsForTesting.getAreaFromPage( @@ -458,47 +363,64 @@ public void testShouldDetectASingleSpreadsheet() throws IOException { 1, 68.08f, 16.44f, 680.85f, 597.84f); SpreadsheetExtractionAlgorithm bea = new SpreadsheetExtractionAlgorithm(); - List
tables = (List
) bea.extract(page); + List
tables = bea.extract(page); assertEquals(1, tables.size()); } - + @Test public void testExtractTableWithExternallyDefinedRulings() throws IOException { - Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/us-007.pdf", + Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/us-007.pdf", 1); SpreadsheetExtractionAlgorithm bea = new SpreadsheetExtractionAlgorithm(); - List
tables = (List
) bea.extract(page, + List
tables = bea.extract(page, Arrays.asList(EXTERNALLY_DEFINED_RULINGS)); assertEquals(1, tables.size()); Table table = tables.get(0); + assertEquals("Payroll Period", table.getRows().get(0).get(0).getText()); + assertEquals("One Withholding\rAllowance", table.getRows().get(0).get(1).getText()); + assertEquals("Weekly", table.getRows().get(1).get(0).getText()); + assertEquals("$71.15", table.getRows().get(1).get(1).getText()); + assertEquals("Biweekly", table.getRows().get(2).get(0).getText()); + assertEquals("142.31", table.getRows().get(2).get(1).getText()); + assertEquals("Semimonthly", table.getRows().get(3).get(0).getText()); assertEquals("154.17", table.getRows().get(3).get(1).getText()); - + assertEquals("Monthly", table.getRows().get(4).get(0).getText()); + assertEquals("308.33", table.getRows().get(4).get(1).getText()); + assertEquals("Quarterly", table.getRows().get(5).get(0).getText()); + assertEquals("925.00", table.getRows().get(5).get(1).getText()); + assertEquals("Semiannually", table.getRows().get(6).get(0).getText()); + assertEquals("1,850.00", table.getRows().get(6).get(1).getText()); + assertEquals("Annually", table.getRows().get(7).get(0).getText()); + assertEquals("3,700.00", table.getRows().get(7).get(1).getText()); + assertEquals("Daily or Miscellaneous\r(each day of the payroll period)", table.getRows().get(8).get(0).getText()); + assertEquals("14.23", table.getRows().get(8).get(1).getText()); + } - + @Test public void testAnotherExtractTableWithExternallyDefinedRulings() throws IOException { - Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/us-024.pdf", + Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/us-024.pdf", 1); SpreadsheetExtractionAlgorithm bea = new SpreadsheetExtractionAlgorithm(); - List
tables = (List
) bea.extract(page, + List
tables = bea.extract(page, Arrays.asList(EXTERNALLY_DEFINED_RULINGS2)); assertEquals(1, tables.size()); Table table = tables.get(0); - + assertEquals("Total Supply", table.getRows().get(4).get(0).getText()); assertEquals("6.6", table.getRows().get(6).get(2).getText()); } - + @Test public void testSpreadsheetsSortedByTopAndRight() throws IOException { Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/sydney_disclosure_contract.pdf", 1); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - List
tables = (List
) sea.extract(page); + List
tables = sea.extract(page); for (int i = 1; i < tables.size(); i++) { - assert(tables.get(i-1).getTop() <= tables.get(i).getTop()); + assert (tables.get(i - 1).getTop() <= tables.get(i).getTop()); } } @@ -508,10 +430,105 @@ public void testDontStackOverflowQuicksort() throws IOException { 1); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - List
tables = (List
) sea.extract(page); + List
tables = sea.extract(page); for (int i = 1; i < tables.size(); i++) { - assert(tables.get(i-1).getTop() <= tables.get(i).getTop()); + assert (tables.get(i - 1).getTop() <= tables.get(i).getTop()); } } + + @Test + public void testRTL() throws IOException { + Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/arabic.pdf", + 1); + SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); + List
tables = sea.extract(page); + // assertEquals(1, tables.size()); + Table table = tables.get(0); + + + assertEquals("اسمي سلطان", table.getRows().get(1).get(1).getText()); + assertEquals("من اين انت؟", table.getRows().get(2).get(1).getText()); + assertEquals("1234", table.getRows().get(3).get(0).getText()); + assertEquals("هل انت شباك؟", table.getRows().get(4).get(0).getText()); + assertEquals("انا من ولاية كارولينا الشمال", table.getRows().get(2).get(0).getText()); // conjoined lam-alif gets missed + assertEquals("اسمي Jeremy في الانجليزية", table.getRows().get(4).get(1).getText()); // conjoined lam-alif gets missed + assertEquals("عندي 47 قطط", table.getRows().get(3).get(1).getText()); // the real right answer is 47. + assertEquals("Jeremy is جرمي in Arabic", table.getRows().get(5).get(0).getText()); // the real right answer is 47. + assertEquals("مرحباً", table.getRows().get(1).get(0).getText()); // really ought to be ً, but this is forgiveable for now + + // there is one remaining problems that are not yet addressed + // - diacritics (e.g. Arabic's tanwinً and probably Hebrew nekudot) are put in the wrong place. + // this should get fixed, but this is a good first stab at the problem. + + // these (commented-out) tests reflect the theoretical correct answer, + // which is not currently possible because of the two problems listed above + // assertEquals("مرحباً", table.getRows().get(0).get(0).getText()); // really ought to be ً, but this is forgiveable for now + + } + + + @Test + public void testRealLifeRTL() throws IOException { + Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/mednine.pdf", + 1); + SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); + List
tables = sea.extract(page); + // assertEquals(1, tables.size()); + Table table = tables.get(0); + + assertEquals("الانتخابات التشريعية 2014", table.getRows().get(0).get(0).getText()); // the doubled spaces might be a bug in my implementation. + assertEquals("ورقة كشف نتائج دائرة مدنين", table.getRows().get(1).get(0).getText()); + assertEquals("426", table.getRows().get(4).get(0).getText()); + assertEquals("63", table.getRows().get(4).get(1).getText()); + assertEquals("43", table.getRows().get(4).get(2).getText()); + assertEquals("56", table.getRows().get(4).get(3).getText()); + assertEquals("58", table.getRows().get(4).get(4).getText()); + assertEquals("49", table.getRows().get(4).get(5).getText()); + assertEquals("55", table.getRows().get(4).get(6).getText()); + assertEquals("33", table.getRows().get(4).get(7).getText()); + assertEquals("32", table.getRows().get(4).get(8).getText()); + assertEquals("37", table.getRows().get(4).get(9).getText()); + assertEquals("قائمة من أجل تحقيق سلطة الشعب", table.getRows().get(4).get(10).getText()); + + // there is one remaining problems that are not yet addressed + // - diacritics (e.g. Arabic's tanwinً and probably Hebrew nekudot) are put in the wrong place. + // this should get fixed, but this is a good first stab at the problem. + + // these (commented-out) tests reflect the theoretical correct answer, + // which is not currently possible because of the two problems listed above + // assertEquals("مرحباً", table.getRows().get(0).get(0).getText()); // really ought to be ً, but this is forgiveable for now + + } + + @Test + public void testExtractColumnsCorrectly3() throws IOException { + + Page page = UtilsForTesting.getAreaFromFirstPage("src/test/resources/technology/tabula/frx_2012_disclosure.pdf", + 106.01f, 48.09f, 227.31f, 551.89f); + SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); + Table table = sea.extract(page).get(0); + + assertEquals("REGIONAL PULMONARY & SLEEP\rMEDICINE", table.getRows().get(8).get(1).getText()); + + } + @Test + public void testSpreadsheetExtractionIssue656() throws IOException { + Page page = UtilsForTesting + .getAreaFromFirstPage( + "src/test/resources/technology/tabula/Publication_of_award_of_Bids_for_Transport_Sector__August_2016.pdf", + 56.925f,24.255f,549.945f,786.555f); + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/Publication_of_award_of_Bids_for_Transport_Sector__August_2016.csv"); + + SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); + List
tables = sea.extract(page); + assertEquals(1, tables.size()); + Table table = tables.get(0); + + StringBuilder sb = new StringBuilder(); + (new CSVWriter()).write(sb, table); + String result = sb.toString(); + assertEquals(expectedCsv, result); + } + } diff --git a/src/test/java/technology/tabula/TestTableDetection.java b/src/test/java/technology/tabula/TestTableDetection.java index 6e90a08c..3d937d60 100644 --- a/src/test/java/technology/tabula/TestTableDetection.java +++ b/src/test/java/technology/tabula/TestTableDetection.java @@ -13,7 +13,6 @@ import com.google.gson.Gson; import org.junit.AfterClass; import org.junit.BeforeClass; -import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -48,10 +47,6 @@ private static final class TestStatus { private transient boolean firstRun; private transient String pdfFilename; - public TestStatus() { - this(null); - } - public TestStatus(String pdfFilename) { this.numExpectedTables = 0; this.numCorrectlyDetectedTables = 0; @@ -75,12 +70,12 @@ public static TestStatus load(String pdfFilename) { } public void save() { - try { - FileWriter w = new FileWriter(jsonFilename(this.pdfFilename)); + try (FileWriter w = new FileWriter(jsonFilename(this.pdfFilename))) { Gson gson = new Gson(); w.write(gson.toJson(this)); w.close(); } catch (Exception e) { + throw new Error(e); } } @@ -109,7 +104,7 @@ public static void enableLogging() { public static Collection data() { String[] regionCodes = {"eu", "us"}; - ArrayList data = new ArrayList(); + ArrayList data = new ArrayList<>(); for (String regionCode : regionCodes) { String directoryName = "src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-" + regionCode + "/"; @@ -127,6 +122,24 @@ public boolean accept(File dir, String name) { } } + Collections.sort(data, new Comparator() { + @Override + public int compare(Object[] t0, Object[] t1) { + String f0 = ((File)t0[0]).getPath(); + String f1 = ((File)t1[0]).getPath(); + + return f0.compareTo(f1); + } + }); + // src/test/resources/technology/tabula/ + + //data = new ArrayList<>(); + //data.add(new Object[] {new File("src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-006.pdf")}); + //data.add(new Object[] {new File("src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-011.pdf")}); + //data.add(new Object[] {new File("src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-015.pdf")}); + //data.add(new Object[] {new File("src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-016.pdf")}); + //data.add(new Object[] {new File("src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-026.pdf")}); + return data; } @@ -145,6 +158,7 @@ public TestTableDetection(File pdf) { try { this.builder = factory.newDocumentBuilder(); } catch (Exception e) { + // ignored } } @@ -170,7 +184,7 @@ public void testDetectionOfTables() throws Exception { ObjectExtractor extractor = new ObjectExtractor(pdfDocument); // parse expected tables from the ground truth dataset - Map> expectedTables = new HashMap>(); + Map> expectedTables = new HashMap<>(); int numExpectedTables = 0; @@ -189,7 +203,7 @@ public void testDetectionOfTables() throws Exception { List pageTables = expectedTables.get(page); if (pageTables == null) { - pageTables = new ArrayList(); + pageTables = new ArrayList<>(); expectedTables.put(page, pageTables); } @@ -208,7 +222,7 @@ public void testDetectionOfTables() throws Exception { } // now find tables detected by tabula-java - Map> detectedTables = new HashMap>(); + Map> detectedTables = new HashMap<>(); // the algorithm we're going to be testing NurminenDetectionAlgorithm detectionAlgorithm = new NurminenDetectionAlgorithm(); @@ -225,7 +239,7 @@ public void testDetectionOfTables() throws Exception { // now compare System.out.println("Testing " + this.pdf.getName()); - List errors = new ArrayList(); + List errors = new ArrayList<>(); this.status.numExpectedTables = numExpectedTables; totalExpectedTables += numExpectedTables; @@ -290,7 +304,7 @@ public void testDetectionOfTables() throws Exception { } private List comparePages(Integer page, List detected, List expected) { - ArrayList errors = new ArrayList(); + ArrayList errors = new ArrayList<>(); // go through the detected tables and try to match them with expected tables // from http://www.orsigiorgio.net/wp-content/papercite-data/pdf/gho*12.pdf (comparing regions): diff --git a/src/test/java/technology/tabula/TestTextElement.java b/src/test/java/technology/tabula/TestTextElement.java index e6ced932..feaaa5e6 100644 --- a/src/test/java/technology/tabula/TestTextElement.java +++ b/src/test/java/technology/tabula/TestTextElement.java @@ -1,6 +1,5 @@ package technology.tabula; -import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -12,7 +11,7 @@ public class TestTextElement { @Test - public void createTextElement() throws IOException { + public void createTextElement() { TextElement textElement = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f); @@ -31,7 +30,7 @@ public void createTextElement() throws IOException { } @Test - public void createTextElementWithDirection() throws IOException { + public void createTextElementWithDirection() { TextElement textElement = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f); @@ -52,7 +51,7 @@ public void createTextElementWithDirection() throws IOException { @Test public void mergeFourElementsIntoFourWords() { - List elements = new ArrayList(); + List elements = new ArrayList<>(); elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); elements.add(new TextElement(20f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)); elements.add(new TextElement(40f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)); @@ -60,7 +59,7 @@ public void mergeFourElementsIntoFourWords() { List words = TextElement.mergeWords(elements); - List expectedWords = new ArrayList(); + List expectedWords = new ArrayList<>(); expectedWords.add(new TextChunk(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f))); expectedWords.add(new TextChunk(new TextElement(20f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f))); expectedWords.add(new TextChunk(new TextElement(40f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f))); @@ -73,7 +72,7 @@ public void mergeFourElementsIntoFourWords() { @Test public void mergeFourElementsIntoOneWord() { - List elements = new ArrayList(); + List elements = new ArrayList<>(); elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); elements.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)); elements.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)); @@ -81,7 +80,7 @@ public void mergeFourElementsIntoOneWord() { List words = TextElement.mergeWords(elements); - List expectedWords = new ArrayList(); + List expectedWords = new ArrayList<>(); TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); textChunk.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)); textChunk.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)); @@ -92,10 +91,28 @@ public void mergeFourElementsIntoOneWord() { } + @Test + public void mergeElementsShouldBeIdempotent() { + /* + * a bug in TextElement.merge_words would delete the first TextElement in the array + * it was called with. Discussion here: https://github.com/tabulapdf/tabula-java/issues/78 + */ + + List elements = new ArrayList<>(); + elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); + elements.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)); + elements.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)); + elements.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f)); + + List words = TextElement.mergeWords(elements); + List words2 = TextElement.mergeWords(elements); + Assert.assertEquals(words, words2); + } + @Test public void mergeElementsWithSkippingRules() { - List elements = new ArrayList(); + List elements = new ArrayList<>(); elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); elements.add(new TextElement(0f, 17f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); elements.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)); @@ -105,7 +122,7 @@ public void mergeElementsWithSkippingRules() { List words = TextElement.mergeWords(elements); - List expectedWords = new ArrayList(); + List expectedWords = new ArrayList<>(); TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)); textChunk.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)); textChunk.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)); @@ -119,7 +136,7 @@ public void mergeElementsWithSkippingRules() { @Test public void mergeTenElementsIntoTwoWords() { - List elements = new ArrayList(); + List elements = new ArrayList<>(); elements.add(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f)); elements.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f)); elements.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f)); @@ -132,7 +149,7 @@ public void mergeTenElementsIntoTwoWords() { List words = TextElement.mergeWords(elements); - List expectedWords = new ArrayList(); + List expectedWords = new ArrayList<>(); TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f)); textChunk.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f)); textChunk.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f)); @@ -154,7 +171,7 @@ public void mergeTenElementsIntoTwoWords() { @Test public void mergeTenElementsIntoTwoLines() { - List elements = new ArrayList(); + List elements = new ArrayList<>(); elements.add(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f)); elements.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f)); elements.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f)); @@ -167,7 +184,7 @@ public void mergeTenElementsIntoTwoLines() { List words = TextElement.mergeWords(elements); - List expectedWords = new ArrayList(); + List expectedWords = new ArrayList<>(); TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f)); textChunk.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f)); textChunk.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f)); diff --git a/src/test/java/technology/tabula/TestUtils.java b/src/test/java/technology/tabula/TestUtils.java index 72967df0..e68411df 100644 --- a/src/test/java/technology/tabula/TestUtils.java +++ b/src/test/java/technology/tabula/TestUtils.java @@ -5,22 +5,26 @@ import static org.junit.Assert.assertNull; import java.awt.geom.Point2D; +import java.io.File; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; +import org.apache.pdfbox.rendering.ImageType; import org.apache.commons.cli.ParseException; -import org.junit.Before; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; import org.junit.Test; public class TestUtils { - - public static final Ruling[] RULINGS = { + + public static final Ruling[] RULINGS = { new Ruling(new Point2D.Float(0, 0), new Point2D.Float(1,1)), - new Ruling(new Point2D.Float(2, 2), new Point2D.Float(3,3)) + new Ruling(new Point2D.Float(2, 2), new Point2D.Float(3,3)) }; - + public static final Rectangle[] RECTANGLES = { new Rectangle(), new Rectangle(0, 0, 2, 4) @@ -35,29 +39,37 @@ public void testBoundsOfTwoRulings() { assertEquals(3, r.getWidth(), 0); assertEquals(3, r.getHeight(), 0); } - + @Test public void testBoundsOfOneEmptyRectangleAndAnotherNonEmpty() { Rectangle r = Utils.bounds(Arrays.asList(RECTANGLES)); assertEquals(r, RECTANGLES[1]); } - + + @Test + public void testBoundsOfOneRectangle() { + ArrayList shapes = new ArrayList<>(); + shapes.add(new Rectangle(0, 0, 20, 40)); + Rectangle r = Utils.bounds(shapes); + assertEquals(r, shapes.get(0)); + } + @Test public void testParsePagesOption() throws ParseException { - + List rv = Utils.parsePagesOption("1"); assertArrayEquals(new Integer[] { 1 }, rv.toArray()); - + rv = Utils.parsePagesOption("1-4"); assertArrayEquals(new Integer[] { 1,2,3,4 }, rv.toArray()); - + rv = Utils.parsePagesOption("1-4,20-24"); assertArrayEquals(new Integer[] { 1,2,3,4,20,21,22,23,24 }, rv.toArray()); - + rv = Utils.parsePagesOption("all"); assertNull(rv); } - + @Test(expected=ParseException.class) public void testExceptionInParsePages() throws ParseException { Utils.parsePagesOption("1-4,24-22"); @@ -70,42 +82,49 @@ public void testAnotherExceptionInParsePages() throws ParseException { @Test public void testQuickSortEmptyList() { - List numbers = new ArrayList(); + List numbers = new ArrayList<>(); QuickSort.sort(numbers); - + assertEquals(Collections.emptyList(), numbers); } - + @Test public void testQuickSortOneElementList() { List numbers = Arrays.asList(5); QuickSort.sort(numbers); - + assertEquals(Arrays.asList(5), numbers); } - + @Test public void testQuickSortShortList() { List numbers = Arrays.asList(4, 5, 6, 8, 7, 1, 2, 3); QuickSort.sort(numbers); - + assertEquals(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8), numbers); } - + @Test public void testQuickSortLongList() { - - List numbers = new ArrayList(); - List expectedNumbers = new ArrayList(); - + + List numbers = new ArrayList<>(); + List expectedNumbers = new ArrayList<>(); + for(int i = 0; i <= 12000; i++){ numbers.add(12000 - i); expectedNumbers.add(i); } - + QuickSort.sort(numbers); - + assertEquals(expectedNumbers, numbers); } + @Test + public void testJPEG2000DoesNotRaise() throws IOException { + PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/jpeg2000.pdf")); + PDPage page = pdf_document.getPage(0); + Utils.pageConvertToImage(pdf_document, page, 360, ImageType.RGB); + } + } diff --git a/src/test/java/technology/tabula/TestWriters.java b/src/test/java/technology/tabula/TestWriters.java index e9bbdfbf..961d57af 100644 --- a/src/test/java/technology/tabula/TestWriters.java +++ b/src/test/java/technology/tabula/TestWriters.java @@ -1,12 +1,15 @@ package technology.tabula; -import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; import java.io.IOException; import java.util.List; import org.junit.Test; +import com.google.gson.Gson; +import com.google.gson.JsonArray; + import technology.tabula.extractors.BasicExtractionAlgorithm; import technology.tabula.extractors.SpreadsheetExtractionAlgorithm; import technology.tabula.writers.CSVWriter; @@ -14,31 +17,35 @@ import technology.tabula.writers.TSVWriter; public class TestWriters { - - private static final String EXPECTED_CSV_WRITER_OUTPUT = "\"ABDALA de MATARAZZO, Norma Amanda \",\"Frente Cívico por Santiago \",\"Santiago del Estero \",AFIRMATIVO"; + + private static final String EXPECTED_CSV_WRITER_OUTPUT = "\"ABDALA de MATARAZZO, Norma Amanda\",Frente Cívico por Santiago,Santiago del Estero,AFIRMATIVO"; + private Table getTable() throws IOException { Page page = UtilsForTesting.getAreaFromFirstPage("src/test/resources/technology/tabula/argentina_diputados_voting_record.pdf", 269.875f, 12.75f, 790.5f, 561f); BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); Table table = bea.extract(page).get(0); return table; } + private List
getTables() throws IOException { - - Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/twotables.pdf", 1); + + Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/twotables.pdf", 1); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - return (List
) sea.extract(page); + return sea.extract(page); } @Test public void testCSVWriter() throws IOException { + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/argentina_diputados_voting_record.csv"); Table table = this.getTable(); StringBuilder sb = new StringBuilder(); (new CSVWriter()).write(sb, table); String s = sb.toString(); String[] lines = s.split("\\r?\\n"); assertEquals(lines[0], EXPECTED_CSV_WRITER_OUTPUT); + assertEquals(expectedCsv, s); } - + // TODO Add assertions @Test public void testTSVWriter() throws IOException { @@ -50,39 +57,80 @@ public void testTSVWriter() throws IOException { //String[] lines = s.split("\\r?\\n"); //assertEquals(lines[0], EXPECTED_CSV_WRITER_OUTPUT); } - + @Test public void testJSONWriter() throws IOException { - String expectedJson = UtilsForTesting.loadJson("src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json"); + String expectedJson = UtilsForTesting.loadJson("src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json"); Table table = this.getTable(); StringBuilder sb = new StringBuilder(); (new JSONWriter()).write(sb, table); String s = sb.toString(); assertEquals(expectedJson, s); } - + @Test public void testJSONSerializeInfinity() throws IOException { - String expectedJson = UtilsForTesting.loadJson("src/test/resources/technology/tabula/json/schools.json"); + String expectedJson = UtilsForTesting.loadJson("src/test/resources/technology/tabula/json/schools.json"); Page page = UtilsForTesting.getAreaFromFirstPage("src/test/resources/technology/tabula/schools.pdf", 53.74f, 16.97f, 548.74f, 762.3f); SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); Table table = sea.extract(page).get(0); - + StringBuilder sb = new StringBuilder(); (new JSONWriter()).write(sb, table); String s = sb.toString(); assertEquals(expectedJson, s); } - + + @Test + public void testCSVSerializeInfinity() throws IOException { + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/schools.csv"); + Page page = UtilsForTesting.getAreaFromFirstPage("src/test/resources/technology/tabula/schools.pdf", 53.74f, 16.97f, 548.74f, 762.3f); + SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); + Table table = sea.extract(page).get(0); + + StringBuilder sb = new StringBuilder(); + (new CSVWriter()).write(sb, table); + String s = sb.toString(); + assertEquals(expectedCsv, s); + } + @Test public void testJSONSerializeTwoTables() throws IOException { - String expectedJson = UtilsForTesting.loadJson("src/test/resources/technology/tabula/json/twotables.json"); + String expectedJson = UtilsForTesting.loadJson("src/test/resources/technology/tabula/json/twotables.json"); List
tables = this.getTables(); StringBuilder sb = new StringBuilder(); (new JSONWriter()).write(sb, tables); + String s = sb.toString(); assertEquals(expectedJson, s); + + Gson gson = new Gson(); + JsonArray json = gson.fromJson(s, JsonArray.class); + assertEquals(2, json.size()); + } + + @Test + public void testCSVSerializeTwoTables() throws IOException { + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/twotables.csv"); + List
tables = this.getTables(); + StringBuilder sb = new StringBuilder(); + (new CSVWriter()).write(sb, tables); + + String s = sb.toString(); + assertEquals(expectedCsv, s); + } + + @Test + public void testCSVMultilineRow() throws IOException { + String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/frx_2012_disclosure.csv"); + Page page = UtilsForTesting.getAreaFromFirstPage("src/test/resources/technology/tabula/frx_2012_disclosure.pdf", 53.0f, 49.0f, 735.0f, 550.0f); + SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); + Table table = sea.extract(page).get(0); + + StringBuilder sb = new StringBuilder(); + (new CSVWriter()).write(sb, table); + String s = sb.toString(); + assertEquals(expectedCsv, s); } - } diff --git a/src/test/java/technology/tabula/UtilsForTesting.java b/src/test/java/technology/tabula/UtilsForTesting.java index 54d844fc..3ee8efde 100644 --- a/src/test/java/technology/tabula/UtilsForTesting.java +++ b/src/test/java/technology/tabula/UtilsForTesting.java @@ -1,11 +1,6 @@ package technology.tabula; -import static org.junit.Assert.assertEquals; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; -import java.io.IOException; +import java.io.*; import java.nio.charset.Charset; import java.util.List; @@ -13,22 +8,23 @@ import org.apache.commons.csv.CSVParser; import org.apache.commons.csv.CSVPrinter; import org.apache.pdfbox.pdmodel.PDDocument; +import org.junit.Assert; public class UtilsForTesting { - + public static Page getAreaFromFirstPage(String path, float top, float left, float bottom, float right) throws IOException { return getAreaFromPage(path, 1, top, left, bottom, right); } - + public static Page getAreaFromPage(String path, int page, float top, float left, float bottom, float right) throws IOException { return getPage(path, page).getArea(top, left, bottom, right); } - + public static Page getPage(String path, int pageNumber) throws IOException { ObjectExtractor oe = null; try { PDDocument document = PDDocument - .load(path); + .load(new File(path)); oe = new ObjectExtractor(document); Page page = oe.extract(pageNumber); return page; @@ -37,46 +33,60 @@ public static Page getPage(String path, int pageNumber) throws IOException { oe.close(); } } - - public static void assertTableEquals(Table table, String[][] arrayOfRows) { + + public static String[][] tableToArrayOfRows(Table table) { List> tableRows = table.getRows(); - assertEquals(arrayOfRows.length, tableRows.size()); - for (int i = 0; i < arrayOfRows.length; i++) { - String[] row = arrayOfRows[i]; - assertEquals(row.length, tableRows.get(i).size()); - for (int j = 0; j < row.length; j++) { - assertEquals(row[j].trim(), table.getCell(i, j).getText().trim()); + + int maxColCount = 0; + + for (int i = 0; i < tableRows.size(); i++) { + List row = tableRows.get(i); + if (maxColCount < row.size()) { + maxColCount = row.size(); + } + } + + Assert.assertEquals(maxColCount, table.getColCount()); + + String[][] rv = new String[tableRows.size()][maxColCount]; + + for (int i = 0; i < tableRows.size(); i++) { + List row = tableRows.get(i); + for (int j = 0; j < row.size(); j++) { + rv[i][j] = table.getCell(i, j).getText(); } } + + return rv; } - + public static String loadJson(String path) throws IOException { - - BufferedReader reader = new BufferedReader( new FileReader (path)); - StringBuilder stringBuilder = new StringBuilder(); - String line = null; - - while( ( line = reader.readLine() ) != null ) { - stringBuilder.append( line ); - } + + StringBuilder stringBuilder = new StringBuilder(); + try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF-8"))) { + String line = null; + while ((line = reader.readLine()) != null) { + stringBuilder.append(line); + } + } return stringBuilder.toString(); - + } - + public static String loadCsv(String path) throws IOException { - - StringBuilder out = new StringBuilder(); - CSVParser parse = org.apache.commons.csv.CSVParser.parse(new File(path), Charset.forName("utf-8"), CSVFormat.EXCEL); - - CSVPrinter printer = new CSVPrinter(out, CSVFormat.EXCEL); + + StringBuilder out = new StringBuilder(); + CSVParser parse = org.apache.commons.csv.CSVParser.parse(new File(path), Charset.forName("utf-8"), CSVFormat.EXCEL); + + CSVPrinter printer = new CSVPrinter(out, CSVFormat.EXCEL); printer.printRecords(parse); printer.close(); String csv = out.toString().replaceAll("(? - +
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-013.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-013.json index 074b6f59..11be9878 100644 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-013.json +++ b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-013.json @@ -1 +1 @@ -{"numExpectedTables":4,"numCorrectlyDetectedTables":4,"numErroneouslyDetectedTables":1,"expectedFailure":true} \ No newline at end of file +{"numExpectedTables":4,"numCorrectlyDetectedTables":4,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-027-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-027-reg.xml index ac5c31cc..66b9caa3 100644 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-027-reg.xml +++ b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-027-reg.xml @@ -201,7 +201,7 @@ - + diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-001.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-001.json index a2697933..41e37c7d 100644 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-001.json +++ b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-001.json @@ -1 +1 @@ -{"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":2,"expectedFailure":true} \ No newline at end of file +{"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":3,"expectedFailure":true} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-002-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-002-reg.xml index fffd80d2..a5bb01c0 100644 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-002-reg.xml +++ b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-002-reg.xml @@ -1229,7 +1229,7 @@ - + diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-002.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-002.json index 1bf5fd29..b5ff463b 100644 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-002.json +++ b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-002.json @@ -1 +1 @@ -{"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file +{"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":1,"expectedFailure":true} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-020-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-020-reg.xml index 549c35d2..3d2802d1 100644 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-020-reg.xml +++ b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-020-reg.xml @@ -1567,7 +1567,7 @@ - + diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-020.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-020.json index 11be9878..6922285b 100644 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-020.json +++ b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-020.json @@ -1 +1 @@ -{"numExpectedTables":4,"numCorrectlyDetectedTables":4,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file +{"numExpectedTables":4,"numCorrectlyDetectedTables":3,"numErroneouslyDetectedTables":1,"expectedFailure":true} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-023-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-023-reg.xml index 16fccc3c..ecafe609 100644 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-023-reg.xml +++ b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-023-reg.xml @@ -217,7 +217,7 @@ xsi:noNamespaceSchemaLocation="competition-entry-region-model.xsd" filename='bm_ - +
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-023.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-023.json index 79301b7d..4ae79b84 100644 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-023.json +++ b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-023.json @@ -1 +1 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":4,"expectedFailure":true} \ No newline at end of file +{"numExpectedTables":1,"numCorrectlyDetectedTables":0,"numErroneouslyDetectedTables":5,"expectedFailure":true} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-024-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-024-reg.xml index f0fb7c73..7893ccb0 100644 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-024-reg.xml +++ b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-024-reg.xml @@ -1026,7 +1026,7 @@ xsi:noNamespaceSchemaLocation="competition-entry-region-model.xsd" filename='bm_ - + diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-024.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-024.json index e700926c..11be9878 100644 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-024.json +++ b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-024.json @@ -1 +1 @@ -{"numExpectedTables":4,"numCorrectlyDetectedTables":4,"numErroneouslyDetectedTables":2,"expectedFailure":true} \ No newline at end of file +{"numExpectedTables":4,"numCorrectlyDetectedTables":4,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-025.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-025.json index f783d0e7..1bf5fd29 100644 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-025.json +++ b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-025.json @@ -1 +1 @@ -{"numExpectedTables":6,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":3,"expectedFailure":true} \ No newline at end of file +{"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-034.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-034.json index eb903a1d..54ceef3a 100644 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-034.json +++ b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-034.json @@ -1 +1 @@ -{"numExpectedTables":2,"numCorrectlyDetectedTables":0,"numErroneouslyDetectedTables":1,"expectedFailure":true} \ No newline at end of file +{"numExpectedTables":2,"numCorrectlyDetectedTables":0,"numErroneouslyDetectedTables":2,"expectedFailure":true} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035a-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035a-reg.xml index 5a3876ac..944026db 100644 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035a-reg.xml +++ b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035a-reg.xml @@ -55,7 +55,7 @@ xsi:noNamespaceSchemaLocation="competition-entry-region-model.xsd" filename='bm_ - +
@@ -297,7 +297,7 @@ xsi:noNamespaceSchemaLocation="competition-entry-region-model.xsd" filename='bm_ - +
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-037.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-037.json index 8dd87c33..a55497df 100644 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-037.json +++ b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-037.json @@ -1 +1 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":0,"numErroneouslyDetectedTables":3,"expectedFailure":true} \ No newline at end of file +{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/indictb1h_14.pdf b/src/test/resources/technology/tabula/indictb1h_14.pdf new file mode 100644 index 00000000..8850058b Binary files /dev/null and b/src/test/resources/technology/tabula/indictb1h_14.pdf differ diff --git a/src/test/resources/technology/tabula/jpeg2000.pdf b/src/test/resources/technology/tabula/jpeg2000.pdf new file mode 100644 index 00000000..815a5010 Binary files /dev/null and b/src/test/resources/technology/tabula/jpeg2000.pdf differ diff --git a/src/test/resources/technology/tabula/json/AnimalSounds1.json b/src/test/resources/technology/tabula/json/AnimalSounds1.json new file mode 100644 index 00000000..c13c6759 --- /dev/null +++ b/src/test/resources/technology/tabula/json/AnimalSounds1.json @@ -0,0 +1 @@ +[{"extraction_method":"lattice","top":0.006499578,"left":56.8,"width":241.1999969482422,"height":315.36407470703125,"right":298.0,"bottom":315.37057,"data":[[{"top":0.006499578,"left":56.8,"width":79.19999694824219,"height":95.31405639648438,"text":"Animal"},{"top":0.006499578,"left":136.0,"width":61.0,"height":95.31405639648438,"text":"Action"},{"top":0.006499578,"left":197.0,"width":101.0,"height":95.31405639648438,"text":"Result"}],[{"top":95.32056,"left":56.8,"width":79.19999694824219,"height":23.050010681152344,"text":"Cat"},{"top":95.32056,"left":136.0,"width":61.0,"height":23.050010681152344,"text":"Says"},{"top":95.32056,"left":197.0,"width":101.0,"height":23.050010681152344,"text":"Meow"}],[{"top":118.37057,"left":56.8,"width":79.19999694824219,"height":63.99999237060547,"text":"Parastratiosph\recomyiastratio\rsphecomyioid\res"},{"top":118.37057,"left":136.0,"width":61.0,"height":63.99999237060547,"text":"Says"},{"top":118.37057,"left":197.0,"width":101.0,"height":63.99999237060547,"text":"bzzzzzzz"}],[{"top":182.37056,"left":56.8,"width":79.19999694824219,"height":133.00001525878906,"text":"Fox"},{"top":182.37056,"left":136.0,"width":61.0,"height":133.00001525878906,"text":"Says"},{"top":182.37056,"left":197.0,"width":101.0,"height":133.00001525878906,"text":"Ring-\rdingdingdingd\ringeringeding\rGering-\rdingdingdingd\ringeringeding\rGering-\rdingdingdingd\ringeringeding"}]]},{"extraction_method":"lattice","top":0.006499578,"left":313.35715,"width":241.55941772460938,"height":259.2640380859375,"right":554.91656,"bottom":259.27054,"data":[[{"top":0.006499578,"left":313.35715,"width":77.64285278320312,"height":72.26405334472656,"text":""},{"top":0.006499578,"left":391.0,"width":66.0,"height":72.26405334472656,"text":""},{"top":0.006499578,"left":457.0,"width":97.91656494140625,"height":72.26405334472656,"text":""}],[{"top":72.27055,"left":313.35715,"width":77.64285278320312,"height":23.050003051757812,"text":"Animal"},{"top":72.27055,"left":391.0,"width":66.0,"height":23.050003051757812,"text":"Action"},{"top":72.27055,"left":457.0,"width":97.91656494140625,"height":23.050003051757812,"text":"Result"}],[{"top":95.32056,"left":313.35715,"width":77.64285278320312,"height":35.94999694824219,"text":"Dogs/wolves/\rMore dogs"},{"top":95.32056,"left":391.0,"width":66.0,"height":35.94999694824219,"text":"Says"},{"top":95.32056,"left":457.0,"width":97.91656494140625,"height":35.94999694824219,"text":"Bow-wow/\rruff-ruff"}],[{"top":131.27055,"left":313.35715,"width":77.64285278320312,"height":36.40000915527344,"text":"Donkey"},{"top":131.27055,"left":391.0,"width":66.0,"height":36.40000915527344,"text":"Says"},{"top":131.27055,"left":457.0,"width":97.91656494140625,"height":36.40000915527344,"text":"Hee-Haw Hee-\rHaw"}],[{"top":167.67056,"left":313.35715,"width":77.64285278320312,"height":91.5999755859375,"text":"Fox"},{"top":167.67056,"left":391.0,"width":66.0,"height":91.5999755859375,"text":"Says"},{"top":167.67056,"left":457.0,"width":97.91656494140625,"height":91.5999755859375,"text":"Wa-pa-pa-pa-\rpa-pa-pow\rWa-pa-pa-pa-\rpa-pow\rWa-pa-pa-pa-\rpa-pa-pow"}]]}] diff --git a/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json b/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json index e995b434..21ad2880 100644 --- a/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json +++ b/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json @@ -1 +1 @@ -{"extraction_method":"basic","top":0.0,"left":0.0,"width":549.0399780273438,"height":782.0400390625,"data":[[{"top":279.11,"left":28.56,"width":175.21029663085938,"height":7.210000038146973,"text":"ABDALA de MATARAZZO, Norma Amanda "},{"top":279.11,"left":213.72,"width":108.25027465820312,"height":7.210000038146973,"text":"Frente Cívico por Santiago "},{"top":279.83,"left":397.56,"width":79.69027709960938,"height":7.210000038146973,"text":"Santiago del Estero "},{"top":279.11,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":295.55,"left":28.56,"width":141.71029663085938,"height":7.210000038146973,"text":"ALBRIEU, Oscar Edmundo Nicolas "},{"top":295.55,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":296.39,"left":397.56,"width":41.6602783203125,"height":7.210000038146973,"text":"Rio Negro "},{"top":295.55,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":312.11,"left":28.56,"width":83.75028228759766,"height":7.210000038146973,"text":"ALONSO, María Luz "},{"top":312.11,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":312.83,"left":397.56,"width":42.250274658203125,"height":7.210000038146973,"text":"La Pampa "},{"top":312.11,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":328.55,"left":28.56,"width":84.25028228759766,"height":7.210000038146973,"text":"ARENA, Celia Isabel "},{"top":328.55,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":329.39,"left":397.56,"width":37.690277099609375,"height":7.210000038146973,"text":"Santa Fe "},{"top":328.55,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":345.11,"left":28.56,"width":110.29029846191406,"height":7.210000038146973,"text":"ARREGUI, Andrés Roberto "},{"top":345.11,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":345.83,"left":397.56,"width":54.71026611328125,"height":7.210000038146973,"text":"Buenos Aires "},{"top":345.11,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":361.55,"left":28.56,"width":115.69029235839844,"height":7.210000038146973,"text":"AVOSCAN, Herman Horacio "},{"top":361.55,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":362.39,"left":397.56,"width":41.6602783203125,"height":7.210000038146973,"text":"Rio Negro "},{"top":361.55,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":378.11,"left":28.56,"width":95.6902847290039,"height":7.210000038146973,"text":"BALCEDO, María Ester "},{"top":378.11,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":378.83,"left":397.56,"width":54.71026611328125,"height":7.210000038146973,"text":"Buenos Aires "},{"top":378.11,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":394.55,"left":28.56,"width":127.69029235839844,"height":7.210000038146973,"text":"BARRANDEGUY, Raúl Enrique "},{"top":394.55,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":395.39,"left":397.56,"width":43.6702880859375,"height":7.210000038146973,"text":"Entre Ríos "},{"top":394.55,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":411.11,"left":28.56,"width":106.69029235839844,"height":7.210000038146973,"text":"BASTERRA, Luis Eugenio "},{"top":411.11,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":411.83,"left":397.56,"width":36.73028564453125,"height":7.210000038146973,"text":"Formosa "},{"top":411.11,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":427.55,"left":28.56,"width":92.21028137207031,"height":7.210000038146973,"text":"BEDANO, Nora Esther "},{"top":427.55,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":428.39,"left":397.56,"width":35.6602783203125,"height":7.210000038146973,"text":"Córdoba "},{"top":427.55,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":444.11,"left":28.56,"width":100.69029235839844,"height":7.210000038146973,"text":"BERNAL, María Eugenia "},{"top":444.11,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":444.83,"left":397.56,"width":22.200286865234375,"height":7.210000038146973,"text":"Jujuy "},{"top":444.11,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":460.55,"left":28.56,"width":112.21029663085938,"height":7.210000038146973,"text":"BERTONE, Rosana Andrea "},{"top":460.55,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":461.39,"left":397.56,"width":67.21026611328125,"height":7.210000038146973,"text":"Tierra del Fuego "},{"top":460.55,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":477.11,"left":28.56,"width":114.73028564453125,"height":7.210000038146973,"text":"BIANCHI, María del Carmen "},{"top":477.11,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":477.83,"left":397.56,"width":74.6702880859375,"height":7.210000038146973,"text":"Cdad. Aut. Bs. As. "},{"top":477.11,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":493.55,"left":28.56,"width":115.19029235839844,"height":7.210000038146973,"text":"BIDEGAIN, Gloria Mercedes "},{"top":493.55,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":494.39,"left":397.56,"width":54.71026611328125,"height":7.210000038146973,"text":"Buenos Aires "},{"top":493.55,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":510.11,"left":28.56,"width":66.25028228759766,"height":7.210000038146973,"text":"BRAWER, Mara "},{"top":510.11,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":510.83,"left":397.56,"width":74.6702880859375,"height":7.210000038146973,"text":"Cdad. Aut. Bs. As. "},{"top":510.11,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":526.55,"left":28.56,"width":90.73028564453125,"height":7.210000038146973,"text":"BRILLO, José Ricardo "},{"top":526.55,"left":213.72,"width":121.81027221679688,"height":7.210000038146973,"text":"Movimiento Popular Neuquino "},{"top":527.39,"left":397.56,"width":37.700286865234375,"height":7.210000038146973,"text":"Neuquén "},{"top":526.55,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":543.11,"left":28.56,"width":120.73028564453125,"height":7.210000038146973,"text":"BROMBERG, Isaac Benjamín "},{"top":543.11,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":543.83,"left":397.56,"width":38.6602783203125,"height":7.210000038146973,"text":"Tucumán "},{"top":543.11,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":559.55,"left":28.56,"width":89.7702865600586,"height":7.210000038146973,"text":"BRUE, Daniel Agustín "},{"top":559.55,"left":213.72,"width":108.25027465820312,"height":7.210000038146973,"text":"Frente Cívico por Santiago "},{"top":560.39,"left":397.56,"width":79.69027709960938,"height":7.210000038146973,"text":"Santiago del Estero "},{"top":559.55,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":576.11,"left":28.56,"width":72.23028564453125,"height":7.210000038146973,"text":"CALCAGNO, Eric "},{"top":576.11,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":576.83,"left":397.56,"width":54.71026611328125,"height":7.210000038146973,"text":"Buenos Aires "},{"top":576.11,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":592.55,"left":28.56,"width":114.73028564453125,"height":7.210000038146973,"text":"CARLOTTO, Remo Gerardo "},{"top":592.55,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":593.39,"left":397.56,"width":54.71026611328125,"height":7.210000038146973,"text":"Buenos Aires "},{"top":592.55,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":609.11,"left":28.56,"width":122.1702880859375,"height":7.210000038146973,"text":"CARMONA, Guillermo Ramón "},{"top":609.11,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":609.83,"left":397.56,"width":38.1602783203125,"height":7.210000038146973,"text":"Mendoza "},{"top":609.11,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":625.55,"left":28.56,"width":124.73028564453125,"height":7.210000038146973,"text":"CATALAN MAGNI, Julio César "},{"top":625.55,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":626.39,"left":397.56,"width":67.21026611328125,"height":7.210000038146973,"text":"Tierra del Fuego "},{"top":625.55,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":642.11,"left":28.56,"width":88.6902847290039,"height":7.210000038146973,"text":"CEJAS, Jorge Alberto "},{"top":642.11,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":642.83,"left":397.56,"width":41.6602783203125,"height":7.210000038146973,"text":"Rio Negro "},{"top":642.11,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":658.55,"left":28.56,"width":89.7702865600586,"height":7.210000038146973,"text":"CHIENO, María Elena "},{"top":658.55,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":659.39,"left":397.56,"width":42.72027587890625,"height":7.210000038146973,"text":"Corrientes "},{"top":658.55,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":675.11,"left":28.56,"width":96.25028228759766,"height":7.210000038146973,"text":"CIAMPINI, José Alberto "},{"top":675.11,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":675.83,"left":397.56,"width":37.700286865234375,"height":7.210000038146973,"text":"Neuquén "},{"top":675.11,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":691.55,"left":28.56,"width":131.77029418945312,"height":7.210000038146973,"text":"CIGOGNA, Luis Francisco Jorge "},{"top":691.55,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":692.39,"left":397.56,"width":54.71026611328125,"height":7.210000038146973,"text":"Buenos Aires "},{"top":691.55,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":708.11,"left":28.56,"width":62.15028381347656,"height":7.210000038146973,"text":"CLERI, Marcos "},{"top":708.11,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":708.83,"left":397.56,"width":37.690277099609375,"height":7.210000038146973,"text":"Santa Fe "},{"top":708.11,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":724.55,"left":28.56,"width":101.77029418945312,"height":7.210000038146973,"text":"COMELLI, Alicia Marcela "},{"top":724.55,"left":213.72,"width":121.81027221679688,"height":7.210000038146973,"text":"Movimiento Popular Neuquino "},{"top":725.39,"left":397.56,"width":37.700286865234375,"height":7.210000038146973,"text":"Neuquén "},{"top":724.55,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":741.11,"left":28.56,"width":88.1902847290039,"height":7.210000038146973,"text":"CONTI, Diana Beatriz "},{"top":741.11,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":741.83,"left":397.56,"width":54.71026611328125,"height":7.210000038146973,"text":"Buenos Aires "},{"top":741.11,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":757.55,"left":28.56,"width":98.75028228759766,"height":7.210000038146973,"text":"CORDOBA, Stella Maris "},{"top":757.55,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":758.39,"left":397.56,"width":38.6602783203125,"height":7.210000038146973,"text":"Tucumán "},{"top":757.55,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}],[{"top":774.11,"left":28.56,"width":105.25028991699219,"height":7.210000038146973,"text":"CURRILEN, Oscar Rubén "},{"top":774.11,"left":213.72,"width":108.23028564453125,"height":7.210000038146973,"text":"Frente para la Victoria - PJ "},{"top":774.83,"left":397.56,"width":30.2802734375,"height":7.210000038146973,"text":"Chubut "},{"top":774.11,"left":494.04,"width":54.999969482421875,"height":7.210000038146973,"text":"AFIRMATIVO"}]]} +{"extraction_method":"stream","top":269.875,"left":12.75,"width":548.25,"height":520.625,"right":561.0,"bottom":790.5,"data":[[{"top":281.82,"left":28.56,"width":175.21029663085938,"height":4.5,"text":"ABDALA de MATARAZZO, Norma Amanda"},{"top":281.82,"left":213.72,"width":108.25027465820312,"height":4.5,"text":"Frente Cívico por Santiago"},{"top":282.54,"left":397.56,"width":79.69027709960938,"height":4.5,"text":"Santiago del Estero"},{"top":281.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":298.26,"left":28.56,"width":141.71029663085938,"height":4.5,"text":"ALBRIEU, Oscar Edmundo Nicolas"},{"top":298.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":299.1,"left":397.56,"width":41.6602783203125,"height":4.5,"text":"Rio Negro"},{"top":298.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":314.82,"left":28.56,"width":83.75028228759766,"height":4.5,"text":"ALONSO, María Luz"},{"top":314.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":315.54,"left":397.56,"width":42.250274658203125,"height":4.5,"text":"La Pampa"},{"top":314.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":331.26,"left":28.56,"width":84.25028228759766,"height":4.5,"text":"ARENA, Celia Isabel"},{"top":331.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":332.1,"left":397.56,"width":37.690277099609375,"height":4.5,"text":"Santa Fe"},{"top":331.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":347.82,"left":28.56,"width":110.29029846191406,"height":4.5,"text":"ARREGUI, Andrés Roberto"},{"top":347.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":348.54,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":347.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":364.26,"left":28.56,"width":115.69029235839844,"height":4.5,"text":"AVOSCAN, Herman Horacio"},{"top":364.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":365.1,"left":397.56,"width":41.6602783203125,"height":4.5,"text":"Rio Negro"},{"top":364.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":380.82,"left":28.56,"width":95.6902847290039,"height":4.5,"text":"BALCEDO, María Ester"},{"top":380.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":381.54,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":380.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":397.26,"left":28.56,"width":127.69029235839844,"height":4.5,"text":"BARRANDEGUY, Raúl Enrique"},{"top":397.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":398.1,"left":397.56,"width":43.6702880859375,"height":4.5,"text":"Entre Ríos"},{"top":397.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":413.82,"left":28.56,"width":106.69029235839844,"height":4.5,"text":"BASTERRA, Luis Eugenio"},{"top":413.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":414.54,"left":397.56,"width":36.73028564453125,"height":4.5,"text":"Formosa"},{"top":413.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":430.26,"left":28.56,"width":92.21028137207031,"height":4.5,"text":"BEDANO, Nora Esther"},{"top":430.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":431.1,"left":397.56,"width":35.6602783203125,"height":4.5,"text":"Córdoba"},{"top":430.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":446.82,"left":28.56,"width":100.69029235839844,"height":4.5,"text":"BERNAL, María Eugenia"},{"top":446.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":447.54,"left":397.56,"width":22.200286865234375,"height":4.5,"text":"Jujuy"},{"top":446.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":463.26,"left":28.56,"width":112.21029663085938,"height":4.5,"text":"BERTONE, Rosana Andrea"},{"top":463.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":464.1,"left":397.56,"width":67.21026611328125,"height":4.5,"text":"Tierra del Fuego"},{"top":463.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":479.82,"left":28.56,"width":114.73028564453125,"height":4.5,"text":"BIANCHI, María del Carmen"},{"top":479.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":480.54,"left":397.56,"width":74.6702880859375,"height":4.5,"text":"Cdad. Aut. Bs. As."},{"top":479.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":496.26,"left":28.56,"width":115.19029235839844,"height":4.5,"text":"BIDEGAIN, Gloria Mercedes"},{"top":496.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":497.1,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":496.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":512.82,"left":28.56,"width":66.25028228759766,"height":4.5,"text":"BRAWER, Mara"},{"top":512.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":513.54,"left":397.56,"width":74.6702880859375,"height":4.5,"text":"Cdad. Aut. Bs. As."},{"top":512.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":529.26,"left":28.56,"width":90.73028564453125,"height":4.5,"text":"BRILLO, José Ricardo"},{"top":529.26,"left":213.72,"width":121.81027221679688,"height":4.5,"text":"Movimiento Popular Neuquino"},{"top":530.1,"left":397.56,"width":37.700286865234375,"height":4.5,"text":"Neuquén"},{"top":529.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":545.82,"left":28.56,"width":120.73028564453125,"height":4.5,"text":"BROMBERG, Isaac Benjamín"},{"top":545.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":546.54,"left":397.56,"width":38.6602783203125,"height":4.5,"text":"Tucumán"},{"top":545.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":562.26,"left":28.56,"width":89.7702865600586,"height":4.5,"text":"BRUE, Daniel Agustín"},{"top":562.26,"left":213.72,"width":108.25027465820312,"height":4.5,"text":"Frente Cívico por Santiago"},{"top":563.1,"left":397.56,"width":79.69027709960938,"height":4.5,"text":"Santiago del Estero"},{"top":562.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":578.82,"left":28.56,"width":72.23028564453125,"height":4.5,"text":"CALCAGNO, Eric"},{"top":578.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":579.54,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":578.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":595.26,"left":28.56,"width":114.73028564453125,"height":4.5,"text":"CARLOTTO, Remo Gerardo"},{"top":595.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":596.1,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":595.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":611.82,"left":28.56,"width":122.1702880859375,"height":4.5,"text":"CARMONA, Guillermo Ramón"},{"top":611.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":612.54,"left":397.56,"width":38.1602783203125,"height":4.5,"text":"Mendoza"},{"top":611.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":628.26,"left":28.56,"width":124.73028564453125,"height":4.5,"text":"CATALAN MAGNI, Julio César"},{"top":628.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":629.1,"left":397.56,"width":67.21026611328125,"height":4.5,"text":"Tierra del Fuego"},{"top":628.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":644.82,"left":28.56,"width":88.6902847290039,"height":4.5,"text":"CEJAS, Jorge Alberto"},{"top":644.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":645.54,"left":397.56,"width":41.6602783203125,"height":4.5,"text":"Rio Negro"},{"top":644.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":661.26,"left":28.56,"width":89.7702865600586,"height":4.5,"text":"CHIENO, María Elena"},{"top":661.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":662.1,"left":397.56,"width":42.72027587890625,"height":4.5,"text":"Corrientes"},{"top":661.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":677.82,"left":28.56,"width":96.25028228759766,"height":4.5,"text":"CIAMPINI, José Alberto"},{"top":677.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":678.54,"left":397.56,"width":37.700286865234375,"height":4.5,"text":"Neuquén"},{"top":677.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":694.26,"left":28.56,"width":131.77029418945312,"height":4.5,"text":"CIGOGNA, Luis Francisco Jorge"},{"top":694.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":695.1,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":694.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":710.82,"left":28.56,"width":62.15028381347656,"height":4.5,"text":"CLERI, Marcos"},{"top":710.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":711.54,"left":397.56,"width":37.690277099609375,"height":4.5,"text":"Santa Fe"},{"top":710.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":727.26,"left":28.56,"width":101.77029418945312,"height":4.5,"text":"COMELLI, Alicia Marcela"},{"top":727.26,"left":213.72,"width":121.81027221679688,"height":4.5,"text":"Movimiento Popular Neuquino"},{"top":728.1,"left":397.56,"width":37.700286865234375,"height":4.5,"text":"Neuquén"},{"top":727.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":743.82,"left":28.56,"width":88.1902847290039,"height":4.5,"text":"CONTI, Diana Beatriz"},{"top":743.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":744.54,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":743.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":760.26,"left":28.56,"width":98.75028228759766,"height":4.5,"text":"CORDOBA, Stella Maris"},{"top":760.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":761.1,"left":397.56,"width":38.6602783203125,"height":4.5,"text":"Tucumán"},{"top":760.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":776.82,"left":28.56,"width":105.25028991699219,"height":4.5,"text":"CURRILEN, Oscar Rubén"},{"top":776.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":777.54,"left":397.56,"width":30.2802734375,"height":4.5,"text":"Chubut"},{"top":776.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}]]} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/json/schools.json b/src/test/resources/technology/tabula/json/schools.json index 154dc0e2..ad21614c 100644 --- a/src/test/resources/technology/tabula/json/schools.json +++ b/src/test/resources/technology/tabula/json/schools.json @@ -1 +1 @@ -{"extraction_method":"spreadsheet","top":54.315777,"left":16.97,"width":745.3303833007812,"height":483.9442443847656,"data":[[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":54.315777,"left":18.588728,"width":153.91127014160156,"height":8.744216918945312,"text":"Last Name"},{"top":54.315777,"left":172.5,"width":69.72000122070312,"height":8.744216918945312,"text":"First Name"},{"top":54.315777,"left":242.22,"width":104.760009765625,"height":8.744216918945312,"text":"Address"},{"top":54.315777,"left":346.98,"width":70.80001831054688,"height":8.744216918945312,"text":"City"},{"top":54.315777,"left":417.78003,"width":20.87994384765625,"height":8.744216918945312,"text":"State"},{"top":54.315777,"left":438.65997,"width":49.91998291015625,"height":8.744216918945312,"text":"Zip"},{"top":54.315777,"left":488.57996,"width":78.47998046875,"height":8.744216918945312,"text":"Occupation"},{"top":54.315777,"left":567.05994,"width":106.4400634765625,"height":8.744216918945312,"text":"Employer"},{"top":54.315777,"left":673.5,"width":45.52001953125,"height":8.744216918945312,"text":"Date"},{"top":54.315777,"left":719.02,"width":43.28033447265625,"height":8.744216918945312,"text":"Amount"}],[{"top":63.059994,"left":16.97,"width":1.6187286376953125,"height":10.799991607666016,"text":""},{"top":63.059994,"left":18.588728,"width":153.91127014160156,"height":10.799991607666016,"text":"Lidstad"},{"top":63.059994,"left":172.5,"width":69.72000122070312,"height":10.799991607666016,"text":"Dick \u0026 Peg"},{"top":63.059994,"left":242.22,"width":104.760009765625,"height":10.799991607666016,"text":"62 Mississippi River Blvd N"},{"top":63.059994,"left":346.98,"width":70.80001831054688,"height":10.799991607666016,"text":"Saint Paul"},{"top":63.059994,"left":417.78003,"width":20.87994384765625,"height":10.799991607666016,"text":"MN"},{"top":63.059994,"left":438.65997,"width":49.91998291015625,"height":10.799991607666016,"text":"55104"},{"top":63.059994,"left":488.57996,"width":78.47998046875,"height":10.799991607666016,"text":"retired"},{"top":63.059994,"left":567.05994,"width":106.4400634765625,"height":10.799991607666016,"text":""},{"top":63.059994,"left":673.5,"width":45.52001953125,"height":10.799991607666016,"text":"10/12/2012"},{"top":63.059994,"left":719.02,"width":43.28033447265625,"height":10.799991607666016,"text":"60.00"}],[{"top":73.859985,"left":16.97,"width":1.6187286376953125,"height":10.800025939941406,"text":""},{"top":73.859985,"left":18.588728,"width":153.91127014160156,"height":10.800025939941406,"text":"Strom"},{"top":73.859985,"left":172.5,"width":69.72000122070312,"height":10.800025939941406,"text":"Pam"},{"top":73.859985,"left":242.22,"width":104.760009765625,"height":10.800025939941406,"text":"1229 Hague Ave"},{"top":73.859985,"left":346.98,"width":70.80001831054688,"height":10.800025939941406,"text":"St. Paul"},{"top":73.859985,"left":417.78003,"width":20.87994384765625,"height":10.800025939941406,"text":"MN"},{"top":73.859985,"left":438.65997,"width":49.91998291015625,"height":10.800025939941406,"text":"55104"},{"top":73.859985,"left":488.57996,"width":78.47998046875,"height":10.800025939941406,"text":""},{"top":73.859985,"left":567.05994,"width":106.4400634765625,"height":10.800025939941406,"text":""},{"top":73.859985,"left":673.5,"width":45.52001953125,"height":10.800025939941406,"text":"9/12/2012"},{"top":73.859985,"left":719.02,"width":43.28033447265625,"height":10.800025939941406,"text":"60.00"}],[{"top":84.66001,"left":16.97,"width":1.6187286376953125,"height":10.799995422363281,"text":""},{"top":84.66001,"left":18.588728,"width":153.91127014160156,"height":10.799995422363281,"text":"Seeba"},{"top":84.66001,"left":172.5,"width":69.72000122070312,"height":10.799995422363281,"text":"Louise \u0026 Paul"},{"top":84.66001,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"1399 Sheldon St"},{"top":84.66001,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"Saint Paul"},{"top":84.66001,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":84.66001,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55108"},{"top":84.66001,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":"BOE"},{"top":84.66001,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":"City of Saint Paul"},{"top":84.66001,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"10/12/2012"},{"top":84.66001,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"60.00"}],[{"top":95.46001,"left":16.97,"width":1.6187286376953125,"height":10.799995422363281,"text":""},{"top":95.46001,"left":18.588728,"width":153.91127014160156,"height":10.799995422363281,"text":"Schumacher / Bales"},{"top":95.46001,"left":172.5,"width":69.72000122070312,"height":10.799995422363281,"text":"Douglas L. / Patricia"},{"top":95.46001,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"948 County Rd. D W"},{"top":95.46001,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"Saint Paul"},{"top":95.46001,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":95.46001,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55126"},{"top":95.46001,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":""},{"top":95.46001,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":""},{"top":95.46001,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"10/13/2012"},{"top":95.46001,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"60.00"}],[{"top":106.26,"left":16.97,"width":1.6187286376953125,"height":10.799995422363281,"text":""},{"top":106.26,"left":18.588728,"width":153.91127014160156,"height":10.799995422363281,"text":"Abrams"},{"top":106.26,"left":172.5,"width":69.72000122070312,"height":10.799995422363281,"text":"Marjorie"},{"top":106.26,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"238 8th St east"},{"top":106.26,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"St Paul"},{"top":106.26,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":106.26,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55101"},{"top":106.26,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":"Retired"},{"top":106.26,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":"Retired"},{"top":106.26,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"8/8/2012"},{"top":106.26,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"75.00"}],[{"top":117.06,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":117.06,"left":18.588728,"width":153.91127014160156,"height":10.79998779296875,"text":"Crouse / Schroeder"},{"top":117.06,"left":172.5,"width":69.72000122070312,"height":10.79998779296875,"text":"Abigail / Jonathan"},{"top":117.06,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1545 Branston St."},{"top":117.06,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":117.06,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":117.06,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55108"},{"top":117.06,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":""},{"top":117.06,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":""},{"top":117.06,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/6/2012"},{"top":117.06,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"75.00"}],[{"top":127.859985,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":127.859985,"left":18.588728,"width":153.91127014160156,"height":10.800003051757812,"text":"O\u0027Connell"},{"top":127.859985,"left":172.5,"width":69.72000122070312,"height":10.800003051757812,"text":"Jean"},{"top":127.859985,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"888 Ivy Ave W."},{"top":127.859985,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Saint Paul"},{"top":127.859985,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":127.859985,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55117"},{"top":127.859985,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":""},{"top":127.859985,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":""},{"top":127.859985,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/30/2012"},{"top":127.859985,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"75.00"}],[{"top":138.65999,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":138.65999,"left":18.588728,"width":153.91127014160156,"height":10.800018310546875,"text":"Reese"},{"top":138.65999,"left":172.5,"width":69.72000122070312,"height":10.800018310546875,"text":"Cheri A"},{"top":138.65999,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"981 Davern St."},{"top":138.65999,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":138.65999,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":138.65999,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55116"},{"top":138.65999,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Public Relations"},{"top":138.65999,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Far North Spirits"},{"top":138.65999,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/3/2012"},{"top":138.65999,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"75.00"}],[{"top":149.46,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":149.46,"left":18.588728,"width":153.91127014160156,"height":10.800003051757812,"text":"Serrano"},{"top":149.46,"left":172.5,"width":69.72000122070312,"height":10.800003051757812,"text":"Luz Maria"},{"top":149.46,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"5559 Park Place Drive"},{"top":149.46,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Shoreview"},{"top":149.46,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":149.46,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55126"},{"top":149.46,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"retired"},{"top":149.46,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":"SPPS"},{"top":149.46,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/20/2012"},{"top":149.46,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"75.00"}],[{"top":160.26001,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":160.26001,"left":18.588728,"width":153.91127014160156,"height":10.79998779296875,"text":"Alsiddiqui"},{"top":160.26001,"left":172.5,"width":69.72000122070312,"height":10.79998779296875,"text":"Jaber"},{"top":160.26001,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"13056 Euclid Ave"},{"top":160.26001,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Apple Valley"},{"top":160.26001,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":160.26001,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55124"},{"top":160.26001,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"SPPS budget analyst"},{"top":160.26001,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"SPPS"},{"top":160.26001,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"9/20/2012"},{"top":160.26001,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":171.06,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":171.06,"left":18.588728,"width":153.91127014160156,"height":10.800003051757812,"text":"Andrastek"},{"top":171.06,"left":172.5,"width":69.72000122070312,"height":10.800003051757812,"text":"John"},{"top":171.06,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"774 Ashland Ave"},{"top":171.06,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"St Paul"},{"top":171.06,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":171.06,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55104"},{"top":171.06,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"asst principal"},{"top":171.06,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":"SPPS"},{"top":171.06,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"8/22/2012"},{"top":171.06,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"100.00"}],[{"top":181.86,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":181.86,"left":18.588728,"width":153.91127014160156,"height":10.79998779296875,"text":"Anfang"},{"top":181.86,"left":172.5,"width":69.72000122070312,"height":10.79998779296875,"text":"Heather L. \u0026 Matt"},{"top":181.86,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1635 Bayard Ave"},{"top":181.86,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":181.86,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":181.86,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55116"},{"top":181.86,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"Ex Director"},{"top":181.86,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"BOMA"},{"top":181.86,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/6/2012"},{"top":181.86,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":192.65999,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":192.65999,"left":18.588728,"width":153.91127014160156,"height":10.800018310546875,"text":"Aronson"},{"top":192.65999,"left":172.5,"width":69.72000122070312,"height":10.800018310546875,"text":"Roger J."},{"top":192.65999,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4852 Emerson Ave. S."},{"top":192.65999,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Minneapolis"},{"top":192.65999,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":192.65999,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55419"},{"top":192.65999,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Attorney at Law"},{"top":192.65999,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":""},{"top":192.65999,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"9/20/2012"},{"top":192.65999,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":203.46,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":203.46,"left":18.588728,"width":153.91127014160156,"height":10.800003051757812,"text":"Aronson"},{"top":203.46,"left":172.5,"width":69.72000122070312,"height":10.800003051757812,"text":"Roger J."},{"top":203.46,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"4852 Emerson Ave. S."},{"top":203.46,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Minneapolis"},{"top":203.46,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":203.46,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55419"},{"top":203.46,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"Attorney at Law"},{"top":203.46,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":""},{"top":203.46,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/20/2012"},{"top":203.46,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"100.00"}],[{"top":214.26001,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":214.26001,"left":18.588728,"width":153.91127014160156,"height":10.79998779296875,"text":"Banks"},{"top":214.26001,"left":172.5,"width":69.72000122070312,"height":10.79998779296875,"text":"Michael or Patricia"},{"top":214.26001,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1664 Van Buren Ave."},{"top":214.26001,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":214.26001,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":214.26001,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":214.26001,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"retired social worker"},{"top":214.26001,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":""},{"top":214.26001,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"9/20/2012"},{"top":214.26001,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":225.06,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":225.06,"left":18.588728,"width":153.91127014160156,"height":10.800018310546875,"text":"Bennett"},{"top":225.06,"left":172.5,"width":69.72000122070312,"height":10.800018310546875,"text":"David"},{"top":225.06,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"25 Birchwood Rd."},{"top":225.06,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Mahtomedi"},{"top":225.06,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":225.06,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55110"},{"top":225.06,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"retired"},{"top":225.06,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"retired"},{"top":225.06,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"9/3/2012"},{"top":225.06,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":235.86002,"left":16.97,"width":1.6187286376953125,"height":10.799972534179688,"text":""},{"top":235.86002,"left":18.588728,"width":153.91127014160156,"height":10.799972534179688,"text":"Berry"},{"top":235.86002,"left":172.5,"width":69.72000122070312,"height":10.799972534179688,"text":"Margaret"},{"top":235.86002,"left":242.22,"width":104.760009765625,"height":10.799972534179688,"text":"1267 Pike Lake Dr"},{"top":235.86002,"left":346.98,"width":70.80001831054688,"height":10.799972534179688,"text":"New Brighton"},{"top":235.86002,"left":417.78003,"width":20.87994384765625,"height":10.799972534179688,"text":"MN"},{"top":235.86002,"left":438.65997,"width":49.91998291015625,"height":10.799972534179688,"text":"55112"},{"top":235.86002,"left":488.57996,"width":78.47998046875,"height":10.799972534179688,"text":"Asst Principal"},{"top":235.86002,"left":567.05994,"width":106.4400634765625,"height":10.799972534179688,"text":"Saint Paul Public Schools"},{"top":235.86002,"left":673.5,"width":45.52001953125,"height":10.799972534179688,"text":"8/8/2012"},{"top":235.86002,"left":719.02,"width":43.28033447265625,"height":10.799972534179688,"text":"100.00"}],[{"top":246.65999,"left":16.97,"width":1.6187286376953125,"height":10.800033569335938,"text":""},{"top":246.65999,"left":18.588728,"width":153.91127014160156,"height":10.800033569335938,"text":"Boyle"},{"top":246.65999,"left":172.5,"width":69.72000122070312,"height":10.800033569335938,"text":"Matthew C. \u0026 Eliza"},{"top":246.65999,"left":242.22,"width":104.760009765625,"height":10.800033569335938,"text":"2165 Princeton Ave"},{"top":246.65999,"left":346.98,"width":70.80001831054688,"height":10.800033569335938,"text":"Saint Paul"},{"top":246.65999,"left":417.78003,"width":20.87994384765625,"height":10.800033569335938,"text":"MN"},{"top":246.65999,"left":438.65997,"width":49.91998291015625,"height":10.800033569335938,"text":"55105"},{"top":246.65999,"left":488.57996,"width":78.47998046875,"height":10.800033569335938,"text":""},{"top":246.65999,"left":567.05994,"width":106.4400634765625,"height":10.800033569335938,"text":""},{"top":246.65999,"left":673.5,"width":45.52001953125,"height":10.800033569335938,"text":"10/6/2012"},{"top":246.65999,"left":719.02,"width":43.28033447265625,"height":10.800033569335938,"text":"100.00"}],[{"top":257.46002,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":257.46002,"left":18.588728,"width":153.91127014160156,"height":10.79998779296875,"text":"Brodrick"},{"top":257.46002,"left":172.5,"width":69.72000122070312,"height":10.79998779296875,"text":"John"},{"top":257.46002,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1007 Charles"},{"top":257.46002,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"St. Paul"},{"top":257.46002,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":257.46002,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":257.46002,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"BOE"},{"top":257.46002,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"retired"},{"top":257.46002,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"8/26/2012"},{"top":257.46002,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":268.26,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":268.26,"left":18.588728,"width":153.91127014160156,"height":10.79998779296875,"text":"Brodrick"},{"top":268.26,"left":172.5,"width":69.72000122070312,"height":10.79998779296875,"text":"John F."},{"top":268.26,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1007 Charles Ave"},{"top":268.26,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":268.26,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":268.26,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":268.26,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"BoE"},{"top":268.26,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"SPPS"},{"top":268.26,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/4/2012"},{"top":268.26,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":279.06,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":279.06,"left":18.588728,"width":153.91127014160156,"height":10.779998779296875,"text":"Brown"},{"top":279.06,"left":172.5,"width":69.72000122070312,"height":10.779998779296875,"text":"Mike"},{"top":279.06,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1440 Goodrich Ave"},{"top":279.06,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":279.06,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":279.06,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":279.06,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":279.06,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":279.06,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"9/23/2012"},{"top":279.06,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":289.84,"left":16.97,"width":1.6187286376953125,"height":10.819976806640625,"text":""},{"top":289.84,"left":18.588728,"width":153.91127014160156,"height":10.819976806640625,"text":"Cacy"},{"top":289.84,"left":172.5,"width":69.72000122070312,"height":10.819976806640625,"text":"Jill"},{"top":289.84,"left":242.22,"width":104.760009765625,"height":10.819976806640625,"text":"1409 Smith Ave So"},{"top":289.84,"left":346.98,"width":70.80001831054688,"height":10.819976806640625,"text":"West St. Paul"},{"top":289.84,"left":417.78003,"width":20.87994384765625,"height":10.819976806640625,"text":"MN"},{"top":289.84,"left":438.65997,"width":49.91998291015625,"height":10.819976806640625,"text":"55118"},{"top":289.84,"left":488.57996,"width":78.47998046875,"height":10.819976806640625,"text":"Administrator"},{"top":289.84,"left":567.05994,"width":106.4400634765625,"height":10.819976806640625,"text":"SPPS"},{"top":289.84,"left":673.5,"width":45.52001953125,"height":10.819976806640625,"text":"9/12/2012"},{"top":289.84,"left":719.02,"width":43.28033447265625,"height":10.819976806640625,"text":"100.00"}],[{"top":300.65997,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":300.65997,"left":18.588728,"width":153.91127014160156,"height":10.800018310546875,"text":"Cardwell"},{"top":300.65997,"left":172.5,"width":69.72000122070312,"height":10.800018310546875,"text":"Eileen"},{"top":300.65997,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4172 Bridgewater Ct"},{"top":300.65997,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Vadnais Height"},{"top":300.65997,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":300.65997,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55127"},{"top":300.65997,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Retired"},{"top":300.65997,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Retired"},{"top":300.65997,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/3/2012"},{"top":300.65997,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":311.46,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":311.46,"left":18.588728,"width":153.91127014160156,"height":10.800018310546875,"text":"Carlstrom"},{"top":311.46,"left":172.5,"width":69.72000122070312,"height":10.800018310546875,"text":"Cheryl"},{"top":311.46,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4244 Oakmede Ln"},{"top":311.46,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"St Paul"},{"top":311.46,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":311.46,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55110"},{"top":311.46,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Satff"},{"top":311.46,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Saint Paul Public Schools"},{"top":311.46,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/8/2012"},{"top":311.46,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":322.26,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":322.26,"left":18.588728,"width":153.91127014160156,"height":10.800018310546875,"text":"Carter"},{"top":322.26,"left":172.5,"width":69.72000122070312,"height":10.800018310546875,"text":"Melvin W. \u0026 Willet"},{"top":322.26,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"405 Western Ave N"},{"top":322.26,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":322.26,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":322.26,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55103"},{"top":322.26,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Cty Commissioner"},{"top":322.26,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Ramsey Cty"},{"top":322.26,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/8/2012"},{"top":322.26,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":333.06003,"left":16.97,"width":1.6187286376953125,"height":10.8199462890625,"text":""},{"top":333.06003,"left":18.588728,"width":153.91127014160156,"height":10.8199462890625,"text":"Caruso"},{"top":333.06003,"left":172.5,"width":69.72000122070312,"height":10.8199462890625,"text":"Sarah"},{"top":333.06003,"left":242.22,"width":104.760009765625,"height":10.8199462890625,"text":"2615 Newton Ave S"},{"top":333.06003,"left":346.98,"width":70.80001831054688,"height":10.8199462890625,"text":"Minneapolis"},{"top":333.06003,"left":417.78003,"width":20.87994384765625,"height":10.8199462890625,"text":"MN"},{"top":333.06003,"left":438.65997,"width":49.91998291015625,"height":10.8199462890625,"text":"55405"},{"top":333.06003,"left":488.57996,"width":78.47998046875,"height":10.8199462890625,"text":"CEO"},{"top":333.06003,"left":567.05994,"width":106.4400634765625,"height":10.8199462890625,"text":"United Way"},{"top":333.06003,"left":673.5,"width":45.52001953125,"height":10.8199462890625,"text":"9/12/2012"},{"top":333.06003,"left":719.02,"width":43.28033447265625,"height":10.8199462890625,"text":"100.00"}],[{"top":343.87997,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":343.87997,"left":18.588728,"width":153.91127014160156,"height":10.779998779296875,"text":"Casey /Trewartha"},{"top":343.87997,"left":172.5,"width":69.72000122070312,"height":10.779998779296875,"text":"Kerry F. / Kelly A."},{"top":343.87997,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"2266 Goodrich Ave"},{"top":343.87997,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":343.87997,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":343.87997,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":343.87997,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":343.87997,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":343.87997,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/4/2012"},{"top":343.87997,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":354.65997,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":354.65997,"left":18.588728,"width":153.91127014160156,"height":10.779998779296875,"text":"Cassidy"},{"top":354.65997,"left":172.5,"width":69.72000122070312,"height":10.779998779296875,"text":"Paul D."},{"top":354.65997,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1990 Dayton Ave"},{"top":354.65997,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":354.65997,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":354.65997,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55104"},{"top":354.65997,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":354.65997,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":354.65997,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/6/2012"},{"top":354.65997,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":365.43997,"left":16.97,"width":1.6187286376953125,"height":10.820037841796875,"text":""},{"top":365.43997,"left":18.588728,"width":153.91127014160156,"height":10.820037841796875,"text":"Causey"},{"top":365.43997,"left":172.5,"width":69.72000122070312,"height":10.820037841796875,"text":"Christopher"},{"top":365.43997,"left":242.22,"width":104.760009765625,"height":10.820037841796875,"text":"2181 Doswell Avenue"},{"top":365.43997,"left":346.98,"width":70.80001831054688,"height":10.820037841796875,"text":"Saint Paul"},{"top":365.43997,"left":417.78003,"width":20.87994384765625,"height":10.820037841796875,"text":"MN"},{"top":365.43997,"left":438.65997,"width":49.91998291015625,"height":10.820037841796875,"text":"55108"},{"top":365.43997,"left":488.57996,"width":78.47998046875,"height":10.820037841796875,"text":"finance"},{"top":365.43997,"left":567.05994,"width":106.4400634765625,"height":10.820037841796875,"text":""},{"top":365.43997,"left":673.5,"width":45.52001953125,"height":10.820037841796875,"text":"9/3/2012"},{"top":365.43997,"left":719.02,"width":43.28033447265625,"height":10.820037841796875,"text":"100.00"}],[{"top":376.26,"left":16.97,"width":1.6187286376953125,"height":10.819976806640625,"text":""},{"top":376.26,"left":18.588728,"width":153.91127014160156,"height":10.819976806640625,"text":"Christiansen"},{"top":376.26,"left":172.5,"width":69.72000122070312,"height":10.819976806640625,"text":"Lena"},{"top":376.26,"left":242.22,"width":104.760009765625,"height":10.819976806640625,"text":"2453 Como Ave"},{"top":376.26,"left":346.98,"width":70.80001831054688,"height":10.819976806640625,"text":"St Paul"},{"top":376.26,"left":417.78003,"width":20.87994384765625,"height":10.819976806640625,"text":"MN"},{"top":376.26,"left":438.65997,"width":49.91998291015625,"height":10.819976806640625,"text":"55108"},{"top":376.26,"left":488.57996,"width":78.47998046875,"height":10.819976806640625,"text":"Principal"},{"top":376.26,"left":567.05994,"width":106.4400634765625,"height":10.819976806640625,"text":"Saint Paul Public Schools"},{"top":376.26,"left":673.5,"width":45.52001953125,"height":10.819976806640625,"text":"8/8/2012"},{"top":376.26,"left":719.02,"width":43.28033447265625,"height":10.819976806640625,"text":"100.00"}],[{"top":387.08,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":387.08,"left":18.588728,"width":153.91127014160156,"height":10.779998779296875,"text":"Clapp"},{"top":387.08,"left":172.5,"width":69.72000122070312,"height":10.779998779296875,"text":"Agustus (Bill)"},{"top":387.08,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"757 Osceola Ave #1"},{"top":387.08,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":387.08,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":387.08,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":387.08,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":"retired"},{"top":387.08,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":387.08,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/2/2012"},{"top":387.08,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":397.86,"left":16.97,"width":1.6187286376953125,"height":10.780029296875,"text":""},{"top":397.86,"left":18.588728,"width":153.91127014160156,"height":10.780029296875,"text":"Cohen"},{"top":397.86,"left":172.5,"width":69.72000122070312,"height":10.780029296875,"text":"Brad A."},{"top":397.86,"left":242.22,"width":104.760009765625,"height":10.780029296875,"text":"1460 Raymond Ave"},{"top":397.86,"left":346.98,"width":70.80001831054688,"height":10.780029296875,"text":"Saint Paul"},{"top":397.86,"left":417.78003,"width":20.87994384765625,"height":10.780029296875,"text":"MN"},{"top":397.86,"left":438.65997,"width":49.91998291015625,"height":10.780029296875,"text":"55108"},{"top":397.86,"left":488.57996,"width":78.47998046875,"height":10.780029296875,"text":"academic technology"},{"top":397.86,"left":567.05994,"width":106.4400634765625,"height":10.780029296875,"text":"UMN"},{"top":397.86,"left":673.5,"width":45.52001953125,"height":10.780029296875,"text":"10/17/2012"},{"top":397.86,"left":719.02,"width":43.28033447265625,"height":10.780029296875,"text":"100.00"}],[{"top":408.64,"left":16.97,"width":1.6187286376953125,"height":10.819976806640625,"text":""},{"top":408.64,"left":18.588728,"width":153.91127014160156,"height":10.819976806640625,"text":"Commers"},{"top":408.64,"left":172.5,"width":69.72000122070312,"height":10.819976806640625,"text":"Beth"},{"top":408.64,"left":242.22,"width":104.760009765625,"height":10.819976806640625,"text":"2294 Commonwealth Ave"},{"top":408.64,"left":346.98,"width":70.80001831054688,"height":10.819976806640625,"text":"St Paul"},{"top":408.64,"left":417.78003,"width":20.87994384765625,"height":10.819976806640625,"text":"MN"},{"top":408.64,"left":438.65997,"width":49.91998291015625,"height":10.819976806640625,"text":"55108"},{"top":408.64,"left":488.57996,"width":78.47998046875,"height":10.819976806640625,"text":"Self Employed"},{"top":408.64,"left":567.05994,"width":106.4400634765625,"height":10.819976806640625,"text":"Homemaker"},{"top":408.64,"left":673.5,"width":45.52001953125,"height":10.819976806640625,"text":"8/1/2012"},{"top":408.64,"left":719.02,"width":43.28033447265625,"height":10.819976806640625,"text":"100.00"}],[{"top":419.46,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":419.46,"left":18.588728,"width":153.91127014160156,"height":10.800018310546875,"text":"Couture"},{"top":419.46,"left":172.5,"width":69.72000122070312,"height":10.800018310546875,"text":"Steven"},{"top":419.46,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"7950 Victoria Way"},{"top":419.46,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Louise Park"},{"top":419.46,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":419.46,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55426"},{"top":419.46,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Principal"},{"top":419.46,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Saint Paul Public Schools"},{"top":419.46,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/10/2012"},{"top":419.46,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":430.26,"left":16.97,"width":1.6187286376953125,"height":10.800048828125,"text":""},{"top":430.26,"left":18.588728,"width":153.91127014160156,"height":10.800048828125,"text":"Crawford"},{"top":430.26,"left":172.5,"width":69.72000122070312,"height":10.800048828125,"text":"Lydia P."},{"top":430.26,"left":242.22,"width":104.760009765625,"height":10.800048828125,"text":"1140 Edgcumbe Rd"},{"top":430.26,"left":346.98,"width":70.80001831054688,"height":10.800048828125,"text":"Saint Paul"},{"top":430.26,"left":417.78003,"width":20.87994384765625,"height":10.800048828125,"text":"MN"},{"top":430.26,"left":438.65997,"width":49.91998291015625,"height":10.800048828125,"text":"55105"},{"top":430.26,"left":488.57996,"width":78.47998046875,"height":10.800048828125,"text":""},{"top":430.26,"left":567.05994,"width":106.4400634765625,"height":10.800048828125,"text":""},{"top":430.26,"left":673.5,"width":45.52001953125,"height":10.800048828125,"text":"9/15/2012"},{"top":430.26,"left":719.02,"width":43.28033447265625,"height":10.800048828125,"text":"100.00"}],[{"top":441.06006,"left":16.97,"width":1.6187286376953125,"height":10.799896240234375,"text":""},{"top":441.06006,"left":18.588728,"width":153.91127014160156,"height":10.799896240234375,"text":"Cudahy / Ricker"},{"top":441.06006,"left":172.5,"width":69.72000122070312,"height":10.799896240234375,"text":"Robert \u0026 Mary C"},{"top":441.06006,"left":242.22,"width":104.760009765625,"height":10.799896240234375,"text":"616 Cherokee Ave."},{"top":441.06006,"left":346.98,"width":70.80001831054688,"height":10.799896240234375,"text":"Saint Paul"},{"top":441.06006,"left":417.78003,"width":20.87994384765625,"height":10.799896240234375,"text":"MN"},{"top":441.06006,"left":438.65997,"width":49.91998291015625,"height":10.799896240234375,"text":"55107"},{"top":441.06006,"left":488.57996,"width":78.47998046875,"height":10.799896240234375,"text":"Teacher/Union Presid"},{"top":441.06006,"left":567.05994,"width":106.4400634765625,"height":10.799896240234375,"text":"SPPS/SPFT"},{"top":441.06006,"left":673.5,"width":45.52001953125,"height":10.799896240234375,"text":"9/18/2012"},{"top":441.06006,"left":719.02,"width":43.28033447265625,"height":10.799896240234375,"text":"100.00"}],[{"top":451.85995,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":451.85995,"left":18.588728,"width":153.91127014160156,"height":10.800018310546875,"text":"Cudahy / Ricker"},{"top":451.85995,"left":172.5,"width":69.72000122070312,"height":10.800018310546875,"text":"Robert \u0026 Mary C"},{"top":451.85995,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"616 Cherokee Ave."},{"top":451.85995,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":451.85995,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":451.85995,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55107"},{"top":451.85995,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Teacher/Union Presid"},{"top":451.85995,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"SPPS/SPFT"},{"top":451.85995,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/6/2012"},{"top":451.85995,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":462.65997,"left":16.97,"width":1.6187286376953125,"height":10.82000732421875,"text":""},{"top":462.65997,"left":18.588728,"width":153.91127014160156,"height":10.82000732421875,"text":"Currie"},{"top":462.65997,"left":172.5,"width":69.72000122070312,"height":10.82000732421875,"text":"Elisabeth"},{"top":462.65997,"left":242.22,"width":104.760009765625,"height":10.82000732421875,"text":"2274 Hillside Ave"},{"top":462.65997,"left":346.98,"width":70.80001831054688,"height":10.82000732421875,"text":"St. Paul"},{"top":462.65997,"left":417.78003,"width":20.87994384765625,"height":10.82000732421875,"text":"MN"},{"top":462.65997,"left":438.65997,"width":49.91998291015625,"height":10.82000732421875,"text":"55108"},{"top":462.65997,"left":488.57996,"width":78.47998046875,"height":10.82000732421875,"text":""},{"top":462.65997,"left":567.05994,"width":106.4400634765625,"height":10.82000732421875,"text":""},{"top":462.65997,"left":673.5,"width":45.52001953125,"height":10.82000732421875,"text":"9/12/2012"},{"top":462.65997,"left":719.02,"width":43.28033447265625,"height":10.82000732421875,"text":"100.00"}],[{"top":473.47998,"left":16.97,"width":1.6187286376953125,"height":10.780029296875,"text":""},{"top":473.47998,"left":18.588728,"width":153.91127014160156,"height":10.780029296875,"text":"Doane"},{"top":473.47998,"left":172.5,"width":69.72000122070312,"height":10.780029296875,"text":"Paul V \u0026 Helen R"},{"top":473.47998,"left":242.22,"width":104.760009765625,"height":10.780029296875,"text":"444 Portland Ave"},{"top":473.47998,"left":346.98,"width":70.80001831054688,"height":10.780029296875,"text":"Sant Paul"},{"top":473.47998,"left":417.78003,"width":20.87994384765625,"height":10.780029296875,"text":"MN"},{"top":473.47998,"left":438.65997,"width":49.91998291015625,"height":10.780029296875,"text":"55102"},{"top":473.47998,"left":488.57996,"width":78.47998046875,"height":10.780029296875,"text":"Ex director"},{"top":473.47998,"left":567.05994,"width":106.4400634765625,"height":10.780029296875,"text":"St Paul Teachers\u0027 Retirement A"},{"top":473.47998,"left":673.5,"width":45.52001953125,"height":10.780029296875,"text":"10/3/2012"},{"top":473.47998,"left":719.02,"width":43.28033447265625,"height":10.780029296875,"text":"100.00"}],[{"top":484.26,"left":16.97,"width":1.6187286376953125,"height":10.800048828125,"text":""},{"top":484.26,"left":18.588728,"width":153.91127014160156,"height":10.800048828125,"text":"Dougherty"},{"top":484.26,"left":172.5,"width":69.72000122070312,"height":10.800048828125,"text":"Richards S \u0026 Patrici"},{"top":484.26,"left":242.22,"width":104.760009765625,"height":10.800048828125,"text":"812 Goodrich Ave"},{"top":484.26,"left":346.98,"width":70.80001831054688,"height":10.800048828125,"text":"Saint Paul"},{"top":484.26,"left":417.78003,"width":20.87994384765625,"height":10.800048828125,"text":"MN"},{"top":484.26,"left":438.65997,"width":49.91998291015625,"height":10.800048828125,"text":"55105"},{"top":484.26,"left":488.57996,"width":78.47998046875,"height":10.800048828125,"text":""},{"top":484.26,"left":567.05994,"width":106.4400634765625,"height":10.800048828125,"text":""},{"top":484.26,"left":673.5,"width":45.52001953125,"height":10.800048828125,"text":"10/4/2012"},{"top":484.26,"left":719.02,"width":43.28033447265625,"height":10.800048828125,"text":"100.00"}],[{"top":495.06006,"left":16.97,"width":1.6187286376953125,"height":10.799896240234375,"text":""},{"top":495.06006,"left":18.588728,"width":153.91127014160156,"height":10.799896240234375,"text":"Driscoll"},{"top":495.06006,"left":172.5,"width":69.72000122070312,"height":10.799896240234375,"text":"Joe \u0026 Jill"},{"top":495.06006,"left":242.22,"width":104.760009765625,"height":10.799896240234375,"text":"2383 Bourne Ave"},{"top":495.06006,"left":346.98,"width":70.80001831054688,"height":10.799896240234375,"text":"Saint Paul"},{"top":495.06006,"left":417.78003,"width":20.87994384765625,"height":10.799896240234375,"text":"MN"},{"top":495.06006,"left":438.65997,"width":49.91998291015625,"height":10.799896240234375,"text":"55108"},{"top":495.06006,"left":488.57996,"width":78.47998046875,"height":10.799896240234375,"text":"HR Manager"},{"top":495.06006,"left":567.05994,"width":106.4400634765625,"height":10.799896240234375,"text":"LexisNexis"},{"top":495.06006,"left":673.5,"width":45.52001953125,"height":10.799896240234375,"text":"10/18/2012"},{"top":495.06006,"left":719.02,"width":43.28033447265625,"height":10.799896240234375,"text":"100.00"}],[{"top":505.85995,"left":16.97,"width":1.6187286376953125,"height":10.780059814453125,"text":""},{"top":505.85995,"left":18.588728,"width":153.91127014160156,"height":10.780059814453125,"text":"Dubaille / Haugee"},{"top":505.85995,"left":172.5,"width":69.72000122070312,"height":10.780059814453125,"text":"Florence M. /Eric"},{"top":505.85995,"left":242.22,"width":104.760009765625,"height":10.780059814453125,"text":"1009 Edmund Ave"},{"top":505.85995,"left":346.98,"width":70.80001831054688,"height":10.780059814453125,"text":"Saint Paul"},{"top":505.85995,"left":417.78003,"width":20.87994384765625,"height":10.780059814453125,"text":"MN"},{"top":505.85995,"left":438.65997,"width":49.91998291015625,"height":10.780059814453125,"text":"55104"},{"top":505.85995,"left":488.57996,"width":78.47998046875,"height":10.780059814453125,"text":"Florence teacher"},{"top":505.85995,"left":567.05994,"width":106.4400634765625,"height":10.780059814453125,"text":"SPPS"},{"top":505.85995,"left":673.5,"width":45.52001953125,"height":10.780059814453125,"text":"10/3/2012"},{"top":505.85995,"left":719.02,"width":43.28033447265625,"height":10.780059814453125,"text":"100.00"}],[{"top":516.64,"left":16.97,"width":1.6187286376953125,"height":10.83990478515625,"text":""},{"top":516.64,"left":18.588728,"width":153.91127014160156,"height":10.83990478515625,"text":"Eaton"},{"top":516.64,"left":172.5,"width":69.72000122070312,"height":10.83990478515625,"text":"Jim"},{"top":516.64,"left":242.22,"width":104.760009765625,"height":10.83990478515625,"text":"2133 Berkeley Ave"},{"top":516.64,"left":346.98,"width":70.80001831054688,"height":10.83990478515625,"text":"St Paul"},{"top":516.64,"left":417.78003,"width":20.87994384765625,"height":10.83990478515625,"text":"MN"},{"top":516.64,"left":438.65997,"width":49.91998291015625,"height":10.83990478515625,"text":"55105"},{"top":516.64,"left":488.57996,"width":78.47998046875,"height":10.83990478515625,"text":"Principal"},{"top":516.64,"left":567.05994,"width":106.4400634765625,"height":10.83990478515625,"text":"Saint Paul Public Schools"},{"top":516.64,"left":673.5,"width":45.52001953125,"height":10.83990478515625,"text":"8/23/2012"},{"top":516.64,"left":719.02,"width":43.28033447265625,"height":10.83990478515625,"text":"100.00"}],[{"top":527.4799,"left":16.97,"width":1.6187286376953125,"height":10.78009033203125,"text":""},{"top":527.4799,"left":18.588728,"width":153.91127014160156,"height":10.78009033203125,"text":"Eaves /Alger"},{"top":527.4799,"left":172.5,"width":69.72000122070312,"height":10.78009033203125,"text":"Patricia / Stuart"},{"top":527.4799,"left":242.22,"width":104.760009765625,"height":10.78009033203125,"text":"1143 Portladn Ave."},{"top":527.4799,"left":346.98,"width":70.80001831054688,"height":10.78009033203125,"text":"Saint Paul"},{"top":527.4799,"left":417.78003,"width":20.87994384765625,"height":10.78009033203125,"text":"MN"},{"top":527.4799,"left":438.65997,"width":49.91998291015625,"height":10.78009033203125,"text":"55104"},{"top":527.4799,"left":488.57996,"width":78.47998046875,"height":10.78009033203125,"text":""},{"top":527.4799,"left":567.05994,"width":106.4400634765625,"height":10.78009033203125,"text":""},{"top":527.4799,"left":673.5,"width":45.52001953125,"height":10.78009033203125,"text":"10/3/2012"},{"top":527.4799,"left":719.02,"width":43.28033447265625,"height":10.78009033203125,"text":"100.00"}]]} +{"extraction_method":"lattice","top":54.315777,"left":16.97,"width":745.3303833007812,"height":483.9442443847656,"right":762.30035,"bottom":538.26,"data":[[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":54.315777,"left":172.50002,"width":69.71998596191406,"height":8.744216918945312,"text":"First Name"},{"top":54.315777,"left":242.22,"width":104.760009765625,"height":8.744216918945312,"text":"Address"},{"top":54.315777,"left":346.98,"width":70.80001831054688,"height":8.744216918945312,"text":"City"},{"top":54.315777,"left":417.78003,"width":20.87994384765625,"height":8.744216918945312,"text":"State"},{"top":54.315777,"left":438.65997,"width":49.91998291015625,"height":8.744216918945312,"text":"Zip"},{"top":54.315777,"left":488.57996,"width":78.47998046875,"height":8.744216918945312,"text":"Occupation"},{"top":54.315777,"left":567.05994,"width":106.4400634765625,"height":8.744216918945312,"text":"Employer"},{"top":54.315777,"left":673.5,"width":45.52001953125,"height":8.744216918945312,"text":"Date"},{"top":54.315777,"left":719.02,"width":43.28033447265625,"height":8.744216918945312,"text":"Amount"}],[{"top":63.059994,"left":16.97,"width":155.53001403808594,"height":10.799991607666016,"text":"Lidstad"},{"top":63.059994,"left":172.50002,"width":69.71998596191406,"height":10.799991607666016,"text":"Dick \u0026 Peg"},{"top":63.059994,"left":242.22,"width":104.760009765625,"height":10.799991607666016,"text":"62 Mississippi River Blvd N"},{"top":63.059994,"left":346.98,"width":70.80001831054688,"height":10.799991607666016,"text":"Saint Paul"},{"top":63.059994,"left":417.78003,"width":20.87994384765625,"height":10.799991607666016,"text":"MN"},{"top":63.059994,"left":438.65997,"width":49.91998291015625,"height":10.799991607666016,"text":"55104"},{"top":63.059994,"left":488.57996,"width":78.47998046875,"height":10.799991607666016,"text":"retired"},{"top":63.059994,"left":567.05994,"width":106.4400634765625,"height":10.799991607666016,"text":""},{"top":63.059994,"left":673.5,"width":45.52001953125,"height":10.799991607666016,"text":"10/12/2012"},{"top":63.059994,"left":719.02,"width":43.28033447265625,"height":10.799991607666016,"text":"60.00"}],[{"top":73.859985,"left":16.97,"width":155.53001403808594,"height":10.800025939941406,"text":"Strom"},{"top":73.859985,"left":172.50002,"width":69.71998596191406,"height":10.800025939941406,"text":"Pam"},{"top":73.859985,"left":242.22,"width":104.760009765625,"height":10.800025939941406,"text":"1229 Hague Ave"},{"top":73.859985,"left":346.98,"width":70.80001831054688,"height":10.800025939941406,"text":"St. Paul"},{"top":73.859985,"left":417.78003,"width":20.87994384765625,"height":10.800025939941406,"text":"MN"},{"top":73.859985,"left":438.65997,"width":49.91998291015625,"height":10.800025939941406,"text":"55104"},{"top":73.859985,"left":488.57996,"width":78.47998046875,"height":10.800025939941406,"text":""},{"top":73.859985,"left":567.05994,"width":106.4400634765625,"height":10.800025939941406,"text":""},{"top":73.859985,"left":673.5,"width":45.52001953125,"height":10.800025939941406,"text":"9/12/2012"},{"top":73.859985,"left":719.02,"width":43.28033447265625,"height":10.800025939941406,"text":"60.00"}],[{"top":84.66001,"left":16.97,"width":155.53001403808594,"height":10.799995422363281,"text":"Seeba"},{"top":84.66001,"left":172.50002,"width":69.71998596191406,"height":10.799995422363281,"text":"Louise \u0026 Paul"},{"top":84.66001,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"1399 Sheldon St"},{"top":84.66001,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"Saint Paul"},{"top":84.66001,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":84.66001,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55108"},{"top":84.66001,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":"BOE"},{"top":84.66001,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":"City of Saint Paul"},{"top":84.66001,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"10/12/2012"},{"top":84.66001,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"60.00"}],[{"top":95.46001,"left":16.97,"width":155.53001403808594,"height":10.799995422363281,"text":"Schumacher / Bales"},{"top":95.46001,"left":172.50002,"width":69.71998596191406,"height":10.799995422363281,"text":"Douglas L. / Patricia"},{"top":95.46001,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"948 County Rd. D W"},{"top":95.46001,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"Saint Paul"},{"top":95.46001,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":95.46001,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55126"},{"top":95.46001,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":""},{"top":95.46001,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":""},{"top":95.46001,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"10/13/2012"},{"top":95.46001,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"60.00"}],[{"top":106.26,"left":16.97,"width":155.53001403808594,"height":10.799995422363281,"text":"Abrams"},{"top":106.26,"left":172.50002,"width":69.71998596191406,"height":10.799995422363281,"text":"Marjorie"},{"top":106.26,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"238 8th St east"},{"top":106.26,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"St Paul"},{"top":106.26,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":106.26,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55101"},{"top":106.26,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":"Retired"},{"top":106.26,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":"Retired"},{"top":106.26,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"8/8/2012"},{"top":106.26,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"75.00"}],[{"top":117.06,"left":16.97,"width":155.53001403808594,"height":10.79998779296875,"text":"Crouse / Schroeder"},{"top":117.06,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Abigail / Jonathan"},{"top":117.06,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1545 Branston St."},{"top":117.06,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":117.06,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":117.06,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55108"},{"top":117.06,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":""},{"top":117.06,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":""},{"top":117.06,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/6/2012"},{"top":117.06,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"75.00"}],[{"top":127.859985,"left":16.97,"width":155.53001403808594,"height":10.800003051757812,"text":"O\u0027Connell"},{"top":127.859985,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"Jean"},{"top":127.859985,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"888 Ivy Ave W."},{"top":127.859985,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Saint Paul"},{"top":127.859985,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":127.859985,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55117"},{"top":127.859985,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":""},{"top":127.859985,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":""},{"top":127.859985,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/30/2012"},{"top":127.859985,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"75.00"}],[{"top":138.65999,"left":16.97,"width":155.53001403808594,"height":10.800018310546875,"text":"Reese"},{"top":138.65999,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Cheri A"},{"top":138.65999,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"981 Davern St."},{"top":138.65999,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":138.65999,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":138.65999,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55116"},{"top":138.65999,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Public Relations"},{"top":138.65999,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Far North Spirits"},{"top":138.65999,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/3/2012"},{"top":138.65999,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"75.00"}],[{"top":149.46,"left":16.97,"width":155.53001403808594,"height":10.800003051757812,"text":"Serrano"},{"top":149.46,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"Luz Maria"},{"top":149.46,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"5559 Park Place Drive"},{"top":149.46,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Shoreview"},{"top":149.46,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":149.46,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55126"},{"top":149.46,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"retired"},{"top":149.46,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":"SPPS"},{"top":149.46,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/20/2012"},{"top":149.46,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"75.00"}],[{"top":160.26001,"left":16.97,"width":155.53001403808594,"height":10.79998779296875,"text":"Alsiddiqui"},{"top":160.26001,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Jaber"},{"top":160.26001,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"13056 Euclid Ave"},{"top":160.26001,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Apple Valley"},{"top":160.26001,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":160.26001,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55124"},{"top":160.26001,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"SPPS budget analyst"},{"top":160.26001,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"SPPS"},{"top":160.26001,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"9/20/2012"},{"top":160.26001,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":171.06,"left":16.97,"width":155.53001403808594,"height":10.800003051757812,"text":"Andrastek"},{"top":171.06,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"John"},{"top":171.06,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"774 Ashland Ave"},{"top":171.06,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"St Paul"},{"top":171.06,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":171.06,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55104"},{"top":171.06,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"asst principal"},{"top":171.06,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":"SPPS"},{"top":171.06,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"8/22/2012"},{"top":171.06,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"100.00"}],[{"top":181.86,"left":16.97,"width":155.53001403808594,"height":10.79998779296875,"text":"Anfang"},{"top":181.86,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Heather L. \u0026 Matt"},{"top":181.86,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1635 Bayard Ave"},{"top":181.86,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":181.86,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":181.86,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55116"},{"top":181.86,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"Ex Director"},{"top":181.86,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"BOMA"},{"top":181.86,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/6/2012"},{"top":181.86,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":192.65999,"left":16.97,"width":155.53001403808594,"height":10.800018310546875,"text":"Aronson"},{"top":192.65999,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Roger J."},{"top":192.65999,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4852 Emerson Ave. S."},{"top":192.65999,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Minneapolis"},{"top":192.65999,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":192.65999,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55419"},{"top":192.65999,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Attorney at Law"},{"top":192.65999,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":""},{"top":192.65999,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"9/20/2012"},{"top":192.65999,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":203.46,"left":16.97,"width":155.53001403808594,"height":10.800003051757812,"text":"Aronson"},{"top":203.46,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"Roger J."},{"top":203.46,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"4852 Emerson Ave. S."},{"top":203.46,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Minneapolis"},{"top":203.46,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":203.46,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55419"},{"top":203.46,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"Attorney at Law"},{"top":203.46,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":""},{"top":203.46,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/20/2012"},{"top":203.46,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"100.00"}],[{"top":214.26001,"left":16.97,"width":155.53001403808594,"height":10.79998779296875,"text":"Banks"},{"top":214.26001,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Michael or Patricia"},{"top":214.26001,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1664 Van Buren Ave."},{"top":214.26001,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":214.26001,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":214.26001,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":214.26001,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"retired social worker"},{"top":214.26001,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":""},{"top":214.26001,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"9/20/2012"},{"top":214.26001,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":225.06,"left":16.97,"width":155.53001403808594,"height":10.800018310546875,"text":"Bennett"},{"top":225.06,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"David"},{"top":225.06,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"25 Birchwood Rd."},{"top":225.06,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Mahtomedi"},{"top":225.06,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":225.06,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55110"},{"top":225.06,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"retired"},{"top":225.06,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"retired"},{"top":225.06,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"9/3/2012"},{"top":225.06,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":235.86002,"left":16.97,"width":155.53001403808594,"height":10.799972534179688,"text":"Berry"},{"top":235.86002,"left":172.50002,"width":69.71998596191406,"height":10.799972534179688,"text":"Margaret"},{"top":235.86002,"left":242.22,"width":104.760009765625,"height":10.799972534179688,"text":"1267 Pike Lake Dr"},{"top":235.86002,"left":346.98,"width":70.80001831054688,"height":10.799972534179688,"text":"New Brighton"},{"top":235.86002,"left":417.78003,"width":20.87994384765625,"height":10.799972534179688,"text":"MN"},{"top":235.86002,"left":438.65997,"width":49.91998291015625,"height":10.799972534179688,"text":"55112"},{"top":235.86002,"left":488.57996,"width":78.47998046875,"height":10.799972534179688,"text":"Asst Principal"},{"top":235.86002,"left":567.05994,"width":106.4400634765625,"height":10.799972534179688,"text":"Saint Paul Public Schools"},{"top":235.86002,"left":673.5,"width":45.52001953125,"height":10.799972534179688,"text":"8/8/2012"},{"top":235.86002,"left":719.02,"width":43.28033447265625,"height":10.799972534179688,"text":"100.00"}],[{"top":246.65999,"left":16.97,"width":155.53001403808594,"height":10.800033569335938,"text":"Boyle"},{"top":246.65999,"left":172.50002,"width":69.71998596191406,"height":10.800033569335938,"text":"Matthew C. \u0026 Eliza"},{"top":246.65999,"left":242.22,"width":104.760009765625,"height":10.800033569335938,"text":"2165 Princeton Ave"},{"top":246.65999,"left":346.98,"width":70.80001831054688,"height":10.800033569335938,"text":"Saint Paul"},{"top":246.65999,"left":417.78003,"width":20.87994384765625,"height":10.800033569335938,"text":"MN"},{"top":246.65999,"left":438.65997,"width":49.91998291015625,"height":10.800033569335938,"text":"55105"},{"top":246.65999,"left":488.57996,"width":78.47998046875,"height":10.800033569335938,"text":""},{"top":246.65999,"left":567.05994,"width":106.4400634765625,"height":10.800033569335938,"text":""},{"top":246.65999,"left":673.5,"width":45.52001953125,"height":10.800033569335938,"text":"10/6/2012"},{"top":246.65999,"left":719.02,"width":43.28033447265625,"height":10.800033569335938,"text":"100.00"}],[{"top":257.46002,"left":16.97,"width":155.53001403808594,"height":10.79998779296875,"text":"Brodrick"},{"top":257.46002,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"John"},{"top":257.46002,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1007 Charles"},{"top":257.46002,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"St. Paul"},{"top":257.46002,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":257.46002,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":257.46002,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"BOE"},{"top":257.46002,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"retired"},{"top":257.46002,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"8/26/2012"},{"top":257.46002,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":268.26,"left":16.97,"width":155.53001403808594,"height":10.79998779296875,"text":"Brodrick"},{"top":268.26,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"John F."},{"top":268.26,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1007 Charles Ave"},{"top":268.26,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":268.26,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":268.26,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":268.26,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"BoE"},{"top":268.26,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"SPPS"},{"top":268.26,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/4/2012"},{"top":268.26,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":279.06,"left":16.97,"width":155.53001403808594,"height":10.779998779296875,"text":"Brown"},{"top":279.06,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Mike"},{"top":279.06,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1440 Goodrich Ave"},{"top":279.06,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":279.06,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":279.06,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":279.06,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":279.06,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":279.06,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"9/23/2012"},{"top":279.06,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":289.84,"left":16.97,"width":155.53001403808594,"height":10.819976806640625,"text":"Cacy"},{"top":289.84,"left":172.50002,"width":69.71998596191406,"height":10.819976806640625,"text":"Jill"},{"top":289.84,"left":242.22,"width":104.760009765625,"height":10.819976806640625,"text":"1409 Smith Ave So"},{"top":289.84,"left":346.98,"width":70.80001831054688,"height":10.819976806640625,"text":"West St. Paul"},{"top":289.84,"left":417.78003,"width":20.87994384765625,"height":10.819976806640625,"text":"MN"},{"top":289.84,"left":438.65997,"width":49.91998291015625,"height":10.819976806640625,"text":"55118"},{"top":289.84,"left":488.57996,"width":78.47998046875,"height":10.819976806640625,"text":"Administrator"},{"top":289.84,"left":567.05994,"width":106.4400634765625,"height":10.819976806640625,"text":"SPPS"},{"top":289.84,"left":673.5,"width":45.52001953125,"height":10.819976806640625,"text":"9/12/2012"},{"top":289.84,"left":719.02,"width":43.28033447265625,"height":10.819976806640625,"text":"100.00"}],[{"top":300.65997,"left":16.97,"width":155.53001403808594,"height":10.800018310546875,"text":"Cardwell"},{"top":300.65997,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Eileen"},{"top":300.65997,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4172 Bridgewater Ct"},{"top":300.65997,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Vadnais Height"},{"top":300.65997,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":300.65997,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55127"},{"top":300.65997,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Retired"},{"top":300.65997,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Retired"},{"top":300.65997,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/3/2012"},{"top":300.65997,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":311.46,"left":16.97,"width":155.53001403808594,"height":10.800018310546875,"text":"Carlstrom"},{"top":311.46,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Cheryl"},{"top":311.46,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4244 Oakmede Ln"},{"top":311.46,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"St Paul"},{"top":311.46,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":311.46,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55110"},{"top":311.46,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Satff"},{"top":311.46,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Saint Paul Public Schools"},{"top":311.46,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/8/2012"},{"top":311.46,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":322.26,"left":16.97,"width":155.53001403808594,"height":10.800018310546875,"text":"Carter"},{"top":322.26,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Melvin W. \u0026 Willet"},{"top":322.26,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"405 Western Ave N"},{"top":322.26,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":322.26,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":322.26,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55103"},{"top":322.26,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Cty Commissioner"},{"top":322.26,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Ramsey Cty"},{"top":322.26,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/8/2012"},{"top":322.26,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":333.06003,"left":16.97,"width":155.53001403808594,"height":10.8199462890625,"text":"Caruso"},{"top":333.06003,"left":172.50002,"width":69.71998596191406,"height":10.8199462890625,"text":"Sarah"},{"top":333.06003,"left":242.22,"width":104.760009765625,"height":10.8199462890625,"text":"2615 Newton Ave S"},{"top":333.06003,"left":346.98,"width":70.80001831054688,"height":10.8199462890625,"text":"Minneapolis"},{"top":333.06003,"left":417.78003,"width":20.87994384765625,"height":10.8199462890625,"text":"MN"},{"top":333.06003,"left":438.65997,"width":49.91998291015625,"height":10.8199462890625,"text":"55405"},{"top":333.06003,"left":488.57996,"width":78.47998046875,"height":10.8199462890625,"text":"CEO"},{"top":333.06003,"left":567.05994,"width":106.4400634765625,"height":10.8199462890625,"text":"United Way"},{"top":333.06003,"left":673.5,"width":45.52001953125,"height":10.8199462890625,"text":"9/12/2012"},{"top":333.06003,"left":719.02,"width":43.28033447265625,"height":10.8199462890625,"text":"100.00"}],[{"top":343.87997,"left":16.97,"width":155.53001403808594,"height":10.779998779296875,"text":"Casey /Trewartha"},{"top":343.87997,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Kerry F. / Kelly A."},{"top":343.87997,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"2266 Goodrich Ave"},{"top":343.87997,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":343.87997,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":343.87997,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":343.87997,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":343.87997,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":343.87997,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/4/2012"},{"top":343.87997,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":354.65997,"left":16.97,"width":155.53001403808594,"height":10.779998779296875,"text":"Cassidy"},{"top":354.65997,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Paul D."},{"top":354.65997,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1990 Dayton Ave"},{"top":354.65997,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":354.65997,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":354.65997,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55104"},{"top":354.65997,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":354.65997,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":354.65997,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/6/2012"},{"top":354.65997,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":365.43997,"left":16.97,"width":155.53001403808594,"height":10.820037841796875,"text":"Causey"},{"top":365.43997,"left":172.50002,"width":69.71998596191406,"height":10.820037841796875,"text":"Christopher"},{"top":365.43997,"left":242.22,"width":104.760009765625,"height":10.820037841796875,"text":"2181 Doswell Avenue"},{"top":365.43997,"left":346.98,"width":70.80001831054688,"height":10.820037841796875,"text":"Saint Paul"},{"top":365.43997,"left":417.78003,"width":20.87994384765625,"height":10.820037841796875,"text":"MN"},{"top":365.43997,"left":438.65997,"width":49.91998291015625,"height":10.820037841796875,"text":"55108"},{"top":365.43997,"left":488.57996,"width":78.47998046875,"height":10.820037841796875,"text":"finance"},{"top":365.43997,"left":567.05994,"width":106.4400634765625,"height":10.820037841796875,"text":""},{"top":365.43997,"left":673.5,"width":45.52001953125,"height":10.820037841796875,"text":"9/3/2012"},{"top":365.43997,"left":719.02,"width":43.28033447265625,"height":10.820037841796875,"text":"100.00"}],[{"top":376.26,"left":16.97,"width":155.53001403808594,"height":10.819976806640625,"text":"Christiansen"},{"top":376.26,"left":172.50002,"width":69.71998596191406,"height":10.819976806640625,"text":"Lena"},{"top":376.26,"left":242.22,"width":104.760009765625,"height":10.819976806640625,"text":"2453 Como Ave"},{"top":376.26,"left":346.98,"width":70.80001831054688,"height":10.819976806640625,"text":"St Paul"},{"top":376.26,"left":417.78003,"width":20.87994384765625,"height":10.819976806640625,"text":"MN"},{"top":376.26,"left":438.65997,"width":49.91998291015625,"height":10.819976806640625,"text":"55108"},{"top":376.26,"left":488.57996,"width":78.47998046875,"height":10.819976806640625,"text":"Principal"},{"top":376.26,"left":567.05994,"width":106.4400634765625,"height":10.819976806640625,"text":"Saint Paul Public Schools"},{"top":376.26,"left":673.5,"width":45.52001953125,"height":10.819976806640625,"text":"8/8/2012"},{"top":376.26,"left":719.02,"width":43.28033447265625,"height":10.819976806640625,"text":"100.00"}],[{"top":387.08,"left":16.97,"width":155.53001403808594,"height":10.779998779296875,"text":"Clapp"},{"top":387.08,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Agustus (Bill)"},{"top":387.08,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"757 Osceola Ave #1"},{"top":387.08,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":387.08,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":387.08,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":387.08,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":"retired"},{"top":387.08,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":387.08,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/2/2012"},{"top":387.08,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":397.86,"left":16.97,"width":155.53001403808594,"height":10.779998779296875,"text":"Cohen"},{"top":397.86,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Brad A."},{"top":397.86,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1460 Raymond Ave"},{"top":397.86,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":397.86,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":397.86,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55108"},{"top":397.86,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":"academic technology"},{"top":397.86,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":"UMN"},{"top":397.86,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/17/2012"},{"top":397.86,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":408.63998,"left":16.97,"width":155.53001403808594,"height":10.82000732421875,"text":"Commers"},{"top":408.63998,"left":172.50002,"width":69.71998596191406,"height":10.82000732421875,"text":"Beth"},{"top":408.63998,"left":242.22,"width":104.760009765625,"height":10.82000732421875,"text":"2294 Commonwealth Ave"},{"top":408.63998,"left":346.98,"width":70.80001831054688,"height":10.82000732421875,"text":"St Paul"},{"top":408.63998,"left":417.78003,"width":20.87994384765625,"height":10.82000732421875,"text":"MN"},{"top":408.63998,"left":438.65997,"width":49.91998291015625,"height":10.82000732421875,"text":"55108"},{"top":408.63998,"left":488.57996,"width":78.47998046875,"height":10.82000732421875,"text":"Self Employed"},{"top":408.63998,"left":567.05994,"width":106.4400634765625,"height":10.82000732421875,"text":"Homemaker"},{"top":408.63998,"left":673.5,"width":45.52001953125,"height":10.82000732421875,"text":"8/1/2012"},{"top":408.63998,"left":719.02,"width":43.28033447265625,"height":10.82000732421875,"text":"100.00"}],[{"top":419.46,"left":16.97,"width":155.53001403808594,"height":10.800018310546875,"text":"Couture"},{"top":419.46,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Steven"},{"top":419.46,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"7950 Victoria Way"},{"top":419.46,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Louise Park"},{"top":419.46,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":419.46,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55426"},{"top":419.46,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Principal"},{"top":419.46,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Saint Paul Public Schools"},{"top":419.46,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/10/2012"},{"top":419.46,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":430.26,"left":16.97,"width":155.53001403808594,"height":10.800048828125,"text":"Crawford"},{"top":430.26,"left":172.50002,"width":69.71998596191406,"height":10.800048828125,"text":"Lydia P."},{"top":430.26,"left":242.22,"width":104.760009765625,"height":10.800048828125,"text":"1140 Edgcumbe Rd"},{"top":430.26,"left":346.98,"width":70.80001831054688,"height":10.800048828125,"text":"Saint Paul"},{"top":430.26,"left":417.78003,"width":20.87994384765625,"height":10.800048828125,"text":"MN"},{"top":430.26,"left":438.65997,"width":49.91998291015625,"height":10.800048828125,"text":"55105"},{"top":430.26,"left":488.57996,"width":78.47998046875,"height":10.800048828125,"text":""},{"top":430.26,"left":567.05994,"width":106.4400634765625,"height":10.800048828125,"text":""},{"top":430.26,"left":673.5,"width":45.52001953125,"height":10.800048828125,"text":"9/15/2012"},{"top":430.26,"left":719.02,"width":43.28033447265625,"height":10.800048828125,"text":"100.00"}],[{"top":441.06006,"left":16.97,"width":155.53001403808594,"height":10.799896240234375,"text":"Cudahy / Ricker"},{"top":441.06006,"left":172.50002,"width":69.71998596191406,"height":10.799896240234375,"text":"Robert \u0026 Mary C"},{"top":441.06006,"left":242.22,"width":104.760009765625,"height":10.799896240234375,"text":"616 Cherokee Ave."},{"top":441.06006,"left":346.98,"width":70.80001831054688,"height":10.799896240234375,"text":"Saint Paul"},{"top":441.06006,"left":417.78003,"width":20.87994384765625,"height":10.799896240234375,"text":"MN"},{"top":441.06006,"left":438.65997,"width":49.91998291015625,"height":10.799896240234375,"text":"55107"},{"top":441.06006,"left":488.57996,"width":78.47998046875,"height":10.799896240234375,"text":"Teacher/Union Presid"},{"top":441.06006,"left":567.05994,"width":106.4400634765625,"height":10.799896240234375,"text":"SPPS/SPFT"},{"top":441.06006,"left":673.5,"width":45.52001953125,"height":10.799896240234375,"text":"9/18/2012"},{"top":441.06006,"left":719.02,"width":43.28033447265625,"height":10.799896240234375,"text":"100.00"}],[{"top":451.85995,"left":16.97,"width":155.53001403808594,"height":10.800018310546875,"text":"Cudahy / Ricker"},{"top":451.85995,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Robert \u0026 Mary C"},{"top":451.85995,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"616 Cherokee Ave."},{"top":451.85995,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":451.85995,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":451.85995,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55107"},{"top":451.85995,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Teacher/Union Presid"},{"top":451.85995,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"SPPS/SPFT"},{"top":451.85995,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/6/2012"},{"top":451.85995,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":462.65997,"left":16.97,"width":155.53001403808594,"height":10.82000732421875,"text":"Currie"},{"top":462.65997,"left":172.50002,"width":69.71998596191406,"height":10.82000732421875,"text":"Elisabeth"},{"top":462.65997,"left":242.22,"width":104.760009765625,"height":10.82000732421875,"text":"2274 Hillside Ave"},{"top":462.65997,"left":346.98,"width":70.80001831054688,"height":10.82000732421875,"text":"St. Paul"},{"top":462.65997,"left":417.78003,"width":20.87994384765625,"height":10.82000732421875,"text":"MN"},{"top":462.65997,"left":438.65997,"width":49.91998291015625,"height":10.82000732421875,"text":"55108"},{"top":462.65997,"left":488.57996,"width":78.47998046875,"height":10.82000732421875,"text":""},{"top":462.65997,"left":567.05994,"width":106.4400634765625,"height":10.82000732421875,"text":""},{"top":462.65997,"left":673.5,"width":45.52001953125,"height":10.82000732421875,"text":"9/12/2012"},{"top":462.65997,"left":719.02,"width":43.28033447265625,"height":10.82000732421875,"text":"100.00"}],[{"top":473.47998,"left":16.97,"width":155.53001403808594,"height":10.780029296875,"text":"Doane"},{"top":473.47998,"left":172.50002,"width":69.71998596191406,"height":10.780029296875,"text":"Paul V \u0026 Helen R"},{"top":473.47998,"left":242.22,"width":104.760009765625,"height":10.780029296875,"text":"444 Portland Ave"},{"top":473.47998,"left":346.98,"width":70.80001831054688,"height":10.780029296875,"text":"Sant Paul"},{"top":473.47998,"left":417.78003,"width":20.87994384765625,"height":10.780029296875,"text":"MN"},{"top":473.47998,"left":438.65997,"width":49.91998291015625,"height":10.780029296875,"text":"55102"},{"top":473.47998,"left":488.57996,"width":78.47998046875,"height":10.780029296875,"text":"Ex director"},{"top":473.47998,"left":567.05994,"width":106.4400634765625,"height":10.780029296875,"text":"St Paul Teachers\u0027 Retirement A"},{"top":473.47998,"left":673.5,"width":45.52001953125,"height":10.780029296875,"text":"10/3/2012"},{"top":473.47998,"left":719.02,"width":43.28033447265625,"height":10.780029296875,"text":"100.00"}],[{"top":484.26,"left":16.97,"width":155.53001403808594,"height":10.800048828125,"text":"Dougherty"},{"top":484.26,"left":172.50002,"width":69.71998596191406,"height":10.800048828125,"text":"Richards S \u0026 Patrici"},{"top":484.26,"left":242.22,"width":104.760009765625,"height":10.800048828125,"text":"812 Goodrich Ave"},{"top":484.26,"left":346.98,"width":70.80001831054688,"height":10.800048828125,"text":"Saint Paul"},{"top":484.26,"left":417.78003,"width":20.87994384765625,"height":10.800048828125,"text":"MN"},{"top":484.26,"left":438.65997,"width":49.91998291015625,"height":10.800048828125,"text":"55105"},{"top":484.26,"left":488.57996,"width":78.47998046875,"height":10.800048828125,"text":""},{"top":484.26,"left":567.05994,"width":106.4400634765625,"height":10.800048828125,"text":""},{"top":484.26,"left":673.5,"width":45.52001953125,"height":10.800048828125,"text":"10/4/2012"},{"top":484.26,"left":719.02,"width":43.28033447265625,"height":10.800048828125,"text":"100.00"}],[{"top":495.06006,"left":16.97,"width":155.53001403808594,"height":10.799896240234375,"text":"Driscoll"},{"top":495.06006,"left":172.50002,"width":69.71998596191406,"height":10.799896240234375,"text":"Joe \u0026 Jill"},{"top":495.06006,"left":242.22,"width":104.760009765625,"height":10.799896240234375,"text":"2383 Bourne Ave"},{"top":495.06006,"left":346.98,"width":70.80001831054688,"height":10.799896240234375,"text":"Saint Paul"},{"top":495.06006,"left":417.78003,"width":20.87994384765625,"height":10.799896240234375,"text":"MN"},{"top":495.06006,"left":438.65997,"width":49.91998291015625,"height":10.799896240234375,"text":"55108"},{"top":495.06006,"left":488.57996,"width":78.47998046875,"height":10.799896240234375,"text":"HR Manager"},{"top":495.06006,"left":567.05994,"width":106.4400634765625,"height":10.799896240234375,"text":"LexisNexis"},{"top":495.06006,"left":673.5,"width":45.52001953125,"height":10.799896240234375,"text":"10/18/2012"},{"top":495.06006,"left":719.02,"width":43.28033447265625,"height":10.799896240234375,"text":"100.00"}],[{"top":505.85995,"left":16.97,"width":155.53001403808594,"height":10.780059814453125,"text":"Dubaille / Haugee"},{"top":505.85995,"left":172.50002,"width":69.71998596191406,"height":10.780059814453125,"text":"Florence M. /Eric"},{"top":505.85995,"left":242.22,"width":104.760009765625,"height":10.780059814453125,"text":"1009 Edmund Ave"},{"top":505.85995,"left":346.98,"width":70.80001831054688,"height":10.780059814453125,"text":"Saint Paul"},{"top":505.85995,"left":417.78003,"width":20.87994384765625,"height":10.780059814453125,"text":"MN"},{"top":505.85995,"left":438.65997,"width":49.91998291015625,"height":10.780059814453125,"text":"55104"},{"top":505.85995,"left":488.57996,"width":78.47998046875,"height":10.780059814453125,"text":"Florence‐teacher"},{"top":505.85995,"left":567.05994,"width":106.4400634765625,"height":10.780059814453125,"text":"SPPS"},{"top":505.85995,"left":673.5,"width":45.52001953125,"height":10.780059814453125,"text":"10/3/2012"},{"top":505.85995,"left":719.02,"width":43.28033447265625,"height":10.780059814453125,"text":"100.00"}],[{"top":516.64,"left":16.97,"width":155.53001403808594,"height":10.83990478515625,"text":"Eaton"},{"top":516.64,"left":172.50002,"width":69.71998596191406,"height":10.83990478515625,"text":"Jim"},{"top":516.64,"left":242.22,"width":104.760009765625,"height":10.83990478515625,"text":"2133 Berkeley Ave"},{"top":516.64,"left":346.98,"width":70.80001831054688,"height":10.83990478515625,"text":"St Paul"},{"top":516.64,"left":417.78003,"width":20.87994384765625,"height":10.83990478515625,"text":"MN"},{"top":516.64,"left":438.65997,"width":49.91998291015625,"height":10.83990478515625,"text":"55105"},{"top":516.64,"left":488.57996,"width":78.47998046875,"height":10.83990478515625,"text":"Principal"},{"top":516.64,"left":567.05994,"width":106.4400634765625,"height":10.83990478515625,"text":"Saint Paul Public Schools"},{"top":516.64,"left":673.5,"width":45.52001953125,"height":10.83990478515625,"text":"8/23/2012"},{"top":516.64,"left":719.02,"width":43.28033447265625,"height":10.83990478515625,"text":"100.00"}],[{"top":527.4799,"left":16.97,"width":155.53001403808594,"height":10.78009033203125,"text":"Eaves /Alger"},{"top":527.4799,"left":172.50002,"width":69.71998596191406,"height":10.78009033203125,"text":"Patricia / Stuart"},{"top":527.4799,"left":242.22,"width":104.760009765625,"height":10.78009033203125,"text":"1143 Portladn Ave."},{"top":527.4799,"left":346.98,"width":70.80001831054688,"height":10.78009033203125,"text":"Saint Paul"},{"top":527.4799,"left":417.78003,"width":20.87994384765625,"height":10.78009033203125,"text":"MN"},{"top":527.4799,"left":438.65997,"width":49.91998291015625,"height":10.78009033203125,"text":"55104"},{"top":527.4799,"left":488.57996,"width":78.47998046875,"height":10.78009033203125,"text":""},{"top":527.4799,"left":567.05994,"width":106.4400634765625,"height":10.78009033203125,"text":""},{"top":527.4799,"left":673.5,"width":45.52001953125,"height":10.78009033203125,"text":"10/3/2012"},{"top":527.4799,"left":719.02,"width":43.28033447265625,"height":10.78009033203125,"text":"100.00"}]]} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/json/spanning_cells.json b/src/test/resources/technology/tabula/json/spanning_cells.json index 46984857..89bb9707 100644 --- a/src/test/resources/technology/tabula/json/spanning_cells.json +++ b/src/test/resources/technology/tabula/json/spanning_cells.json @@ -1 +1 @@ -[{"extraction_method":"spreadsheet","top":146.51932,"left":119.78943,"width":355.312255859375,"height":270.5516052246094,"data":[[{"top":146.51932,"left":119.78943,"width":355.312255859375,"height":12.938674926757812,"text":"Improved operation scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":159.458,"left":119.78943,"width":121.92680358886719,"height":12.785385131835938,"text":"Volume servers in:"},{"top":159.458,"left":241.71623,"width":46.66426086425781,"height":12.785385131835938,"text":"2007"},{"top":159.458,"left":288.3805,"width":46.663482666015625,"height":12.785385131835938,"text":"2008"},{"top":159.458,"left":335.04398,"width":46.663818359375,"height":12.785385131835938,"text":"2009"},{"top":159.458,"left":381.7078,"width":46.663360595703125,"height":12.785385131835938,"text":"2010"},{"top":159.458,"left":428.37115,"width":46.73052978515625,"height":12.785385131835938,"text":"2011"}],[{"top":172.24338,"left":119.78943,"width":121.92680358886719,"height":12.832305908203125,"text":"Server closets"},{"top":172.24338,"left":241.71623,"width":46.66426086425781,"height":12.832305908203125,"text":"1,505"},{"top":172.24338,"left":288.3805,"width":46.663482666015625,"height":12.832305908203125,"text":"1,580"},{"top":172.24338,"left":335.04398,"width":46.663818359375,"height":12.832305908203125,"text":"1,643"},{"top":172.24338,"left":381.7078,"width":46.663360595703125,"height":12.832305908203125,"text":"1,673"},{"top":172.24338,"left":428.37115,"width":46.73052978515625,"height":12.832305908203125,"text":"1,689"}],[{"top":185.07568,"left":119.78943,"width":121.92680358886719,"height":12.891036987304688,"text":"Server rooms"},{"top":185.07568,"left":241.71623,"width":46.66426086425781,"height":12.891036987304688,"text":"1,512"},{"top":185.07568,"left":288.3805,"width":46.663482666015625,"height":12.891036987304688,"text":"1,586"},{"top":185.07568,"left":335.04398,"width":46.663818359375,"height":12.891036987304688,"text":"1,646"},{"top":185.07568,"left":381.7078,"width":46.663360595703125,"height":12.891036987304688,"text":"1,677"},{"top":185.07568,"left":428.37115,"width":46.73052978515625,"height":12.891036987304688,"text":"1,693"}],[{"top":197.96672,"left":119.78943,"width":121.92680358886719,"height":12.890426635742188,"text":"Localized data centers"},{"top":197.96672,"left":241.71623,"width":46.66426086425781,"height":12.890426635742188,"text":"1,512"},{"top":197.96672,"left":288.3805,"width":46.663482666015625,"height":12.890426635742188,"text":"1,586"},{"top":197.96672,"left":335.04398,"width":46.663818359375,"height":12.890426635742188,"text":"1,646"},{"top":197.96672,"left":381.7078,"width":46.663360595703125,"height":12.890426635742188,"text":"1,677"},{"top":197.96672,"left":428.37115,"width":46.73052978515625,"height":12.890426635742188,"text":"1,693"}],[{"top":210.85715,"left":119.78943,"width":121.92680358886719,"height":12.890701293945312,"text":"Mid-tier data centers"},{"top":210.85715,"left":241.71623,"width":46.66426086425781,"height":12.890701293945312,"text":"1,512"},{"top":210.85715,"left":288.3805,"width":46.663482666015625,"height":12.890701293945312,"text":"1,586"},{"top":210.85715,"left":335.04398,"width":46.663818359375,"height":12.890701293945312,"text":"1,646"},{"top":210.85715,"left":381.7078,"width":46.663360595703125,"height":12.890701293945312,"text":"1,677"},{"top":210.85715,"left":428.37115,"width":46.73052978515625,"height":12.890701293945312,"text":"1,693"}],[{"top":223.74785,"left":119.78943,"width":121.92680358886719,"height":12.785049438476562,"text":"Enterprise-class data centers"},{"top":223.74785,"left":241.71623,"width":46.66426086425781,"height":12.785049438476562,"text":"1,512"},{"top":223.74785,"left":288.3805,"width":46.663482666015625,"height":12.785049438476562,"text":"1,586"},{"top":223.74785,"left":335.04398,"width":46.663818359375,"height":12.785049438476562,"text":"1,646"},{"top":223.74785,"left":381.7078,"width":46.663360595703125,"height":12.785049438476562,"text":"1,677"},{"top":223.74785,"left":428.37115,"width":46.73052978515625,"height":12.785049438476562,"text":"1,693"}],[{"top":236.5329,"left":119.78943,"width":355.312255859375,"height":13.102508544921875,"text":"Best practice scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":249.6354,"left":119.78943,"width":121.92680358886719,"height":12.72686767578125,"text":"Volume servers in:"},{"top":249.6354,"left":241.71623,"width":46.66426086425781,"height":12.72686767578125,"text":"2007"},{"top":249.6354,"left":288.3805,"width":46.663482666015625,"height":12.72686767578125,"text":"2008"},{"top":249.6354,"left":335.04398,"width":46.663818359375,"height":12.72686767578125,"text":"2009"},{"top":249.6354,"left":381.7078,"width":46.663360595703125,"height":12.72686767578125,"text":"2010"},{"top":249.6354,"left":428.37115,"width":46.73052978515625,"height":12.72686767578125,"text":"2011"}],[{"top":262.36227,"left":119.78943,"width":121.92680358886719,"height":12.890472412109375,"text":"Server closets"},{"top":262.36227,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,456"},{"top":262.36227,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,439"},{"top":262.36227,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,386"},{"top":262.36227,"left":381.7078,"width":46.663360595703125,"height":12.890472412109375,"text":"1,296"},{"top":262.36227,"left":428.37115,"width":46.73052978515625,"height":12.890472412109375,"text":"1,326"}],[{"top":275.25275,"left":119.78943,"width":121.92680358886719,"height":12.891510009765625,"text":"Server rooms"},{"top":275.25275,"left":241.71623,"width":46.66426086425781,"height":12.891510009765625,"text":"1,465"},{"top":275.25275,"left":288.3805,"width":46.663482666015625,"height":12.891510009765625,"text":"1,472"},{"top":275.25275,"left":335.04398,"width":46.663818359375,"height":12.891510009765625,"text":"1,427"},{"top":275.25275,"left":381.7078,"width":46.663360595703125,"height":12.891510009765625,"text":"1,334"},{"top":275.25275,"left":428.37115,"width":46.73052978515625,"height":12.891510009765625,"text":"1,371"}],[{"top":288.14426,"left":119.78943,"width":121.92680358886719,"height":12.89013671875,"text":"Localized data centers"},{"top":288.14426,"left":241.71623,"width":46.66426086425781,"height":12.89013671875,"text":"1,465"},{"top":288.14426,"left":288.3805,"width":46.663482666015625,"height":12.89013671875,"text":"1,471"},{"top":288.14426,"left":335.04398,"width":46.663818359375,"height":12.89013671875,"text":"1,426"},{"top":288.14426,"left":381.7078,"width":46.663360595703125,"height":12.89013671875,"text":"1,334"},{"top":288.14426,"left":428.37115,"width":46.73052978515625,"height":12.89013671875,"text":"1,371"}],[{"top":301.0344,"left":119.78943,"width":121.92680358886719,"height":12.890472412109375,"text":"Mid-tier data centers"},{"top":301.0344,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,465"},{"top":301.0344,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,471"},{"top":301.0344,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,426"},{"top":301.0344,"left":381.7078,"width":46.663360595703125,"height":12.890472412109375,"text":"1,334"},{"top":301.0344,"left":428.37115,"width":46.73052978515625,"height":12.890472412109375,"text":"1,371"}],[{"top":313.92487,"left":119.78943,"width":121.92680358886719,"height":12.7855224609375,"text":"Enterprise-class data centers"},{"top":313.92487,"left":241.71623,"width":46.66426086425781,"height":12.7855224609375,"text":"1,465"},{"top":313.92487,"left":288.3805,"width":46.663482666015625,"height":12.7855224609375,"text":"1,471"},{"top":313.92487,"left":335.04398,"width":46.663818359375,"height":12.7855224609375,"text":"1,426"},{"top":313.92487,"left":381.7078,"width":46.663360595703125,"height":12.7855224609375,"text":"1,334"},{"top":313.92487,"left":428.37115,"width":46.73052978515625,"height":12.7855224609375,"text":"1,371"}],[{"top":326.7104,"left":119.78943,"width":355.312255859375,"height":13.0440673828125,"text":"State-of-the-art scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":339.75446,"left":119.78943,"width":121.92680358886719,"height":12.784912109375,"text":"Volume servers in:"},{"top":339.75446,"left":241.71623,"width":46.66426086425781,"height":12.784912109375,"text":"2007"},{"top":339.75446,"left":288.3805,"width":46.663482666015625,"height":12.784912109375,"text":"2008"},{"top":339.75446,"left":335.04398,"width":46.663818359375,"height":12.784912109375,"text":"2009"},{"top":339.75446,"left":381.7078,"width":46.663360595703125,"height":12.784912109375,"text":"2010"},{"top":339.75446,"left":428.37115,"width":46.73052978515625,"height":12.784912109375,"text":"2011"}],[{"top":352.53937,"left":119.78943,"width":121.92680358886719,"height":12.89111328125,"text":"Server closets"},{"top":352.53937,"left":241.71623,"width":46.66426086425781,"height":12.89111328125,"text":"1,485"},{"top":352.53937,"left":288.3805,"width":46.663482666015625,"height":12.89111328125,"text":"1,471"},{"top":352.53937,"left":335.04398,"width":46.663818359375,"height":12.89111328125,"text":"1,424"},{"top":352.53937,"left":381.7078,"width":46.663360595703125,"height":12.89111328125,"text":"1,315"},{"top":352.53937,"left":428.37115,"width":46.73052978515625,"height":12.89111328125,"text":"1,349"}],[{"top":365.43048,"left":119.78943,"width":121.92680358886719,"height":12.8902587890625,"text":"Server rooms"},{"top":365.43048,"left":241.71623,"width":46.66426086425781,"height":12.8902587890625,"text":"1,495"},{"top":365.43048,"left":288.3805,"width":46.663482666015625,"height":12.8902587890625,"text":"1,573"},{"top":365.43048,"left":335.04398,"width":46.663818359375,"height":12.8902587890625,"text":"1,586"},{"top":365.43048,"left":381.7078,"width":46.663360595703125,"height":12.8902587890625,"text":"1,424"},{"top":365.43048,"left":428.37115,"width":46.73052978515625,"height":12.8902587890625,"text":"1,485"}],[{"top":378.32074,"left":119.78943,"width":121.92680358886719,"height":12.891143798828125,"text":"Localized data centers"},{"top":378.32074,"left":241.71623,"width":46.66426086425781,"height":12.891143798828125,"text":"1,495"},{"top":378.32074,"left":288.3805,"width":46.663482666015625,"height":12.891143798828125,"text":"1,572"},{"top":378.32074,"left":335.04398,"width":46.663818359375,"height":12.891143798828125,"text":"1,585"},{"top":378.32074,"left":381.7078,"width":46.663360595703125,"height":12.891143798828125,"text":"1,424"},{"top":378.32074,"left":428.37115,"width":46.73052978515625,"height":12.891143798828125,"text":"1,485"}],[{"top":391.21188,"left":119.78943,"width":121.92680358886719,"height":12.89068603515625,"text":"Mid-tier data centers"},{"top":391.21188,"left":241.71623,"width":46.66426086425781,"height":12.89068603515625,"text":"1,495"},{"top":391.21188,"left":288.3805,"width":46.663482666015625,"height":12.89068603515625,"text":"1,572"},{"top":391.21188,"left":335.04398,"width":46.663818359375,"height":12.89068603515625,"text":"1,585"},{"top":391.21188,"left":381.7078,"width":46.663360595703125,"height":12.89068603515625,"text":"1,424"},{"top":391.21188,"left":428.37115,"width":46.73052978515625,"height":12.89068603515625,"text":"1,485"}],[{"top":404.10257,"left":119.78943,"width":121.92680358886719,"height":12.968353271484375,"text":"Enterprise-class data centers"},{"top":404.10257,"left":241.71623,"width":46.66426086425781,"height":12.968353271484375,"text":"1,495"},{"top":404.10257,"left":288.3805,"width":46.663482666015625,"height":12.968353271484375,"text":"1,572"},{"top":404.10257,"left":335.04398,"width":46.663818359375,"height":12.968353271484375,"text":"1,585"},{"top":404.10257,"left":381.7078,"width":46.663360595703125,"height":12.968353271484375,"text":"1,424"},{"top":404.10257,"left":428.37115,"width":46.73052978515625,"height":12.968353271484375,"text":"1,485"}]]},{"extraction_method":"spreadsheet","top":538.25964,"left":130.8136,"width":333.3038024902344,"height":51.52508544921875,"data":[[{"top":538.25964,"left":130.8136,"width":333.3038024902344,"height":12.880615234375,"text":"All alternative scenarios"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":551.14026,"left":130.8136,"width":99.93739318847656,"height":12.78485107421875,"text":"Server class"},{"top":551.14026,"left":230.75099,"width":46.66310119628906,"height":12.78485107421875,"text":"2007"},{"top":551.14026,"left":277.4141,"width":46.663787841796875,"height":12.78485107421875,"text":"2008"},{"top":551.14026,"left":324.07788,"width":46.663970947265625,"height":12.78485107421875,"text":"2009"},{"top":551.14026,"left":370.74185,"width":46.663177490234375,"height":12.78485107421875,"text":"2010"},{"top":551.14026,"left":417.40503,"width":46.712371826171875,"height":12.78485107421875,"text":"2011"}],[{"top":563.9251,"left":130.8136,"width":99.93739318847656,"height":12.8905029296875,"text":"Mid-range"},{"top":563.9251,"left":230.75099,"width":46.66310119628906,"height":12.8905029296875,"text":"4,921"},{"top":563.9251,"left":277.4141,"width":46.663787841796875,"height":12.8905029296875,"text":"5,467"},{"top":563.9251,"left":324.07788,"width":46.663970947265625,"height":12.8905029296875,"text":"6,152"},{"top":563.9251,"left":370.74185,"width":46.663177490234375,"height":12.8905029296875,"text":"6,649"},{"top":563.9251,"left":417.40503,"width":46.712371826171875,"height":12.8905029296875,"text":"7,185"}],[{"top":576.8156,"left":130.8136,"width":99.93739318847656,"height":12.9691162109375,"text":"High-end"},{"top":576.8156,"left":230.75099,"width":46.66310119628906,"height":12.9691162109375,"text":"76,295"},{"top":576.8156,"left":277.4141,"width":46.663787841796875,"height":12.9691162109375,"text":"81,624"},{"top":576.8156,"left":324.07788,"width":46.663970947265625,"height":12.9691162109375,"text":"86,849"},{"top":576.8156,"left":370.74185,"width":46.663177490234375,"height":12.9691162109375,"text":"92,662"},{"top":576.8156,"left":417.40503,"width":46.712371826171875,"height":12.9691162109375,"text":"98,864"}]]}] \ No newline at end of file +[{"extraction_method":"lattice","top":146.51932,"left":119.78943,"width":355.312255859375,"height":270.5516052246094,"right":475.10168,"bottom":417.07092,"data":[[{"top":146.51932,"left":119.78943,"width":355.312255859375,"height":12.938491821289062,"text":"Improved operation scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":159.45781,"left":119.78943,"width":121.92680358886719,"height":12.785446166992188,"text":"Volume servers in:"},{"top":159.45781,"left":241.71623,"width":46.66426086425781,"height":12.785446166992188,"text":"2007"},{"top":159.45781,"left":288.3805,"width":46.663482666015625,"height":12.785446166992188,"text":"2008"},{"top":159.45781,"left":335.04398,"width":46.663818359375,"height":12.785446166992188,"text":"2009"},{"top":159.45781,"left":381.7078,"width":46.663360595703125,"height":12.785446166992188,"text":"2010"},{"top":159.45781,"left":428.37115,"width":46.73052978515625,"height":12.785446166992188,"text":"2011"}],[{"top":172.24326,"left":119.78943,"width":121.92680358886719,"height":12.832427978515625,"text":"Server closets"},{"top":172.24326,"left":241.71623,"width":46.66426086425781,"height":12.832427978515625,"text":"1,505"},{"top":172.24326,"left":288.3805,"width":46.663482666015625,"height":12.832427978515625,"text":"1,580"},{"top":172.24326,"left":335.04398,"width":46.663818359375,"height":12.832427978515625,"text":"1,643"},{"top":172.24326,"left":381.7078,"width":46.663360595703125,"height":12.832427978515625,"text":"1,673"},{"top":172.24326,"left":428.37115,"width":46.73052978515625,"height":12.832427978515625,"text":"1,689"}],[{"top":185.07568,"left":119.78943,"width":121.92680358886719,"height":12.891036987304688,"text":"Server rooms"},{"top":185.07568,"left":241.71623,"width":46.66426086425781,"height":12.891036987304688,"text":"1,512"},{"top":185.07568,"left":288.3805,"width":46.663482666015625,"height":12.891036987304688,"text":"1,586"},{"top":185.07568,"left":335.04398,"width":46.663818359375,"height":12.891036987304688,"text":"1,646"},{"top":185.07568,"left":381.7078,"width":46.663360595703125,"height":12.891036987304688,"text":"1,677"},{"top":185.07568,"left":428.37115,"width":46.73052978515625,"height":12.891036987304688,"text":"1,693"}],[{"top":197.96672,"left":119.78943,"width":121.92680358886719,"height":12.890411376953125,"text":"Localized data centers"},{"top":197.96672,"left":241.71623,"width":46.66426086425781,"height":12.890411376953125,"text":"1,512"},{"top":197.96672,"left":288.3805,"width":46.663482666015625,"height":12.890411376953125,"text":"1,586"},{"top":197.96672,"left":335.04398,"width":46.663818359375,"height":12.890411376953125,"text":"1,646"},{"top":197.96672,"left":381.7078,"width":46.663360595703125,"height":12.890411376953125,"text":"1,677"},{"top":197.96672,"left":428.37115,"width":46.73052978515625,"height":12.890411376953125,"text":"1,693"}],[{"top":210.85713,"left":119.78943,"width":121.92680358886719,"height":12.890716552734375,"text":"Mid-tier data centers"},{"top":210.85713,"left":241.71623,"width":46.66426086425781,"height":12.890716552734375,"text":"1,512"},{"top":210.85713,"left":288.3805,"width":46.663482666015625,"height":12.890716552734375,"text":"1,586"},{"top":210.85713,"left":335.04398,"width":46.663818359375,"height":12.890716552734375,"text":"1,646"},{"top":210.85713,"left":381.7078,"width":46.663360595703125,"height":12.890716552734375,"text":"1,677"},{"top":210.85713,"left":428.37115,"width":46.73052978515625,"height":12.890716552734375,"text":"1,693"}],[{"top":223.74785,"left":119.78943,"width":121.92680358886719,"height":12.785018920898438,"text":"Enterprise-class data centers"},{"top":223.74785,"left":241.71623,"width":46.66426086425781,"height":12.785018920898438,"text":"1,512"},{"top":223.74785,"left":288.3805,"width":46.663482666015625,"height":12.785018920898438,"text":"1,586"},{"top":223.74785,"left":335.04398,"width":46.663818359375,"height":12.785018920898438,"text":"1,646"},{"top":223.74785,"left":381.7078,"width":46.663360595703125,"height":12.785018920898438,"text":"1,677"},{"top":223.74785,"left":428.37115,"width":46.73052978515625,"height":12.785018920898438,"text":"1,693"}],[{"top":236.53287,"left":119.78943,"width":355.312255859375,"height":13.1025390625,"text":"Best practice scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":249.6354,"left":119.78943,"width":121.92680358886719,"height":12.72686767578125,"text":"Volume servers in:"},{"top":249.6354,"left":241.71623,"width":46.66426086425781,"height":12.72686767578125,"text":"2007"},{"top":249.6354,"left":288.3805,"width":46.663482666015625,"height":12.72686767578125,"text":"2008"},{"top":249.6354,"left":335.04398,"width":46.663818359375,"height":12.72686767578125,"text":"2009"},{"top":249.6354,"left":381.7078,"width":46.663360595703125,"height":12.72686767578125,"text":"2010"},{"top":249.6354,"left":428.37115,"width":46.73052978515625,"height":12.72686767578125,"text":"2011"}],[{"top":262.36227,"left":119.78943,"width":121.92680358886719,"height":12.890472412109375,"text":"Server closets"},{"top":262.36227,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,456"},{"top":262.36227,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,439"},{"top":262.36227,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,386"},{"top":262.36227,"left":381.7078,"width":46.663360595703125,"height":12.890472412109375,"text":"1,296"},{"top":262.36227,"left":428.37115,"width":46.73052978515625,"height":12.890472412109375,"text":"1,326"}],[{"top":275.25275,"left":119.78943,"width":121.92680358886719,"height":12.8914794921875,"text":"Server rooms"},{"top":275.25275,"left":241.71623,"width":46.66426086425781,"height":12.8914794921875,"text":"1,465"},{"top":275.25275,"left":288.3805,"width":46.663482666015625,"height":12.8914794921875,"text":"1,472"},{"top":275.25275,"left":335.04398,"width":46.663818359375,"height":12.8914794921875,"text":"1,427"},{"top":275.25275,"left":381.7078,"width":46.663360595703125,"height":12.8914794921875,"text":"1,334"},{"top":275.25275,"left":428.37115,"width":46.73052978515625,"height":12.8914794921875,"text":"1,371"}],[{"top":288.14423,"left":119.78943,"width":121.92680358886719,"height":12.890167236328125,"text":"Localized data centers"},{"top":288.14423,"left":241.71623,"width":46.66426086425781,"height":12.890167236328125,"text":"1,465"},{"top":288.14423,"left":288.3805,"width":46.663482666015625,"height":12.890167236328125,"text":"1,471"},{"top":288.14423,"left":335.04398,"width":46.663818359375,"height":12.890167236328125,"text":"1,426"},{"top":288.14423,"left":381.7078,"width":46.663360595703125,"height":12.890167236328125,"text":"1,334"},{"top":288.14423,"left":428.37115,"width":46.73052978515625,"height":12.890167236328125,"text":"1,371"}],[{"top":301.0344,"left":119.78943,"width":121.92680358886719,"height":12.890472412109375,"text":"Mid-tier data centers"},{"top":301.0344,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,465"},{"top":301.0344,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,471"},{"top":301.0344,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,426"},{"top":301.0344,"left":381.7078,"width":46.663360595703125,"height":12.890472412109375,"text":"1,334"},{"top":301.0344,"left":428.37115,"width":46.73052978515625,"height":12.890472412109375,"text":"1,371"}],[{"top":313.92487,"left":119.78943,"width":121.92680358886719,"height":12.7855224609375,"text":"Enterprise-class data centers"},{"top":313.92487,"left":241.71623,"width":46.66426086425781,"height":12.7855224609375,"text":"1,465"},{"top":313.92487,"left":288.3805,"width":46.663482666015625,"height":12.7855224609375,"text":"1,471"},{"top":313.92487,"left":335.04398,"width":46.663818359375,"height":12.7855224609375,"text":"1,426"},{"top":313.92487,"left":381.7078,"width":46.663360595703125,"height":12.7855224609375,"text":"1,334"},{"top":313.92487,"left":428.37115,"width":46.73052978515625,"height":12.7855224609375,"text":"1,371"}],[{"top":326.7104,"left":119.78943,"width":355.312255859375,"height":13.0440673828125,"text":"State-of-the-art scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":339.75446,"left":119.78943,"width":121.92680358886719,"height":12.784912109375,"text":"Volume servers in:"},{"top":339.75446,"left":241.71623,"width":46.66426086425781,"height":12.784912109375,"text":"2007"},{"top":339.75446,"left":288.3805,"width":46.663482666015625,"height":12.784912109375,"text":"2008"},{"top":339.75446,"left":335.04398,"width":46.663818359375,"height":12.784912109375,"text":"2009"},{"top":339.75446,"left":381.7078,"width":46.663360595703125,"height":12.784912109375,"text":"2010"},{"top":339.75446,"left":428.37115,"width":46.73052978515625,"height":12.784912109375,"text":"2011"}],[{"top":352.53937,"left":119.78943,"width":121.92680358886719,"height":12.89111328125,"text":"Server closets"},{"top":352.53937,"left":241.71623,"width":46.66426086425781,"height":12.89111328125,"text":"1,485"},{"top":352.53937,"left":288.3805,"width":46.663482666015625,"height":12.89111328125,"text":"1,471"},{"top":352.53937,"left":335.04398,"width":46.663818359375,"height":12.89111328125,"text":"1,424"},{"top":352.53937,"left":381.7078,"width":46.663360595703125,"height":12.89111328125,"text":"1,315"},{"top":352.53937,"left":428.37115,"width":46.73052978515625,"height":12.89111328125,"text":"1,349"}],[{"top":365.43048,"left":119.78943,"width":121.92680358886719,"height":12.8902587890625,"text":"Server rooms"},{"top":365.43048,"left":241.71623,"width":46.66426086425781,"height":12.8902587890625,"text":"1,495"},{"top":365.43048,"left":288.3805,"width":46.663482666015625,"height":12.8902587890625,"text":"1,573"},{"top":365.43048,"left":335.04398,"width":46.663818359375,"height":12.8902587890625,"text":"1,586"},{"top":365.43048,"left":381.7078,"width":46.663360595703125,"height":12.8902587890625,"text":"1,424"},{"top":365.43048,"left":428.37115,"width":46.73052978515625,"height":12.8902587890625,"text":"1,485"}],[{"top":378.32074,"left":119.78943,"width":121.92680358886719,"height":12.891143798828125,"text":"Localized data centers"},{"top":378.32074,"left":241.71623,"width":46.66426086425781,"height":12.891143798828125,"text":"1,495"},{"top":378.32074,"left":288.3805,"width":46.663482666015625,"height":12.891143798828125,"text":"1,572"},{"top":378.32074,"left":335.04398,"width":46.663818359375,"height":12.891143798828125,"text":"1,585"},{"top":378.32074,"left":381.7078,"width":46.663360595703125,"height":12.891143798828125,"text":"1,424"},{"top":378.32074,"left":428.37115,"width":46.73052978515625,"height":12.891143798828125,"text":"1,485"}],[{"top":391.21188,"left":119.78943,"width":121.92680358886719,"height":12.89068603515625,"text":"Mid-tier data centers"},{"top":391.21188,"left":241.71623,"width":46.66426086425781,"height":12.89068603515625,"text":"1,495"},{"top":391.21188,"left":288.3805,"width":46.663482666015625,"height":12.89068603515625,"text":"1,572"},{"top":391.21188,"left":335.04398,"width":46.663818359375,"height":12.89068603515625,"text":"1,585"},{"top":391.21188,"left":381.7078,"width":46.663360595703125,"height":12.89068603515625,"text":"1,424"},{"top":391.21188,"left":428.37115,"width":46.73052978515625,"height":12.89068603515625,"text":"1,485"}],[{"top":404.10257,"left":119.78943,"width":121.92680358886719,"height":12.968353271484375,"text":"Enterprise-class data centers"},{"top":404.10257,"left":241.71623,"width":46.66426086425781,"height":12.968353271484375,"text":"1,495"},{"top":404.10257,"left":288.3805,"width":46.663482666015625,"height":12.968353271484375,"text":"1,572"},{"top":404.10257,"left":335.04398,"width":46.663818359375,"height":12.968353271484375,"text":"1,585"},{"top":404.10257,"left":381.7078,"width":46.663360595703125,"height":12.968353271484375,"text":"1,424"},{"top":404.10257,"left":428.37115,"width":46.73052978515625,"height":12.968353271484375,"text":"1,485"}]]},{"extraction_method":"lattice","top":538.25964,"left":130.8136,"width":333.3038024902344,"height":51.52508544921875,"right":464.1174,"bottom":589.7847,"data":[[{"top":538.25964,"left":130.8136,"width":333.3038024902344,"height":12.880615234375,"text":"All alternative scenarios"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":551.14026,"left":130.8136,"width":99.93739318847656,"height":12.78485107421875,"text":"Server class"},{"top":551.14026,"left":230.75099,"width":46.66316223144531,"height":12.78485107421875,"text":"2007"},{"top":551.14026,"left":277.41415,"width":46.663726806640625,"height":12.78485107421875,"text":"2008"},{"top":551.14026,"left":324.07788,"width":46.663970947265625,"height":12.78485107421875,"text":"2009"},{"top":551.14026,"left":370.74185,"width":46.663177490234375,"height":12.78485107421875,"text":"2010"},{"top":551.14026,"left":417.40503,"width":46.712371826171875,"height":12.78485107421875,"text":"2011"}],[{"top":563.9251,"left":130.8136,"width":99.93739318847656,"height":12.8905029296875,"text":"Mid-range"},{"top":563.9251,"left":230.75099,"width":46.66316223144531,"height":12.8905029296875,"text":"4,921"},{"top":563.9251,"left":277.41415,"width":46.663726806640625,"height":12.8905029296875,"text":"5,467"},{"top":563.9251,"left":324.07788,"width":46.663970947265625,"height":12.8905029296875,"text":"6,152"},{"top":563.9251,"left":370.74185,"width":46.663177490234375,"height":12.8905029296875,"text":"6,649"},{"top":563.9251,"left":417.40503,"width":46.712371826171875,"height":12.8905029296875,"text":"7,185"}],[{"top":576.8156,"left":130.8136,"width":99.93739318847656,"height":12.9691162109375,"text":"High-end"},{"top":576.8156,"left":230.75099,"width":46.66316223144531,"height":12.9691162109375,"text":"76,295"},{"top":576.8156,"left":277.41415,"width":46.663726806640625,"height":12.9691162109375,"text":"81,624"},{"top":576.8156,"left":324.07788,"width":46.663970947265625,"height":12.9691162109375,"text":"86,849"},{"top":576.8156,"left":370.74185,"width":46.663177490234375,"height":12.9691162109375,"text":"92,662"},{"top":576.8156,"left":417.40503,"width":46.712371826171875,"height":12.9691162109375,"text":"98,864"}]]}] diff --git a/src/test/resources/technology/tabula/json/spanning_cells_basic.json b/src/test/resources/technology/tabula/json/spanning_cells_basic.json new file mode 100644 index 00000000..d0164280 --- /dev/null +++ b/src/test/resources/technology/tabula/json/spanning_cells_basic.json @@ -0,0 +1 @@ +[{"extraction_method":"lattice","top":150.56,"left":119.789444,"width":355.31939697265625,"height":266.5108642578125,"right":475.10883,"bottom":417.07086,"data":[[{"top":150.56,"left":119.789444,"width":355.31939697265625,"height":8.897811889648438,"text":"Improved operation scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":159.45781,"left":119.789444,"width":121.92678833007812,"height":12.785446166992188,"text":"Volume servers in:"},{"top":159.45781,"left":241.71623,"width":46.66426086425781,"height":12.785446166992188,"text":"2007"},{"top":159.45781,"left":288.3805,"width":46.663482666015625,"height":12.785446166992188,"text":"2008"},{"top":159.45781,"left":335.04398,"width":46.663818359375,"height":12.785446166992188,"text":"2009"},{"top":159.45781,"left":381.7078,"width":46.663330078125,"height":12.785446166992188,"text":"2010"},{"top":159.45781,"left":428.37112,"width":46.73773193359375,"height":12.785446166992188,"text":"2011"}],[{"top":172.24326,"left":119.789444,"width":121.92678833007812,"height":12.832427978515625,"text":"Server closets"},{"top":172.24326,"left":241.71623,"width":46.66426086425781,"height":12.832427978515625,"text":"1,505"},{"top":172.24326,"left":288.3805,"width":46.663482666015625,"height":12.832427978515625,"text":"1,580"},{"top":172.24326,"left":335.04398,"width":46.663818359375,"height":12.832427978515625,"text":"1,643"},{"top":172.24326,"left":381.7078,"width":46.663330078125,"height":12.832427978515625,"text":"1,673"},{"top":172.24326,"left":428.37112,"width":46.73773193359375,"height":12.832427978515625,"text":"1,689"}],[{"top":185.07568,"left":119.789444,"width":121.92678833007812,"height":12.891036987304688,"text":"Server rooms"},{"top":185.07568,"left":241.71623,"width":46.66426086425781,"height":12.891036987304688,"text":"1,512"},{"top":185.07568,"left":288.3805,"width":46.663482666015625,"height":12.891036987304688,"text":"1,586"},{"top":185.07568,"left":335.04398,"width":46.663818359375,"height":12.891036987304688,"text":"1,646"},{"top":185.07568,"left":381.7078,"width":46.663330078125,"height":12.891036987304688,"text":"1,677"},{"top":185.07568,"left":428.37112,"width":46.73773193359375,"height":12.891036987304688,"text":"1,693"}],[{"top":197.96672,"left":119.789444,"width":121.92678833007812,"height":12.890411376953125,"text":"Localized data centers"},{"top":197.96672,"left":241.71623,"width":46.66426086425781,"height":12.890411376953125,"text":"1,512"},{"top":197.96672,"left":288.3805,"width":46.663482666015625,"height":12.890411376953125,"text":"1,586"},{"top":197.96672,"left":335.04398,"width":46.663818359375,"height":12.890411376953125,"text":"1,646"},{"top":197.96672,"left":381.7078,"width":46.663330078125,"height":12.890411376953125,"text":"1,677"},{"top":197.96672,"left":428.37112,"width":46.73773193359375,"height":12.890411376953125,"text":"1,693"}],[{"top":210.85713,"left":119.789444,"width":121.92678833007812,"height":12.890716552734375,"text":"Mid-tier data centers"},{"top":210.85713,"left":241.71623,"width":46.66426086425781,"height":12.890716552734375,"text":"1,512"},{"top":210.85713,"left":288.3805,"width":46.663482666015625,"height":12.890716552734375,"text":"1,586"},{"top":210.85713,"left":335.04398,"width":46.663818359375,"height":12.890716552734375,"text":"1,646"},{"top":210.85713,"left":381.7078,"width":46.663330078125,"height":12.890716552734375,"text":"1,677"},{"top":210.85713,"left":428.37112,"width":46.73773193359375,"height":12.890716552734375,"text":"1,693"}],[{"top":223.74785,"left":119.789444,"width":121.92678833007812,"height":12.785003662109375,"text":"Enterprise-class data centers"},{"top":223.74785,"left":241.71623,"width":46.66426086425781,"height":12.785003662109375,"text":"1,512"},{"top":223.74785,"left":288.3805,"width":46.663482666015625,"height":12.785003662109375,"text":"1,586"},{"top":223.74785,"left":335.04398,"width":46.663818359375,"height":12.785003662109375,"text":"1,646"},{"top":223.74785,"left":381.7078,"width":46.663330078125,"height":12.785003662109375,"text":"1,677"},{"top":223.74785,"left":428.37112,"width":46.73773193359375,"height":12.785003662109375,"text":"1,693"}],[{"top":236.53285,"left":119.789444,"width":355.31939697265625,"height":13.1025390625,"text":"Best practice scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":249.63539,"left":119.789444,"width":121.92678833007812,"height":12.726882934570312,"text":"Volume servers in:"},{"top":249.63539,"left":241.71623,"width":46.66426086425781,"height":12.726882934570312,"text":"2007"},{"top":249.63539,"left":288.3805,"width":46.663482666015625,"height":12.726882934570312,"text":"2008"},{"top":249.63539,"left":335.04398,"width":46.663818359375,"height":12.726882934570312,"text":"2009"},{"top":249.63539,"left":381.7078,"width":46.663330078125,"height":12.726882934570312,"text":"2010"},{"top":249.63539,"left":428.37112,"width":46.73773193359375,"height":12.726882934570312,"text":"2011"}],[{"top":262.36227,"left":119.789444,"width":121.92678833007812,"height":12.890472412109375,"text":"Server closets"},{"top":262.36227,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,456"},{"top":262.36227,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,439"},{"top":262.36227,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,386"},{"top":262.36227,"left":381.7078,"width":46.663330078125,"height":12.890472412109375,"text":"1,296"},{"top":262.36227,"left":428.37112,"width":46.73773193359375,"height":12.890472412109375,"text":"1,326"}],[{"top":275.25275,"left":119.789444,"width":121.92678833007812,"height":12.8914794921875,"text":"Server rooms"},{"top":275.25275,"left":241.71623,"width":46.66426086425781,"height":12.8914794921875,"text":"1,465"},{"top":275.25275,"left":288.3805,"width":46.663482666015625,"height":12.8914794921875,"text":"1,472"},{"top":275.25275,"left":335.04398,"width":46.663818359375,"height":12.8914794921875,"text":"1,427"},{"top":275.25275,"left":381.7078,"width":46.663330078125,"height":12.8914794921875,"text":"1,334"},{"top":275.25275,"left":428.37112,"width":46.73773193359375,"height":12.8914794921875,"text":"1,371"}],[{"top":288.14423,"left":119.789444,"width":121.92678833007812,"height":12.890167236328125,"text":"Localized data centers"},{"top":288.14423,"left":241.71623,"width":46.66426086425781,"height":12.890167236328125,"text":"1,465"},{"top":288.14423,"left":288.3805,"width":46.663482666015625,"height":12.890167236328125,"text":"1,471"},{"top":288.14423,"left":335.04398,"width":46.663818359375,"height":12.890167236328125,"text":"1,426"},{"top":288.14423,"left":381.7078,"width":46.663330078125,"height":12.890167236328125,"text":"1,334"},{"top":288.14423,"left":428.37112,"width":46.73773193359375,"height":12.890167236328125,"text":"1,371"}],[{"top":301.0344,"left":119.789444,"width":121.92678833007812,"height":12.890472412109375,"text":"Mid-tier data centers"},{"top":301.0344,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,465"},{"top":301.0344,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,471"},{"top":301.0344,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,426"},{"top":301.0344,"left":381.7078,"width":46.663330078125,"height":12.890472412109375,"text":"1,334"},{"top":301.0344,"left":428.37112,"width":46.73773193359375,"height":12.890472412109375,"text":"1,371"}],[{"top":313.92487,"left":119.789444,"width":121.92678833007812,"height":12.785552978515625,"text":"Enterprise-class data centers"},{"top":313.92487,"left":241.71623,"width":46.66426086425781,"height":12.785552978515625,"text":"1,465"},{"top":313.92487,"left":288.3805,"width":46.663482666015625,"height":12.785552978515625,"text":"1,471"},{"top":313.92487,"left":335.04398,"width":46.663818359375,"height":12.785552978515625,"text":"1,426"},{"top":313.92487,"left":381.7078,"width":46.663330078125,"height":12.785552978515625,"text":"1,334"},{"top":313.92487,"left":428.37112,"width":46.73773193359375,"height":12.785552978515625,"text":"1,371"}],[{"top":326.71042,"left":119.789444,"width":355.31939697265625,"height":13.04400634765625,"text":"State-of-the-art scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":339.75443,"left":119.789444,"width":121.92678833007812,"height":12.784942626953125,"text":"Volume servers in:"},{"top":339.75443,"left":241.71623,"width":46.66426086425781,"height":12.784942626953125,"text":"2007"},{"top":339.75443,"left":288.3805,"width":46.663482666015625,"height":12.784942626953125,"text":"2008"},{"top":339.75443,"left":335.04398,"width":46.663818359375,"height":12.784942626953125,"text":"2009"},{"top":339.75443,"left":381.7078,"width":46.663330078125,"height":12.784942626953125,"text":"2010"},{"top":339.75443,"left":428.37112,"width":46.73773193359375,"height":12.784942626953125,"text":"2011"}],[{"top":352.53937,"left":119.789444,"width":121.92678833007812,"height":12.89111328125,"text":"Server closets"},{"top":352.53937,"left":241.71623,"width":46.66426086425781,"height":12.89111328125,"text":"1,485"},{"top":352.53937,"left":288.3805,"width":46.663482666015625,"height":12.89111328125,"text":"1,471"},{"top":352.53937,"left":335.04398,"width":46.663818359375,"height":12.89111328125,"text":"1,424"},{"top":352.53937,"left":381.7078,"width":46.663330078125,"height":12.89111328125,"text":"1,315"},{"top":352.53937,"left":428.37112,"width":46.73773193359375,"height":12.89111328125,"text":"1,349"}],[{"top":365.43048,"left":119.789444,"width":121.92678833007812,"height":12.8902587890625,"text":"Server rooms"},{"top":365.43048,"left":241.71623,"width":46.66426086425781,"height":12.8902587890625,"text":"1,495"},{"top":365.43048,"left":288.3805,"width":46.663482666015625,"height":12.8902587890625,"text":"1,573"},{"top":365.43048,"left":335.04398,"width":46.663818359375,"height":12.8902587890625,"text":"1,586"},{"top":365.43048,"left":381.7078,"width":46.663330078125,"height":12.8902587890625,"text":"1,424"},{"top":365.43048,"left":428.37112,"width":46.73773193359375,"height":12.8902587890625,"text":"1,485"}],[{"top":378.32074,"left":119.789444,"width":121.92678833007812,"height":12.891143798828125,"text":"Localized data centers"},{"top":378.32074,"left":241.71623,"width":46.66426086425781,"height":12.891143798828125,"text":"1,495"},{"top":378.32074,"left":288.3805,"width":46.663482666015625,"height":12.891143798828125,"text":"1,572"},{"top":378.32074,"left":335.04398,"width":46.663818359375,"height":12.891143798828125,"text":"1,585"},{"top":378.32074,"left":381.7078,"width":46.663330078125,"height":12.891143798828125,"text":"1,424"},{"top":378.32074,"left":428.37112,"width":46.73773193359375,"height":12.891143798828125,"text":"1,485"}],[{"top":391.21188,"left":119.789444,"width":121.92678833007812,"height":12.89068603515625,"text":"Mid-tier data centers"},{"top":391.21188,"left":241.71623,"width":46.66426086425781,"height":12.89068603515625,"text":"1,495"},{"top":391.21188,"left":288.3805,"width":46.663482666015625,"height":12.89068603515625,"text":"1,572"},{"top":391.21188,"left":335.04398,"width":46.663818359375,"height":12.89068603515625,"text":"1,585"},{"top":391.21188,"left":381.7078,"width":46.663330078125,"height":12.89068603515625,"text":"1,424"},{"top":391.21188,"left":428.37112,"width":46.73773193359375,"height":12.89068603515625,"text":"1,485"}],[{"top":404.10257,"left":119.789444,"width":121.92678833007812,"height":12.968292236328125,"text":"Enterprise-class data centers"},{"top":404.10257,"left":241.71623,"width":46.66426086425781,"height":12.968292236328125,"text":"1,495"},{"top":404.10257,"left":288.3805,"width":46.663482666015625,"height":12.968292236328125,"text":"1,572"},{"top":404.10257,"left":335.04398,"width":46.663818359375,"height":12.968292236328125,"text":"1,585"},{"top":404.10257,"left":381.7078,"width":46.663330078125,"height":12.968292236328125,"text":"1,424"},{"top":404.10257,"left":428.37112,"width":46.73773193359375,"height":12.968292236328125,"text":"1,485"}]]},{"extraction_method":"lattice","top":538.25964,"left":130.8136,"width":333.3038024902344,"height":51.5423583984375,"right":464.1174,"bottom":589.802,"data":[[{"top":538.25964,"left":130.8136,"width":333.3038024902344,"height":12.88055419921875,"text":"All alternative scenarios"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":551.1402,"left":130.8136,"width":99.93739318847656,"height":12.784912109375,"text":"Server class"},{"top":551.1402,"left":230.75099,"width":46.66316223144531,"height":12.784912109375,"text":"2007"},{"top":551.1402,"left":277.41415,"width":46.663726806640625,"height":12.784912109375,"text":"2008"},{"top":551.1402,"left":324.07788,"width":46.663970947265625,"height":12.784912109375,"text":"2009"},{"top":551.1402,"left":370.74185,"width":46.663177490234375,"height":12.784912109375,"text":"2010"},{"top":551.1402,"left":417.40503,"width":46.712371826171875,"height":12.784912109375,"text":"2011"}],[{"top":563.9251,"left":130.8136,"width":99.93739318847656,"height":12.8905029296875,"text":"Mid-range"},{"top":563.9251,"left":230.75099,"width":46.66316223144531,"height":12.8905029296875,"text":"4,921"},{"top":563.9251,"left":277.41415,"width":46.663726806640625,"height":12.8905029296875,"text":"5,467"},{"top":563.9251,"left":324.07788,"width":46.663970947265625,"height":12.8905029296875,"text":"6,152"},{"top":563.9251,"left":370.74185,"width":46.663177490234375,"height":12.8905029296875,"text":"6,649"},{"top":563.9251,"left":417.40503,"width":46.712371826171875,"height":12.8905029296875,"text":"7,185"}],[{"top":576.8156,"left":130.8136,"width":99.93739318847656,"height":12.98638916015625,"text":"High-end"},{"top":576.8156,"left":230.75099,"width":46.66316223144531,"height":12.98638916015625,"text":"76,295"},{"top":576.8156,"left":277.41415,"width":46.663726806640625,"height":12.98638916015625,"text":"81,624"},{"top":576.8156,"left":324.07788,"width":46.663970947265625,"height":12.98638916015625,"text":"86,849"},{"top":576.8156,"left":370.74185,"width":46.663177490234375,"height":12.98638916015625,"text":"92,662"},{"top":576.8156,"left":417.40503,"width":46.712371826171875,"height":12.98638916015625,"text":"98,864"}]]}] diff --git a/src/test/resources/technology/tabula/json/twotables.json b/src/test/resources/technology/tabula/json/twotables.json index 00211f47..f6bd4f36 100644 --- a/src/test/resources/technology/tabula/json/twotables.json +++ b/src/test/resources/technology/tabula/json/twotables.json @@ -1 +1 @@ -[{"extraction_method":"spreadsheet","top":111.18451,"left":67.41156,"width":342.44476318359375,"height":174.07479858398438,"data":[[{"top":111.18451,"left":67.41156,"width":85.61141967773438,"height":28.53582763671875,"text":" "},{"top":111.18451,"left":153.02298,"width":256.8333435058594,"height":14.266975402832031,"text":"株主資本"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":125.451485,"left":153.02298,"width":51.367401123046875,"height":14.268852233886719,"text":"資本金"},{"top":125.451485,"left":204.39038,"width":51.36669921875,"height":14.268852233886719,"text":"資本剰余金"},{"top":125.451485,"left":255.75708,"width":51.3656005859375,"height":14.268852233886719,"text":"利益剰余金"},{"top":125.451485,"left":307.12268,"width":51.366668701171875,"height":14.268852233886719,"text":"自己株式"},{"top":125.451485,"left":358.48935,"width":51.366973876953125,"height":14.268852233886719,"text":"株主資本合計"}],[{"top":139.72034,"left":67.41156,"width":85.61141967773438,"height":14.268722534179688,"text":"当期首残高"},{"top":139.72034,"left":153.02298,"width":51.367401123046875,"height":14.268722534179688,"text":"5,664"},{"top":139.72034,"left":204.39038,"width":51.36669921875,"height":14.268722534179688,"text":"749"},{"top":139.72034,"left":255.75708,"width":51.3656005859375,"height":14.268722534179688,"text":"12,017"},{"top":139.72034,"left":307.12268,"width":51.366668701171875,"height":14.268722534179688,"text":"△747"},{"top":139.72034,"left":358.48935,"width":51.366973876953125,"height":14.268722534179688,"text":"17,683"}],[{"top":153.98906,"left":67.41156,"width":85.61141967773438,"height":14.268753051757812,"text":"当期変動額"},{"top":153.98906,"left":153.02298,"width":51.367401123046875,"height":14.268753051757812,"text":" "},{"top":153.98906,"left":204.39038,"width":51.36669921875,"height":14.268753051757812,"text":" "},{"top":153.98906,"left":255.75708,"width":51.3656005859375,"height":14.268753051757812,"text":" "},{"top":153.98906,"left":307.12268,"width":51.366668701171875,"height":14.268753051757812,"text":" "},{"top":153.98906,"left":358.48935,"width":51.366973876953125,"height":14.268753051757812,"text":" "}],[{"top":168.25781,"left":67.41156,"width":85.61141967773438,"height":14.268936157226562,"text":"剰余金の配当"},{"top":168.25781,"left":153.02298,"width":51.367401123046875,"height":14.268936157226562,"text":" "},{"top":168.25781,"left":204.39038,"width":51.36669921875,"height":14.268936157226562,"text":" "},{"top":168.25781,"left":255.75708,"width":51.3656005859375,"height":14.268936157226562,"text":"△525"},{"top":168.25781,"left":307.12268,"width":51.366668701171875,"height":14.268936157226562,"text":" "},{"top":168.25781,"left":358.48935,"width":51.366973876953125,"height":14.268936157226562,"text":"△525"}],[{"top":182.52675,"left":67.41156,"width":85.61141967773438,"height":14.265869140625,"text":"当期純利益"},{"top":182.52675,"left":153.02298,"width":51.367401123046875,"height":14.265869140625,"text":" "},{"top":182.52675,"left":204.39038,"width":51.36669921875,"height":14.265869140625,"text":" "},{"top":182.52675,"left":255.75708,"width":51.3656005859375,"height":14.265869140625,"text":"1,269"},{"top":182.52675,"left":307.12268,"width":51.366668701171875,"height":14.265869140625,"text":" "},{"top":182.52675,"left":358.48935,"width":51.366973876953125,"height":14.265869140625,"text":"1,269"}],[{"top":196.79262,"left":67.41156,"width":85.61141967773438,"height":14.268966674804688,"text":"自己株式の取得"},{"top":196.79262,"left":153.02298,"width":51.367401123046875,"height":14.268966674804688,"text":" "},{"top":196.79262,"left":204.39038,"width":51.36669921875,"height":14.268966674804688,"text":" "},{"top":196.79262,"left":255.75708,"width":51.3656005859375,"height":14.268966674804688,"text":" "},{"top":196.79262,"left":307.12268,"width":51.366668701171875,"height":14.268966674804688,"text":"△0"},{"top":196.79262,"left":358.48935,"width":51.366973876953125,"height":14.268966674804688,"text":"△0"}],[{"top":211.06158,"left":67.41156,"width":85.61141967773438,"height":22.830032348632812,"text":"持分法の適用範囲\rの変動"},{"top":211.06158,"left":153.02298,"width":51.367401123046875,"height":22.830032348632812,"text":" "},{"top":211.06158,"left":204.39038,"width":51.36669921875,"height":22.830032348632812,"text":" "},{"top":211.06158,"left":255.75708,"width":51.3656005859375,"height":22.830032348632812,"text":"85"},{"top":211.06158,"left":307.12268,"width":51.366668701171875,"height":22.830032348632812,"text":" "},{"top":211.06158,"left":358.48935,"width":51.366973876953125,"height":22.830032348632812,"text":"85"}],[{"top":233.89162,"left":67.41156,"width":85.61141967773438,"height":22.829788208007812,"text":"株主資本以外の項目\rの当期変動額(純額)"},{"top":233.89162,"left":153.02298,"width":51.367401123046875,"height":22.829788208007812,"text":" "},{"top":233.89162,"left":204.39038,"width":51.36669921875,"height":22.829788208007812,"text":" "},{"top":233.89162,"left":255.75708,"width":51.3656005859375,"height":22.829788208007812,"text":" "},{"top":233.89162,"left":307.12268,"width":51.366668701171875,"height":22.829788208007812,"text":" "},{"top":233.89162,"left":358.48935,"width":51.366973876953125,"height":22.829788208007812,"text":" "}],[{"top":256.7214,"left":67.41156,"width":85.61141967773438,"height":14.268798828125,"text":"当期変動額合計"},{"top":256.7214,"left":153.02298,"width":51.367401123046875,"height":14.268798828125,"text":"―"},{"top":256.7214,"left":204.39038,"width":51.36669921875,"height":14.268798828125,"text":"―"},{"top":256.7214,"left":255.75708,"width":51.3656005859375,"height":14.268798828125,"text":"829"},{"top":256.7214,"left":307.12268,"width":51.366668701171875,"height":14.268798828125,"text":"△0"},{"top":256.7214,"left":358.48935,"width":51.366973876953125,"height":14.268798828125,"text":"829"}],[{"top":270.9902,"left":67.41156,"width":85.61141967773438,"height":14.26910400390625,"text":"当期末残高"},{"top":270.9902,"left":153.02298,"width":51.367401123046875,"height":14.26910400390625,"text":"5,664"},{"top":270.9902,"left":204.39038,"width":51.36669921875,"height":14.26910400390625,"text":"749"},{"top":270.9902,"left":255.75708,"width":51.3656005859375,"height":14.26910400390625,"text":"12,846"},{"top":270.9902,"left":307.12268,"width":51.366668701171875,"height":14.26910400390625,"text":"△747"},{"top":270.9902,"left":358.48935,"width":51.366973876953125,"height":14.26910400390625,"text":"18,512"}]]},{"extraction_method":"spreadsheet","top":311.64725,"left":67.41156,"width":445.17803955078125,"height":191.19354248046875,"data":[[{"top":311.64725,"left":67.41156,"width":85.61141967773438,"height":45.658660888671875,"text":" "},{"top":311.64725,"left":153.02298,"width":256.8333435058594,"height":14.266693115234375,"text":"その他の包括利益累計額"},{"top":311.64725,"left":409.85632,"width":51.365631103515625,"height":45.658660888671875,"text":"少数株主持分"},{"top":311.64725,"left":461.22195,"width":51.367645263671875,"height":45.658660888671875,"text":"純資産合計"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":325.91394,"left":153.02298,"width":51.367401123046875,"height":31.3919677734375,"text":"その他有価証券\r評価差額金"},{"top":325.91394,"left":204.39038,"width":51.36669921875,"height":31.3919677734375,"text":"繰延ヘッジ\r損益"},{"top":325.91394,"left":255.75708,"width":51.3656005859375,"height":31.3919677734375,"text":"為替換算\r調整勘定"},{"top":325.91394,"left":307.12268,"width":51.366668701171875,"height":31.3919677734375,"text":"退職給付に係る\r調整累計額"},{"top":325.91394,"left":358.48935,"width":51.366973876953125,"height":31.3919677734375,"text":"その他の\r包括利益\r累計額合計"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":357.3059,"left":67.41156,"width":85.61141967773438,"height":14.266754150390625,"text":"当期首残高"},{"top":357.3059,"left":153.02298,"width":51.367401123046875,"height":14.266754150390625,"text":"△669"},{"top":357.3059,"left":204.39038,"width":51.36669921875,"height":14.266754150390625,"text":"61"},{"top":357.3059,"left":255.75708,"width":51.3656005859375,"height":14.266754150390625,"text":"△109"},{"top":357.3059,"left":307.12268,"width":51.366668701171875,"height":14.266754150390625,"text":"―"},{"top":357.3059,"left":358.48935,"width":51.366973876953125,"height":14.266754150390625,"text":"△717"},{"top":357.3059,"left":409.85632,"width":51.365631103515625,"height":14.266754150390625,"text":"246"},{"top":357.3059,"left":461.22195,"width":51.367645263671875,"height":14.266754150390625,"text":"17,212"}],[{"top":371.57266,"left":67.41156,"width":85.61141967773438,"height":14.269073486328125,"text":"当期変動額"},{"top":371.57266,"left":153.02298,"width":51.367401123046875,"height":14.269073486328125,"text":" "},{"top":371.57266,"left":204.39038,"width":51.36669921875,"height":14.269073486328125,"text":" "},{"top":371.57266,"left":255.75708,"width":51.3656005859375,"height":14.269073486328125,"text":" "},{"top":371.57266,"left":307.12268,"width":51.366668701171875,"height":14.269073486328125,"text":""},{"top":371.57266,"left":358.48935,"width":51.366973876953125,"height":14.269073486328125,"text":" "},{"top":371.57266,"left":409.85632,"width":51.365631103515625,"height":14.269073486328125,"text":" "},{"top":371.57266,"left":461.22195,"width":51.367645263671875,"height":14.269073486328125,"text":" "}],[{"top":385.84174,"left":67.41156,"width":85.61141967773438,"height":14.268402099609375,"text":"剰余金の配当"},{"top":385.84174,"left":153.02298,"width":51.367401123046875,"height":14.268402099609375,"text":" "},{"top":385.84174,"left":204.39038,"width":51.36669921875,"height":14.268402099609375,"text":" "},{"top":385.84174,"left":255.75708,"width":51.3656005859375,"height":14.268402099609375,"text":" "},{"top":385.84174,"left":307.12268,"width":51.366668701171875,"height":14.268402099609375,"text":""},{"top":385.84174,"left":358.48935,"width":51.366973876953125,"height":14.268402099609375,"text":" "},{"top":385.84174,"left":409.85632,"width":51.365631103515625,"height":14.268402099609375,"text":" "},{"top":385.84174,"left":461.22195,"width":51.367645263671875,"height":14.268402099609375,"text":"△525"}],[{"top":400.11014,"left":67.41156,"width":85.61141967773438,"height":14.26885986328125,"text":"当期純利益"},{"top":400.11014,"left":153.02298,"width":51.367401123046875,"height":14.26885986328125,"text":" "},{"top":400.11014,"left":204.39038,"width":51.36669921875,"height":14.26885986328125,"text":" "},{"top":400.11014,"left":255.75708,"width":51.3656005859375,"height":14.26885986328125,"text":" "},{"top":400.11014,"left":307.12268,"width":51.366668701171875,"height":14.26885986328125,"text":""},{"top":400.11014,"left":358.48935,"width":51.366973876953125,"height":14.26885986328125,"text":" "},{"top":400.11014,"left":409.85632,"width":51.365631103515625,"height":14.26885986328125,"text":" "},{"top":400.11014,"left":461.22195,"width":51.367645263671875,"height":14.26885986328125,"text":"1,269"}],[{"top":414.379,"left":67.41156,"width":85.61141967773438,"height":14.269195556640625,"text":"自己株式の取得"},{"top":414.379,"left":153.02298,"width":51.367401123046875,"height":14.269195556640625,"text":" "},{"top":414.379,"left":204.39038,"width":51.36669921875,"height":14.269195556640625,"text":" "},{"top":414.379,"left":255.75708,"width":51.3656005859375,"height":14.269195556640625,"text":" "},{"top":414.379,"left":307.12268,"width":51.366668701171875,"height":14.269195556640625,"text":""},{"top":414.379,"left":358.48935,"width":51.366973876953125,"height":14.269195556640625,"text":" "},{"top":414.379,"left":409.85632,"width":51.365631103515625,"height":14.269195556640625,"text":" "},{"top":414.379,"left":461.22195,"width":51.367645263671875,"height":14.269195556640625,"text":"△0"}],[{"top":428.6482,"left":67.41156,"width":85.61141967773438,"height":22.829620361328125,"text":"持分法の適用範囲\rの変動"},{"top":428.6482,"left":153.02298,"width":51.367401123046875,"height":22.829620361328125,"text":" "},{"top":428.6482,"left":204.39038,"width":51.36669921875,"height":22.829620361328125,"text":" "},{"top":428.6482,"left":255.75708,"width":51.3656005859375,"height":22.829620361328125,"text":" "},{"top":428.6482,"left":307.12268,"width":51.366668701171875,"height":22.829620361328125,"text":""},{"top":428.6482,"left":358.48935,"width":51.366973876953125,"height":22.829620361328125,"text":" "},{"top":428.6482,"left":409.85632,"width":51.365631103515625,"height":22.829620361328125,"text":" "},{"top":428.6482,"left":461.22195,"width":51.367645263671875,"height":22.829620361328125,"text":"85"}],[{"top":451.4778,"left":67.41156,"width":85.61141967773438,"height":22.82977294921875,"text":"株主資本以外の項目\rの当期変動額(純額)"},{"top":451.4778,"left":153.02298,"width":51.367401123046875,"height":22.82977294921875,"text":"556"},{"top":451.4778,"left":204.39038,"width":51.36669921875,"height":22.82977294921875,"text":"80"},{"top":451.4778,"left":255.75708,"width":51.3656005859375,"height":22.82977294921875,"text":"5"},{"top":451.4778,"left":307.12268,"width":51.366668701171875,"height":22.82977294921875,"text":"―"},{"top":451.4778,"left":358.48935,"width":51.366973876953125,"height":22.82977294921875,"text":"642"},{"top":451.4778,"left":409.85632,"width":51.365631103515625,"height":22.82977294921875,"text":"△0"},{"top":451.4778,"left":461.22195,"width":51.367645263671875,"height":22.82977294921875,"text":"642"}],[{"top":474.3076,"left":67.41156,"width":85.61141967773438,"height":14.268341064453125,"text":"当期変動額合計"},{"top":474.3076,"left":153.02298,"width":51.367401123046875,"height":14.268341064453125,"text":"556"},{"top":474.3076,"left":204.39038,"width":51.36669921875,"height":14.268341064453125,"text":"80"},{"top":474.3076,"left":255.75708,"width":51.3656005859375,"height":14.268341064453125,"text":"5"},{"top":474.3076,"left":307.12268,"width":51.366668701171875,"height":14.268341064453125,"text":"―"},{"top":474.3076,"left":358.48935,"width":51.366973876953125,"height":14.268341064453125,"text":"642"},{"top":474.3076,"left":409.85632,"width":51.365631103515625,"height":14.268341064453125,"text":"△0"},{"top":474.3076,"left":461.22195,"width":51.367645263671875,"height":14.268341064453125,"text":"1,471"}],[{"top":488.57593,"left":67.41156,"width":85.61141967773438,"height":14.264862060546875,"text":"当期末残高"},{"top":488.57593,"left":153.02298,"width":51.367401123046875,"height":14.264862060546875,"text":"△113"},{"top":488.57593,"left":204.39038,"width":51.36669921875,"height":14.264862060546875,"text":"142"},{"top":488.57593,"left":255.75708,"width":51.3656005859375,"height":14.264862060546875,"text":"△104"},{"top":488.57593,"left":307.12268,"width":51.366668701171875,"height":14.264862060546875,"text":"―"},{"top":488.57593,"left":358.48935,"width":51.366973876953125,"height":14.264862060546875,"text":"△75"},{"top":488.57593,"left":409.85632,"width":51.365631103515625,"height":14.264862060546875,"text":"245"},{"top":488.57593,"left":461.22195,"width":51.367645263671875,"height":14.264862060546875,"text":"18,683"}]]}] +[{"extraction_method":"lattice","top":111.79087,"left":67.41156,"width":342.44476318359375,"height":174.0704345703125,"right":409.85632,"bottom":285.8613,"data":[[{"top":111.79087,"left":67.41156,"width":85.61141967773438,"height":28.536293029785156,"text":""},{"top":111.79087,"left":153.02298,"width":256.8333435058594,"height":14.267105102539062,"text":"株主資本"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":126.057976,"left":153.02298,"width":51.367401123046875,"height":14.269187927246094,"text":"資本金"},{"top":126.057976,"left":204.39038,"width":51.36669921875,"height":14.269187927246094,"text":"資本剰余金"},{"top":126.057976,"left":255.75708,"width":51.3656005859375,"height":14.269187927246094,"text":"利益剰余金"},{"top":126.057976,"left":307.12268,"width":51.366668701171875,"height":14.269187927246094,"text":"自己株式"},{"top":126.057976,"left":358.48935,"width":51.366973876953125,"height":14.269187927246094,"text":"株主資本合計"}],[{"top":140.32716,"left":67.41156,"width":85.61141967773438,"height":14.265777587890625,"text":"当期首残高"},{"top":140.32716,"left":153.02298,"width":51.367401123046875,"height":14.265777587890625,"text":"5,664"},{"top":140.32716,"left":204.39038,"width":51.36669921875,"height":14.265777587890625,"text":"749"},{"top":140.32716,"left":255.75708,"width":51.3656005859375,"height":14.265777587890625,"text":"12,017"},{"top":140.32716,"left":307.12268,"width":51.366668701171875,"height":14.265777587890625,"text":"△747"},{"top":140.32716,"left":358.48935,"width":51.366973876953125,"height":14.265777587890625,"text":"17,683"}],[{"top":154.59294,"left":67.41156,"width":85.61141967773438,"height":14.26910400390625,"text":"当期変動額"},{"top":154.59294,"left":153.02298,"width":51.367401123046875,"height":14.26910400390625,"text":""},{"top":154.59294,"left":204.39038,"width":51.36669921875,"height":14.26910400390625,"text":""},{"top":154.59294,"left":255.75708,"width":51.3656005859375,"height":14.26910400390625,"text":""},{"top":154.59294,"left":307.12268,"width":51.366668701171875,"height":14.26910400390625,"text":""},{"top":154.59294,"left":358.48935,"width":51.366973876953125,"height":14.26910400390625,"text":""}],[{"top":168.86205,"left":67.41156,"width":85.61141967773438,"height":14.268997192382812,"text":"剰余金の配当"},{"top":168.86205,"left":153.02298,"width":51.367401123046875,"height":14.268997192382812,"text":""},{"top":168.86205,"left":204.39038,"width":51.36669921875,"height":14.268997192382812,"text":""},{"top":168.86205,"left":255.75708,"width":51.3656005859375,"height":14.268997192382812,"text":"△525"},{"top":168.86205,"left":307.12268,"width":51.366668701171875,"height":14.268997192382812,"text":""},{"top":168.86205,"left":358.48935,"width":51.366973876953125,"height":14.268997192382812,"text":"△525"}],[{"top":183.13104,"left":67.41156,"width":85.61141967773438,"height":14.2691650390625,"text":"当期純利益"},{"top":183.13104,"left":153.02298,"width":51.367401123046875,"height":14.2691650390625,"text":""},{"top":183.13104,"left":204.39038,"width":51.36669921875,"height":14.2691650390625,"text":""},{"top":183.13104,"left":255.75708,"width":51.3656005859375,"height":14.2691650390625,"text":"1,269"},{"top":183.13104,"left":307.12268,"width":51.366668701171875,"height":14.2691650390625,"text":""},{"top":183.13104,"left":358.48935,"width":51.366973876953125,"height":14.2691650390625,"text":"1,269"}],[{"top":197.4002,"left":67.41156,"width":85.61141967773438,"height":14.268936157226562,"text":"自己株式の取得"},{"top":197.4002,"left":153.02298,"width":51.367401123046875,"height":14.268936157226562,"text":""},{"top":197.4002,"left":204.39038,"width":51.36669921875,"height":14.268936157226562,"text":""},{"top":197.4002,"left":255.75708,"width":51.3656005859375,"height":14.268936157226562,"text":""},{"top":197.4002,"left":307.12268,"width":51.366668701171875,"height":14.268936157226562,"text":"△0"},{"top":197.4002,"left":358.48935,"width":51.366973876953125,"height":14.268936157226562,"text":"△0"}],[{"top":211.66914,"left":67.41156,"width":85.61141967773438,"height":22.82952880859375,"text":"持分法の適用範囲\rの変動"},{"top":211.66914,"left":153.02298,"width":51.367401123046875,"height":22.82952880859375,"text":""},{"top":211.66914,"left":204.39038,"width":51.36669921875,"height":22.82952880859375,"text":""},{"top":211.66914,"left":255.75708,"width":51.3656005859375,"height":22.82952880859375,"text":"85"},{"top":211.66914,"left":307.12268,"width":51.366668701171875,"height":22.82952880859375,"text":""},{"top":211.66914,"left":358.48935,"width":51.366973876953125,"height":22.82952880859375,"text":"85"}],[{"top":234.49867,"left":67.41156,"width":85.61141967773438,"height":22.829910278320312,"text":"株主資本以外の項目\rの当期変動額(純額)"},{"top":234.49867,"left":153.02298,"width":51.367401123046875,"height":22.829910278320312,"text":""},{"top":234.49867,"left":204.39038,"width":51.36669921875,"height":22.829910278320312,"text":""},{"top":234.49867,"left":255.75708,"width":51.3656005859375,"height":22.829910278320312,"text":""},{"top":234.49867,"left":307.12268,"width":51.366668701171875,"height":22.829910278320312,"text":""},{"top":234.49867,"left":358.48935,"width":51.366973876953125,"height":22.829910278320312,"text":""}],[{"top":257.32858,"left":67.41156,"width":85.61141967773438,"height":14.2686767578125,"text":"当期変動額合計"},{"top":257.32858,"left":153.02298,"width":51.367401123046875,"height":14.2686767578125,"text":"―"},{"top":257.32858,"left":204.39038,"width":51.36669921875,"height":14.2686767578125,"text":"―"},{"top":257.32858,"left":255.75708,"width":51.3656005859375,"height":14.2686767578125,"text":"829"},{"top":257.32858,"left":307.12268,"width":51.366668701171875,"height":14.2686767578125,"text":"△0"},{"top":257.32858,"left":358.48935,"width":51.366973876953125,"height":14.2686767578125,"text":"829"}],[{"top":271.59726,"left":67.41156,"width":85.61141967773438,"height":14.2640380859375,"text":"当期末残高"},{"top":271.59726,"left":153.02298,"width":51.367401123046875,"height":14.2640380859375,"text":"5,664"},{"top":271.59726,"left":204.39038,"width":51.36669921875,"height":14.2640380859375,"text":"749"},{"top":271.59726,"left":255.75708,"width":51.3656005859375,"height":14.2640380859375,"text":"12,846"},{"top":271.59726,"left":307.12268,"width":51.366668701171875,"height":14.2640380859375,"text":"△747"},{"top":271.59726,"left":358.48935,"width":51.366973876953125,"height":14.2640380859375,"text":"18,512"}]]},{"extraction_method":"lattice","top":312.25272,"left":67.41156,"width":445.17803955078125,"height":191.19696044921875,"right":512.5896,"bottom":503.44968,"data":[[{"top":312.25272,"left":67.41156,"width":85.61141967773438,"height":45.658233642578125,"text":""},{"top":312.25272,"left":153.02298,"width":256.8333435058594,"height":14.267730712890625,"text":"その他の包括利益累計額"},{"top":312.25272,"left":409.85632,"width":51.365631103515625,"height":45.658233642578125,"text":"少数株主持分"},{"top":312.25272,"left":461.22195,"width":51.367645263671875,"height":45.658233642578125,"text":"純資産合計"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":326.52045,"left":153.02298,"width":51.367401123046875,"height":31.3905029296875,"text":"その他有価証券\r評価差額金"},{"top":326.52045,"left":204.39038,"width":51.36669921875,"height":31.3905029296875,"text":"繰延ヘッジ\r損益"},{"top":326.52045,"left":255.75708,"width":51.3656005859375,"height":31.3905029296875,"text":"為替換算\r調整勘定"},{"top":326.52045,"left":307.12268,"width":51.366668701171875,"height":31.3905029296875,"text":"退職給付に係る\r調整累計額"},{"top":326.52045,"left":358.48935,"width":51.366973876953125,"height":31.3905029296875,"text":"その他の\r包括利益\r累計額合計"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":357.91095,"left":67.41156,"width":85.61141967773438,"height":14.268768310546875,"text":"当期首残高"},{"top":357.91095,"left":153.02298,"width":51.367401123046875,"height":14.268768310546875,"text":"△669"},{"top":357.91095,"left":204.39038,"width":51.36669921875,"height":14.268768310546875,"text":"61"},{"top":357.91095,"left":255.75708,"width":51.3656005859375,"height":14.268768310546875,"text":"△109"},{"top":357.91095,"left":307.12268,"width":51.366668701171875,"height":14.268768310546875,"text":"―"},{"top":357.91095,"left":358.48935,"width":51.366973876953125,"height":14.268768310546875,"text":"△717"},{"top":357.91095,"left":409.85632,"width":51.365631103515625,"height":14.268768310546875,"text":"246"},{"top":357.91095,"left":461.22195,"width":51.367645263671875,"height":14.268768310546875,"text":"17,212"}],[{"top":372.17972,"left":67.41156,"width":85.61141967773438,"height":14.2691650390625,"text":"当期変動額"},{"top":372.17972,"left":153.02298,"width":51.367401123046875,"height":14.2691650390625,"text":""},{"top":372.17972,"left":204.39038,"width":51.36669921875,"height":14.2691650390625,"text":""},{"top":372.17972,"left":255.75708,"width":51.3656005859375,"height":14.2691650390625,"text":""},{"top":372.17972,"left":307.12268,"width":51.366668701171875,"height":14.2691650390625,"text":""},{"top":372.17972,"left":358.48935,"width":51.366973876953125,"height":14.2691650390625,"text":""},{"top":372.17972,"left":409.85632,"width":51.365631103515625,"height":14.2691650390625,"text":""},{"top":372.17972,"left":461.22195,"width":51.367645263671875,"height":14.2691650390625,"text":""}],[{"top":386.44888,"left":67.41156,"width":85.61141967773438,"height":14.268646240234375,"text":"剰余金の配当"},{"top":386.44888,"left":153.02298,"width":51.367401123046875,"height":14.268646240234375,"text":""},{"top":386.44888,"left":204.39038,"width":51.36669921875,"height":14.268646240234375,"text":""},{"top":386.44888,"left":255.75708,"width":51.3656005859375,"height":14.268646240234375,"text":""},{"top":386.44888,"left":307.12268,"width":51.366668701171875,"height":14.268646240234375,"text":""},{"top":386.44888,"left":358.48935,"width":51.366973876953125,"height":14.268646240234375,"text":""},{"top":386.44888,"left":409.85632,"width":51.365631103515625,"height":14.268646240234375,"text":""},{"top":386.44888,"left":461.22195,"width":51.367645263671875,"height":14.268646240234375,"text":"△525"}],[{"top":400.71753,"left":67.41156,"width":85.61141967773438,"height":14.26812744140625,"text":"当期純利益"},{"top":400.71753,"left":153.02298,"width":51.367401123046875,"height":14.26812744140625,"text":""},{"top":400.71753,"left":204.39038,"width":51.36669921875,"height":14.26812744140625,"text":""},{"top":400.71753,"left":255.75708,"width":51.3656005859375,"height":14.26812744140625,"text":""},{"top":400.71753,"left":307.12268,"width":51.366668701171875,"height":14.26812744140625,"text":""},{"top":400.71753,"left":358.48935,"width":51.366973876953125,"height":14.26812744140625,"text":""},{"top":400.71753,"left":409.85632,"width":51.365631103515625,"height":14.26812744140625,"text":""},{"top":400.71753,"left":461.22195,"width":51.367645263671875,"height":14.26812744140625,"text":"1,269"}],[{"top":414.98566,"left":67.41156,"width":85.61141967773438,"height":14.26678466796875,"text":"自己株式の取得"},{"top":414.98566,"left":153.02298,"width":51.367401123046875,"height":14.26678466796875,"text":""},{"top":414.98566,"left":204.39038,"width":51.36669921875,"height":14.26678466796875,"text":""},{"top":414.98566,"left":255.75708,"width":51.3656005859375,"height":14.26678466796875,"text":""},{"top":414.98566,"left":307.12268,"width":51.366668701171875,"height":14.26678466796875,"text":""},{"top":414.98566,"left":358.48935,"width":51.366973876953125,"height":14.26678466796875,"text":""},{"top":414.98566,"left":409.85632,"width":51.365631103515625,"height":14.26678466796875,"text":""},{"top":414.98566,"left":461.22195,"width":51.367645263671875,"height":14.26678466796875,"text":"△0"}],[{"top":429.25244,"left":67.41156,"width":85.61141967773438,"height":22.8292236328125,"text":"持分法の適用範囲\rの変動"},{"top":429.25244,"left":153.02298,"width":51.367401123046875,"height":22.8292236328125,"text":""},{"top":429.25244,"left":204.39038,"width":51.36669921875,"height":22.8292236328125,"text":""},{"top":429.25244,"left":255.75708,"width":51.3656005859375,"height":22.8292236328125,"text":""},{"top":429.25244,"left":307.12268,"width":51.366668701171875,"height":22.8292236328125,"text":""},{"top":429.25244,"left":358.48935,"width":51.366973876953125,"height":22.8292236328125,"text":""},{"top":429.25244,"left":409.85632,"width":51.365631103515625,"height":22.8292236328125,"text":""},{"top":429.25244,"left":461.22195,"width":51.367645263671875,"height":22.8292236328125,"text":"85"}],[{"top":452.08167,"left":67.41156,"width":85.61141967773438,"height":22.830596923828125,"text":"株主資本以外の項目\rの当期変動額(純額)"},{"top":452.08167,"left":153.02298,"width":51.367401123046875,"height":22.830596923828125,"text":"556"},{"top":452.08167,"left":204.39038,"width":51.36669921875,"height":22.830596923828125,"text":"80"},{"top":452.08167,"left":255.75708,"width":51.3656005859375,"height":22.830596923828125,"text":"5"},{"top":452.08167,"left":307.12268,"width":51.366668701171875,"height":22.830596923828125,"text":"―"},{"top":452.08167,"left":358.48935,"width":51.366973876953125,"height":22.830596923828125,"text":"642"},{"top":452.08167,"left":409.85632,"width":51.365631103515625,"height":22.830596923828125,"text":"△0"},{"top":452.08167,"left":461.22195,"width":51.367645263671875,"height":22.830596923828125,"text":"642"}],[{"top":474.91226,"left":67.41156,"width":85.61141967773438,"height":14.26776123046875,"text":"当期変動額合計"},{"top":474.91226,"left":153.02298,"width":51.367401123046875,"height":14.26776123046875,"text":"556"},{"top":474.91226,"left":204.39038,"width":51.36669921875,"height":14.26776123046875,"text":"80"},{"top":474.91226,"left":255.75708,"width":51.3656005859375,"height":14.26776123046875,"text":"5"},{"top":474.91226,"left":307.12268,"width":51.366668701171875,"height":14.26776123046875,"text":"―"},{"top":474.91226,"left":358.48935,"width":51.366973876953125,"height":14.26776123046875,"text":"642"},{"top":474.91226,"left":409.85632,"width":51.365631103515625,"height":14.26776123046875,"text":"△0"},{"top":474.91226,"left":461.22195,"width":51.367645263671875,"height":14.26776123046875,"text":"1,471"}],[{"top":489.18002,"left":67.41156,"width":85.61141967773438,"height":14.2696533203125,"text":"当期末残高"},{"top":489.18002,"left":153.02298,"width":51.367401123046875,"height":14.2696533203125,"text":"△113"},{"top":489.18002,"left":204.39038,"width":51.36669921875,"height":14.2696533203125,"text":"142"},{"top":489.18002,"left":255.75708,"width":51.3656005859375,"height":14.2696533203125,"text":"△104"},{"top":489.18002,"left":307.12268,"width":51.366668701171875,"height":14.2696533203125,"text":"―"},{"top":489.18002,"left":358.48935,"width":51.366973876953125,"height":14.2696533203125,"text":"△75"},{"top":489.18002,"left":409.85632,"width":51.365631103515625,"height":14.2696533203125,"text":"245"},{"top":489.18002,"left":461.22195,"width":51.367645263671875,"height":14.2696533203125,"text":"18,683"}]]}] diff --git a/src/test/resources/technology/tabula/mednine.pdf b/src/test/resources/technology/tabula/mednine.pdf new file mode 100644 index 00000000..0a3f36c1 Binary files /dev/null and b/src/test/resources/technology/tabula/mednine.pdf differ diff --git a/src/test/resources/technology/tabula/npe_issue_206.pdf b/src/test/resources/technology/tabula/npe_issue_206.pdf new file mode 100644 index 00000000..352e77ba Binary files /dev/null and b/src/test/resources/technology/tabula/npe_issue_206.pdf differ diff --git a/src/test/resources/technology/tabula/us-020.pdf b/src/test/resources/technology/tabula/us-020.pdf new file mode 100644 index 00000000..39a8546c Binary files /dev/null and b/src/test/resources/technology/tabula/us-020.pdf differ