diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index a217b347..00000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,7 +0,0 @@ -version: 2 -updates: -- package-ecosystem: maven - directory: "/" - schedule: - interval: daily - open-pull-requests-limit: 10 diff --git a/.github/workflows/tests-windows.yml b/.github/workflows/tests-windows.yml deleted file mode 100644 index 5cc1031a..00000000 --- a/.github/workflows/tests-windows.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: Java CI (Windows) - -on: [push] - -jobs: - build: - runs-on: windows-latest - - steps: - # https://github.com/actions/checkout/issues/135#issuecomment-602171132 - - name: Set git to use LF - run: | - git config --global core.autocrlf false - git config --global core.eol lf - - uses: actions/checkout@v3 - - name: Set up JDK 11 - uses: actions/setup-java@v3 - with: - java-version: '11' - distribution: 'adopt' - cache: maven - - name: Build with Maven - run: mvn --batch-mode test diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml deleted file mode 100644 index da2d019b..00000000 --- a/.github/workflows/tests.yml +++ /dev/null @@ -1,18 +0,0 @@ -name: Java CI - -on: [push, pull_request] - -jobs: - build: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - - name: Set up JDK 11 - uses: actions/setup-java@v3 - with: - java-version: '11' - distribution: 'adopt' - cache: maven - - name: Build with Maven - run: mvn --batch-mode test diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 0247d35b..00000000 --- a/.gitignore +++ /dev/null @@ -1,9 +0,0 @@ -.settings/ -.idea/ -.project -.classpath -/bin/ -/src/test/**/*.jpg -/src/test/resources/technology/tabula/icdar2013-dataset/test-statistics.json -/target/ -*.iml diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 4beb04ee..00000000 --- a/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2014-2016 Manuel Aristarán - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/README.md b/README.md deleted file mode 100644 index db7b0023..00000000 --- a/README.md +++ /dev/null @@ -1,158 +0,0 @@ -tabula-java [![Build Status](https://travis-ci.org/tabulapdf/tabula-java.svg?branch=master)](https://travis-ci.org/tabulapdf/tabula-java) -=========== - -`tabula-java` is a library for extracting tables from PDF files — it is the table extraction engine that powers [Tabula](http://tabula.technology/) ([repo](http://github.com/tabulapdf/tabula)). You can use `tabula-java` as a command-line tool to programmatically extract tables from PDFs. - -© 2014-2020 Manuel Aristarán. Available under MIT License. See [`LICENSE`](LICENSE). - -## Download - -Download a version of the tabula-java's jar, with all dependencies included, that works on Mac, Windows and Linux from our [releases page](../../releases). - -## Commandline Usage Examples - -`tabula-java` provides a command line application: - -``` -$ java -jar target/tabula-1.0.5-jar-with-dependencies.jar --help -usage: tabula [-a ] [-b ] [-c ] [-f ] - [-g] [-h] [-i] [-l] [-n] [-o ] [-p ] [-r] [-s - ] [-t] [-u] [-v] - -Tabula helps you extract tables from PDFs - - -a,--area -a/--area = Portion of the page to analyze. - Example: --area 269.875,12.75,790.5,561. - Accepts top,left,bottom,right i.e. y1,x1,y2,x2 - where all values are in points relative to the - top left corner. If all values are between - 0-100 (inclusive) and preceded by '%', input - will be taken as % of actual height or width - of the page. Example: --area %0,0,100,50. To - specify multiple areas, -a option should be - repeated. Default is entire page - -b,--batch Convert all .pdfs in the provided directory. - -c,--columns X coordinates of column boundaries. Example - --columns 10.1,20.2,30.3. If all values are - between 0-100 (inclusive) and preceded by '%', - input will be taken as % of actual width of - the page. Example: --columns %25,50,80.6 - -f,--format Output format: (CSV,TSV,JSON). Default: CSV - -g,--guess Guess the portion of the page to analyze per - page. - -h,--help Print this help text. - -i,--silent Suppress all stderr output. - -l,--lattice Force PDF to be extracted using lattice-mode - extraction (if there are ruling lines - separating each cell, as in a PDF of an Excel - spreadsheet) - -n,--no-spreadsheet [Deprecated in favor of -t/--stream] Force PDF - not to be extracted using spreadsheet-style - extraction (if there are no ruling lines - separating each cell) - -o,--outfile Write output to instead of STDOUT. - Default: - - -p,--pages Comma separated list of ranges, or all. - Examples: --pages 1-3,5-7, --pages 3 or - --pages all. Default is --pages 1 - -r,--spreadsheet [Deprecated in favor of -l/--lattice] Force - PDF to be extracted using spreadsheet-style - extraction (if there are ruling lines - separating each cell, as in a PDF of an Excel - spreadsheet) - -s,--password Password to decrypt document. Default is empty - -t,--stream Force PDF to be extracted using stream-mode - extraction (if there are no ruling lines - separating each cell) - -u,--use-line-returns Use embedded line returns in cells. (Only in - spreadsheet mode.) - -v,--version Print version and exit. -``` - -It also includes a debugging tool, run `java -cp ./target/tabula-1.0.5-jar-with-dependencies.jar technology.tabula.debug.Debug -h` for the available options. - -You can also integrate `tabula-java` with any JVM language. For Java examples, see the [`tests`](src/test/java/technology/tabula/) folder. - -JVM start-up time is a lot of the cost of the `tabula` command, so if you're trying to extract many tables from PDFs, you have a few options for speeding it up: - - - the -b option, which allows you to convert all pdfs in a given directory - - the [drip](https://github.com/ninjudd/drip) utility - - the [Ruby](http://github.com/tabulapdf/tabula-extractor), [Python](https://github.com/chezou/tabula-py), [R](https://github.com/leeper/tabulizer), and [Node.js](https://github.com/ezodude/tabula-js) bindings - - writing your own program in any JVM language (Java, JRuby, Scala) that imports tabula-java. - - waiting for us to implement an API/server-style system (it's on the [roadmap](https://github.com/tabulapdf/tabula-api)) - -## API Usage Examples - -A simple Java code example which extracts all rows and cells from all tables of all pages of a PDF document: - -```java -InputStream in = this.getClass().getResourceAsStream("my.pdf"); -try (PDDocument document = PDDocument.load(in)) { - SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - PageIterator pi = new ObjectExtractor(document).extract(); - while (pi.hasNext()) { - // iterate over the pages of the document - Page page = pi.next(); - List table = sea.extract(page); - // iterate over the tables of the page - for(Table tables: table) { - List> rows = tables.getRows(); - // iterate over the rows of the table - for (List cells : rows) { - // print all column-cells of the row plus linefeed - for (RectangularTextContainer content : cells) { - // Note: Cell.getText() uses \r to concat text chunks - String text = content.getText().replace("\r", " "); - System.out.print(text + "|"); - } - System.out.println(); - } - } - } -} -``` - - -For more detail information check the Javadoc. -The Javadoc API documentation can be generated (see also '_Building from Source_' section) via - -``` -mvn javadoc:javadoc -``` - -which generates the HTML files to directory ```target/site/apidocs/``` - -## Building from Source - -Clone this repo and run: - -``` -mvn clean compile assembly:single -``` - -## Contributing - -Interested in helping out? We'd love to have your help! - -You can help by: - -- [Reporting a bug](https://github.com/tabulapdf/tabula-java/issues). -- Adding or editing documentation. -- Contributing code via a Pull Request. -- Spreading the word about `tabula-java` to people who might be able to benefit from using it. - -### Backers - -You can also support our continued work on `tabula-java` with a one-time or monthly donation [on OpenCollective](https://opencollective.com/tabulapdf#support). Organizations who use `tabula-java` can also [sponsor the project](https://opencollective.com/tabulapdf#support) for acknowledgement on [our official site](http://tabula.technology/) and this README. - -Special thanks to the following users and organizations for generously supporting Tabula with donations and grants: - - - - - - - - -The John S. and James L. Knight Foundation -The Shuttleworth Foundation diff --git a/allclasses-frame.html b/allclasses-frame.html new file mode 100644 index 00000000..33d4f1ca --- /dev/null +++ b/allclasses-frame.html @@ -0,0 +1,90 @@ + + + + + + + +All Classes (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + +All Classes +
+ +
+ + + +
BasicExtractionAlgorithm +
+Cell +
+CohenSutherlandClipping +
+CommandLineApp +
+CSVWriter +
+Debug +
+DummyGraphics2D +
+ExtractionAlgorithm +
+HasText +
+JSONWriter +
+Line +
+ObjectExtractor +
+Page +
+PageIterator +
+ProjectionProfile +
+QuickSort +
+Rectangle +
+RectangularTextContainer +
+Ruling +
+RulingSerializer +
+SpreadsheetExtractionAlgorithm +
+Table +
+TableSerializer +
+TableWithRulingLines +
+TextChunk +
+TextChunkSerializer +
+TextElement +
+TSVWriter +
+Utils +
+Writer +
+
+ + + diff --git a/allclasses-noframe.html b/allclasses-noframe.html new file mode 100644 index 00000000..f74ca473 --- /dev/null +++ b/allclasses-noframe.html @@ -0,0 +1,90 @@ + + + + + + + +All Classes (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + +All Classes +
+ + + + + +
BasicExtractionAlgorithm +
+Cell +
+CohenSutherlandClipping +
+CommandLineApp +
+CSVWriter +
+Debug +
+DummyGraphics2D +
+ExtractionAlgorithm +
+HasText +
+JSONWriter +
+Line +
+ObjectExtractor +
+Page +
+PageIterator +
+ProjectionProfile +
+QuickSort +
+Rectangle +
+RectangularTextContainer +
+Ruling +
+RulingSerializer +
+SpreadsheetExtractionAlgorithm +
+Table +
+TableSerializer +
+TableWithRulingLines +
+TextChunk +
+TextChunkSerializer +
+TextElement +
+TSVWriter +
+Utils +
+Writer +
+
+ + + diff --git a/constant-values.html b/constant-values.html new file mode 100644 index 00000000..96e84d9c --- /dev/null +++ b/constant-values.html @@ -0,0 +1,175 @@ + + + + + + + +Constant Field Values (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Constant Field Values

+
+
+Contents + + + + + + +
+technology.tabula.*
+ +

+ + + + + + + + + + + + +
technology.tabula.ProjectionProfile
+public static final intDECIMAL_PLACES1
+ +

+ +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/deprecated-list.html b/deprecated-list.html new file mode 100644 index 00000000..f3e9577f --- /dev/null +++ b/deprecated-list.html @@ -0,0 +1,147 @@ + + + + + + + +Deprecated List (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Deprecated API

+
+
+Contents
    +
+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/help-doc.html b/help-doc.html new file mode 100644 index 00000000..c58b3eed --- /dev/null +++ b/help-doc.html @@ -0,0 +1,224 @@ + + + + + + + +API Help (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+How This API Document Is Organized

+
+This API (Application Programming Interface) document has pages corresponding to the items in the navigation bar, described as follows.

+Overview

+
+ +

+The Overview page is the front page of this API document and provides a list of all packages with a summary for each. This page can also contain an overall description of the set of packages.

+

+Package

+
+ +

+Each package has a page that contains a list of its classes and interfaces, with a summary for each. This page can contain four categories:

    +
  • Interfaces (italic)
  • Classes
  • Enums
  • Exceptions
  • Errors
  • Annotation Types
+
+

+Class/Interface

+
+ +

+Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a class/interface description, summary tables, and detailed member descriptions:

    +
  • Class inheritance diagram
  • Direct Subclasses
  • All Known Subinterfaces
  • All Known Implementing Classes
  • Class/interface declaration
  • Class/interface description +

    +

  • Nested Class Summary
  • Field Summary
  • Constructor Summary
  • Method Summary +

    +

  • Field Detail
  • Constructor Detail
  • Method Detail
+Each summary entry contains the first sentence from the detailed description for that item. The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.
+ +

+Annotation Type

+
+ +

+Each annotation type has its own separate page with the following sections:

    +
  • Annotation Type declaration
  • Annotation Type description
  • Required Element Summary
  • Optional Element Summary
  • Element Detail
+
+ +

+Enum

+
+ +

+Each enum has its own separate page with the following sections:

    +
  • Enum declaration
  • Enum description
  • Enum Constant Summary
  • Enum Constant Detail
+
+

+Use

+
+Each documented package, class and interface has its own Use page. This page describes what packages, classes, methods, constructors and fields use any part of the given class or package. Given a class or interface A, its Use page includes subclasses of A, fields declared as A, methods that return A, and methods and constructors with parameters of type A. You can access this page by first going to the package, class or interface, then clicking on the "Use" link in the navigation bar.
+

+Tree (Class Hierarchy)

+
+There is a Class Hierarchy page for all packages, plus a hierarchy for each package. Each hierarchy page contains a list of classes and a list of interfaces. The classes are organized by inheritance structure starting with java.lang.Object. The interfaces do not inherit from java.lang.Object.
    +
  • When viewing the Overview page, clicking on "Tree" displays the hierarchy for all packages.
  • When viewing a particular package, class or interface page, clicking "Tree" displays the hierarchy for only that package.
+
+

+Deprecated API

+
+The Deprecated API page lists all of the API that have been deprecated. A deprecated API is not recommended for use, generally due to improvements, and a replacement API is usually given. Deprecated APIs may be removed in future implementations.
+

+Index

+
+The Index contains an alphabetic list of all classes, interfaces, constructors, methods, and fields.
+

+Prev/Next

+These links take you to the next or previous class, interface, package, or related page.

+Frames/No Frames

+These links show and hide the HTML frames. All pages are available with or without frames. +

+

+Serialized Form

+Each serializable or externalizable class has a description of its serialization fields and methods. This information is of interest to re-implementors, not to developers using the API. While there is no link in the navigation bar, you can get to this information by going to any serialized class and clicking "Serialized Form" in the "See also" section of the class description. +

+

+Constant Field Values

+The Constant Field Values page lists the static final fields and their values. +

+ + +This help file applies to API documentation generated using the standard doclet. + +
+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/index-all.html b/index-all.html new file mode 100644 index 00000000..e5eec8f4 --- /dev/null +++ b/index-all.html @@ -0,0 +1,1172 @@ + + + + + + + +Index (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I J L M N O P Q R S T U V W
+

+A

+
+
add(RectangularTextContainer, int, int) - +Method in class technology.tabula.Table +
  +
add(TextElement) - +Method in class technology.tabula.TextChunk +
  +
add(List<TextElement>) - +Method in class technology.tabula.TextChunk +
  +
addRenderingHints(Map<?, ?>) - +Method in class technology.tabula.DummyGraphics2D +
  +
addRuling(Ruling) - +Method in class technology.tabula.Page +
  +
addTextChunk(int, TextChunk) - +Method in class technology.tabula.Line +
  +
addTextChunk(TextChunk) - +Method in class technology.tabula.Line +
  +
allSameChar(List<TextChunk>) - +Static method in class technology.tabula.TextChunk +
  +
+
+

+B

+
+
BasicExtractionAlgorithm - Class in technology.tabula.extractors
 
BasicExtractionAlgorithm() - +Constructor for class technology.tabula.extractors.BasicExtractionAlgorithm +
  +
BasicExtractionAlgorithm(List<Ruling>) - +Constructor for class technology.tabula.extractors.BasicExtractionAlgorithm +
  +
boundingBoxOf(List<? extends Rectangle>) - +Static method in class technology.tabula.Rectangle +
  +
bounds(Collection<? extends Shape>) - +Static method in class technology.tabula.Utils +
  +
+
+

+C

+
+
Cell - Class in technology.tabula
 
Cell(float, float, float, float) - +Constructor for class technology.tabula.Cell +
  +
Cell(Point2D, Point2D) - +Constructor for class technology.tabula.Cell +
  +
clearRect(int, int, int, int) - +Method in class technology.tabula.DummyGraphics2D +
  +
clip(Line2D.Float) - +Method in class technology.tabula.CohenSutherlandClipping +
Clips a given line against the clip rectangle. +
clip(Shape) - +Method in class technology.tabula.DummyGraphics2D +
  +
clippingPaths - +Variable in class technology.tabula.ObjectExtractor +
  +
clipRect(int, int, int, int) - +Method in class technology.tabula.DummyGraphics2D +
  +
close() - +Method in class technology.tabula.ObjectExtractor +
  +
CohenSutherlandClipping - Class in technology.tabula
Implements the well known Cohen Sutherland line + clipping algorithm (line against clip rectangle).
CohenSutherlandClipping() - +Constructor for class technology.tabula.CohenSutherlandClipping +
Creates a Cohen Sutherland clipper with clip rect (0, 0, 0, 0). +
CohenSutherlandClipping(Rectangle2D) - +Constructor for class technology.tabula.CohenSutherlandClipping +
Creates a Cohen Sutherland clipper with the given clip rectangle. +
colinear(Point2D) - +Method in class technology.tabula.Ruling +
  +
collapseOrientedRulings(List<Ruling>) - +Static method in class technology.tabula.Ruling +
  +
columnPositions(List<Line>) - +Static method in class technology.tabula.extractors.BasicExtractionAlgorithm +
  +
CommandLineApp - Class in technology.tabula
 
CommandLineApp() - +Constructor for class technology.tabula.CommandLineApp +
  +
compareTo(Rectangle) - +Method in class technology.tabula.Rectangle +
  +
copyArea(int, int, int, int, int, int) - +Method in class technology.tabula.DummyGraphics2D +
  +
create() - +Method in class technology.tabula.DummyGraphics2D +
  +
cropRulingsToArea(List<Ruling>, Rectangle2D) - +Static method in class technology.tabula.Ruling +
  +
CSVWriter - Class in technology.tabula.writers
 
CSVWriter() - +Constructor for class technology.tabula.writers.CSVWriter +
  +
currentClippingPath() - +Method in class technology.tabula.ObjectExtractor +
  +
+
+

+D

+
+
Debug - Class in technology.tabula.debug
 
Debug() - +Constructor for class technology.tabula.debug.Debug +
  +
debugIntersections(Graphics2D, Page) - +Static method in class technology.tabula.debug.Debug +
  +
DECIMAL_PLACES - +Static variable in class technology.tabula.ProjectionProfile +
  +
dispose() - +Method in class technology.tabula.DummyGraphics2D +
  +
draw(Shape) - +Method in class technology.tabula.DummyGraphics2D +
  +
drawArc(int, int, int, int, int, int) - +Method in class technology.tabula.DummyGraphics2D +
  +
drawGlyphVector(GlyphVector, float, float) - +Method in class technology.tabula.DummyGraphics2D +
  +
drawImage(Image, AffineTransform, ImageObserver) - +Method in class technology.tabula.DummyGraphics2D +
  +
drawImage(BufferedImage, BufferedImageOp, int, int) - +Method in class technology.tabula.DummyGraphics2D +
  +
drawImage(Image, int, int, ImageObserver) - +Method in class technology.tabula.DummyGraphics2D +
  +
drawImage(Image, int, int, Color, ImageObserver) - +Method in class technology.tabula.DummyGraphics2D +
  +
drawImage(Image, int, int, int, int, ImageObserver) - +Method in class technology.tabula.DummyGraphics2D +
  +
drawImage(Image, int, int, int, int, Color, ImageObserver) - +Method in class technology.tabula.DummyGraphics2D +
  +
drawImage(Image, int, int, int, int, int, int, int, int, ImageObserver) - +Method in class technology.tabula.DummyGraphics2D +
  +
drawImage(Image, int, int, int, int, int, int, int, int, Color, ImageObserver) - +Method in class technology.tabula.DummyGraphics2D +
  +
drawImage(Image, AffineTransform) - +Method in class technology.tabula.ObjectExtractor +
  +
drawLine(int, int, int, int) - +Method in class technology.tabula.DummyGraphics2D +
  +
drawOval(int, int, int, int) - +Method in class technology.tabula.DummyGraphics2D +
  +
drawPage(PDPage) - +Method in class technology.tabula.ObjectExtractor +
  +
drawPolygon(int[], int[], int) - +Method in class technology.tabula.DummyGraphics2D +
  +
drawPolyline(int[], int[], int) - +Method in class technology.tabula.DummyGraphics2D +
  +
drawRenderableImage(RenderableImage, AffineTransform) - +Method in class technology.tabula.DummyGraphics2D +
  +
drawRenderedImage(RenderedImage, AffineTransform) - +Method in class technology.tabula.DummyGraphics2D +
  +
drawRoundRect(int, int, int, int, int, int) - +Method in class technology.tabula.DummyGraphics2D +
  +
drawString(String, int, int) - +Method in class technology.tabula.DummyGraphics2D +
  +
drawString(String, float, float) - +Method in class technology.tabula.DummyGraphics2D +
  +
drawString(AttributedCharacterIterator, int, int) - +Method in class technology.tabula.DummyGraphics2D +
  +
drawString(AttributedCharacterIterator, float, float) - +Method in class technology.tabula.DummyGraphics2D +
  +
DummyGraphics2D - Class in technology.tabula
 
DummyGraphics2D() - +Constructor for class technology.tabula.DummyGraphics2D +
  +
+
+

+E

+
+
EMPTY - +Static variable in class technology.tabula.Table +
  +
EMPTY - +Static variable in class technology.tabula.TextChunk +
  +
equals(Object) - +Method in class technology.tabula.Ruling +
  +
expand(float) - +Method in class technology.tabula.Ruling +
  +
extract(Page, List<Float>) - +Method in class technology.tabula.extractors.BasicExtractionAlgorithm +
  +
extract(Page) - +Method in class technology.tabula.extractors.BasicExtractionAlgorithm +
  +
extract(Page) - +Method in interface technology.tabula.extractors.ExtractionAlgorithm +
  +
extract(Page) - +Method in class technology.tabula.extractors.SpreadsheetExtractionAlgorithm +
  +
extract(Page, List<Ruling>) - +Method in class technology.tabula.extractors.SpreadsheetExtractionAlgorithm +
Extract a list of Table from page using rulings as separators +
extract(Iterable<Integer>) - +Method in class technology.tabula.ObjectExtractor +
  +
extract() - +Method in class technology.tabula.ObjectExtractor +
  +
extract(int) - +Method in class technology.tabula.ObjectExtractor +
  +
ExtractionAlgorithm - Interface in technology.tabula.extractors
 
extractPage(Integer) - +Method in class technology.tabula.ObjectExtractor +
  +
+
+

+F

+
+
feq(double, double) - +Static method in class technology.tabula.Utils +
  +
fill(Shape) - +Method in class technology.tabula.DummyGraphics2D +
  +
fillArc(int, int, int, int, int, int) - +Method in class technology.tabula.DummyGraphics2D +
  +
fillOval(int, int, int, int) - +Method in class technology.tabula.DummyGraphics2D +
  +
fillPath(int) - +Method in class technology.tabula.ObjectExtractor +
  +
fillPolygon(int[], int[], int) - +Method in class technology.tabula.DummyGraphics2D +
  +
fillRect(int, int, int, int) - +Method in class technology.tabula.DummyGraphics2D +
  +
fillRoundRect(int, int, int, int, int, int) - +Method in class technology.tabula.DummyGraphics2D +
  +
filter(float[], float) - +Static method in class technology.tabula.ProjectionProfile +
Simple Low pass filter +
findCells(List<Ruling>, List<Ruling>) - +Static method in class technology.tabula.extractors.SpreadsheetExtractionAlgorithm +
  +
findHorizontalSeparators(float) - +Method in class technology.tabula.ProjectionProfile +
  +
findIntersections(List<Ruling>, List<Ruling>) - +Static method in class technology.tabula.Ruling +
  +
findSpreadsheetsFromCells(List<? extends Rectangle>) - +Method in class technology.tabula.extractors.SpreadsheetExtractionAlgorithm +
  +
findVerticalSeparators(float) - +Method in class technology.tabula.ProjectionProfile +
  +
+
+

+G

+
+
getAngle() - +Method in class technology.tabula.Ruling +
  +
getArea(Rectangle) - +Method in class technology.tabula.Page +
  +
getArea(float, float, float, float) - +Method in class technology.tabula.Page +
  +
getArea() - +Method in class technology.tabula.Rectangle +
  +
getAutocorrelation(float[]) - +Static method in class technology.tabula.ProjectionProfile +
  +
getBackground() - +Method in class technology.tabula.DummyGraphics2D +
  +
getBottom() - +Method in class technology.tabula.Rectangle +
  +
getBottom() - +Method in class technology.tabula.Ruling +
  +
getCell(int, int) - +Method in class technology.tabula.Table +
  +
getCells() - +Method in class technology.tabula.Table +
  +
getCharacters() - +Method in class technology.tabula.ObjectExtractor +
  +
getClip() - +Method in class technology.tabula.DummyGraphics2D +
  +
getClipBounds() - +Method in class technology.tabula.DummyGraphics2D +
  +
getColor() - +Method in class technology.tabula.DummyGraphics2D +
  +
getCols() - +Method in class technology.tabula.Table +
  +
getComposite() - +Method in class technology.tabula.DummyGraphics2D +
  +
getDeviceConfiguration() - +Method in class technology.tabula.DummyGraphics2D +
  +
getDirection() - +Method in class technology.tabula.TextElement +
  +
getEnd() - +Method in class technology.tabula.Ruling +
  +
getExtractionAlgorithm() - +Method in class technology.tabula.Table +
  +
getFirstDeriv(float[]) - +Static method in class technology.tabula.ProjectionProfile +
  +
getFont() - +Method in class technology.tabula.DummyGraphics2D +
  +
getFont() - +Method in class technology.tabula.TextElement +
  +
getFontMetrics(Font) - +Method in class technology.tabula.DummyGraphics2D +
  +
getFontRenderContext() - +Method in class technology.tabula.DummyGraphics2D +
  +
getFontSize() - +Method in class technology.tabula.TextElement +
  +
getHeight() - +Method in class technology.tabula.Ruling +
  +
getHorizontalProjection() - +Method in class technology.tabula.ProjectionProfile +
  +
getHorizontalRulings() - +Method in class technology.tabula.Page +
  +
getLeft() - +Method in class technology.tabula.Rectangle +
  +
getLeft() - +Method in class technology.tabula.Ruling +
  +
getMinCharHeight() - +Method in class technology.tabula.ObjectExtractor +
  +
getMinCharHeight() - +Method in class technology.tabula.Page +
  +
getMinCharWidth() - +Method in class technology.tabula.ObjectExtractor +
  +
getMinCharWidth() - +Method in class technology.tabula.Page +
  +
getPageCount() - +Method in class technology.tabula.ObjectExtractor +
  +
getPageNumber() - +Method in class technology.tabula.Page +
  +
getPageTransform() - +Method in class technology.tabula.ObjectExtractor +
  +
getPaint() - +Method in class technology.tabula.DummyGraphics2D +
  +
getPoints() - +Method in class technology.tabula.Rectangle +
  +
getPosition() - +Method in class technology.tabula.Ruling +
  +
getRenderingHint(RenderingHints.Key) - +Method in class technology.tabula.DummyGraphics2D +
  +
getRenderingHints() - +Method in class technology.tabula.DummyGraphics2D +
  +
getRight() - +Method in class technology.tabula.Rectangle +
  +
getRight() - +Method in class technology.tabula.Ruling +
  +
getRotation() - +Method in class technology.tabula.Page +
  +
getRows() - +Method in class technology.tabula.Table +
  +
getRulings() - +Method in class technology.tabula.ObjectExtractor +
  +
getRulings() - +Method in class technology.tabula.Page +
  +
getSpatialIndex() - +Method in class technology.tabula.Page +
  +
getStart() - +Method in class technology.tabula.Ruling +
  +
getStroke() - +Method in class technology.tabula.DummyGraphics2D +
  +
getText(boolean) - +Method in class technology.tabula.Cell +
  +
getText() - +Method in class technology.tabula.Cell +
  +
getText() - +Method in interface technology.tabula.HasText +
  +
getText() - +Method in class technology.tabula.Page +
  +
getText(Rectangle) - +Method in class technology.tabula.Page +
  +
getText(float, float, float, float) - +Method in class technology.tabula.Page +
  +
getText() - +Method in class technology.tabula.RectangularTextContainer +
  +
getText(boolean) - +Method in class technology.tabula.RectangularTextContainer +
  +
getText() - +Method in class technology.tabula.TextChunk +
  +
getText(boolean) - +Method in class technology.tabula.TextChunk +
  +
getText() - +Method in class technology.tabula.TextElement +
  +
getTextBounds() - +Method in class technology.tabula.Page +
Returns the minimum bounding box that contains all the TextElements on this Page +
getTextElements() - +Method in class technology.tabula.Cell +
  +
getTextElements() - +Method in class technology.tabula.Line +
  +
getTextElements() - +Method in class technology.tabula.RectangularTextContainer +
  +
getTextElements() - +Method in class technology.tabula.TextChunk +
  +
getTexts() - +Method in class technology.tabula.Page +
  +
getTop() - +Method in class technology.tabula.Rectangle +
  +
getTop() - +Method in class technology.tabula.Ruling +
  +
getTransform() - +Method in class technology.tabula.DummyGraphics2D +
  +
getUnprocessedRulings() - +Method in class technology.tabula.Page +
  +
getVerticalProjection() - +Method in class technology.tabula.ProjectionProfile +
  +
getVerticalRulings() - +Method in class technology.tabula.Page +
  +
getWidth() - +Method in class technology.tabula.Ruling +
  +
getWidthOfSpace() - +Method in class technology.tabula.TextElement +
  +
groupByLines(List<TextChunk>) - +Static method in class technology.tabula.TextChunk +
  +
+
+

+H

+
+
hashCode() - +Method in class technology.tabula.Ruling +
  +
hasNext() - +Method in class technology.tabula.PageIterator +
  +
HasText - Interface in technology.tabula
 
hasText() - +Method in class technology.tabula.Page +
  +
hit(Rectangle, Shape, boolean) - +Method in class technology.tabula.DummyGraphics2D +
  +
horizontal() - +Method in class technology.tabula.Ruling +
  +
horizontallyOverlaps(Rectangle) - +Method in class technology.tabula.Rectangle +
  +
horizontalOverlapRatio(Rectangle) - +Method in class technology.tabula.Rectangle +
  +
+
+

+I

+
+
intersect(Rectangle2D) - +Method in class technology.tabula.Ruling +
  +
intersectionPoint(Ruling) - +Method in class technology.tabula.Ruling +
  +
isDebugClippingPaths() - +Method in class technology.tabula.ObjectExtractor +
  +
isExtractRulingLines() - +Method in class technology.tabula.ObjectExtractor +
  +
isNumeric(CharSequence) - +Static method in class technology.tabula.Utils +
  +
isPlaceholder() - +Method in class technology.tabula.Cell +
  +
isSameChar(Character) - +Method in class technology.tabula.TextChunk +
  +
isSameChar(Character[]) - +Method in class technology.tabula.TextChunk +
  +
isSpanning() - +Method in class technology.tabula.Cell +
  +
isTabular(Page) - +Method in class technology.tabula.extractors.SpreadsheetExtractionAlgorithm +
  +
+
+

+J

+
+
join(String, String...) - +Static method in class technology.tabula.Utils +
  +
JSONWriter - Class in technology.tabula.writers
 
JSONWriter() - +Constructor for class technology.tabula.writers.JSONWriter +
  +
+
+

+L

+
+
length() - +Method in class technology.tabula.Ruling +
  +
Line - Class in technology.tabula
 
Line() - +Constructor for class technology.tabula.Line +
  +
+
+

+M

+
+
main(String[]) - +Static method in class technology.tabula.CommandLineApp +
  +
main(String[]) - +Static method in class technology.tabula.debug.Debug +
  +
merge(Rectangle) - +Method in class technology.tabula.Rectangle +
  +
merge(RectangularTextContainer<T>) - +Method in class technology.tabula.RectangularTextContainer +
  +
merge(TextChunk) - +Method in class technology.tabula.TextChunk +
  +
mergeWords(List<TextElement>) - +Static method in class technology.tabula.TextElement +
  +
mergeWords(List<TextElement>, List<Ruling>) - +Static method in class technology.tabula.TextElement +
heuristically merge a list of TextElement into a list of TextChunk + ported from from PDFBox's PDFTextStripper.writePage, with modifications. +
+
+

+N

+
+
nearlyIntersects(Ruling) - +Method in class technology.tabula.Ruling +
  +
next() - +Method in class technology.tabula.PageIterator +
  +
+
+

+O

+
+
ObjectExtractor - Class in technology.tabula
 
ObjectExtractor(PDDocument) - +Constructor for class technology.tabula.ObjectExtractor +
  +
ObjectExtractor(PDDocument, String) - +Constructor for class technology.tabula.ObjectExtractor +
  +
oblique() - +Method in class technology.tabula.Ruling +
  +
overlap(double, double, double, double, double) - +Static method in class technology.tabula.Utils +
  +
overlap(double, double, double, double) - +Static method in class technology.tabula.Utils +
  +
overlapRatio(Rectangle) - +Method in class technology.tabula.Rectangle +
  +
+
+

+P

+
+
Page - Class in technology.tabula
 
Page(float, float, float, float, int, int) - +Constructor for class technology.tabula.Page +
  +
Page(float, float, float, float, int, int, List<TextElement>, List<Ruling>) - +Constructor for class technology.tabula.Page +
  +
Page(float, float, float, float, int, int, List<TextElement>, List<Ruling>, float, float, RectangleSpatialIndex<TextElement>) - +Constructor for class technology.tabula.Page +
  +
PageIterator - Class in technology.tabula
 
PageIterator(ObjectExtractor, Iterable<Integer>) - +Constructor for class technology.tabula.PageIterator +
  +
parseFloatList(String) - +Static method in class technology.tabula.CommandLineApp +
  +
parsePagesOption(String) - +Static method in class technology.tabula.Utils +
  +
pdf_document_pages - +Variable in class technology.tabula.ObjectExtractor +
  +
perpendicularTo(Ruling) - +Method in class technology.tabula.Ruling +
  +
processTextPosition(TextPosition) - +Method in class technology.tabula.ObjectExtractor +
  +
ProjectionProfile - Class in technology.tabula
 
ProjectionProfile(Page, List<? extends Rectangle>, float, float) - +Constructor for class technology.tabula.ProjectionProfile +
  +
+
+

+Q

+
+
QuickSort - Class in technology.tabula
see http://de.wikipedia.org/wiki/Quicksort.
+
+

+R

+
+
range(int, int) - +Static method in class technology.tabula.Utils +
  +
Rectangle - Class in technology.tabula
 
Rectangle() - +Constructor for class technology.tabula.Rectangle +
  +
Rectangle(float, float, float, float) - +Constructor for class technology.tabula.Rectangle +
  +
RectangularTextContainer<T extends HasText> - Class in technology.tabula
 
RectangularTextContainer(float, float, float, float) - +Constructor for class technology.tabula.RectangularTextContainer +
  +
remove() - +Method in class technology.tabula.PageIterator +
  +
renderPage(String, String, int, Rectangle, boolean, boolean, boolean, boolean, boolean, boolean, boolean, boolean, boolean, boolean, boolean) - +Static method in class technology.tabula.debug.Debug +
  +
rotate(double) - +Method in class technology.tabula.DummyGraphics2D +
  +
rotate(double, double, double) - +Method in class technology.tabula.DummyGraphics2D +
  +
round(double, int) - +Static method in class technology.tabula.Utils +
  +
Ruling - Class in technology.tabula
 
Ruling(float, float, float, float) - +Constructor for class technology.tabula.Ruling +
  +
Ruling(Point2D, Point2D) - +Constructor for class technology.tabula.Ruling +
  +
RulingSerializer - Class in technology.tabula.json
 
RulingSerializer() - +Constructor for class technology.tabula.json.RulingSerializer +
  +
+
+

+S

+
+
scale(double, double) - +Method in class technology.tabula.DummyGraphics2D +
  +
serialize(Ruling, Type, JsonSerializationContext) - +Method in class technology.tabula.json.RulingSerializer +
  +
serialize(Table, Type, JsonSerializationContext) - +Method in class technology.tabula.json.TableSerializer +
  +
serialize(RectangularTextContainer, Type, JsonSerializationContext) - +Method in class technology.tabula.json.TextChunkSerializer +
  +
setBackground(Color) - +Method in class technology.tabula.DummyGraphics2D +
  +
setBottom(float) - +Method in class technology.tabula.Rectangle +
  +
setBottom(float) - +Method in class technology.tabula.Ruling +
  +
setClip(Rectangle2D) - +Method in class technology.tabula.CohenSutherlandClipping +
Sets the clip rectangle. +
setClip(Shape) - +Method in class technology.tabula.DummyGraphics2D +
  +
setClip(int, int, int, int) - +Method in class technology.tabula.DummyGraphics2D +
  +
setColor(Color) - +Method in class technology.tabula.DummyGraphics2D +
  +
setComposite(Composite) - +Method in class technology.tabula.DummyGraphics2D +
  +
setDebugClippingPaths(boolean) - +Method in class technology.tabula.ObjectExtractor +
  +
setEnd(float) - +Method in class technology.tabula.Ruling +
  +
setExtractionAlgorithm(ExtractionAlgorithm) - +Method in class technology.tabula.Table +
  +
setExtractRulingLines(boolean) - +Method in class technology.tabula.ObjectExtractor +
  +
setFont(Font) - +Method in class technology.tabula.DummyGraphics2D +
  +
setLeft(float) - +Method in class technology.tabula.Rectangle +
  +
setLeft(float) - +Method in class technology.tabula.Ruling +
  +
setPaint(Paint) - +Method in class technology.tabula.DummyGraphics2D +
  +
setPaintMode() - +Method in class technology.tabula.DummyGraphics2D +
  +
setPlaceholder(boolean) - +Method in class technology.tabula.Cell +
  +
setPosition(float) - +Method in class technology.tabula.Ruling +
  +
setRenderingHint(RenderingHints.Key, Object) - +Method in class technology.tabula.DummyGraphics2D +
  +
setRenderingHints(Map<?, ?>) - +Method in class technology.tabula.DummyGraphics2D +
  +
setRight(float) - +Method in class technology.tabula.Rectangle +
  +
setRight(float) - +Method in class technology.tabula.Ruling +
  +
setSpanning(boolean) - +Method in class technology.tabula.Cell +
  +
setStart(float) - +Method in class technology.tabula.Ruling +
  +
setStroke(Stroke) - +Method in class technology.tabula.DummyGraphics2D +
  +
setTextElements(List<TextChunk>) - +Method in class technology.tabula.Cell +
  +
setTextElements(List<TextChunk>) - +Method in class technology.tabula.Line +
  +
setTop(float) - +Method in class technology.tabula.Rectangle +
  +
setTop(float) - +Method in class technology.tabula.Ruling +
  +
setTransform(AffineTransform) - +Method in class technology.tabula.DummyGraphics2D +
  +
setXORMode(Color) - +Method in class technology.tabula.DummyGraphics2D +
  +
shear(double, double) - +Method in class technology.tabula.DummyGraphics2D +
  +
smooth(float[], int) - +Static method in class technology.tabula.ProjectionProfile +
  +
snapPoints() - +Method in class technology.tabula.Page +
  +
sort(List<T>, Comparator<T>) - +Static method in class technology.tabula.QuickSort +
Sorts the given list using the given comparator. +
sort(List<T>) - +Static method in class technology.tabula.QuickSort +
Sorts the given list using compareTo as comparator. +
sort(List<T>) - +Static method in class technology.tabula.Utils +
Wrap Collections.sort so we can fallback to a non-stable quicksort + if we're running on JDK7+ +
splitAt(int) - +Method in class technology.tabula.TextChunk +
Splits a TextChunk in two, at the position of the i-th TextElement +
SpreadsheetExtractionAlgorithm - Class in technology.tabula.extractors
 
SpreadsheetExtractionAlgorithm() - +Constructor for class technology.tabula.extractors.SpreadsheetExtractionAlgorithm +
  +
squeeze(Character, int) - +Method in class technology.tabula.TextChunk +
Removes runs of identical TextElements in this TextChunk + For example, if the TextChunk contains this string of characters: "1234xxxxx56xx" + and c == 'x' and minRunLength == 4, this method will return a list of TextChunk + such that: ["1234", "56xx"] +
strokeOrFillPath(boolean) - +Method in class technology.tabula.ObjectExtractor +
  +
strokePath() - +Method in class technology.tabula.ObjectExtractor +
  +
+
+

+T

+
+
Table - Class in technology.tabula
 
Table() - +Constructor for class technology.tabula.Table +
  +
Table(Page, ExtractionAlgorithm) - +Constructor for class technology.tabula.Table +
  +
TableSerializer - Class in technology.tabula.json
 
TableSerializer() - +Constructor for class technology.tabula.json.TableSerializer +
  +
TableWithRulingLines - Class in technology.tabula
 
TableWithRulingLines() - +Constructor for class technology.tabula.TableWithRulingLines +
  +
TableWithRulingLines(Rectangle, Page, List<Cell>, List<Ruling>, List<Ruling>) - +Constructor for class technology.tabula.TableWithRulingLines +
  +
technology.tabula - package technology.tabula
 
technology.tabula.debug - package technology.tabula.debug
 
technology.tabula.extractors - package technology.tabula.extractors
 
technology.tabula.json - package technology.tabula.json
 
technology.tabula.writers - package technology.tabula.writers
 
TextChunk - Class in technology.tabula
 
TextChunk(float, float, float, float) - +Constructor for class technology.tabula.TextChunk +
  +
TextChunk(TextElement) - +Constructor for class technology.tabula.TextChunk +
  +
TextChunk(List<TextElement>) - +Constructor for class technology.tabula.TextChunk +
  +
TextChunkSerializer - Class in technology.tabula.json
 
TextChunkSerializer() - +Constructor for class technology.tabula.json.TextChunkSerializer +
  +
TextElement - Class in technology.tabula
 
TextElement(float, float, float, float, PDFont, float, String, float) - +Constructor for class technology.tabula.TextElement +
  +
TextElement(float, float, float, float, PDFont, float, String, float, float) - +Constructor for class technology.tabula.TextElement +
  +
toString() - +Method in class technology.tabula.extractors.BasicExtractionAlgorithm +
  +
toString() - +Method in interface technology.tabula.extractors.ExtractionAlgorithm +
  +
toString() - +Method in class technology.tabula.extractors.SpreadsheetExtractionAlgorithm +
  +
toString() - +Method in class technology.tabula.Line +
  +
toString() - +Method in class technology.tabula.Rectangle +
  +
toString() - +Method in class technology.tabula.RectangularTextContainer +
  +
toString() - +Method in class technology.tabula.Ruling +
  +
toString() - +Method in class technology.tabula.TextElement +
  +
transform(AffineTransform) - +Method in class technology.tabula.DummyGraphics2D +
  +
translate(int, int) - +Method in class technology.tabula.DummyGraphics2D +
  +
translate(double, double) - +Method in class technology.tabula.DummyGraphics2D +
  +
transpose(List<List<T>>) - +Static method in class technology.tabula.Utils +
  +
TSVWriter - Class in technology.tabula.writers
 
TSVWriter() - +Constructor for class technology.tabula.writers.TSVWriter +
  +
+
+

+U

+
+
useQuickSort - +Static variable in class technology.tabula.Utils +
  +
Utils - Class in technology.tabula
 
Utils() - +Constructor for class technology.tabula.Utils +
  +
+
+

+V

+
+
vertical() - +Method in class technology.tabula.Ruling +
  +
verticallyOverlaps(Rectangle) - +Method in class technology.tabula.Rectangle +
  +
verticalOverlap(Rectangle) - +Method in class technology.tabula.Rectangle +
  +
verticalOverlapRatio(Rectangle) - +Method in class technology.tabula.Rectangle +
  +
+
+

+W

+
+
WHITE_SPACE_CHARS - +Static variable in class technology.tabula.Line +
  +
within(double, double, double) - +Static method in class technology.tabula.Utils +
  +
write(Appendable, Table) - +Method in class technology.tabula.writers.CSVWriter +
  +
write(Appendable, Table) - +Method in class technology.tabula.writers.JSONWriter +
  +
write(Appendable, Table) - +Method in interface technology.tabula.writers.Writer +
  +
Writer - Interface in technology.tabula.writers
 
+
+A B C D E F G H I J L M N O P Q R S T U V W + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/index.html b/index.html new file mode 100644 index 00000000..c6748482 --- /dev/null +++ b/index.html @@ -0,0 +1,74 @@ + + + + + + + +tabula-extractor 0.7.4-SNAPSHOT API + + + + + + + + + + + +<H2> +Frame Alert</H2> + +<P> +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. +<BR> +Link to<A HREF="overview-summary.html">Non-frame version.</A> + + + diff --git a/jbang-catalog.json b/jbang-catalog.json deleted file mode 100644 index b7f71347..00000000 --- a/jbang-catalog.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "catalogs": {}, - "aliases": { - "tabula": { - "script-ref": "https://github.com/tabulapdf/tabula-java/releases/download/v1.0.4/tabula-1.0.4-jar-with-dependencies.jar" - } - } -} \ No newline at end of file diff --git a/overview-frame.html b/overview-frame.html new file mode 100644 index 00000000..01aff937 --- /dev/null +++ b/overview-frame.html @@ -0,0 +1,51 @@ + + + + + + + +Overview List (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + + + + +
+
+ + + + + +
All Classes +

+ +Packages +
+technology.tabula +
+technology.tabula.debug +
+technology.tabula.extractors +
+technology.tabula.json +
+technology.tabula.writers +
+

+ +

+  + + diff --git a/overview-summary.html b/overview-summary.html new file mode 100644 index 00000000..fb6a6bb4 --- /dev/null +++ b/overview-summary.html @@ -0,0 +1,173 @@ + + + + + + + +Overview (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+tabula-extractor 0.7.4-SNAPSHOT API +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + +
+Packages
technology.tabula 
technology.tabula.debug 
technology.tabula.extractors 
technology.tabula.json 
technology.tabula.writers 
+ +


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/overview-tree.html b/overview-tree.html new file mode 100644 index 00000000..f8508b0e --- /dev/null +++ b/overview-tree.html @@ -0,0 +1,199 @@ + + + + + + + +Class Hierarchy (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For All Packages

+
+
+
Package Hierarchies:
technology.tabula, technology.tabula.debug, technology.tabula.extractors, technology.tabula.json, technology.tabula.writers
+
+

+Class Hierarchy +

+ +

+Interface Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/package-list b/package-list new file mode 100644 index 00000000..7fd1bb8f --- /dev/null +++ b/package-list @@ -0,0 +1,5 @@ +technology.tabula +technology.tabula.debug +technology.tabula.extractors +technology.tabula.json +technology.tabula.writers diff --git a/pom.xml b/pom.xml index 211d0d4d..64a5ed4f 100644 --- a/pom.xml +++ b/pom.xml @@ -1,156 +1,59 @@ 4.0.0 - technology.tabula - tabula - 1.0.6-SNAPSHOT - Tabula + tabula-extractor + tabula-extractor + 0.7.4-SNAPSHOT + tabula-extractor Extract tables from PDF files - http://github.com/tabulapdf/tabula-java - - - - MIT License - http://www.opensource.org/licenses/mit-license.php - - - - - - Manuel Aristaran - Tabula - http://github.com/tabulapdf - - - Jeremy B. Merrill - Tabula - http://github.com/tabulapdf - - - Mike Tigas - Tabula - http://github.com/tabulapdf - - - - - - snapshots - https://repository.apache.org/content/repositories/snapshots/ - - false - - - true - - - - - - scm:git:git@github.com:tabulapdf/tabula-java.git - scm:git:git@github.com:tabulapdf/tabula-java.git - git@github.com:tabulapdf/tabula-java.git - v1.0.2 - UTF-8 UTF-8 - - - ossrh - https://oss.sonatype.org/content/repositories/snapshots - + + + + sonatype + Sonatype repository + https://oss.sonatype.org/content/repositories/snapshots/ + + - ossrh - https://oss.sonatype.org/service/local/staging/deploy/maven2/ + + true + always + warn + + apachesnapshots + apache snapshots + http://repository.apache.org/snapshots/ + default - + + - - - org.apache.maven.plugins - maven-javadoc-plugin - 3.8.0 - - true - - - - + + + org.apache.maven.plugins + maven-javadoc-plugin + 2.10.3 + + + + + - - org.sonatype.plugins - nexus-staging-maven-plugin - 1.7.0 - true - - ossrh - https://oss.sonatype.org/ - 40766864c3b853 - true - - - - org.apache.maven.plugins - maven-source-plugin - 3.3.1 - - - attach-sources - - jar-no-fork - - - - - - org.apache.maven.plugins - maven-javadoc-plugin - 3.8.0 - - 8 - - - - attach-javadocs - - jar - - - - - - org.apache.maven.plugins - maven-gpg-plugin - 3.2.4 - - - sign-artifacts - verify - - sign - - - - --pinentry-mode - loopback - - - - - maven-compiler-plugin - 3.13.0 + 3.1 - 1.8 - 1.8 + 1.6 + 1.6 @@ -166,159 +69,91 @@ - - org.apache.maven.plugins - maven-surefire-plugin - 3.3.1 - - - -Xms1024m -Xmx2048m - - - - - org.apache.maven.plugins - maven-eclipse-plugin - 2.10 + + + - org.locationtech.jts - jts-core - 1.20.0 + net.sf.jsi + jsi + 1.1.0-SNAPSHOT org.slf4j slf4j-api - 2.0.13 + 1.6.4 org.slf4j slf4j-simple - 2.0.13 + 1.6.4 org.apache.pdfbox pdfbox - 3.0.4 + 1.8.9 org.bouncycastle - bcprov-jdk18on - 1.80 + bcprov-jdk15 + 1.44 - org.bouncycastle - bcmail-jdk18on - 1.80 + bcmail-jdk15 + 1.44 junit junit - 4.13.2 + 4.11 test - commons-cli commons-cli - 1.8.0 + 1.2 - org.apache.commons commons-csv - 1.11.0 + 1.0 - com.google.code.gson gson - 2.11.0 - - - - com.github.jai-imageio - jai-imageio-core - 1.4.0 - - - - com.github.jai-imageio - jai-imageio-jpeg2000 - 1.4.0 - - - - org.apache.pdfbox - jbig2-imageio - 3.0.4 + 2.2.4 diff --git a/resources/inherit.gif b/resources/inherit.gif new file mode 100644 index 00000000..c814867a Binary files /dev/null and b/resources/inherit.gif differ diff --git a/serialized-form.html b/serialized-form.html new file mode 100644 index 00000000..ab8076ab --- /dev/null +++ b/serialized-form.html @@ -0,0 +1,555 @@ + + + + + + + +Serialized Form (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Serialized Form

+
+
+ + + + + +
+Package technology.tabula
+ +

+ + + + + +
+Class technology.tabula.Cell extends RectangularTextContainer<TextChunk> implements Serializable
+ +

+ + + + + +
+Serialized Fields
+ +

+spanning

+
+boolean spanning
+
+
+
+
+
+

+placeholder

+
+boolean placeholder
+
+
+
+
+
+

+textElements

+
+List<E> textElements
+
+
+
+
+ +

+ + + + + +
+Class technology.tabula.Line extends Rectangle implements Serializable
+ +

+ + + + + +
+Serialized Fields
+ +

+textChunks

+
+List<E> textChunks
+
+
+
+
+ +

+ + + + + +
+Class technology.tabula.Page extends Rectangle implements Serializable
+ +

+ + + + + +
+Serialized Fields
+ +

+rotation

+
+Integer rotation
+
+
+
+
+
+

+pageNumber

+
+int pageNumber
+
+
+
+
+
+

+texts

+
+List<E> texts
+
+
+
+
+
+

+rulings

+
+List<E> rulings
+
+
+
+
+
+

+cleanRulings

+
+List<E> cleanRulings
+
+
+
+
+
+

+verticalRulingLines

+
+List<E> verticalRulingLines
+
+
+
+
+
+

+horizontalRulingLines

+
+List<E> horizontalRulingLines
+
+
+
+
+
+

+minCharWidth

+
+float minCharWidth
+
+
+
+
+
+

+minCharHeight

+
+float minCharHeight
+
+
+
+
+
+

+spatial_index

+
+technology.tabula.RectangleSpatialIndex<T extends Rectangle> spatial_index
+
+
+
+
+ +

+ + + + + +
+Class technology.tabula.Rectangle extends Rectangle2D.Float implements Serializable
+ +

+ +

+ + + + + +
+Class technology.tabula.RectangularTextContainer extends Rectangle implements Serializable
+ +

+ +

+ + + + + +
+Class technology.tabula.Ruling extends Line2D.Float implements Serializable
+ +

+ +

+ + + + + +
+Class technology.tabula.Table extends Rectangle implements Serializable
+ +

+ + + + + +
+Serialized Fields
+ +

+cellContainer

+
+technology.tabula.Table.CellContainer cellContainer
+
+
+
+
+
+

+page

+
+Page page
+
+
+
+
+
+

+extractionAlgorithm

+
+ExtractionAlgorithm extractionAlgorithm
+
+
+
+
+
+

+rows

+
+List<E> rows
+
+
+
+
+ +

+ + + + + +
+Class technology.tabula.TableWithRulingLines extends Table implements Serializable
+ +

+ + + + + +
+Serialized Fields
+ +

+verticalRulings

+
+List<E> verticalRulings
+
+
+
+
+
+

+horizontalRulings

+
+List<E> horizontalRulings
+
+
+
+
+
+

+si

+
+technology.tabula.RectangleSpatialIndex<T extends Rectangle> si
+
+
+
+
+ +

+ + + + + +
+Class technology.tabula.TextChunk extends RectangularTextContainer<TextElement> implements Serializable
+ +

+ + + + + +
+Serialized Fields
+ +

+textElements

+
+List<E> textElements
+
+
+
+
+ +

+ + + + + +
+Class technology.tabula.TextElement extends Rectangle implements Serializable
+ +

+ + + + + +
+Serialized Fields
+ +

+text

+
+String text
+
+
+
+
+
+

+font

+
+org.apache.pdfbox.pdmodel.font.PDFont font
+
+
+
+
+
+

+fontSize

+
+float fontSize
+
+
+
+
+
+

+widthOfSpace

+
+float widthOfSpace
+
+
+
+
+
+

+dir

+
+float dir
+
+
+
+
+ +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/src/main/java/technology/tabula/Cell.java b/src/main/java/technology/tabula/Cell.java deleted file mode 100644 index d02c8c50..00000000 --- a/src/main/java/technology/tabula/Cell.java +++ /dev/null @@ -1,62 +0,0 @@ -package technology.tabula; - -import java.awt.geom.Point2D; -import java.util.Collections; - -@SuppressWarnings("serial") -public class Cell extends RectangularTextContainer { - - public Cell(float top, float left, float width, float height) { - super(top, left, width, height); - this.setPlaceholder(false); - this.setSpanning(false); - } - - public Cell(Point2D topLeft, Point2D bottomRight) { - super((float) topLeft.getY(), (float) topLeft.getX(), (float) (bottomRight.getX() - topLeft.getX()), (float) (bottomRight.getY() - topLeft.getY())); - this.setPlaceholder(false); - this.setSpanning(false); - } - - private boolean spanning; - private boolean placeholder; - - @Override - public String getText(boolean useLineReturns) { - if (this.textElements.size() == 0) { - return ""; - } - StringBuilder sb = new StringBuilder(); - this.textElements.sort(Rectangle.ILL_DEFINED_ORDER); - double curTop = this.textElements.get(0).getTop(); - for (TextChunk tc : this.textElements) { - if (useLineReturns && tc.getTop() > curTop) { - sb.append('\r'); - } - sb.append(tc.getText()); - curTop = tc.getTop(); - } - return sb.toString().trim(); - } - - @Override - public String getText() { - return getText(true); - } - - public boolean isSpanning() { - return spanning; - } - - public void setSpanning(boolean spanning) { - this.spanning = spanning; - } - - public boolean isPlaceholder() { - return placeholder; - } - - public void setPlaceholder(boolean placeholder) { - this.placeholder = placeholder; - } -} diff --git a/src/main/java/technology/tabula/CohenSutherlandClipping.java b/src/main/java/technology/tabula/CohenSutherlandClipping.java deleted file mode 100644 index db9153e9..00000000 --- a/src/main/java/technology/tabula/CohenSutherlandClipping.java +++ /dev/null @@ -1,141 +0,0 @@ -/* - * CohenSutherland.java - * -------------------- - * (c) 2007 by Intevation GmbH - * - * @author Sascha L. Teichmann (teichmann@intevation.de) - * @author Ludwig Reiter (ludwig@intevation.de) - * - * This program is free software under the LGPL (>=v2.1) - * Read the file LICENSE.txt coming with the sources for details. - */ -package technology.tabula; - -import java.awt.geom.Rectangle2D; -import java.awt.geom.Line2D; - -/** - * Implements the well known Cohen Sutherland line - * clipping algorithm (line against clip rectangle). - */ -public final class CohenSutherlandClipping { - - private double xMin; - private double yMin; - private double xMax; - private double yMax; - - private static final int INSIDE = 0; - private static final int LEFT = 1; - private static final int RIGHT = 2; - private static final int BOTTOM = 4; - private static final int TOP = 8; - - private final static float MINIMUM_DELTA = 0.01f; - - /** - * Creates a Cohen Sutherland clipper with clip window (0, 0, 0, 0). - */ - public CohenSutherlandClipping() {} - - /** - * Creates a Cohen Sutherland clipper with the given clip window. - * @param clipWindow the clip window to use. - */ - public CohenSutherlandClipping(Rectangle2D clipWindow) { - setClip(clipWindow); - } - - /** - * Sets the clip rectangle. - * @param clipWindow the clip window. - */ - public void setClip(Rectangle2D clipWindow) { - xMin = clipWindow.getX(); - xMax = xMin + clipWindow.getWidth(); - yMin = clipWindow.getY(); - yMax = yMin + clipWindow.getHeight(); - } - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - /** - * Clips a given line against the clip window. - * The modification (if needed) is done in place. - * @param line the line to clip. - * @return true if line is clipped, false if line is - * totally outside the clip window. - */ - public boolean clip(Line2D.Float line) { - Point point1 = new Point(line.getX1(), line.getY1()); - Point point2 = new Point(line.getX2(), line.getY2()); - Point outsidePoint = new Point(0d, 0d); - - boolean lineIsVertical = (point1.x == point2.x); - double lineSlope = lineIsVertical ? 0d : (point2.y-point1.y)/(point2.x-point1.x); - - while (point1.region != INSIDE || point2.region != INSIDE) { - if ((point1.region & point2.region) != 0) return false; - - outsidePoint.region = (point1.region == INSIDE) ? point2.region : point1.region; - - if ((outsidePoint.region & LEFT) != 0) { - outsidePoint.x = xMin; - outsidePoint.y = delta(outsidePoint.x, point1.x)*lineSlope + point1.y; - } - else if ((outsidePoint.region & RIGHT) != 0) { - outsidePoint.x = xMax; - outsidePoint.y = delta(outsidePoint.x, point1.x)*lineSlope + point1.y; - } - else if ((outsidePoint.region & BOTTOM) != 0) { - outsidePoint.y = yMin; - outsidePoint.x = lineIsVertical - ? point1.x - : delta(outsidePoint.y, point1.y)/lineSlope + point1.x; - } - else if ((outsidePoint.region & TOP) != 0) { - outsidePoint.y = yMax; - outsidePoint.x = lineIsVertical - ? point1.x - : delta(outsidePoint.y, point1.y)/lineSlope + point1.x; - } - - if (outsidePoint.isInTheSameRegionAs(point1)) { - point1.setPositionAndRegion(outsidePoint.x, outsidePoint.y); - } - else { - point2.setPositionAndRegion(outsidePoint.x, outsidePoint.y); - } - } - line.setLine(point1.x, point1.y, point2.x, point2.y); - return true; - } - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - private static double delta(double value1, double value2) { - return (Math.abs(value1 - value2) < MINIMUM_DELTA) ? 0 : (value1 - value2); - } - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - class Point { - double x, y; - int region; - - Point(double x, double y) { - setPositionAndRegion(x, y); - } - - void setPositionAndRegion(double x, double y) { - this.x = x; this.y = y; - region = (x < xMin) ? LEFT : (x > xMax) ? RIGHT : INSIDE; - if (y < yMin) - region |= BOTTOM; - else if (y > yMax) - region |= TOP; - } - - boolean isInTheSameRegionAs(Point otherPoint) { - return this.region == otherPoint.region; - } - } - -} diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java deleted file mode 100644 index 1b422303..00000000 --- a/src/main/java/technology/tabula/CommandLineApp.java +++ /dev/null @@ -1,528 +0,0 @@ -package technology.tabula; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FilenameFilter; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; -import org.apache.commons.cli.DefaultParser; -import org.apache.pdfbox.Loader; -import org.apache.pdfbox.pdmodel.PDDocument; - -import technology.tabula.detectors.DetectionAlgorithm; -import technology.tabula.detectors.NurminenDetectionAlgorithm; -import technology.tabula.extractors.BasicExtractionAlgorithm; -import technology.tabula.extractors.SpreadsheetExtractionAlgorithm; -import technology.tabula.writers.CSVWriter; -import technology.tabula.writers.JSONWriter; -import technology.tabula.writers.TSVWriter; -import technology.tabula.writers.Writer; - - -public class CommandLineApp { - - private static String VERSION = "1.0.6-SNAPSHOT"; - private static String VERSION_STRING = String.format("tabula %s (c) 2012-2020 Manuel Aristarán", VERSION); - private static String BANNER = "\nTabula helps you extract tables from PDFs\n\n"; - - private static final int RELATIVE_AREA_CALCULATION_MODE = 0; - private static final int ABSOLUTE_AREA_CALCULATION_MODE = 1; - - - private Appendable defaultOutput; - - private List> pageAreas; - private List pages; - private OutputFormat outputFormat; - private String password; - private TableExtractor tableExtractor; - - public CommandLineApp(Appendable defaultOutput, CommandLine line) throws ParseException { - this.defaultOutput = defaultOutput; - this.pageAreas = CommandLineApp.whichAreas(line); - this.pages = CommandLineApp.whichPages(line); - this.outputFormat = CommandLineApp.whichOutputFormat(line); - this.tableExtractor = CommandLineApp.createExtractor(line); - - if (line.hasOption('s')) { - this.password = line.getOptionValue('s'); - } - } - - public static void main(String[] args) { - CommandLineParser parser = new DefaultParser(); - try { - // parse the command line arguments - CommandLine line = parser.parse(buildOptions(), args); - - if (line.hasOption('h')) { - printHelp(); - System.exit(0); - } - - if (line.hasOption('v')) { - System.out.println(VERSION_STRING); - System.exit(0); - } - - new CommandLineApp(System.out, line).extractTables(line); - } catch (ParseException exp) { - System.err.println("Error: " + exp.getMessage()); - System.exit(1); - } - System.exit(0); - } - - public void extractTables(CommandLine line) throws ParseException { - if (line.hasOption('b')) { - if (line.getArgs().length != 0) { - throw new ParseException("Filename specified with batch\nTry --help for help"); - } - - File pdfDirectory = new File(line.getOptionValue('b')); - if (!pdfDirectory.isDirectory()) { - throw new ParseException("Directory does not exist or is not a directory"); - } - extractDirectoryTables(line, pdfDirectory); - return; - } - - if (line.getArgs().length != 1) { - throw new ParseException("Need exactly one filename\nTry --help for help"); - } - - File pdfFile = new File(line.getArgs()[0]); - if (!pdfFile.exists()) { - throw new ParseException("File does not exist"); - } - extractFileTables(line, pdfFile); - } - - public void extractDirectoryTables(CommandLine line, File pdfDirectory) throws ParseException { - File[] pdfs = pdfDirectory.listFiles(new FilenameFilter() { - public boolean accept(File dir, String name) { - return name.endsWith(".pdf"); - } - }); - - for (File pdfFile : pdfs) { - File outputFile = new File(getOutputFilename(pdfFile)); - try { - extractFileInto(pdfFile, outputFile); - } catch (ParseException e) { - System.err.println("Caught exception while processing file: " + pdfFile.toString()); - throw e; - } - } - } - - public void extractFileTables(CommandLine line, File pdfFile) throws ParseException { - if (!line.hasOption('o')) { - extractFile(pdfFile, this.defaultOutput); - return; - } - - File outputFile = new File(line.getOptionValue('o')); - extractFileInto(pdfFile, outputFile); - } - - public void extractFileInto(File pdfFile, File outputFile) throws ParseException { - BufferedWriter bufferedWriter = null; - try { - FileWriter fileWriter = new FileWriter(outputFile.getAbsoluteFile()); - bufferedWriter = new BufferedWriter(fileWriter); - - outputFile.createNewFile(); - extractFile(pdfFile, bufferedWriter); - } catch (IOException e) { - throw new ParseException("Cannot create file " + outputFile); - } finally { - if (bufferedWriter != null) { - try { - bufferedWriter.close(); - } catch (IOException e) { - System.out.println("Error in closing the BufferedWriter" + e); - } - } - } - } - - private void extractFile(File pdfFile, Appendable outFile) throws ParseException { - PDDocument pdfDocument = null; - try { - pdfDocument = this.password == null ? Loader.loadPDF(pdfFile) : Loader.loadPDF(pdfFile,password); - PageIterator pageIterator = getPageIterator(pdfDocument); - List tables = new ArrayList<>(); - - while (pageIterator.hasNext()) { - Page page = pageIterator.next(); - - if (tableExtractor.verticalRulingPositions != null) { - for (Float verticalRulingPosition : tableExtractor.verticalRulingPositions) { - page.addRuling(new Ruling(0, verticalRulingPosition, 0.0f, (float) page.getHeight())); - } - } - - if (pageAreas != null) { - for (Pair areaPair : pageAreas) { - Rectangle area = areaPair.getRight(); - if (areaPair.getLeft() == RELATIVE_AREA_CALCULATION_MODE) { - area = new Rectangle((float) (area.getTop() / 100 * page.getHeight()), - (float) (area.getLeft() / 100 * page.getWidth()), (float) (area.getWidth() / 100 * page.getWidth()), - (float) (area.getHeight() / 100 * page.getHeight())); - } - tables.addAll(tableExtractor.extractTables(page.getArea(area))); - } - } else { - tables.addAll(tableExtractor.extractTables(page)); - } - } - writeTables(tables, outFile); - } catch (IOException e) { - throw new ParseException(e.getMessage()); - } finally { - try { - if (pdfDocument != null) { - pdfDocument.close(); - } - } catch (IOException e) { - System.out.println("Error in closing pdf document" + e); - } - } - } - - private PageIterator getPageIterator(PDDocument pdfDocument) throws IOException { - ObjectExtractor extractor = new ObjectExtractor(pdfDocument); - return (pages == null) ? - extractor.extract() : - extractor.extract(pages); - } - - // CommandLine parsing methods - - private static OutputFormat whichOutputFormat(CommandLine line) throws ParseException { - if (!line.hasOption('f')) { - return OutputFormat.CSV; - } - - try { - return OutputFormat.valueOf(line.getOptionValue('f')); - } catch (IllegalArgumentException e) { - throw new ParseException(String.format( - "format %s is illegal. Available formats: %s", - line.getOptionValue('f'), - Utils.join(",", OutputFormat.formatNames()))); - } - } - - private static List> whichAreas(CommandLine line) throws ParseException { - if (!line.hasOption('a')) { - return null; - } - - String[] optionValues = line.getOptionValues('a'); - - List> areaList = new ArrayList>(); - for (String optionValue : optionValues) { - int areaCalculationMode = ABSOLUTE_AREA_CALCULATION_MODE; - int startIndex = 0; - if (optionValue.startsWith("%")) { - startIndex = 1; - areaCalculationMode = RELATIVE_AREA_CALCULATION_MODE; - } - List f = parseFloatList(optionValue.substring(startIndex)); - if (f.size() != 4) { - throw new ParseException("area parameters must be top,left,bottom,right optionally preceded by %"); - } - areaList.add(new Pair(areaCalculationMode, new Rectangle(f.get(0), f.get(1), f.get(3) - f.get(1), f.get(2) - f.get(0)))); - } - return areaList; - } - - private static List whichPages(CommandLine line) throws ParseException { - String pagesOption = line.hasOption('p') ? line.getOptionValue('p') : "1"; - return Utils.parsePagesOption(pagesOption); - } - - private static ExtractionMethod whichExtractionMethod(CommandLine line) { - // -r/--spreadsheet [deprecated; use -l] or -l/--lattice - if (line.hasOption('r') || line.hasOption('l')) { - return ExtractionMethod.SPREADSHEET; - } - - // -n/--no-spreadsheet [deprecated; use -t] or -c/--columns or -g/--guess or -t/--stream - if (line.hasOption('n') || line.hasOption('c') || line.hasOption('t')) { - return ExtractionMethod.BASIC; - } - return ExtractionMethod.DECIDE; - } - - private static TableExtractor createExtractor(CommandLine line) throws ParseException { - TableExtractor extractor = new TableExtractor(); - extractor.setGuess(line.hasOption('g')); - extractor.setMethod(CommandLineApp.whichExtractionMethod(line)); - extractor.setUseLineReturns(line.hasOption('u')); - - if (line.hasOption('c')) { - String optionString = line.getOptionValue('c'); - if (optionString.startsWith("%")) { - extractor.setVerticalRulingPositionsRelative(true); - optionString = optionString.substring(1); - } - extractor.setVerticalRulingPositions(parseFloatList(optionString)); - } - - return extractor; - } - - // utilities, etc. - - public static List parseFloatList(String option) throws ParseException { - String[] f = option.split(","); - List rv = new ArrayList<>(); - try { - for (final String element : f) { - rv.add(Float.parseFloat(element)); - } - return rv; - } catch (NumberFormatException e) { - throw new ParseException("Wrong number syntax"); - } - } - - private static void printHelp() { - HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp("tabula", BANNER, buildOptions(), "", true); - } - - public static Options buildOptions() { - Options o = new Options(); - - o.addOption("v", "version", false, "Print version and exit."); - o.addOption("h", "help", false, "Print this help text."); - o.addOption("g", "guess", false, "Guess the portion of the page to analyze per page."); - o.addOption("r", "spreadsheet", false, "[Deprecated in favor of -l/--lattice] Force PDF to be extracted using spreadsheet-style extraction (if there are ruling lines separating each cell, as in a PDF of an Excel spreadsheet)"); - o.addOption("n", "no-spreadsheet", false, "[Deprecated in favor of -t/--stream] Force PDF not to be extracted using spreadsheet-style extraction (if there are no ruling lines separating each cell)"); - o.addOption("l", "lattice", false, "Force PDF to be extracted using lattice-mode extraction (if there are ruling lines separating each cell, as in a PDF of an Excel spreadsheet)"); - o.addOption("t", "stream", false, "Force PDF to be extracted using stream-mode extraction (if there are no ruling lines separating each cell)"); - o.addOption("i", "silent", false, "Suppress all stderr output."); - o.addOption("u", "use-line-returns", false, "Use embedded line returns in cells. (Only in spreadsheet mode.)"); - // o.addOption("d", "debug", false, "Print detected table areas instead of processing."); - o.addOption(Option.builder("b") - .longOpt("batch") - .desc("Convert all .pdfs in the provided directory.") - .hasArg() - .argName("DIRECTORY") - .build()); - o.addOption(Option.builder("o") - .longOpt("outfile") - .desc("Write output to instead of STDOUT. Default: -") - .hasArg() - .argName("OUTFILE") - .build()); - o.addOption(Option.builder("f") - .longOpt("format") - .desc("Output format: (" + Utils.join(",", OutputFormat.formatNames()) + "). Default: CSV") - .hasArg() - .argName("FORMAT") - .build()); - o.addOption(Option.builder("s") - .longOpt("password") - .desc("Password to decrypt document. Default is empty") - .hasArg() - .argName("PASSWORD") - .build()); - o.addOption(Option.builder("c") - .longOpt("columns") - .desc("X coordinates of column boundaries. Example --columns 10.1,20.2,30.3. " - + "If all values are between 0-100 (inclusive) and preceded by '%', input will be taken as % of actual width of the page. " - + "Example: --columns %25,50,80.6") - .hasArg() - .argName("COLUMNS") - .build()); - o.addOption(Option.builder("a") - .longOpt("area") - .desc("-a/--area = Portion of the page to analyze. Example: --area 269.875,12.75,790.5,561. " - + "Accepts top,left,bottom,right i.e. y1,x1,y2,x2 where all values are in points relative to the top left corner. " - + "If all values are between 0-100 (inclusive) and preceded by '%', input will be taken as % of actual height or width of the page. " - + "Example: --area %0,0,100,50. To specify multiple areas, -a option should be repeated. Default is entire page") - .hasArg() - .argName("AREA") - .build()); - o.addOption(Option.builder("p") - .longOpt("pages") - .desc("Comma separated list of ranges, or all. Examples: --pages 1-3,5-7, --pages 3 or --pages all. Default is --pages 1") - .hasArg() - .argName("PAGES") - .build()); - - return o; - } - - private static class TableExtractor { - private boolean guess = false; - private boolean useLineReturns = false; - private BasicExtractionAlgorithm basicExtractor = new BasicExtractionAlgorithm(); - private SpreadsheetExtractionAlgorithm spreadsheetExtractor = new SpreadsheetExtractionAlgorithm(); - - private boolean verticalRulingPositionsRelative = false; - private List verticalRulingPositions = null; - - private ExtractionMethod method = ExtractionMethod.BASIC; - - public TableExtractor() { - } - - public void setVerticalRulingPositions(List positions) { - this.verticalRulingPositions = positions; - } - - public void setVerticalRulingPositionsRelative(boolean relative) { - this.verticalRulingPositionsRelative = relative; - } - - public void setGuess(boolean guess) { - this.guess = guess; - } - - public void setUseLineReturns(boolean useLineReturns) { - this.useLineReturns = useLineReturns; - } - - public void setMethod(ExtractionMethod method) { - this.method = method; - } - - public List
extractTables(Page page) { - ExtractionMethod effectiveMethod = this.method; - if (effectiveMethod == ExtractionMethod.DECIDE) { - effectiveMethod = spreadsheetExtractor.isTabular(page) ? - ExtractionMethod.SPREADSHEET : - ExtractionMethod.BASIC; - } - switch (effectiveMethod) { - case BASIC: - return extractTablesBasic(page); - case SPREADSHEET: - return extractTablesSpreadsheet(page); - default: - return new ArrayList<>(); - } - } - - public List
extractTablesBasic(Page page) { - if (guess) { - // guess the page areas to extract using a detection algorithm - // currently we only have a detector that uses spreadsheets to find table areas - DetectionAlgorithm detector = new NurminenDetectionAlgorithm(); - List guesses = detector.detect(page); - List
tables = new ArrayList<>(); - - for (Rectangle guessRect : guesses) { - Page guess = page.getArea(guessRect); - tables.addAll(basicExtractor.extract(guess)); - } - return tables; - } - - if (verticalRulingPositions != null) { - List absoluteRulingPositions; - - if (this.verticalRulingPositionsRelative) { - // convert relative to absolute - absoluteRulingPositions = new ArrayList<>(verticalRulingPositions.size()); - for (float relative : this.verticalRulingPositions) { - float absolute = (float) (relative / 100.0 * page.getWidth()); - absoluteRulingPositions.add(absolute); - } - } else { - absoluteRulingPositions = this.verticalRulingPositions; - } - return basicExtractor.extract(page, absoluteRulingPositions); - } - - return basicExtractor.extract(page); - } - - public List
extractTablesSpreadsheet(Page page) { - // TODO add useLineReturns - return spreadsheetExtractor.extract(page); - } - } - - private void writeTables(List
tables, Appendable out) throws IOException { - Writer writer = null; - switch (outputFormat) { - case CSV: - writer = new CSVWriter(); - break; - case JSON: - writer = new JSONWriter(); - break; - case TSV: - writer = new TSVWriter(); - break; - } - writer.write(out, tables); - } - - private String getOutputFilename(File pdfFile) { - String extension = ".csv"; - switch (outputFormat) { - case CSV: - extension = ".csv"; - break; - case JSON: - extension = ".json"; - break; - case TSV: - extension = ".tsv"; - break; - } - return pdfFile.getPath().replaceFirst("(\\.pdf|)$", extension); - } - - private enum OutputFormat { - CSV, - TSV, - JSON; - - static String[] formatNames() { - OutputFormat[] values = OutputFormat.values(); - String[] rv = new String[values.length]; - for (int i = 0; i < values.length; i++) { - rv[i] = values[i].name(); - } - return rv; - } - } - - private enum ExtractionMethod { - BASIC, - SPREADSHEET, - DECIDE - } - - private class DebugOutput { - private boolean debugEnabled; - - public DebugOutput(boolean debug) { - this.debugEnabled = debug; - } - - public void debug(String msg) { - if (this.debugEnabled) { - System.err.println(msg); - } - } - } -} diff --git a/src/main/java/technology/tabula/HasText.java b/src/main/java/technology/tabula/HasText.java deleted file mode 100644 index 1a9bda99..00000000 --- a/src/main/java/technology/tabula/HasText.java +++ /dev/null @@ -1,8 +0,0 @@ -package technology.tabula; - -public interface HasText { - - String getText(); - String getText(boolean useLineReturns); - -} diff --git a/src/main/java/technology/tabula/Line.java b/src/main/java/technology/tabula/Line.java deleted file mode 100644 index 31d10529..00000000 --- a/src/main/java/technology/tabula/Line.java +++ /dev/null @@ -1,76 +0,0 @@ -package technology.tabula; - -import java.util.ArrayList; -import java.util.List; - -// TODO this class seems superfluous - get rid of it - -@SuppressWarnings("serial") -public class Line extends Rectangle { - - List textChunks = new ArrayList<>(); - public static final Character[] WHITE_SPACE_CHARS = { ' ', '\t', '\r', '\n', '\f' }; - - - public List getTextElements() { - return textChunks; - } - - public void setTextElements(List textChunks) { - this.textChunks = textChunks; - } - - public void addTextChunk(int i, TextChunk textChunk) { - if (i < 0) { - throw new IllegalArgumentException("i can't be less than 0"); - } - - int s = this.textChunks.size(); - if (s < i + 1) { - for (; s <= i; s++) { - this.textChunks.add(null); - } - this.textChunks.set(i, textChunk); - } - else { - this.textChunks.set(i, this.textChunks.get(i).merge(textChunk)); - } - this.merge(textChunk); - } - - public void addTextChunk(TextChunk textChunk) { - if (this.textChunks.isEmpty()) { - this.setRect(textChunk); - } - else { - this.merge(textChunk); - } - this.textChunks.add(textChunk); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - String s = super.toString(); - sb.append(s, 0, s.length() - 1); - sb.append(",chunks="); - for (TextChunk te: this.textChunks) { - sb.append("'" + te.getText() + "', "); - } - sb.append(']'); - return sb.toString(); - } - - static Line removeRepeatedCharacters(Line line, Character c, int minRunLength) { - - Line rv = new Line(); - - for(TextChunk t: line.getTextElements()) { - for (TextChunk r: t.squeeze(c, minRunLength)) { - rv.addTextChunk(r); - } - } - - return rv; - } -} diff --git a/src/main/java/technology/tabula/ObjectExtractor.java b/src/main/java/technology/tabula/ObjectExtractor.java deleted file mode 100644 index 9f3f6a03..00000000 --- a/src/main/java/technology/tabula/ObjectExtractor.java +++ /dev/null @@ -1,73 +0,0 @@ -package technology.tabula; - -import java.io.IOException; - -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; - -public class ObjectExtractor implements java.io.Closeable { - - private final PDDocument pdfDocument; - - public ObjectExtractor(PDDocument pdfDocument) { - this.pdfDocument = pdfDocument; - } - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - protected Page extractPage(Integer pageNumber) throws IOException { - if (pageNumber > pdfDocument.getNumberOfPages() || pageNumber < 1) { - throw new java.lang.IndexOutOfBoundsException("Page number does not exist."); - } - PDPage page = pdfDocument.getPage(pageNumber - 1); - - ObjectExtractorStreamEngine streamEngine = new ObjectExtractorStreamEngine(page); - streamEngine.processPage(page); - - TextStripper textStripper = new TextStripper(pdfDocument, pageNumber); - textStripper.process(); - - Utils.sort(textStripper.getTextElements(), Rectangle.ILL_DEFINED_ORDER); - - float width, height; - int rotation = page.getRotation(); - if (Math.abs(rotation) == 90 || Math.abs(rotation) == 270) { - width = page.getCropBox().getHeight(); - height = page.getCropBox().getWidth(); - } else { - width = page.getCropBox().getWidth(); - height = page.getCropBox().getHeight(); - } - - return Page.Builder.newInstance() - .withPageDims(PageDims.of(0, 0, width, height)) - .withRotation(rotation) - .withNumber(pageNumber) - .withPdPage(page) - .withPdDocument(pdfDocument) - .withRulings(streamEngine.rulings) - .withTextElements(textStripper.getTextElements()) - .withMinCharWidth(textStripper.getMinCharWidth()) - .withMinCharHeight(textStripper.getMinCharHeight()) - .withIndex(textStripper.getSpatialIndex()) - .build(); - } - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - public PageIterator extract(Iterable pages) { - return new PageIterator(this, pages); - } - - public PageIterator extract() { - return extract(Utils.range(1, pdfDocument.getNumberOfPages() + 1)); - } - - public Page extract(int pageNumber) { - return extract(Utils.range(pageNumber, pageNumber + 1)).next(); - } - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - public void close() throws IOException { - pdfDocument.close(); - } - -} diff --git a/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java b/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java deleted file mode 100644 index 9907eca1..00000000 --- a/src/main/java/technology/tabula/ObjectExtractorStreamEngine.java +++ /dev/null @@ -1,271 +0,0 @@ -package technology.tabula; - -import java.awt.Shape; -import java.awt.geom.AffineTransform; -import java.awt.geom.GeneralPath; -import java.awt.geom.Line2D; -import java.awt.geom.PathIterator; -import java.awt.geom.Point2D; -import java.awt.geom.Rectangle2D; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.List; - -import org.apache.pdfbox.contentstream.PDFGraphicsStreamEngine; -import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.pdmodel.common.PDRectangle; -import org.apache.pdfbox.pdmodel.graphics.image.PDImage; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import static java.awt.geom.PathIterator.*; - -class ObjectExtractorStreamEngine extends PDFGraphicsStreamEngine { - - protected List rulings; - private AffineTransform pageTransform; - private boolean extractRulingLines = true; - private Logger logger; - private int clipWindingRule = -1; - private GeneralPath currentPath = new GeneralPath(); - - private static final float RULING_MINIMUM_LENGTH = 0.01f; - - protected ObjectExtractorStreamEngine(PDPage page) { - super(page); - logger = LoggerFactory.getLogger(ObjectExtractorStreamEngine.class); - rulings = new ArrayList<>(); - - // Calculate page transform: - pageTransform = new AffineTransform(); - PDRectangle pageCropBox = getPage().getCropBox(); - int rotationAngleInDegrees = getPage().getRotation(); - - if (Math.abs(rotationAngleInDegrees) == 90 || Math.abs(rotationAngleInDegrees) == 270) { - double rotationAngleInRadians = rotationAngleInDegrees * (Math.PI / 180.0); - pageTransform = AffineTransform.getRotateInstance(rotationAngleInRadians, 0, 0); - } else { - double deltaX = 0; - double deltaY = pageCropBox.getHeight(); - pageTransform.concatenate(AffineTransform.getTranslateInstance(deltaX, deltaY)); - } - - pageTransform.concatenate(AffineTransform.getScaleInstance(1, -1)); - pageTransform.translate(-pageCropBox.getLowerLeftX(), -pageCropBox.getLowerLeftY()); - } - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - @Override - public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) { - currentPath.moveTo((float) p0.getX(), (float) p0.getY()); - currentPath.lineTo((float) p1.getX(), (float) p1.getY()); - currentPath.lineTo((float) p2.getX(), (float) p2.getY()); - currentPath.lineTo((float) p3.getX(), (float) p3.getY()); - currentPath.closePath(); - } - - @Override - public void clip(int windingRule) { - // The clipping path will not be updated until the succeeding painting - // operator is called. - clipWindingRule = windingRule; - } - - @Override - public void closePath() { - currentPath.closePath(); - } - - @Override - public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) { - currentPath.curveTo(x1, y1, x2, y2, x3, y3); - } - - @Override - public void drawImage(PDImage arg0) {} - - @Override - public void endPath() { - if (clipWindingRule != -1) { - currentPath.setWindingRule(clipWindingRule); - getGraphicsState().intersectClippingPath(currentPath); - clipWindingRule = -1; - } - currentPath.reset(); - } - - @Override - public void fillAndStrokePath(int arg0) { - strokeOrFillPath(true); - } - - @Override - public void fillPath(int arg0) { - strokeOrFillPath(true); - } - - @Override - public Point2D getCurrentPoint() { - return currentPath.getCurrentPoint(); - } - - @Override - public void lineTo(float x, float y) { - currentPath.lineTo(x, y); - } - - @Override - public void moveTo(float x, float y) { - currentPath.moveTo(x, y); - } - - @Override - public void shadingFill(COSName arg0) {} - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - @Override - public void strokePath() { - strokeOrFillPath(false); - } - - private void strokeOrFillPath(boolean isFill) { - if (!extractRulingLines) { - currentPath.reset(); - return; - } - - boolean didNotPassedTheFilter = filterPathBySegmentType(); - if (didNotPassedTheFilter) return; - - // TODO: how to implement color filter? - - // Skip the first path operation and save it as the starting point. - PathIterator pathIterator = currentPath.getPathIterator(getPageTransform()); - - float[] coordinates = new float[6]; - int currentSegment; - - Point2D.Float startPoint = getStartPoint(pathIterator); - Point2D.Float last_move = startPoint; - Point2D.Float endPoint = null; - Line2D.Float line; - PointComparator pointComparator = new PointComparator(); - - while (!pathIterator.isDone()) { - pathIterator.next(); - // This can be the last segment, when pathIterator.isDone, but we need to - // process it otherwise us-017.pdf fails the last value. - try { - currentSegment = pathIterator.currentSegment(coordinates); - } catch (IndexOutOfBoundsException ex) { - continue; - } - switch (currentSegment) { - case SEG_LINETO: - endPoint = new Point2D.Float(coordinates[0], coordinates[1]); - if (startPoint == null || endPoint == null) { - break; - } - line = getLineBetween(startPoint, endPoint, pointComparator); - verifyLineIntersectsClipping(line); - break; - case SEG_MOVETO: - last_move = new Point2D.Float(coordinates[0], coordinates[1]); - endPoint = last_move; - break; - case SEG_CLOSE: - // According to PathIterator docs: - // "The preceding sub-path should be closed by appending a line - // segment back to the point corresponding to the most recent - // SEG_MOVETO." - if (startPoint == null || endPoint == null) { - break; - } - line = getLineBetween(endPoint, last_move, pointComparator); - verifyLineIntersectsClipping(line); - break; - } - startPoint = endPoint; - } - currentPath.reset(); - } - - private boolean filterPathBySegmentType() { - PathIterator pathIterator = currentPath.getPathIterator(pageTransform); - float[] coordinates = new float[6]; - int currentSegmentType = pathIterator.currentSegment(coordinates); - if (currentSegmentType != SEG_MOVETO) { - currentPath.reset(); - return true; - } - pathIterator.next(); - while (!pathIterator.isDone()) { - currentSegmentType = pathIterator.currentSegment(coordinates); - if (currentSegmentType != SEG_LINETO && currentSegmentType != SEG_CLOSE && currentSegmentType != SEG_MOVETO) { - currentPath.reset(); - return true; - } - pathIterator.next(); - } - return false; - } - - private Point2D.Float getStartPoint(PathIterator pathIterator) { - float[] startPointCoordinates = new float[6]; - pathIterator.currentSegment(startPointCoordinates); - float x = Utils.round(startPointCoordinates[0], 2); - float y = Utils.round(startPointCoordinates[1], 2); - return new Point2D.Float(x, y); - } - - private Line2D.Float getLineBetween(Point2D.Float pointA, Point2D.Float pointB, PointComparator pointComparator) { - if (pointComparator.compare(pointA, pointB) == -1) { - return new Line2D.Float(pointA, pointB); - } - return new Line2D.Float(pointB, pointA); - } - - private void verifyLineIntersectsClipping(Line2D.Float line) { - Rectangle2D currentClippingPath = currentClippingPath(); - if (line.intersects(currentClippingPath)) { - Ruling ruling = new Ruling(line.getP1(), line.getP2()).intersect(currentClippingPath); - if (ruling.length() > RULING_MINIMUM_LENGTH) { - rulings.add(ruling); - } - } - } - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - public AffineTransform getPageTransform() { - return pageTransform; - } - - public Rectangle2D currentClippingPath() { - Shape currentClippingPath = getGraphicsState().getCurrentClippingPath(); - Shape transformedClippingPath = getPageTransform().createTransformedShape(currentClippingPath); - return transformedClippingPath.getBounds2D(); - } - - // TODO: repeated in SpreadsheetExtractionAlgorithm. - class PointComparator implements Comparator { - @Override - public int compare(Point2D p1, Point2D p2) { - float p1X = Utils.round(p1.getX(), 2); - float p1Y = Utils.round(p1.getY(), 2); - float p2X = Utils.round(p2.getX(), 2); - float p2Y = Utils.round(p2.getY(), 2); - - if (p1Y > p2Y) - return 1; - if (p1Y < p2Y) - return -1; - if (p1X > p2X) - return 1; - if (p1X < p2X) - return -1; - return 0; - } - } - -} diff --git a/src/main/java/technology/tabula/Page.java b/src/main/java/technology/tabula/Page.java deleted file mode 100644 index ed74d14a..00000000 --- a/src/main/java/technology/tabula/Page.java +++ /dev/null @@ -1,416 +0,0 @@ -package technology.tabula; - -import java.awt.geom.Point2D; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.List; - -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; - -import static java.lang.Float.compare; -import static java.util.Collections.min; - -@SuppressWarnings("serial") -// TODO: this class should probably be called "PageArea" or something like that -public class Page extends Rectangle { - - private int number; - private Integer rotation; - private float minCharWidth; - private float minCharHeight; - - private List textElements; - - // TODO: Create a class for 'List ' that encapsulates all of these lists and their behaviors? - private List rulings, - cleanRulings = null, - verticalRulingLines = null, - horizontalRulingLines = null; - - private PDPage pdPage; - private PDDocument pdDoc; - - private RectangleSpatialIndex spatialIndex; - - private static final float DEFAULT_MIN_CHAR_LENGTH = 7; - - private Page( - PageDims pageDims, - int rotation, - int number, - PDPage pdPage, - PDDocument doc, - List characters, - List rulings, - float minCharWidth, - float minCharHeight, - RectangleSpatialIndex index - ) { - super(pageDims.getTop(), pageDims.getLeft(), pageDims.getWidth(), pageDims.getHeight()); - this.rotation = rotation; - this.number = number; - this.pdPage = pdPage; - this.pdDoc = doc; - this.textElements = characters; - this.rulings = rulings; - this.minCharWidth = minCharWidth; - this.minCharHeight = minCharHeight; - this.spatialIndex = index; - } - - /** - * - * @deprecated use {@link Builder} instead - */ - @Deprecated - public Page(float top, float left, float width, float height, int rotation, int number, PDPage pdPage, PDDocument doc) { - super(top, left, width, height); - this.rotation = rotation; - this.number = number; - this.pdPage = pdPage; - this.pdDoc = doc; - } - - /** - * - * @deprecated use {@link Builder} instead - */ - public Page(float top, float left, float width, float height, int rotation, int number, PDPage pdPage, PDDocument doc, - List characters, List rulings) { - this(top, left, width, height, rotation, number, pdPage, doc); - this.textElements = characters; - this.rulings = rulings; - } - - /** - * - * @deprecated use {@link Builder} instead - */ - public Page(float top, float left, float width, float height, int rotation, int number, PDPage pdPage, PDDocument doc, - ObjectExtractorStreamEngine streamEngine, TextStripper textStripper) { - this(top, left, width, height, rotation, number, pdPage, doc, textStripper.getTextElements(), streamEngine.rulings); - this.minCharWidth = textStripper.getMinCharWidth(); - this.minCharHeight = textStripper.getMinCharHeight(); - this.spatialIndex = textStripper.getSpatialIndex(); - } - - - - /** - * - * @deprecated use {@link Builder} instead - */ - public Page(float top, float left, float width, float height, int rotation, int number, PDPage pdPage, PDDocument doc, - List characters, List rulings, - float minCharWidth, float minCharHeight, RectangleSpatialIndex index) { - this(top, left, width, height, rotation, number, pdPage, doc, characters, rulings); - this.minCharHeight = minCharHeight; - this.minCharWidth = minCharWidth; - this.spatialIndex = index; - } - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - public Page getArea(Rectangle area) { - List areaTextElements = getText(area); - - float minimumCharWidth = getMinimumCharWidthFrom(areaTextElements); - float minimumCharHeight = getMinimumCharHeightFrom(areaTextElements); - - final Page page = Page.Builder.newInstance() - .withPageDims(PageDims.of(area.getTop(), area.getLeft(), (float) area.getWidth(), (float) area.getHeight())) - .withRotation(rotation) - .withNumber(number) - .withPdPage(pdPage) - .withPdDocument(pdDoc) - .withTextElements(areaTextElements) - .withRulings(Ruling.cropRulingsToArea(getRulings(), area)) - .withMinCharWidth(minimumCharWidth) - .withMinCharHeight(minimumCharHeight) - .withIndex(spatialIndex) - .build(); - - addBorderRulingsTo(page); - - return page; - } - - private float getMinimumCharWidthFrom(List areaTextElements) { - if (!areaTextElements.isEmpty()) { - return min(areaTextElements, (te1, te2) -> compare(te1.width, te2.width)).width; - } - return DEFAULT_MIN_CHAR_LENGTH; - } - - private float getMinimumCharHeightFrom(List areaTextElements) { - if (!areaTextElements.isEmpty()) { - return min(areaTextElements, (te1, te2) -> compare(te1.height, te2.height)).height; - } - return DEFAULT_MIN_CHAR_LENGTH; - } - - private void addBorderRulingsTo(Page page) { - Point2D.Double leftTop = new Point2D.Double(page.getLeft(), page.getTop()), - rightTop = new Point2D.Double(page.getRight(), page.getTop()), - rightBottom = new Point2D.Double(page.getRight(), page.getBottom()), - leftBottom = new Point2D.Double(page.getLeft(), page.getBottom()); - page.addRuling(new Ruling(leftTop, rightTop)); - page.addRuling(new Ruling(rightTop, rightBottom)); - page.addRuling(new Ruling(rightBottom, leftBottom)); - page.addRuling(new Ruling(leftBottom, leftTop)); - } - - public Page getArea(float top, float left, float bottom, float right) { - Rectangle area = new Rectangle(top, left, right - left, bottom - top); - return getArea(area); - } - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - public Integer getRotation() { - return rotation; - } - - public int getPageNumber() { - return number; - } - - /** - * @deprecated with no replacement - */ - @Deprecated - public float getMinCharWidth() { - return minCharWidth; - } - - /** - * @deprecated with no replacement - */ - @Deprecated - public float getMinCharHeight() { - return minCharHeight; - } - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - public List getText() { - return textElements; - } - - public List getText(Rectangle area) { - return spatialIndex.contains(area); - } - - /** - * @deprecated use {@linkplain #getText(Rectangle)} instead - */ - @Deprecated - public List getText(float top, float left, float bottom, float right) { - return getText(new Rectangle(top, left, right - left, bottom - top)); - } - - /** - * @deprecated use {@linkplain #getText()} instead - */ - @Deprecated - public List getTexts() { - return textElements; - } - - /** - * Returns the minimum bounding box that contains all the TextElements on this Page - */ - public Rectangle getTextBounds() { - List texts = this.getText(); - if (!texts.isEmpty()) { - return Utils.bounds(texts); - } else { - return new Rectangle(); - } - } - - /** - * @deprecated with no replacement - */ - @Deprecated - public boolean hasText() { - return textElements.size() > 0; - } - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - public List getRulings() { - if (cleanRulings != null) { - return cleanRulings; - } - - if (rulings == null || rulings.isEmpty()) { - verticalRulingLines = new ArrayList<>(); - horizontalRulingLines = new ArrayList<>(); - return new ArrayList<>(); - } - - // TODO: Move as a static method to the Ruling class? - Utils.snapPoints(rulings, minCharWidth, minCharHeight); - - verticalRulingLines = getCollapsedVerticalRulings(); - horizontalRulingLines = getCollapsedHorizontalRulings(); - - cleanRulings = new ArrayList<>(verticalRulingLines); - cleanRulings.addAll(horizontalRulingLines); - - return cleanRulings; - } - - // TODO: Create a class for 'List ' and encapsulate these behaviors within it? - private List getCollapsedVerticalRulings() { - List verticalRulings = new ArrayList<>(); - for (Ruling ruling : rulings) { - if (ruling.vertical()) { - verticalRulings.add(ruling); - } - } - return Ruling.collapseOrientedRulings(verticalRulings); - } - - private List getCollapsedHorizontalRulings() { - List horizontalRulings = new ArrayList<>(); - for (Ruling ruling : rulings) { - if (ruling.horizontal()) { - horizontalRulings.add(ruling); - } - } - return Ruling.collapseOrientedRulings(horizontalRulings); - } - - public List getVerticalRulings() { - if (verticalRulingLines != null) { - return verticalRulingLines; - } - getRulings(); - return verticalRulingLines; - } - - public List getHorizontalRulings() { - if (horizontalRulingLines != null) { - return horizontalRulingLines; - } - getRulings(); - return horizontalRulingLines; - } - - public void addRuling(Ruling ruling) { - if (ruling.oblique()) { - throw new UnsupportedOperationException("Can't add an oblique ruling."); - } - rulings.add(ruling); - // Clear caches: - verticalRulingLines = null; - horizontalRulingLines = null; - cleanRulings = null; - } - - public List getUnprocessedRulings() { - return rulings; - } - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - public PDPage getPDPage() { - return pdPage; - } - - public PDDocument getPDDoc() { - return pdDoc; - } - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - - /** - * @deprecated with no replacement - */ - @Deprecated - public RectangleSpatialIndex getSpatialIndex() { - return spatialIndex; - } - - public static class Builder { - private PageDims pageDims; - private int rotation; - private int number; - private PDPage pdPage; - private PDDocument pdDocument; - private List textElements; - private List rulings; - private float minCharWidth; - private float minCharHeight; - private RectangleSpatialIndex index; - - private Builder() {} - - public static Builder newInstance() { - return new Builder(); - } - - public Builder withPageDims(PageDims pageDims) { - this.pageDims = pageDims; - - return this; - } - - public Builder withRotation(int rotation) { - this.rotation = rotation; - - return this; - } - - public Builder withNumber(int number) { - this.number = number; - - return this; - } - - public Builder withPdPage(PDPage pdPage) { - this.pdPage = pdPage; - - return this; - } - - public Builder withPdDocument(PDDocument pdDocument) { - this.pdDocument = pdDocument; - - return this; - } - - public Builder withTextElements(List textElements) { - this.textElements = textElements; - - return this; - } - - public Builder withRulings(List rulings) { - this.rulings = rulings; - - return this; - } - - public Builder withMinCharWidth(float minCharWidth) { - this.minCharWidth = minCharWidth; - - return this; - } - - public Builder withMinCharHeight(float minCharHeight) { - this.minCharHeight = minCharHeight; - - return this; - } - - public Builder withIndex(RectangleSpatialIndex index) { - this.index = index; - - return this; - } - - public Page build() { - return new Page(pageDims, rotation, number, pdPage, pdDocument, textElements, rulings, minCharWidth, minCharHeight, index); - } - } -} diff --git a/src/main/java/technology/tabula/PageDims.java b/src/main/java/technology/tabula/PageDims.java deleted file mode 100644 index 1598d125..00000000 --- a/src/main/java/technology/tabula/PageDims.java +++ /dev/null @@ -1,35 +0,0 @@ -package technology.tabula; - -public class PageDims { - private final float top; - private final float left; - private final float width; - private final float height; - - private PageDims(final float top, final float left, final float width, final float height) { - this.top = top; - this.left = left; - this.width = width; - this.height = height; - } - - public static PageDims of(final float top, final float left, final float width, final float height) { - return new PageDims(top, left, width, height); - } - - public float getTop() { - return top; - } - - public float getLeft() { - return left; - } - - public float getWidth() { - return width; - } - - public float getHeight() { - return height; - } -} diff --git a/src/main/java/technology/tabula/PageIterator.java b/src/main/java/technology/tabula/PageIterator.java deleted file mode 100644 index 052ed54a..00000000 --- a/src/main/java/technology/tabula/PageIterator.java +++ /dev/null @@ -1,43 +0,0 @@ -package technology.tabula; - -import java.io.IOException; -import java.util.Iterator; - -public class PageIterator implements Iterator { - - private ObjectExtractor objectExtractor; - private Iterator pageIndexIterator; - - public PageIterator(ObjectExtractor objectExtractor, Iterable pages) { - super(); - this.objectExtractor = objectExtractor; - this.pageIndexIterator = pages.iterator(); - } - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - @Override - public boolean hasNext() { - return pageIndexIterator.hasNext(); - } - - @Override - public Page next() { - Page nextPage = null; - if (!this.hasNext()) { - throw new IllegalStateException(); - } - try { - nextPage = objectExtractor.extractPage(pageIndexIterator.next()); - } catch (IOException e) { - e.printStackTrace(); - } - return nextPage; - } - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - -} diff --git a/src/main/java/technology/tabula/Pair.java b/src/main/java/technology/tabula/Pair.java deleted file mode 100644 index d54cbbe5..00000000 --- a/src/main/java/technology/tabula/Pair.java +++ /dev/null @@ -1,19 +0,0 @@ -package technology.tabula; - -public class Pair { - private final L left; - private final R right; - - public Pair(L left, R right) { - this.left = left; - this.right = right; - } - - public L getLeft() { - return this.left; - } - - public R getRight() { - return this.right; - } -} diff --git a/src/main/java/technology/tabula/ProjectionProfile.java b/src/main/java/technology/tabula/ProjectionProfile.java deleted file mode 100644 index 39ab9e41..00000000 --- a/src/main/java/technology/tabula/ProjectionProfile.java +++ /dev/null @@ -1,219 +0,0 @@ -package technology.tabula; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - - -// NOTE: this class is currently not used by the extraction algorithms -// keeping it for potential use. -public class ProjectionProfile { - - public static final int DECIMAL_PLACES = 1; // fixed <-> float conversion precision - private final Page area; - private final Rectangle textBounds; - private float[] verticalProjection; - private float[] horizontalProjection; - private final double areaWidth, areaHeight, areaTop, areaLeft; - private float minCharWidth = Float.MAX_VALUE, minCharHeight = Float.MAX_VALUE, horizontalKernelSize, verticalKernelSize; - private float maxHorizontalProjection = 0, maxVerticalProjection = 0; - - public ProjectionProfile(Page area, List elements, float horizontalKernelSize, float verticalKernelSize) { - this.area = area; - this.areaWidth = area.getWidth(); - this.areaHeight = area.getHeight(); - this.areaTop = area.getTop(); - this.areaLeft = area.getLeft(); - this.verticalProjection = new float[toFixed(areaHeight)]; - this.horizontalProjection = new float[toFixed(areaWidth)]; - this.horizontalKernelSize = horizontalKernelSize; - this.verticalKernelSize = verticalKernelSize; - this.textBounds = area.getTextBounds(); - - for (Rectangle element: elements) { - // exclude elements that take more than 80% of the width - // of the area. They won't contribute to determining columns - if (element.getWidth() / this.textBounds.getWidth() > 0.8) { - continue; - } - this.addRectangle(element); - } - - this.verticalProjection = smooth(this.verticalProjection, toFixed(verticalKernelSize)); - this.horizontalProjection = smooth(this.horizontalProjection, toFixed(horizontalKernelSize)); - } - - private void addRectangle(Rectangle element) { - // calculate horizontal and vertical projection profiles - if (!area.contains(element)) { - return; - } - - this.minCharHeight = (float) Math.min(this.minCharHeight, element.getHeight()); - this.minCharWidth = (float) Math.min(this.minCharWidth, element.getWidth()); - - for (int k = toFixed(element.getLeft()); k < toFixed(element.getRight()); k++) { - this.horizontalProjection[k - toFixed(areaLeft)] += element.getHeight(); - this.maxHorizontalProjection = Math.max(this.maxHorizontalProjection, this.horizontalProjection[k - toFixed(areaLeft)]); - } - for(int k = toFixed(element.getTop()); k < toFixed(element.getBottom()); k++) { - this.verticalProjection[k - toFixed(areaTop)] += element.getWidth(); - this.maxVerticalProjection = Math.max(this.maxVerticalProjection, this.verticalProjection[k - toFixed(areaTop)]); - } - } - - public float[] getVerticalProjection() { - return verticalProjection; - } - - public float[] getHorizontalProjection() { - return horizontalProjection; - } - - public float[] findVerticalSeparators(float minColumnWidth) { - boolean foundNarrower = false; - - List verticalSeparators = new ArrayList<>(); - for (Ruling r: area.getVerticalRulings()) { - if (r.length() / this.textBounds.getHeight() >= 0.95) { - verticalSeparators.add(toFixed(r.getPosition() - this.areaLeft)); - } - } - - List seps = findSeparatorsFromProjection(filter(getFirstDeriv(this.horizontalProjection), 0.1f)); - - for (Integer foundSep: seps) { - for (Integer explicitSep: verticalSeparators) { - if (Math.abs(toDouble(foundSep - explicitSep)) <= minColumnWidth) { - foundNarrower = true; - break; - } - } - if (!foundNarrower) { - verticalSeparators.add(foundSep); - } - foundNarrower = false; - } - Collections.sort(verticalSeparators); - float[] rv = new float[verticalSeparators.size()]; - for (int i = 0; i < rv.length; i++) { - rv[i] = (float) toDouble(verticalSeparators.get(i)); - } - return rv; - } - - public float[] findHorizontalSeparators(float minRowHeight) { - boolean foundShorter = false; - - List horizontalSeparators = new ArrayList<>(); - for (Ruling r: area.getHorizontalRulings()) { - System.out.println(r.length() / this.textBounds.getWidth()); - if (r.length() / this.textBounds.getWidth() >= 0.95) { - horizontalSeparators.add(toFixed(r.getPosition() - this.areaTop)); - } - } - - List seps = findSeparatorsFromProjection(filter(getFirstDeriv(this.verticalProjection), 0.1f)); - - for (Integer foundSep: seps) { - for (Integer explicitSep: horizontalSeparators) { - if (Math.abs(toDouble(foundSep - explicitSep)) <= minRowHeight) { - foundShorter = true; - break; - } - } - if (!foundShorter) { - horizontalSeparators.add(foundSep); - } - foundShorter = false; - } - Collections.sort(horizontalSeparators); - float[] rv = new float[horizontalSeparators.size()]; - for (int i = 0; i < rv.length; i++) { - rv[i] = (float) toDouble(horizontalSeparators.get(i)); - } - return rv; - } - - private static List findSeparatorsFromProjection(float[] derivative) { - List separators = new ArrayList<>(); - Integer lastNeg = null; - float s; - boolean positiveSlope = false; - - // find separators based on histogram - for (int i = 0; i < derivative.length; i++) { - s = derivative[i]; - if (s > 0 && !positiveSlope) { - positiveSlope = true; - separators.add(lastNeg != null ? lastNeg : i); - } - else if (s < 0) { - lastNeg = i; - positiveSlope = false; - } - } - return separators; - } - - public static float[] smooth(float[] data, int kernelSize) { - float[] rv = new float[data.length]; - float s; - - for (int pass = 0; pass < 1; pass++) { - for (int i = 0; i < data.length; i++) { - s = 0; - for (int j = Math.max(0, i - kernelSize / 2); j < Math.min(i - + kernelSize / 2, data.length); j++) { - s += data[j]; - } - rv[i] = (float) Math.floor(s / kernelSize); - } - } - return rv; - } - - - /** - * Simple Low pass filter - */ - public static float[] filter(float[] data, float alpha) { - - float[] rv = new float[data.length]; - rv[0] = data[0]; - for (int i = 1; i < data.length; i++) { - rv[i] = rv[i-1] + alpha * (data[i] - rv[i-1]); - } - - return rv; - } - - public static float[] getAutocorrelation(float[] projection) { - float[] rv = new float[projection.length-1]; - for (int i = 1; i < projection.length - 1; i++) { - rv[i] = (projection[i] * projection[i-1]) / 100f; - } - return rv; - - } - - public static float[] getFirstDeriv(float[] projection) { - float[] rv = new float[projection.length]; - rv[0] = projection[1] - projection[0]; - for (int i = 1; i < projection.length - 1; i++) { - rv[i] = projection[i+1] - projection[i-1]; - } - rv[projection.length - 1] = projection[projection.length - 1] - projection[projection.length - 2]; - return rv; - } - - // pretty lame fixed precision math here - private static int toFixed(double value) { - return (int) Math.round(value * (Math.pow(10, DECIMAL_PLACES))); - } - - private static double toDouble(int value) { - return value / Math.pow(10, DECIMAL_PLACES); - } - -} diff --git a/src/main/java/technology/tabula/QuickSort.java b/src/main/java/technology/tabula/QuickSort.java deleted file mode 100644 index 03388a15..00000000 --- a/src/main/java/technology/tabula/QuickSort.java +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package technology.tabula; - -import java.util.ArrayList; -import java.util.Comparator; -import java.util.List; -import java.util.RandomAccess; -import java.util.Stack; - -/** - * An implementation of Quicksort. - * - * @see wikipedia - * - * @author UWe Pachler - */ -public final class QuickSort { - - private QuickSort() { - // utility - } - - /** - * Sorts the given list according to natural order. - */ - public static > void sort(List list) { - sort(list, QuickSort.naturalOrder()); // JAVA_8 replace with Comparator.naturalOrder() (and cleanup) - } - - /** - * Sorts the given list using the given comparator. - */ - public static void sort(List list, Comparator comparator) { - if (list instanceof RandomAccess) { - quicksort(list, comparator); - } else { - List copy = new ArrayList<>(list); - quicksort(copy, comparator); - list.clear(); - list.addAll(copy); - } - } - - private static void quicksort(List list, Comparator cmp) { - Stack stack = new Stack<>(); - stack.push(0); - stack.push(list.size()); - while (!stack.isEmpty()) { - int right = stack.pop(); - int left = stack.pop(); - - if (right - left < 2) continue; - int p = left + ((right - left) / 2); - p = partition(list, cmp, p, left, right); - - stack.push(p + 1); - stack.push(right); - - stack.push(left); - stack.push(p); - } - } - - private static int partition(List list, Comparator cmp, int p, int start, int end) { - int l = start; - int h = end - 2; - T piv = list.get(p); - swap(list, p, end - 1); - - while (l < h) { - if (cmp.compare(list.get(l), piv) <= 0) l++; - else if (cmp.compare(piv, list.get(h)) <= 0) h--; - else swap(list, l, h); - } - int idx = h; - if (cmp.compare(list.get(h), piv) < 0) idx++; - swap(list, end - 1, idx); - return idx; - } - - private static void swap(List list, int i, int j) { - T tmp = list.get(i); - list.set(i, list.get(j)); - list.set(j, tmp); - } - - @SuppressWarnings({ "rawtypes", "unchecked" }) - private static final Comparator NATURAL_ORDER = new Comparator() { - @Override public int compare(Object l, Object r) { return ((Comparable) l).compareTo(r); } - }; - - @SuppressWarnings("unchecked") - private static > Comparator naturalOrder() { - return NATURAL_ORDER; - } - -} diff --git a/src/main/java/technology/tabula/Rectangle.java b/src/main/java/technology/tabula/Rectangle.java deleted file mode 100644 index b96fcd77..00000000 --- a/src/main/java/technology/tabula/Rectangle.java +++ /dev/null @@ -1,178 +0,0 @@ -package technology.tabula; - -import java.awt.geom.Point2D; -import java.awt.geom.Rectangle2D; -import java.util.Comparator; -import java.util.List; -import java.util.Locale; - -@SuppressWarnings("serial") -public class Rectangle extends Rectangle2D.Float { - - /** - * Ill-defined comparator, from when Rectangle was Comparable. - * - * @see PR 116 - * @deprecated with no replacement - */ - @Deprecated - public static final Comparator ILL_DEFINED_ORDER = new Comparator() { - @Override public int compare(Rectangle o1, Rectangle o2) { - if (o1.equals(o2)) return 0; - if (o1.verticalOverlap(o2) > VERTICAL_COMPARISON_THRESHOLD) { - return o1.isLtrDominant() == -1 && o2.isLtrDominant() == -1 - ? - java.lang.Double.compare(o1.getX(), o2.getX()) - : java.lang.Double.compare(o1.getX(), o2.getX()); - } else { - return java.lang.Float.compare(o1.getBottom(), o2.getBottom()); - } - } - }; - - protected static final float VERTICAL_COMPARISON_THRESHOLD = 0.4f; - - public Rectangle() { - super(); - } - - public Rectangle(float top, float left, float width, float height) { - super(); - this.setRect(left, top, width, height); - } - - public int compareTo(Rectangle other) { - return ILL_DEFINED_ORDER.compare(this, other); - } - - // I'm bad at Java and need this for fancy sorting in - // technology.tabula.TextChunk. - public int isLtrDominant() { - return 0; - } - - public float getArea() { - return this.width * this.height; - } - - public float verticalOverlap(Rectangle other) { - return Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop())); - } - - public boolean verticallyOverlaps(Rectangle other) { - return verticalOverlap(other) > 0; - } - - public float horizontalOverlap(Rectangle other) { - return Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft())); - } - - public boolean horizontallyOverlaps(Rectangle other) { - return horizontalOverlap(other) > 0; - } - - public float verticalOverlapRatio(Rectangle other) { - float rv = 0, delta = Math.min(this.getBottom() - this.getTop(), other.getBottom() - other.getTop()); - - if (other.getTop() <= this.getTop() && this.getTop() <= other.getBottom() - && other.getBottom() <= this.getBottom()) { - rv = (other.getBottom() - this.getTop()) / delta; - } else if (this.getTop() <= other.getTop() && other.getTop() <= this.getBottom() - && this.getBottom() <= other.getBottom()) { - rv = (this.getBottom() - other.getTop()) / delta; - } else if (this.getTop() <= other.getTop() && other.getTop() <= other.getBottom() - && other.getBottom() <= this.getBottom()) { - rv = (other.getBottom() - other.getTop()) / delta; - } else if (other.getTop() <= this.getTop() && this.getTop() <= this.getBottom() - && this.getBottom() <= other.getBottom()) { - rv = (this.getBottom() - this.getTop()) / delta; - } - - return rv; - - } - - public float overlapRatio(Rectangle other) { - double intersectionWidth = Math.max(0, - Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft())); - double intersectionHeight = Math.max(0, - Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop())); - double intersectionArea = Math.max(0, intersectionWidth * intersectionHeight); - double unionArea = this.getArea() + other.getArea() - intersectionArea; - - return (float) (intersectionArea / unionArea); - } - - public Rectangle merge(Rectangle other) { - this.setRect(this.createUnion(other)); - return this; - } - - public float getTop() { - return (float) this.getMinY(); - } - - public void setTop(float top) { - float deltaHeight = top - this.y; - this.setRect(this.x, top, this.width, this.height - deltaHeight); - } - - public float getRight() { - return (float) this.getMaxX(); - } - - public void setRight(float right) { - this.setRect(this.x, this.y, right - this.x, this.height); - } - - public float getLeft() { - return (float) this.getMinX(); - } - - public void setLeft(float left) { - float deltaWidth = left - this.x; - this.setRect(left, this.y, this.width - deltaWidth, this.height); - } - - public float getBottom() { - return (float) this.getMaxY(); - } - - public void setBottom(float bottom) { - this.setRect(this.x, this.y, this.width, bottom - this.y); - } - - public Point2D[] getPoints() { - return new Point2D[] { new Point2D.Float(this.getLeft(), this.getTop()), - new Point2D.Float(this.getRight(), this.getTop()), new Point2D.Float(this.getRight(), this.getBottom()), - new Point2D.Float(this.getLeft(), this.getBottom()) }; - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - String s = super.toString(); - sb.append(s.substring(0, s.length() - 1)); - sb.append(String.format(Locale.US, ",bottom=%f,right=%f]", this.getBottom(), this.getRight())); - return sb.toString(); - } - - /** - * @param rectangles - * @return minimum bounding box that contains all the rectangles - */ - public static Rectangle boundingBoxOf(List rectangles) { - float minx = java.lang.Float.MAX_VALUE; - float miny = java.lang.Float.MAX_VALUE; - float maxx = java.lang.Float.MIN_VALUE; - float maxy = java.lang.Float.MIN_VALUE; - - for (Rectangle r : rectangles) { - minx = (float) Math.min(r.getMinX(), minx); - miny = (float) Math.min(r.getMinY(), miny); - maxx = (float) Math.max(r.getMaxX(), maxx); - maxy = (float) Math.max(r.getMaxY(), maxy); - } - return new Rectangle(miny, minx, maxx - minx, maxy - miny); - } - -} diff --git a/src/main/java/technology/tabula/RectangleSpatialIndex.java b/src/main/java/technology/tabula/RectangleSpatialIndex.java deleted file mode 100644 index 0e942545..00000000 --- a/src/main/java/technology/tabula/RectangleSpatialIndex.java +++ /dev/null @@ -1,47 +0,0 @@ -package technology.tabula; - -import java.util.ArrayList; -import java.util.List; - -import org.locationtech.jts.geom.Envelope; -import org.locationtech.jts.index.strtree.STRtree; - -public class RectangleSpatialIndex { - - - private final STRtree si = new STRtree(); - private final List rectangles = new ArrayList<>(); - - public void add(T te) { - rectangles.add(te); - si.insert(new Envelope(te.getLeft(), te.getRight(), te.getBottom(), te.getTop()), te); - } - - public List contains(Rectangle r) { - List intersection = si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom())); - List rv = new ArrayList(); - - for (T ir: intersection) { - if (r.contains(ir)) { - rv.add(ir); - } - } - - Utils.sort(rv, Rectangle.ILL_DEFINED_ORDER); - return rv; - } - - public List intersects(Rectangle r) { - return si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom())); - } - - /** - * Minimum bounding box of all the Rectangles contained on this RectangleSpatialIndex - * - * @return a Rectangle - */ - public Rectangle getBounds() { - return Rectangle.boundingBoxOf(rectangles); - } - -} diff --git a/src/main/java/technology/tabula/RectangularTextContainer.java b/src/main/java/technology/tabula/RectangularTextContainer.java deleted file mode 100644 index 934b5f13..00000000 --- a/src/main/java/technology/tabula/RectangularTextContainer.java +++ /dev/null @@ -1,51 +0,0 @@ -package technology.tabula; - -import java.util.ArrayList; -import java.util.List; - -@SuppressWarnings("serial") -public class RectangularTextContainer extends Rectangle implements HasText { - - protected List textElements = new ArrayList<>(); - - protected RectangularTextContainer(float top, float left, float width, float height) { - super(top, left, width, height); - } - - public RectangularTextContainer merge(RectangularTextContainer other) { - if (compareTo(other) < 0) { - this.getTextElements().addAll(other.getTextElements()); - } else { - this.getTextElements().addAll(0, other.getTextElements()); - } - super.merge(other); - return this; - } - - public List getTextElements() { - return textElements; - } - - public void setTextElements(List textElements) { - this.textElements = textElements; - } - - @Override - public String getText() { - throw new UnsupportedOperationException(); - } - - @Override - public String getText(boolean useLineReturns) { - throw new UnsupportedOperationException(); - } - - @Override public String toString() { - StringBuilder sb = new StringBuilder(); - String s = super.toString(); - sb.append(s.substring(0, s.length() - 1)); - sb.append(String.format(",text=%s]", this.getText() == null ? "null" : "\"" + this.getText() + "\"")); - return sb.toString(); - } - -} diff --git a/src/main/java/technology/tabula/Ruling.java b/src/main/java/technology/tabula/Ruling.java deleted file mode 100644 index 213ce87f..00000000 --- a/src/main/java/technology/tabula/Ruling.java +++ /dev/null @@ -1,442 +0,0 @@ -package technology.tabula; - -import java.awt.geom.Line2D; -import java.awt.geom.Point2D; -import java.awt.geom.Rectangle2D; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.Formatter; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.TreeMap; - -@SuppressWarnings("serial") -public class Ruling extends Line2D.Float { - - private static int PERPENDICULAR_PIXEL_EXPAND_AMOUNT = 2; - private static int COLINEAR_OR_PARALLEL_PIXEL_EXPAND_AMOUNT = 1; - private enum SOType { VERTICAL, HRIGHT, HLEFT } - - public Ruling(float top, float left, float width, float height) { - this(new Point2D.Float(left, top), new Point2D.Float(left+width, top+height)); - } - - public Ruling(Point2D p1, Point2D p2) { - super(p1, p2); - this.normalize(); - } - - /** - * Normalize almost horizontal or almost vertical lines - */ - public void normalize() { - - double angle = this.getAngle(); - if (Utils.within(angle, 0, 1) || Utils.within(angle, 180, 1)) { // almost horizontal - this.setLine(this.x1, this.y1, this.x2, this.y1); - } - else if (Utils.within(angle, 90, 1) || Utils.within(angle, 270, 1)) { // almost vertical - this.setLine(this.x1, this.y1, this.x1, this.y2); - } - } - - public boolean vertical() { - return this.length() > 0 && Utils.feq(this.x1, this.x2); //diff < ORIENTATION_CHECK_THRESHOLD; - } - - public boolean horizontal() { - return this.length() > 0 && Utils.feq(this.y1, this.y2); //diff < ORIENTATION_CHECK_THRESHOLD; - } - - public boolean oblique() { - return !(this.vertical() || this.horizontal()); - } - - // attributes that make sense only for non-oblique lines - // these are used to have a single collapse method (in page, currently) - - public float getPosition() { - if (this.oblique()) { - throw new UnsupportedOperationException(); - } - return this.vertical() ? this.getLeft() : this.getTop(); - } - - public void setPosition(float v) { - if (this.oblique()) { - throw new UnsupportedOperationException(); - } - if (this.vertical()) { - this.setLeft(v); - this.setRight(v); - } - else { - this.setTop(v); - this.setBottom(v); - } - } - - public float getStart() { - if (this.oblique()) { - throw new UnsupportedOperationException(); - } - return this.vertical() ? this.getTop() : this.getLeft(); - } - - public void setStart(float v) { - if (this.oblique()) { - throw new UnsupportedOperationException(); - } - if (this.vertical()) { - this.setTop(v); - } - else { - this.setLeft(v); - } - } - - public float getEnd() { - if (this.oblique()) { - throw new UnsupportedOperationException(); - } - return this.vertical() ? this.getBottom() : this.getRight(); - } - - public void setEnd(float v) { - if (this.oblique()) { - throw new UnsupportedOperationException(); - } - if (this.vertical()) { - this.setBottom(v); - } - else { - this.setRight(v); - } - } - - private void setStartEnd(float start, float end) { - if (this.oblique()) { - throw new UnsupportedOperationException(); - } - if (this.vertical()) { - this.setTop(start); - this.setBottom(end); - } - else { - this.setLeft(start); - this.setRight(end); - } - } - - // ----- - - public boolean perpendicularTo(Ruling other) { - return this.vertical() == other.horizontal(); - } - - public boolean colinear(Point2D point) { - return point.getX() >= this.x1 - && point.getX() <= this.x2 - && point.getY() >= this.y1 - && point.getY() <= this.y2; - } - - // if the lines we're comparing are colinear or parallel, we expand them by a only 1 pixel, - // because the expansions are additive - // (e.g. two vertical lines, at x = 100, with one having y2 of 98 and the other having y1 of 102 would - // erroneously be said to nearlyIntersect if they were each expanded by 2 (since they'd both terminate at 100). - // By default the COLINEAR_OR_PARALLEL_PIXEL_EXPAND_AMOUNT is only 1 so the total expansion is 2. - // A total expansion amount of 2 is empirically verified to work sometimes. It's not a magic number from any - // source other than a little bit of experience.) - public boolean nearlyIntersects(Ruling another) { - return this.nearlyIntersects(another, COLINEAR_OR_PARALLEL_PIXEL_EXPAND_AMOUNT); - } - - public boolean nearlyIntersects(Ruling another, int colinearOrParallelExpandAmount) { - if (this.intersectsLine(another)) { - return true; - } - - boolean rv = false; - - if (this.perpendicularTo(another)) { - rv = this.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT).intersectsLine(another); - } - else { - rv = this.expand(colinearOrParallelExpandAmount) - .intersectsLine(another.expand(colinearOrParallelExpandAmount)); - } - - return rv; - } - - public double length() { - return Math.sqrt(Math.pow(this.x1 - this.x2, 2) + Math.pow(this.y1 - this.y2, 2)); - } - - public Ruling intersect(Rectangle2D clip) { - Line2D.Float clipee = (Line2D.Float) this.clone(); - boolean clipped = new CohenSutherlandClipping(clip).clip(clipee); - - if (clipped) { - return new Ruling(clipee.getP1(), clipee.getP2()); - } - else { - return this; - } - } - - public Ruling expand(float amount) { - Ruling r = (Ruling) this.clone(); - r.setStart(this.getStart() - amount); - r.setEnd(this.getEnd() + amount); - return r; - } - - public Point2D intersectionPoint(Ruling other) { - Ruling this_l = this.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT); - Ruling other_l = other.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT); - Ruling horizontal, vertical; - - if (!this_l.intersectsLine(other_l)) { - return null; - } - - if (this_l.horizontal() && other_l.vertical()) { - horizontal = this_l; vertical = other_l; - } - else if (this_l.vertical() && other_l.horizontal()) { - vertical = this_l; horizontal = other_l; - } - else { - throw new IllegalArgumentException("lines must be orthogonal, vertical and horizontal"); - } - return new Point2D.Float(vertical.getLeft(), horizontal.getTop()); - } - - @Override - public boolean equals(Object other) { - if (this == other) - return true; - - if (!(other instanceof Ruling)) - return false; - - Ruling o = (Ruling) other; - return this.getP1().equals(o.getP1()) && this.getP2().equals(o.getP2()); - } - - public float getTop() { - return this.y1; - } - - public void setTop(float v) { - setLine(this.getLeft(), v, this.getRight(), this.getBottom()); - } - - public float getLeft() { - return this.x1; - } - - public void setLeft(float v) { - setLine(v, this.getTop(), this.getRight(), this.getBottom()); - } - - public float getBottom() { - return this.y2; - } - - public void setBottom(float v) { - setLine(this.getLeft(), this.getTop(), this.getRight(), v); - } - - public float getRight() { - return this.x2; - } - - public void setRight(float v) { - setLine(this.getLeft(), this.getTop(), v, this.getBottom()); - } - - public float getWidth() { - return this.getRight() - this.getLeft(); - } - - public float getHeight() { - return this.getBottom() - this.getTop(); - } - - public double getAngle() { - double angle = Math.toDegrees(Math.atan2(this.getP2().getY() - this.getP1().getY(), - this.getP2().getX() - this.getP1().getX())); - - if (angle < 0) { - angle += 360; - } - return angle; - } - - - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - Formatter formatter = new Formatter(sb); - String rv = formatter.format(Locale.US, "%s[x1=%f y1=%f x2=%f y2=%f]", this.getClass().toString(), this.x1, this.y1, this.x2, this.y2).toString(); - formatter.close(); - return rv; - } - - public static List cropRulingsToArea(List rulings, Rectangle2D area) { - ArrayList rv = new ArrayList<>(); - for (Ruling r : rulings) { - if (r.intersects(area)) { - rv.add(r.intersect(area)); - } - } - return rv; - } - - // log(n) implementation of find_intersections - // based on http://people.csail.mit.edu/indyk/6.838-old/handouts/lec2.pdf - public static Map findIntersections(List horizontals, List verticals) { - - class SortObject { - protected SOType type; - protected float position; - protected Ruling ruling; - - public SortObject(SOType type, float position, Ruling ruling) { - this.type = type; - this.position = position; - this.ruling = ruling; - } - } - - List sos = new ArrayList<>(); - - TreeMap tree = new TreeMap<>(new Comparator() { - @Override - public int compare(Ruling o1, Ruling o2) { - return java.lang.Double.compare(o1.getTop(), o2.getTop()); - }}); - - TreeMap rv = new TreeMap<>(new Comparator() { - @Override - public int compare(Point2D o1, Point2D o2) { - if (o1.getY() > o2.getY()) return 1; - if (o1.getY() < o2.getY()) return -1; - if (o1.getX() > o2.getX()) return 1; - if (o1.getX() < o2.getX()) return -1; - return 0; - } - }); - - for (Ruling h : horizontals) { - sos.add(new SortObject(SOType.HLEFT, h.getLeft() - PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h)); - sos.add(new SortObject(SOType.HRIGHT, h.getRight() + PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h)); - } - - for (Ruling v : verticals) { - sos.add(new SortObject(SOType.VERTICAL, v.getLeft(), v)); - } - - Collections.sort(sos, new Comparator() { - @Override - public int compare(SortObject a, SortObject b) { - int rv; - if (Utils.feq(a.position, b.position)) { - if (a.type == SOType.VERTICAL && b.type == SOType.HLEFT) { - rv = 1; - } - else if (a.type == SOType.VERTICAL && b.type == SOType.HRIGHT) { - rv = -1; - } - else if (a.type == SOType.HLEFT && b.type == SOType.VERTICAL) { - rv = -1; - } - else if (a.type == SOType.HRIGHT && b.type == SOType.VERTICAL) { - rv = 1; - } - else { - rv = java.lang.Double.compare(a.position, b.position); - } - } - else { - return java.lang.Double.compare(a.position, b.position); - } - return rv; - } - }); - - for (SortObject so : sos) { - switch(so.type) { - case VERTICAL: - for (Map.Entry h : tree.entrySet()) { - Point2D i = h.getKey().intersectionPoint(so.ruling); - if (i == null) { - continue; - } - rv.put(i, - new Ruling[] { h.getKey().expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT), - so.ruling.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT) }); - } - break; - case HRIGHT: - tree.remove(so.ruling); - break; - case HLEFT: - tree.put(so.ruling, true); - break; - } - } - - return rv; - - } - - public static List collapseOrientedRulings(List lines) { - return collapseOrientedRulings(lines, COLINEAR_OR_PARALLEL_PIXEL_EXPAND_AMOUNT); - } - - public static List collapseOrientedRulings(List lines, int expandAmount) { - ArrayList rv = new ArrayList<>(); - Collections.sort(lines, new Comparator() { - @Override - public int compare(Ruling a, Ruling b) { - final float diff = a.getPosition() - b.getPosition(); - return java.lang.Float.compare(diff == 0 ? a.getStart() - b.getStart() : diff, 0f); - } - }); - - for (Ruling next_line : lines) { - Ruling last = rv.isEmpty() ? null : rv.get(rv.size() - 1); - // if current line colinear with next, and are "close enough": expand current line - if (last != null && Utils.feq(next_line.getPosition(), last.getPosition()) && last.nearlyIntersects(next_line, expandAmount)) { - final float lastStart = last.getStart(); - final float lastEnd = last.getEnd(); - - final boolean lastFlipped = lastStart > lastEnd; - final boolean nextFlipped = next_line.getStart() > next_line.getEnd(); - - boolean differentDirections = nextFlipped != lastFlipped; - float nextS = differentDirections ? next_line.getEnd() : next_line.getStart(); - float nextE = differentDirections ? next_line.getStart() : next_line.getEnd(); - - final float newStart = lastFlipped ? Math.max(nextS, lastStart) : Math.min(nextS, lastStart); - final float newEnd = lastFlipped ? Math.min(nextE, lastEnd) : Math.max(nextE, lastEnd); - last.setStartEnd(newStart, newEnd); - assert !last.oblique(); - } - else if (next_line.length() == 0) { - continue; - } - else { - rv.add(next_line); - } - } - return rv; - } -} diff --git a/src/main/java/technology/tabula/Table.java b/src/main/java/technology/tabula/Table.java deleted file mode 100644 index 1e73bedf..00000000 --- a/src/main/java/technology/tabula/Table.java +++ /dev/null @@ -1,105 +0,0 @@ -package technology.tabula; - -import java.util.ArrayList; -import java.util.List; -import java.util.TreeMap; - -import technology.tabula.extractors.ExtractionAlgorithm; - -@SuppressWarnings("serial") -public class Table extends Rectangle { - - public static final Table empty() { return new Table(""); } - - private Table(String extractionMethod) { - this.extractionMethod = extractionMethod; - } - - public Table(ExtractionAlgorithm extractionAlgorithm) { - this(extractionAlgorithm.toString()); - } - - private final String extractionMethod; - - private int rowCount = 0; - private int colCount = 0; - private int pageNumber = 0; - - /* visible for testing */ final TreeMap cells = new TreeMap<>(); - - public int getRowCount() { return rowCount; } - public int getColCount() { return colCount; } - public int getPageNumber() { return pageNumber; } - public void setPageNumber(int pageNumber) { this.pageNumber = pageNumber; } - - public String getExtractionMethod() { return extractionMethod; } - - public void add(RectangularTextContainer chunk, int row, int col) { - this.merge(chunk); - - rowCount = Math.max(rowCount, row + 1); - colCount = Math.max(colCount, col + 1); - - CellPosition cp = new CellPosition(row, col); - - RectangularTextContainer old = cells.get(cp); - if (old != null) chunk.merge(old); - cells.put(cp, chunk); - - this.memoizedRows = null; - } - - private List> memoizedRows = null; - - public List> getRows() { - if (this.memoizedRows == null) this.memoizedRows = computeRows(); - return this.memoizedRows; - } - - private List> computeRows() { - List> rows = new ArrayList<>(); - for (int i = 0; i < rowCount; i++) { - List lastRow = new ArrayList<>(); - rows.add(lastRow); - for (int j = 0; j < colCount; j++) { - RectangularTextContainer cell = cells.get(new CellPosition(i,j)); // JAVA_8 use getOrDefault() - lastRow.add(cell != null ? cell : TextChunk.EMPTY); - } - } - return rows; - } - - public RectangularTextContainer getCell(int i, int j) { - RectangularTextContainer cell = cells.get(new CellPosition(i,j)); // JAVA_8 use getOrDefault() - return cell != null ? cell : TextChunk.EMPTY; - } - -} - -class CellPosition implements Comparable { - - CellPosition(int row, int col) { - this.row = row; - this.col = col; - } - - final int row, col; - - @Override public int hashCode() { - return row + 101 * col; - } - - @Override public boolean equals(Object obj) { - if (this == obj) return true; - if (obj == null) return false; - if (getClass() != obj.getClass()) return false; - CellPosition other = (CellPosition) obj; - return row == other.row && col == other.col; - } - - @Override public int compareTo(CellPosition other) { - int rowdiff = row - other.row; - return rowdiff != 0 ? rowdiff : col - other.col; - } - -} diff --git a/src/main/java/technology/tabula/TableWithRulingLines.java b/src/main/java/technology/tabula/TableWithRulingLines.java deleted file mode 100644 index cde0ce72..00000000 --- a/src/main/java/technology/tabula/TableWithRulingLines.java +++ /dev/null @@ -1,94 +0,0 @@ -package technology.tabula; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.Iterator; -import java.util.List; - -import technology.tabula.extractors.ExtractionAlgorithm; - -@SuppressWarnings("serial") -public class TableWithRulingLines extends Table { - - List verticalRulings, horizontalRulings; - RectangleSpatialIndex si = new RectangleSpatialIndex<>(); - - public TableWithRulingLines(Rectangle area, List cells, List horizontalRulings, List verticalRulings, ExtractionAlgorithm extractionAlgorithm, int pageNumber) { - super(extractionAlgorithm); - this.setRect(area); - this.verticalRulings = verticalRulings; - this.horizontalRulings = horizontalRulings; - this.addCells(cells); - this.setPageNumber(pageNumber); - } - - private void addCells(List cells) { - - if (cells.isEmpty()) { - return; - } - - for (Cell ce: cells) { - si.add(ce); - } - - List> rowsOfCells = rowsOfCells(cells); - for (int i = 0; i < rowsOfCells.size(); i++) { - List row = rowsOfCells.get(i); - Iterator rowCells = row.iterator(); - Cell cell = rowCells.next(); - List> others = rowsOfCells( - si.contains( - new Rectangle(cell.getBottom(), si.getBounds().getLeft(), cell.getLeft() - si.getBounds().getLeft(), - si.getBounds().getBottom() - cell.getBottom()) - )); - int startColumn = 0; - for (List r: others) { - startColumn = Math.max(startColumn, r.size()); - } - this.add(cell, i, startColumn++); - while (rowCells.hasNext()) { - this.add(rowCells.next(), i, startColumn++); - } - } - } - - private static List> rowsOfCells(List cells) { - Cell c; - float lastTop; - List> rv = new ArrayList<>(); - List lastRow; - - if (cells.isEmpty()) { - return rv; - } - - Collections.sort(cells, new Comparator() { - @Override - public int compare(Cell arg0, Cell arg1) { - return java.lang.Double.compare(arg0.getTop(), arg1.getTop()); - } - }); - - - Iterator iter = cells.iterator(); - c = iter.next(); - lastTop = c.getTop(); - lastRow = new ArrayList<>(); - lastRow.add(c); - rv.add(lastRow); - - while (iter.hasNext()) { - c = iter.next(); - if (!Utils.feq(c.getTop(), lastTop)) { - lastRow = new ArrayList<>(); - rv.add(lastRow); - } - lastRow.add(c); - lastTop = c.getTop(); - } - return rv; - } - -} diff --git a/src/main/java/technology/tabula/TextChunk.java b/src/main/java/technology/tabula/TextChunk.java deleted file mode 100644 index 9f5adbd5..00000000 --- a/src/main/java/technology/tabula/TextChunk.java +++ /dev/null @@ -1,363 +0,0 @@ -package technology.tabula; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Collections; -import java.util.HashMap; -import java.text.Normalizer; - -@SuppressWarnings("serial") -public class TextChunk extends RectangularTextContainer { - public static final TextChunk EMPTY = new TextChunk(0, 0, 0, 0); -// List textElements = new ArrayList<>(); - - public TextChunk(float top, float left, float width, float height) { - super(top, left, width, height); - } - - public TextChunk(TextElement textElement) { - super(textElement.y, textElement.x, textElement.width, textElement.height); - this.add(textElement); - } - - public TextChunk(List textElements) { - this(textElements.get(0)); - for (int i = 1; i < textElements.size(); i++) { - this.add(textElements.get(i)); - } - } - - private enum DirectionalityOptions { - LTR, NONE, RTL - } - - // I hate Java so bad. - // we're making this HashMap static! which requires really funky initialization per http://stackoverflow.com/questions/6802483/how-to-directly-initialize-a-hashmap-in-a-literal-way/6802502#6802502 - private static HashMap directionalities; - - static { - directionalities = new HashMap<>(); - // BCT = bidirectional character type - directionalities.put(java.lang.Character.DIRECTIONALITY_ARABIC_NUMBER, DirectionalityOptions.LTR); // Weak BCT "AN" in the Unicode specification. - directionalities.put(java.lang.Character.DIRECTIONALITY_BOUNDARY_NEUTRAL, DirectionalityOptions.NONE); // Weak BCT "BN" in the Unicode specification. - directionalities.put(java.lang.Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR, DirectionalityOptions.LTR); // Weak BCT "CS" in the Unicode specification. - directionalities.put(java.lang.Character.DIRECTIONALITY_EUROPEAN_NUMBER, DirectionalityOptions.LTR); // Weak BCT "EN" in the Unicode specification. - directionalities.put(java.lang.Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR, DirectionalityOptions.LTR); // Weak BCT "ES" in the Unicode specification. - directionalities.put(java.lang.Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR, DirectionalityOptions.LTR); // Weak BCT "ET" in the Unicode specification. - directionalities.put(java.lang.Character.DIRECTIONALITY_LEFT_TO_RIGHT, DirectionalityOptions.LTR); // Strong BCT "L" in the Unicode specification. - directionalities.put(java.lang.Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING, DirectionalityOptions.LTR); // Strong BCT "LRE" in the Unicode specification. - directionalities.put(java.lang.Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE, DirectionalityOptions.LTR); // Strong BCT "LRO" in the Unicode specification. - directionalities.put(java.lang.Character.DIRECTIONALITY_NONSPACING_MARK, DirectionalityOptions.NONE); // Weak BCT "NSM" in the Unicode specification. - directionalities.put(java.lang.Character.DIRECTIONALITY_OTHER_NEUTRALS, DirectionalityOptions.NONE); // Neutral BCT "ON" in the Unicode specification. - directionalities.put(java.lang.Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR, DirectionalityOptions.NONE); // Neutral BCT "B" in the Unicode specification. - directionalities.put(java.lang.Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT, DirectionalityOptions.NONE); // Weak BCT "PDF" in the Unicode specification. - directionalities.put(java.lang.Character.DIRECTIONALITY_RIGHT_TO_LEFT, DirectionalityOptions.RTL); // Strong BCT "R" in the Unicode specification. - directionalities.put(java.lang.Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC, DirectionalityOptions.RTL); // Strong BCT "AL" in the Unicode specification. - directionalities.put(java.lang.Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING, DirectionalityOptions.RTL); // Strong BCT "RLE" in the Unicode specification. - directionalities.put(java.lang.Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE, DirectionalityOptions.RTL); // Strong BCT "RLO" in the Unicode specification. - directionalities.put(java.lang.Character.DIRECTIONALITY_SEGMENT_SEPARATOR, DirectionalityOptions.RTL); // Neutral BCT "S" in the Unicode specification. - directionalities.put(java.lang.Character.DIRECTIONALITY_UNDEFINED, DirectionalityOptions.NONE); // Undefined BCT. - directionalities.put(java.lang.Character.DIRECTIONALITY_WHITESPACE, DirectionalityOptions.NONE); // Neutral BCT "WS" in the Unicode specification. - } - - /** - * Splits a TextChunk into N TextChunks, where each chunk is of a single directionality, and - * then reverse the RTL ones. - * what we're doing here is *reversing* the Unicode bidi algorithm - * in the language of that algorithm, each chunk is a (maximal) directional run. - * We attach whitespace to the beginning of non-RTL - **/ - public TextChunk groupByDirectionality(Boolean isLtrDominant) { - if (this.getTextElements().size() <= 0) { - throw new IllegalArgumentException(); - } - - ArrayList> chunks = new ArrayList<>(); - ArrayList buff = new ArrayList<>(); - DirectionalityOptions buffDirectionality = DirectionalityOptions.NONE; // the directionality of the characters in buff; - - for (TextElement te : this.getTextElements()) { - //TODO: we need to loop over the textelement characters - // because it is possible for a textelement to contain multiple characters? - - - // System.out.println(te.getText() + " is " + Character.getDirectionality(te.getText().charAt(0) ) + " " + directionalities.get(Character.getDirectionality(te.getText().charAt(0) ))); - if (buff.size() == 0) { - buff.add(te); - buffDirectionality = directionalities.get(Character.getDirectionality(te.getText().charAt(0))); - } else { - if (buffDirectionality == DirectionalityOptions.NONE) { - buffDirectionality = directionalities.get(Character.getDirectionality(te.getText().charAt(0))); - } - DirectionalityOptions teDirectionality = directionalities.get(Character.getDirectionality(te.getText().charAt(0))); - - if (teDirectionality == buffDirectionality || teDirectionality == DirectionalityOptions.NONE) { - if (Character.getDirectionality(te.getText().charAt(0)) == java.lang.Character.DIRECTIONALITY_WHITESPACE && (buffDirectionality == (isLtrDominant ? DirectionalityOptions.RTL : DirectionalityOptions.LTR))) { - buff.add(0, te); - } else { - buff.add(te); - } - } else { - // finish this chunk - if (buffDirectionality == DirectionalityOptions.RTL) { - Collections.reverse(buff); - } - chunks.add(buff); - - // and start a new one - buffDirectionality = directionalities.get(Character.getDirectionality(te.getText().charAt(0))); - buff = new ArrayList<>(); - buff.add(te); - } - } - } - if (buffDirectionality == DirectionalityOptions.RTL) { - Collections.reverse(buff); - } - chunks.add(buff); - ArrayList everything = new ArrayList<>(); - if (!isLtrDominant) { - Collections.reverse(chunks); - } - for (ArrayList group : chunks) { - everything.addAll(group); - } - return new TextChunk(everything); - } - - @Override public int isLtrDominant() { - int ltrCnt = 0; - int rtlCnt = 0; - for (int i = 0; i < this.getTextElements().size(); i++) { - String elementText = this.getTextElements().get(i).getText(); - for (int j = 0; j < elementText.length(); j++) { - byte dir = Character.getDirectionality(elementText.charAt(j)); - if ((dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT) || - (dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING) || - (dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE)) { - ltrCnt++; - } else if ((dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT) || - (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC) || - (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING) || - (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE)) { - rtlCnt++; - } - } - } - return java.lang.Integer.compare(ltrCnt, rtlCnt); // 1 is LTR, 0 is neutral, -1 is RTL - } - - - public TextChunk merge(TextChunk other) { - super.merge(other); - return this; - } - - public void add(TextElement textElement) { - this.textElements.add(textElement); - this.merge(textElement); - } - - public void add(List elements) { - for (TextElement te : elements) { - this.add(te); - } - } - - @Override - public String getText() { - if (this.textElements.size() == 0) { - return ""; - } - - StringBuilder sb = new StringBuilder(); - for (TextElement te : this.textElements) { - sb.append(te.getText()); - } - return Normalizer.normalize(sb.toString(), Normalizer.Form.NFKC).trim(); - } - - @Override - public String getText(boolean useLineReturns) { - return getText(); - } - - /** - * Returns true if text contained in this TextChunk is the same repeated character - */ - public boolean isSameChar(Character c) { - return isSameChar(new Character[]{c}); - } - - public boolean isSameChar(Character[] c) { - String s = this.getText(); - List chars = Arrays.asList(c); - for (int i = 0; i < s.length(); i++) { - if (!chars.contains(s.charAt(i))) { - return false; - } - } - return true; - } - - /** - * Splits a TextChunk in two, at the position of the i-th TextElement - */ - public TextChunk[] splitAt(int i) { - if (i < 1 || i >= this.getTextElements().size()) { - throw new IllegalArgumentException(); - } - - return new TextChunk[]{ - new TextChunk(this.getTextElements().subList(0, i)), - new TextChunk(this.getTextElements().subList(i, this.getTextElements().size())) - }; - } - - /** - * Removes runs of identical TextElements in this TextChunk - * For example, if the TextChunk contains this string of characters: "1234xxxxx56xx" - * and c == 'x' and minRunLength == 4, this method will return a list of TextChunk - * such that: ["1234", "56xx"] - */ - public List squeeze(Character c, int minRunLength) { - Character currentChar, lastChar = null; - int subSequenceLength = 0, subSequenceStart = 0; - TextChunk[] t; - List rv = new ArrayList<>(); - - for (int i = 0; i < this.getTextElements().size(); i++) { - TextElement textElement = this.getTextElements().get(i); - String text = textElement.getText(); - if (text.length() > 1) { - currentChar = text.trim().charAt(0); - } else { - currentChar = text.charAt(0); - } - - - if (lastChar != null && currentChar.equals(c) && lastChar.equals(currentChar)) { - subSequenceLength++; - } else { - if (((lastChar != null && !lastChar.equals(currentChar)) || i + 1 == this.getTextElements().size()) && subSequenceLength >= minRunLength) { - - if (subSequenceStart == 0 && subSequenceLength <= this.getTextElements().size() - 1) { - t = this.splitAt(subSequenceLength); - } else { - t = this.splitAt(subSequenceStart); - rv.add(t[0]); - } - rv.addAll(t[1].squeeze(c, minRunLength)); // Lo and behold, recursion. - break; - - } - subSequenceLength = 1; - subSequenceStart = i; - } - lastChar = currentChar; - } - - - if (rv.isEmpty()) { // no splits occurred, hence this.squeeze() == [this] - if (subSequenceLength >= minRunLength && subSequenceLength < this.textElements.size()) { - TextChunk[] chunks = this.splitAt(subSequenceStart); - rv.add(chunks[0]); - } else { - rv.add(this); - } - } - - return rv; - - } - - - @Override - public int hashCode() { - final int prime = 31; - int result = super.hashCode(); - result = prime * result - + ((textElements == null) ? 0 : textElements.hashCode()); - return result; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (!super.equals(obj)) - return false; - if (getClass() != obj.getClass()) - return false; - TextChunk other = (TextChunk) obj; - if (textElements == null) { - return other.textElements == null; - } else return textElements.equals(other.textElements); - } - - public static boolean allSameChar(List textChunks) { - /* the previous, far more elegant version of this method failed when there was an empty TextChunk in textChunks. - * so I rewrote it in an ugly way. but it works! - * it would be good for this to get rewritten eventually - * the purpose is basically just to return true iff there are 2+ TextChunks and they're identical. - * -Jeremy 5/13/2016 - */ - - if (textChunks.size() == 1) return false; - boolean hasHadAtLeastOneNonEmptyTextChunk = false; - char first = '\u0000'; - for (TextChunk tc : textChunks) { - if (tc.getText().length() == 0) { - continue; - } - if (first == '\u0000') { - first = tc.getText().charAt(0); - } else { - hasHadAtLeastOneNonEmptyTextChunk = true; - if (!tc.isSameChar(first)) return false; - } - } - return hasHadAtLeastOneNonEmptyTextChunk; - } - - public static List groupByLines(List textChunks) { - List lines = new ArrayList<>(); - - if (textChunks.size() == 0) { - return lines; - } - - float bbwidth = Rectangle.boundingBoxOf(textChunks).width; - - Line l = new Line(); - l.addTextChunk(textChunks.get(0)); - textChunks.remove(0); - lines.add(l); - - Line last = lines.get(lines.size() - 1); - for (TextChunk te : textChunks) { - if (last.verticalOverlapRatio(te) < 0.1) { - if (last.width / bbwidth > 0.9 && TextChunk.allSameChar(last.getTextElements())) { - lines.remove(lines.size() - 1); - } - lines.add(new Line()); - last = lines.get(lines.size() - 1); - } - last.addTextChunk(te); - } - - if (last.width / bbwidth > 0.9 && TextChunk.allSameChar(last.getTextElements())) { - lines.remove(lines.size() - 1); - } - - List rv = new ArrayList<>(lines.size()); - - for (Line line : lines) { - rv.add(Line.removeRepeatedCharacters(line, ' ', 3)); - } - - return rv; - } - -} diff --git a/src/main/java/technology/tabula/TextElement.java b/src/main/java/technology/tabula/TextElement.java deleted file mode 100644 index a0f24fa0..00000000 --- a/src/main/java/technology/tabula/TextElement.java +++ /dev/null @@ -1,277 +0,0 @@ -package technology.tabula; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.pdfbox.pdmodel.font.PDFont; - -@SuppressWarnings("serial") -public class TextElement extends Rectangle implements HasText { - - private final String text; - private final PDFont font; - private float fontSize; - private float widthOfSpace, dir; - private static final float AVERAGE_CHAR_TOLERANCE = 0.3f; - - public TextElement(float y, float x, float width, float height, - PDFont font, float fontSize, String c, float widthOfSpace) { - this(y, x, width, height, font, fontSize, c, widthOfSpace, 0f); - } - - public TextElement(float y, float x, float width, float height, - PDFont font, float fontSize, String c, float widthOfSpace, float dir) { - super(); - this.setRect(x, y, width, height); - this.text = c; - this.widthOfSpace = widthOfSpace; - this.fontSize = fontSize; - this.font = font; - this.dir = dir; - } - - @Override - public String getText() { - return text; - } - - @Override - public String getText(boolean useLineReturns) { - return text; - } - - public float getDirection() { - return dir; - } - - public float getWidthOfSpace() { - return widthOfSpace; - } - - public PDFont getFont() { - return font; - } - - public float getFontSize() { - return fontSize; - } - - @Override public String toString() { - StringBuilder sb = new StringBuilder(); - String s = super.toString(); - sb.append(s.substring(0, s.length() - 1)); - sb.append(String.format(",text=\"%s\"]", this.getText())); - return sb.toString(); - } - - @Override - public int hashCode() { - final int prime = 31; - int result = super.hashCode(); - result = prime * result + java.lang.Float.floatToIntBits(dir); - result = prime * result + ((font == null) ? 0 : font.hashCode()); - result = prime * result + java.lang.Float.floatToIntBits(fontSize); - result = prime * result + ((text == null) ? 0 : text.hashCode()); - result = prime * result + java.lang.Float.floatToIntBits(widthOfSpace); - return result; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (!super.equals(obj)) - return false; - if (getClass() != obj.getClass()) - return false; - TextElement other = (TextElement) obj; - if (java.lang.Float.floatToIntBits(dir) != java.lang.Float - .floatToIntBits(other.dir)) - return false; - if (font == null) { - if (other.font != null) - return false; - } else if (!font.equals(other.font)) - return false; - if (java.lang.Float.floatToIntBits(fontSize) != java.lang.Float - .floatToIntBits(other.fontSize)) - return false; - if (text == null) { - if (other.text != null) - return false; - } else if (!text.equals(other.text)) - return false; - return java.lang.Float.floatToIntBits(widthOfSpace) == java.lang.Float - .floatToIntBits(other.widthOfSpace); - } - - public static List mergeWords(List textElements) { - return mergeWords(textElements, new ArrayList()); - } - - /** - * heuristically merge a list of TextElement into a list of TextChunk - * ported from from PDFBox's PDFTextStripper.writePage, with modifications. - * Here be dragons - */ - public static List mergeWords(List textElements, List verticalRulings) { - - List textChunks = new ArrayList<>(); - - if (textElements.isEmpty()) { - return textChunks; - } - - // it's a problem that this `remove` is side-effecty - // other things depend on `textElements` and it can sometimes lead to the first textElement in textElement - // not appearing in the final output because it's been removed here. - // https://github.com/tabulapdf/tabula-java/issues/78 - List copyOfTextElements = new ArrayList<>(textElements); - textChunks.add(new TextChunk(copyOfTextElements.remove(0))); - TextChunk firstTC = textChunks.get(0); - - float previousAveCharWidth = (float) firstTC.getWidth(); - float endOfLastTextX = firstTC.getRight(); - float maxYForLine = firstTC.getBottom(); - float maxHeightForLine = (float) firstTC.getHeight(); - float minYTopForLine = firstTC.getTop(); - float lastWordSpacing = -1; - float wordSpacing, deltaSpace, averageCharWidth, deltaCharWidth; - float expectedStartOfNextWordX, dist; - TextElement sp, prevChar; - TextChunk currentChunk; - boolean sameLine, acrossVerticalRuling; - - for (TextElement chr : copyOfTextElements) { - currentChunk = textChunks.get(textChunks.size() - 1); - prevChar = currentChunk.textElements.get(currentChunk.textElements.size() - 1); - - // if same char AND overlapped, skip - if ((chr.getText().equals(prevChar.getText())) && (prevChar.overlapRatio(chr) > 0.5)) { - continue; - } - - // if chr is a space that overlaps with prevChar, skip - if (chr.getText().equals(" ") && Utils.feq(prevChar.getLeft(), chr.getLeft()) && Utils.feq(prevChar.getTop(), chr.getTop())) { - continue; - } - - // Resets the average character width when we see a change in font - // or a change in the font size - if ((chr.getFont() != prevChar.getFont()) || !Utils.feq(chr.getFontSize(), prevChar.getFontSize())) { - previousAveCharWidth = -1; - } - - // is there any vertical ruling that goes across chr and prevChar? - acrossVerticalRuling = false; - for (Ruling r : verticalRulings) { - if ( - (verticallyOverlapsRuling(prevChar, r) && verticallyOverlapsRuling(chr, r)) && - (prevChar.x < r.getPosition() && chr.x > r.getPosition()) || (prevChar.x > r.getPosition() && chr.x < r.getPosition()) - ) { - acrossVerticalRuling = true; - break; - } - } - - // Estimate the expected width of the space based on the - // space character with some margin. - wordSpacing = chr.getWidthOfSpace(); - deltaSpace = 0; - if (java.lang.Float.isNaN(wordSpacing) || wordSpacing == 0) { - deltaSpace = java.lang.Float.MAX_VALUE; - } else if (lastWordSpacing < 0) { - deltaSpace = wordSpacing * 0.5f; // 0.5 == spacing tolerance - } else { - deltaSpace = ((wordSpacing + lastWordSpacing) / 2.0f) * 0.5f; - } - - // Estimate the expected width of the space based on the - // average character width with some margin. This calculation does not - // make a true average (average of averages) but we found that it gave the - // best results after numerous experiments. Based on experiments we also found that - // .3 worked well. - if (previousAveCharWidth < 0) { - averageCharWidth = (float) (chr.getWidth() / chr.getText().length()); - } else { - averageCharWidth = (float) ((previousAveCharWidth + (chr.getWidth() / chr.getText().length())) / 2.0f); - } - deltaCharWidth = averageCharWidth * AVERAGE_CHAR_TOLERANCE; - - // Compares the values obtained by the average method and the wordSpacing method and picks - // the smaller number. - expectedStartOfNextWordX = -java.lang.Float.MAX_VALUE; - - if (endOfLastTextX != -1) { - expectedStartOfNextWordX = endOfLastTextX + Math.min(deltaCharWidth, deltaSpace); - } - - // new line? - sameLine = true; - if (!Utils.overlap(chr.getBottom(), chr.height, maxYForLine, maxHeightForLine)) { - endOfLastTextX = -1; - expectedStartOfNextWordX = -java.lang.Float.MAX_VALUE; - maxYForLine = -java.lang.Float.MAX_VALUE; - maxHeightForLine = -1; - minYTopForLine = java.lang.Float.MAX_VALUE; - sameLine = false; - } - - endOfLastTextX = chr.getRight(); - - // should we add a space? - if (!acrossVerticalRuling && - sameLine && - expectedStartOfNextWordX < chr.getLeft() && - !prevChar.getText().endsWith(" ")) { - - sp = new TextElement(prevChar.getTop(), - prevChar.getLeft(), - expectedStartOfNextWordX - prevChar.getLeft(), - (float) prevChar.getHeight(), - prevChar.getFont(), - prevChar.getFontSize(), - " ", - prevChar.getWidthOfSpace()); - - currentChunk.add(sp); - } else { - sp = null; - } - - maxYForLine = Math.max(chr.getBottom(), maxYForLine); - maxHeightForLine = (float) Math.max(maxHeightForLine, chr.getHeight()); - minYTopForLine = Math.min(minYTopForLine, chr.getTop()); - - dist = chr.getLeft() - (sp != null ? sp.getRight() : prevChar.getRight()); - - if (!acrossVerticalRuling && - sameLine && - (dist < 0 ? currentChunk.verticallyOverlaps(chr) : dist < wordSpacing)) { - currentChunk.add(chr); - } else { // create a new chunk - textChunks.add(new TextChunk(chr)); - } - - lastWordSpacing = wordSpacing; - previousAveCharWidth = (float) (sp != null ? (averageCharWidth + sp.getWidth()) / 2.0f : averageCharWidth); - } - - - List textChunksSeparatedByDirectionality = new ArrayList<>(); - // count up characters by directionality - for (TextChunk chunk : textChunks) { - // choose the dominant direction - boolean isLtrDominant = chunk.isLtrDominant() != -1; // treat neutral as LTR - TextChunk dirChunk = chunk.groupByDirectionality(isLtrDominant); - textChunksSeparatedByDirectionality.add(dirChunk); - } - - return textChunksSeparatedByDirectionality; - } - - private static boolean verticallyOverlapsRuling(TextElement te, Ruling r) { - return Math.max(0, Math.min(te.getBottom(), r.getY2()) - Math.max(te.getTop(), r.getY1())) > 0; - } - -} diff --git a/src/main/java/technology/tabula/TextStripper.java b/src/main/java/technology/tabula/TextStripper.java deleted file mode 100644 index 557fa439..00000000 --- a/src/main/java/technology/tabula/TextStripper.java +++ /dev/null @@ -1,175 +0,0 @@ -package technology.tabula; - -import org.apache.fontbox.util.BoundingBox; -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.font.PDFont; -import org.apache.pdfbox.pdmodel.font.PDFontDescriptor; -import org.apache.pdfbox.pdmodel.font.PDType3Font; -import org.apache.pdfbox.text.PDFTextStripper; -import org.apache.pdfbox.text.TextPosition; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -public class TextStripper extends PDFTextStripper { - - private static final String NBSP = "\u00A0"; - private static final float AVG_HEIGHT_MULT_THRESHOLD = 6.0f; - private static final float MAX_BLANK_FONT_SIZE = 40.0f; - private static final float MIN_BLANK_FONT_SIZE = 2.0f; - private final PDDocument document; - private final ArrayList textElements; - private final RectangleSpatialIndex spatialIndex; - private float minCharWidth = Float.MAX_VALUE; - private float minCharHeight = Float.MAX_VALUE; - private float totalHeight = 0.0f; - private int countHeight = 0; - - public TextStripper(PDDocument document, int pageNumber) throws IOException { - super(); - this.document = document; - this.setStartPage(pageNumber); - this.setEndPage(pageNumber); - this.textElements = new ArrayList<>(); - this.spatialIndex = new RectangleSpatialIndex<>(); - } - - public void process() throws IOException { - this.getText(this.document); - } - - @Override - protected void writeString(String string, List textPositions) throws IOException - { - for (TextPosition textPosition: textPositions) - { - if (textPosition == null) { - continue; - } - - String c = textPosition.getUnicode(); - - // if c not printable, return - if (!isPrintable(c)) { - continue; - } - - Float h = textPosition.getHeightDir(); - - if (c.equals(NBSP)) { // replace non-breaking space for space - c = " "; - } - - float wos = textPosition.getWidthOfSpace(); - - TextElement te = new TextElement(Utils.round(textPosition.getYDirAdj() - h, 2), - Utils.round(textPosition.getXDirAdj(), 2), Utils.round(textPosition.getWidthDirAdj(), 2), - Utils.round(textPosition.getHeightDir(), 2), textPosition.getFont(), textPosition.getFontSizeInPt(), c, - // workaround a possible bug in PDFBox: - // https://issues.apache.org/jira/browse/PDFBOX-1755 - wos, textPosition.getDir()); - - this.minCharWidth = (float) Math.min(this.minCharWidth, te.getWidth()); - this.minCharHeight = (float) Math.min(this.minCharHeight, te.getHeight()); - - countHeight++; - totalHeight += te.getHeight(); - float avgHeight = totalHeight / countHeight; - - //We have an issue where tall blank cells throw off the row height calculation - //Introspect a blank cell a bit here to see if it should be thrown away - if ((te.getText() == null || te.getText().trim().equals(""))) { - //if the cell height is more than AVG_HEIGHT_MULT_THRESHOLDxaverage, throw it away - if (avgHeight > 0 - && te.getHeight() >= (avgHeight * AVG_HEIGHT_MULT_THRESHOLD)) { - continue; - } - - //if the font size is outside of reasonable ranges, throw it away - if (textPosition.getFontSizeInPt() > MAX_BLANK_FONT_SIZE || textPosition.getFontSizeInPt() < MIN_BLANK_FONT_SIZE) { - continue; - } - } - - this.spatialIndex.add(te); - this.textElements.add(te); - } - } - - @Override - protected float computeFontHeight(PDFont font) throws IOException - { - BoundingBox bbox = font.getBoundingBox(); - if (bbox.getLowerLeftY() < Short.MIN_VALUE) - { - // PDFBOX-2158 and PDFBOX-3130 - // files by Salmat eSolutions / ClibPDF Library - bbox.setLowerLeftY(- (bbox.getLowerLeftY() + 65536)); - } - // 1/2 the bbox is used as the height todo: why? - float glyphHeight = bbox.getHeight() / 2; - - // sometimes the bbox has very high values, but CapHeight is OK - PDFontDescriptor fontDescriptor = font.getFontDescriptor(); - if (fontDescriptor != null) - { - float capHeight = fontDescriptor.getCapHeight(); - if (Float.compare(capHeight, 0) != 0 && - (capHeight < glyphHeight || Float.compare(glyphHeight, 0) == 0)) - { - glyphHeight = capHeight; - } - // PDFBOX-3464, PDFBOX-448: - // sometimes even CapHeight has very high value, but Ascent and Descent are ok - float ascent = fontDescriptor.getAscent(); - float descent = fontDescriptor.getDescent(); - if (ascent > 0 && descent < 0 && - ((ascent - descent) / 2 < glyphHeight || Float.compare(glyphHeight, 0) == 0)) - { - glyphHeight = (ascent - descent) / 2; - } - } - - // transformPoint from glyph space -> text space - float height; - if (font instanceof PDType3Font) - { - height = font.getFontMatrix().transformPoint(0, glyphHeight).y; - } - else - { - height = glyphHeight / 1000; - } - - return height; - } - - private boolean isPrintable(String s) { - Character c; - Character.UnicodeBlock block; - boolean printable = false; - for (int i = 0; i < s.length(); i++) { - c = s.charAt(i); - block = Character.UnicodeBlock.of(c); - printable |= !Character.isISOControl(c) && block != null && block != Character.UnicodeBlock.SPECIALS; - } - return printable; - } - - public List getTextElements() { - return this.textElements; - } - - public RectangleSpatialIndex getSpatialIndex() { - return spatialIndex; - } - - public float getMinCharWidth() { - return minCharWidth; - } - - public float getMinCharHeight() { - return minCharHeight; - } -} diff --git a/src/main/java/technology/tabula/Utils.java b/src/main/java/technology/tabula/Utils.java deleted file mode 100644 index 00814429..00000000 --- a/src/main/java/technology/tabula/Utils.java +++ /dev/null @@ -1,288 +0,0 @@ -package technology.tabula; - -import java.awt.Shape; -import java.awt.geom.Line2D; -import java.awt.geom.Point2D; -import java.awt.geom.Rectangle2D; -import java.awt.image.BufferedImage; -import java.io.IOException; -import java.math.BigDecimal; -import java.util.*; - -import org.apache.commons.cli.ParseException; -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.rendering.ImageType; -import org.apache.pdfbox.rendering.PDFRenderer; - -/** - * @author manuel - */ -public class Utils { - public static boolean within(double first, double second, double variance) { - return second < first + variance && second > first - variance; - } - - public static boolean overlap(double y1, double height1, double y2, double height2, double variance) { - return within(y1, y2, variance) || (y2 <= y1 && y2 >= y1 - height1) || (y1 <= y2 && y1 >= y2 - height2); - } - - public static boolean overlap(double y1, double height1, double y2, double height2) { - return overlap(y1, height1, y2, height2, 0.1f); - } - - private final static float EPSILON = 0.01f; - protected static boolean useQuickSort = useCustomQuickSort(); - - public static boolean feq(double f1, double f2) { - return (Math.abs(f1 - f2) < EPSILON); - } - - public static float round(double d, int decimalPlace) { - BigDecimal bd = new BigDecimal(Double.toString(d)); - bd = bd.setScale(decimalPlace, BigDecimal.ROUND_HALF_UP); - return bd.floatValue(); - } - - public static Rectangle bounds(Collection shapes) { - if (shapes.isEmpty()) { - throw new IllegalArgumentException("shapes can't be empty"); - } - - Iterator iter = shapes.iterator(); - Rectangle rv = new Rectangle(); - rv.setRect(iter.next().getBounds2D()); - - while (iter.hasNext()) { - Rectangle2D.union(iter.next().getBounds2D(), rv, rv); - } - - return rv; - - } - - // range iterator - public static List range(final int begin, final int end) { - return new AbstractList() { - @Override - public Integer get(int index) { - return begin + index; - } - - @Override - public int size() { - return end - begin; - } - }; - } - - - /* from apache.commons-lang */ - public static boolean isNumeric(final CharSequence cs) { - if (cs == null || cs.length() == 0) { - return false; - } - final int sz = cs.length(); - for (int i = 0; i < sz; i++) { - if (!Character.isDigit(cs.charAt(i))) { - return false; - } - } - return true; - } - - public static String join(String glue, String... s) { - int k = s.length; - if (k == 0) { - return null; - } - StringBuilder out = new StringBuilder(); - out.append(s[0]); - for (int x = 1; x < k; ++x) { - out.append(glue).append(s[x]); - } - return out.toString(); - } - - public static List> transpose(List> table) { - List> ret = new ArrayList<>(); - final int N = table.get(0).size(); - for (int i = 0; i < N; i++) { - List col = new ArrayList<>(); - for (List row : table) { - col.add(row.get(i)); - } - ret.add(col); - } - return ret; - } - - /** - * Wrap Collections.sort so we can fallback to a non-stable quicksort if we're - * running on JDK7+ - */ - public static > void sort(List list) { - if (useQuickSort) QuickSort.sort(list); - else Collections.sort(list); - } - - public static void sort(List list, Comparator comparator) { - if (useQuickSort) QuickSort.sort(list, comparator); - else Collections.sort(list, comparator); - } - - private static boolean useCustomQuickSort() { - // taken from PDFBOX: - - // check if we need to use the custom quicksort algorithm as a - // workaround to the transitivity issue of TextPositionComparator: - // https://issues.apache.org/jira/browse/PDFBOX-1512 - - String numberybits = System.getProperty("java.version").split( - "-")[0]; // some Java version strings are 9-internal, which is dumb. - String[] versionComponents = numberybits.split( - "\\."); - int javaMajorVersion; - int javaMinorVersion; - if (versionComponents.length >= 2) { - javaMajorVersion = Integer.parseInt(versionComponents[0]); - javaMinorVersion = Integer.parseInt(versionComponents[1]); - } else { - javaMajorVersion = 1; - javaMinorVersion = Integer.parseInt(versionComponents[0]); - } - boolean is16orLess = javaMajorVersion == 1 && javaMinorVersion <= 6; - String useLegacySort = System.getProperty("java.util.Arrays.useLegacyMergeSort"); - return !is16orLess || (useLegacySort != null && useLegacySort.equals("true")); - } - - - public static List parsePagesOption(String pagesSpec) throws ParseException { - if (pagesSpec.equals("all")) { - return null; - } - - List rv = new ArrayList<>(); - - String[] ranges = pagesSpec.split(","); - for (int i = 0; i < ranges.length; i++) { - String[] r = ranges[i].split("-"); - if (r.length == 0 || !Utils.isNumeric(r[0]) || r.length > 1 && !Utils.isNumeric(r[1])) { - throw new ParseException("Syntax error in page range specification"); - } - - if (r.length < 2) { - rv.add(Integer.parseInt(r[0])); - } else { - int t = Integer.parseInt(r[0]); - int f = Integer.parseInt(r[1]); - if (t > f) { - throw new ParseException("Syntax error in page range specification"); - } - rv.addAll(Utils.range(t, f + 1)); - } - } - - Collections.sort(rv); - return rv; - } - - public static void snapPoints(List rulings, float xThreshold, float yThreshold) { - - // collect points and keep a Line -> p1,p2 map - Map linesToPoints = new HashMap<>(); - List points = new ArrayList<>(); - for (Line2D.Float r : rulings) { - Point2D p1 = r.getP1(); - Point2D p2 = r.getP2(); - linesToPoints.put(r, new Point2D[]{p1, p2}); - points.add(p1); - points.add(p2); - } - - // snap by X - Collections.sort(points, new Comparator() { - @Override - public int compare(Point2D arg0, Point2D arg1) { - return java.lang.Double.compare(arg0.getX(), arg1.getX()); - } - }); - - List> groupedPoints = new ArrayList<>(); - groupedPoints.add(new ArrayList<>(Arrays.asList(new Point2D[]{points.get(0)}))); - - for (Point2D p : points.subList(1, points.size() - 1)) { - List last = groupedPoints.get(groupedPoints.size() - 1); - if (Math.abs(p.getX() - last.get(0).getX()) < xThreshold) { - groupedPoints.get(groupedPoints.size() - 1).add(p); - } else { - groupedPoints.add(new ArrayList<>(Arrays.asList(new Point2D[]{p}))); - } - } - - for (List group : groupedPoints) { - float avgLoc = 0; - for (Point2D p : group) { - avgLoc += p.getX(); - } - avgLoc /= group.size(); - for (Point2D p : group) { - p.setLocation(avgLoc, p.getY()); - } - } - // --- - - // snap by Y - Collections.sort(points, new Comparator() { - @Override - public int compare(Point2D arg0, Point2D arg1) { - return java.lang.Double.compare(arg0.getY(), arg1.getY()); - } - }); - - groupedPoints = new ArrayList<>(); - groupedPoints.add(new ArrayList<>(Arrays.asList(new Point2D[]{points.get(0)}))); - - for (Point2D p : points.subList(1, points.size() - 1)) { - List last = groupedPoints.get(groupedPoints.size() - 1); - if (Math.abs(p.getY() - last.get(0).getY()) < yThreshold) { - groupedPoints.get(groupedPoints.size() - 1).add(p); - } else { - groupedPoints.add(new ArrayList<>(Arrays.asList(new Point2D[]{p}))); - } - } - - for (List group : groupedPoints) { - float avgLoc = 0; - for (Point2D p : group) { - avgLoc += p.getY(); - } - avgLoc /= group.size(); - for (Point2D p : group) { - p.setLocation(p.getX(), avgLoc); - } - } - // --- - - // finally, modify lines - for (Map.Entry ltp : linesToPoints.entrySet()) { - Point2D[] p = ltp.getValue(); - ltp.getKey().setLine(p[0], p[1]); - } - } - - public static BufferedImage pageConvertToImage(PDPage page, int dpi, ImageType imageType) throws IOException { - try (PDDocument document = new PDDocument()) { - document.addPage(page); - PDFRenderer renderer = new PDFRenderer(document); - document.close(); - return renderer.renderImageWithDPI(0, dpi, imageType); - } - } - - public static BufferedImage pageConvertToImage(PDDocument doc, PDPage page, int dpi, ImageType imageType) throws IOException { - PDFRenderer renderer = new PDFRenderer(doc); - return renderer.renderImageWithDPI(doc.getPages().indexOf(page), dpi, imageType); - } - -} diff --git a/src/main/java/technology/tabula/debug/Debug.java b/src/main/java/technology/tabula/debug/Debug.java deleted file mode 100644 index d6d257ce..00000000 --- a/src/main/java/technology/tabula/debug/Debug.java +++ /dev/null @@ -1,404 +0,0 @@ -package technology.tabula.debug; - -import java.awt.BasicStroke; -import java.awt.Color; -import java.awt.Graphics2D; -import java.awt.Shape; -import java.awt.Stroke; -import java.awt.geom.Ellipse2D; -import java.awt.geom.Line2D; -import java.awt.geom.Point2D; -import java.awt.image.BufferedImage; -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; - -import org.apache.commons.cli.*; -import org.apache.pdfbox.Loader; -import technology.tabula.Cell; -import technology.tabula.CommandLineApp; -import technology.tabula.Line; -import technology.tabula.ObjectExtractor; -import technology.tabula.Page; -import technology.tabula.ProjectionProfile; -import technology.tabula.Rectangle; -import technology.tabula.Ruling; -import technology.tabula.Table; -import technology.tabula.TextChunk; -import technology.tabula.TextElement; -import technology.tabula.Utils; -import technology.tabula.detectors.NurminenDetectionAlgorithm; -import technology.tabula.extractors.BasicExtractionAlgorithm; -import technology.tabula.extractors.SpreadsheetExtractionAlgorithm; -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.rendering.ImageType; - -import javax.imageio.ImageIO; - -public class Debug { - - private static final float CIRCLE_RADIUS = 5f; - - private static final Color[] COLORS = {new Color(27, 158, 119), new Color(217, 95, 2), new Color(117, 112, 179), - new Color(231, 41, 138), new Color(102, 166, 30)}; - - public static void debugIntersections(Graphics2D g, Page page) { - int i = 0; - for (Point2D p : Ruling.findIntersections(page.getHorizontalRulings(), page.getVerticalRulings()).keySet()) { - g.setColor(COLORS[(i++) % 5]); - g.fill(new Ellipse2D.Float((float) p.getX() - CIRCLE_RADIUS / 2f, (float) p.getY() - CIRCLE_RADIUS / 2f, 5f, - 5f)); - } - } - - private static void debugNonCleanRulings(Graphics2D g, Page page) { - drawShapes(g, page.getUnprocessedRulings()); - } - - private static void debugRulings(Graphics2D g, Page page) { - // draw detected lines - List rulings = new ArrayList<>(page.getHorizontalRulings()); - rulings.addAll(page.getVerticalRulings()); - drawShapes(g, rulings); - } - - private static void debugColumns(Graphics2D g, Page page) { - List textChunks = TextElement.mergeWords(page.getText()); - List lines = TextChunk.groupByLines(textChunks); - List columns = BasicExtractionAlgorithm.columnPositions(lines); - int i = 0; - for (float p : columns) { - Ruling r = new Ruling(new Point2D.Float(p, page.getTop()), - new Point2D.Float(p, page.getBottom())); - g.setColor(COLORS[(i++) % 5]); - drawShape(g, r); - } - } - - private static void debugCharacters(Graphics2D g, Page page) { - drawShapes(g, page.getText()); - } - - private static void debugTextChunks(Graphics2D g, Page page) { - List chunks = TextElement.mergeWords(page.getText(), page.getVerticalRulings()); - drawShapes(g, chunks); - } - - private static void debugSpreadsheets(Graphics2D g, Page page) { - SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - List tables = sea.extract(page); - drawShapes(g, tables); - } - - private static void debugCells(Graphics2D g, Rectangle area, Page page) { - List h = page.getHorizontalRulings(); - List v = page.getVerticalRulings(); - if (area != null) { - h = Ruling.cropRulingsToArea(h, area); - v = Ruling.cropRulingsToArea(v, area); - } - List cells = SpreadsheetExtractionAlgorithm.findCells(h, v); - drawShapes(g, cells); - } - - private static void debugDetectedTables(Graphics2D g, Page page) { - NurminenDetectionAlgorithm detectionAlgorithm = new NurminenDetectionAlgorithm(); - List tables = detectionAlgorithm.detect(page); - drawShapes(g, tables); - } - - private static void drawShapes(Graphics2D g, Collection shapes, Stroke stroke) { - int i = 0; - g.setStroke(stroke); - for (Shape s : shapes) { - g.setColor(COLORS[(i++) % 5]); - drawShape(g, s); - } - } - - private static void drawShapes(Graphics2D g, Collection shapes) { - drawShapes(g, shapes, new BasicStroke(2f)); - } - - private static void debugProjectionProfile(Graphics2D g, Page page) { - float horizSmoothKernel = 0, vertSmoothKernel = 0; - // for (Rectangle r: page.getText()) { - for (Rectangle r : page.getText()) { - horizSmoothKernel += r.getWidth(); - vertSmoothKernel += r.getHeight(); - } - horizSmoothKernel /= page.getText().size(); - vertSmoothKernel /= page.getText().size(); - System.out.println("hsk: " + horizSmoothKernel + " vsk: " + vertSmoothKernel); - // ProjectionProfile profile = new ProjectionProfile(page, - // page.getText(), horizSmoothKernel, vertSmoothKernel); - ProjectionProfile profile = new ProjectionProfile(page, - TextElement.mergeWords(page.getText(), page.getVerticalRulings()), horizSmoothKernel * 1.5f, - vertSmoothKernel); - float prec = (float) Math.pow(10, ProjectionProfile.DECIMAL_PLACES); - - float[] hproj = profile.getHorizontalProjection(); - float[] vproj = profile.getVerticalProjection(); - - g.setStroke(new BasicStroke(1f)); - g.setColor(Color.RED); - - // hproj - // Point2D last = new Point2D.Double(page.getLeft(), page.getBottom() - - // hproj[0] / prec), cur; - Point2D last = new Point2D.Double(page.getLeft(), page.getBottom()), cur; - for (int i = 0; i < hproj.length; i++) { - cur = new Point2D.Double(page.getLeft() + i / prec, page.getBottom() - hproj[i]); - g.draw(new Line2D.Double(last, cur)); - last = cur; - } - - // hproj first derivative - g.setColor(Color.BLUE); - float[] deriv = ProjectionProfile.filter(ProjectionProfile.getFirstDeriv(profile.getHorizontalProjection()), - 0.01f); - last = new Point2D.Double(page.getLeft(), page.getBottom()); - for (int i = 0; i < deriv.length; i++) { - cur = new Point2D.Double(page.getLeft() + i / prec, page.getBottom() - deriv[i]); - g.draw(new Line2D.Double(last, cur)); - last = cur; - } - - // columns - g.setColor(Color.MAGENTA); - g.setStroke(new BasicStroke(1f)); - float[] seps = profile.findVerticalSeparators(horizSmoothKernel * 2.5f); - for (int i = 0; i < seps.length; i++) { - float x = page.getLeft() + seps[i]; - g.draw(new Line2D.Double(x, page.getTop(), x, page.getBottom())); - } - - // vproj - g.setStroke(new BasicStroke(1f)); - g.setColor(Color.GREEN); - last = new Point2D.Double(page.getLeft(), page.getTop()); - for (int i = 0; i < vproj.length; i++) { - cur = new Point2D.Double(page.getLeft() + vproj[i] / prec, page.getTop() + i / prec); - g.draw(new Line2D.Double(last, cur)); - last = cur; - } - - // vproj first derivative - g.setColor(new Color(0, 0, 1, 0.5f)); - deriv = ProjectionProfile.filter(ProjectionProfile.getFirstDeriv(vproj), 0.1f); - last = new Point2D.Double(page.getRight(), page.getTop()); - for (int i = 0; i < deriv.length; i++) { - cur = new Point2D.Double(page.getRight() - deriv[i] * 10, page.getTop() + i / prec); - g.draw(new Line2D.Double(last, cur)); - last = cur; - } - - // rows - g.setStroke(new BasicStroke(1.5f)); - seps = profile.findHorizontalSeparators(vertSmoothKernel); - for (int i = 0; i < seps.length; i++) { - float y = page.getTop() + seps[i]; - g.draw(new Line2D.Double(page.getLeft(), y, page.getRight(), y)); - } - - } - - private static void drawShape(Graphics2D g, Shape shape) { - //g.setStroke(new BasicStroke(1)); - g.draw(shape); - } - - public static void renderPage(String pdfPath, String outPath, int pageNumber, Rectangle area, - boolean drawTextChunks, boolean drawSpreadsheets, boolean drawRulings, boolean drawIntersections, - boolean drawColumns, boolean drawCharacters, boolean drawArea, boolean drawCells, - boolean drawUnprocessedRulings, boolean drawProjectionProfile, boolean drawClippingPaths, - boolean drawDetectedTables) throws IOException { - PDDocument document = Loader.loadPDF(new File(pdfPath)); - - ObjectExtractor oe = new ObjectExtractor(document); - - Page page = oe.extract(pageNumber + 1); - - if (area != null) { - page = page.getArea(area); - } - - PDPage p = document.getPage(pageNumber); - - BufferedImage image = Utils.pageConvertToImage(document, p, 72, ImageType.RGB); - - Graphics2D g = (Graphics2D) image.getGraphics(); - - if (drawTextChunks) { - debugTextChunks(g, page); - } - if (drawSpreadsheets) { - debugSpreadsheets(g, page); - } - if (drawRulings) { - debugRulings(g, page); - } - if (drawIntersections) { - debugIntersections(g, page); - } - if (drawColumns) { - debugColumns(g, page); - } - if (drawCharacters) { - debugCharacters(g, page); - } - if (drawArea) { - g.setColor(Color.ORANGE); - drawShape(g, area); - } - if (drawCells) { - debugCells(g, area, page); - } - if (drawUnprocessedRulings) { - debugNonCleanRulings(g, page); - } - if (drawProjectionProfile) { - debugProjectionProfile(g, page); - } - if (drawClippingPaths) { - // TODO: Enable when oe.clippingPaths is done - //drawShapes(g, oe.clippingPaths, - // new BasicStroke(2f, BasicStroke.CAP_BUTT, BasicStroke.JOIN_MITER, 10f, new float[] { 3f }, 0f)); - } - if (drawDetectedTables) { - debugDetectedTables(g, page); - } - - document.close(); - - ImageIO.write(image, "jpg", new File(outPath)); - } - - private static Options buildOptions() { - Options o = new Options(); - - o.addOption("h", "help", false, "Print this help text."); - o.addOption("r", "rulings", false, "Show detected rulings."); - o.addOption("i", "intersections", false, "Show intersections between rulings."); - o.addOption("s", "spreadsheets", false, "Show detected spreadsheets."); - o.addOption("t", "textchunks", false, "Show detected text chunks (merged characters)"); - o.addOption("c", "columns", false, "Show columns as detected by BasicExtractionAlgorithm"); - o.addOption("e", "characters", false, "Show detected characters"); - o.addOption("g", "region", false, "Show provided region (-a parameter)"); - o.addOption("l", "cells", false, "Show detected cells"); - o.addOption("u", "unprocessed-rulings", false, "Show non-cleaned rulings"); - o.addOption("f", "profile", false, "Show projection profile"); - o.addOption("n", "clipping-paths", false, "Show clipping paths"); - o.addOption("d", "detected-tables", false, "Show detected tables"); - - o.addOption(Option.builder("a").longOpt("area") - .desc("Portion of the page to analyze (top,left,bottom,right). Example: --area 269.875,12.75,790.5,561. Default is entire page") - .hasArg() - .argName("AREA") - .build()); - - o.addOption(Option.builder("p").longOpt("pages") - .desc("Comma separated list of ranges, or all. Examples: --pages 1-3,5-7, --pages 3 or --pages all. Default is --pages 1") - .hasArg() - .argName("PAGES") - .build()); - - return o; - } - - public static void main(String[] args) throws IOException { - CommandLineParser parser = new DefaultParser(); - try { - // parse the command line arguments - CommandLine line = parser.parse(buildOptions(), args); - List pages = new ArrayList<>(); - if (line.hasOption('p')) { - pages = Utils.parsePagesOption(line.getOptionValue('p')); - } else { - pages.add(1); - } - - if (line.hasOption('h')) { - printHelp(); - System.exit(0); - } - - if (line.getArgs().length != 1) { - throw new ParseException("Need one filename\nTry --help for help"); - } - - File pdfFile = new File(line.getArgs()[0]); - if (!pdfFile.exists()) { - throw new ParseException("File does not exist"); - } - - if (line.hasOption('g') && !line.hasOption('a')) { - throw new ParseException("-g argument needs an area (-a)"); - } - - Rectangle area = null; - if (line.hasOption('a')) { - List f = CommandLineApp.parseFloatList(line.getOptionValue('a')); - if (f.size() != 4) { - throw new ParseException("area parameters must be top,left,bottom,right"); - } - area = new Rectangle(f.get(0), f.get(1), f.get(3) - f.get(1), f.get(2) - f.get(0)); - } - - if (pages == null) { - // user specified all pages - PDDocument document = Loader.loadPDF(pdfFile); - - int numPages = document.getNumberOfPages(); - pages = new ArrayList<>(numPages); - - for (int i = 1; i <= numPages; i++) { - pages.add(i); - } - - document.close(); - } - - for (int i : pages) { - renderPage(pdfFile.getAbsolutePath(), - new File(pdfFile.getParent(), removeExtension(pdfFile.getName()) + "-" + (i) + ".jpg") - .getAbsolutePath(), - i - 1, area, line.hasOption('t'), line.hasOption('s'), line.hasOption('r'), line.hasOption('i'), - line.hasOption('c'), line.hasOption('e'), line.hasOption('g'), line.hasOption('l'), - line.hasOption('u'), line.hasOption('f'), line.hasOption('n'), line.hasOption('d')); - } - } catch (ParseException e) { - System.err.println("Error: " + e.getMessage()); - System.exit(1); - } - } - - private static void printHelp() { - HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp("tabula-debug", "Generate debugging images", buildOptions(), "", true); - } - - private static String removeExtension(String s) { - - String separator = System.getProperty("file.separator"); - String filename; - - // Remove the path upto the filename. - int lastSeparatorIndex = s.lastIndexOf(separator); - if (lastSeparatorIndex == -1) { - filename = s; - } else { - filename = s.substring(lastSeparatorIndex + 1); - } - - // Remove the extension. - int extensionIndex = filename.lastIndexOf("."); - if (extensionIndex == -1) - return filename; - - return filename.substring(0, extensionIndex); - } -} diff --git a/src/main/java/technology/tabula/detectors/DetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/DetectionAlgorithm.java deleted file mode 100644 index c292c282..00000000 --- a/src/main/java/technology/tabula/detectors/DetectionAlgorithm.java +++ /dev/null @@ -1,13 +0,0 @@ -package technology.tabula.detectors; - -import technology.tabula.Page; -import technology.tabula.Rectangle; - -import java.util.List; - -/** - * Created by matt on 2015-12-14. - */ -public interface DetectionAlgorithm { - List detect(Page page); -} diff --git a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java deleted file mode 100644 index 86639f66..00000000 --- a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java +++ /dev/null @@ -1,856 +0,0 @@ -package technology.tabula.detectors; - -import org.apache.pdfbox.contentstream.PDContentStream; -import org.apache.pdfbox.contentstream.operator.Operator; -import org.apache.pdfbox.contentstream.operator.OperatorName; -import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.pdfparser.PDFStreamParser; -import org.apache.pdfbox.pdfwriter.ContentStreamWriter; -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.pdmodel.common.PDStream; -import org.apache.pdfbox.rendering.ImageType; -import technology.tabula.*; -import technology.tabula.extractors.SpreadsheetExtractionAlgorithm; - -import java.awt.geom.Line2D; -import java.awt.geom.Point2D; -import java.awt.image.BufferedImage; -import java.awt.image.Raster; -import java.io.IOException; -import java.io.OutputStream; -import java.util.*; - -/** - * Created by matt on 2015-12-17. - *

- * Attempt at an implementation of the table finding algorithm described by - * Anssi Nurminen's master's thesis: - * http://dspace.cc.tut.fi/dpub/bitstream/handle/123456789/21520/Nurminen.pdf?sequence=3 - */ -public class NurminenDetectionAlgorithm implements DetectionAlgorithm { - - private static final int GRAYSCALE_INTENSITY_THRESHOLD = 25; - private static final int HORIZONTAL_EDGE_WIDTH_MINIMUM = 50; - private static final int VERTICAL_EDGE_HEIGHT_MINIMUM = 10; - private static final int CELL_CORNER_DISTANCE_MAXIMUM = 10; - private static final float POINT_SNAP_DISTANCE_THRESHOLD = 8f; - private static final float TABLE_PADDING_AMOUNT = 1.0f; - private static final int REQUIRED_TEXT_LINES_FOR_EDGE = 4; - private static final int REQUIRED_CELLS_FOR_TABLE = 4; - private static final float IDENTICAL_TABLE_OVERLAP_RATIO = 0.9f; - - /** - * Helper class that encapsulates a text edge - */ - private static final class TextEdge extends Line2D.Float { - // types of text edges - public static final int LEFT = 0; - public static final int MID = 1; - public static final int RIGHT = 2; - public static final int NUM_TYPES = 3; - - public int intersectingTextRowCount; - - public TextEdge(float x1, float y1, float x2, float y2) { - super(x1, y1, x2, y2); - this.intersectingTextRowCount = 0; - } - } - - /** - * Helper container for all text edges on a page - */ - private static final class TextEdges extends ArrayList> { - public TextEdges(List leftEdges, List midEdges, List rightEdges) { - super(3); - this.add(leftEdges); - this.add(midEdges); - this.add(rightEdges); - } - } - - /** - * Helper container for relevant text edge info - */ - private static final class RelevantEdges { - public int edgeType; - public int edgeCount; - - public RelevantEdges(int edgeType, int edgeCount) { - this.edgeType = edgeType; - this.edgeCount = edgeCount; - } - } - - @Override - public List detect(Page page) { - - // get horizontal & vertical lines - // we get these from an image of the PDF and not the PDF itself because sometimes there are invisible PDF - // instructions that are interpreted incorrectly as visible elements - we really want to capture what a - // person sees when they look at the PDF - BufferedImage image; - PDPage pdfPage = page.getPDPage(); - try { - image = Utils.pageConvertToImage(page.getPDDoc(), pdfPage, 144, ImageType.GRAY); - } catch (IOException e) { - return new ArrayList<>(); - } - - List horizontalRulings = this.getHorizontalRulings(image); - - // now check the page for vertical lines, but remove the text first to make things less confusing - PDDocument removeTextDocument = null; - try { - removeTextDocument = this.removeText(pdfPage); - pdfPage = removeTextDocument.getPage(0); - image = Utils.pageConvertToImage(removeTextDocument, pdfPage, 144, ImageType.GRAY); - } catch (Exception e) { - return new ArrayList<>(); - } finally { - if (removeTextDocument != null) { - try { - removeTextDocument.close(); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - } - - List verticalRulings = this.getVerticalRulings(image); - - List allEdges = new ArrayList<>(horizontalRulings); - allEdges.addAll(verticalRulings); - - List tableAreas = new ArrayList<>(); - - // if we found some edges, try to find some tables based on them - if (allEdges.size() > 0) { - // now we need to snap edge endpoints to a grid - Utils.snapPoints(allEdges, POINT_SNAP_DISTANCE_THRESHOLD, POINT_SNAP_DISTANCE_THRESHOLD); - - // normalize the rulings to make sure snapping didn't create any wacky non-horizontal/vertical rulings - for (List rulings : Arrays.asList(horizontalRulings, verticalRulings)) { - for (Iterator iterator = rulings.iterator(); iterator.hasNext(); ) { - Ruling ruling = iterator.next(); - - ruling.normalize(); - if (ruling.oblique()) { - iterator.remove(); - } - } - } - - // merge the edge lines into rulings - this makes finding edges between crossing points in the next step easier - // we use a larger pixel expansion than the normal spreadsheet extraction method to cover gaps in the - // edge detection/pixel snapping steps - horizontalRulings = Ruling.collapseOrientedRulings(horizontalRulings, 5); - verticalRulings = Ruling.collapseOrientedRulings(verticalRulings, 5); - - // use the rulings and points to find cells - List cells = SpreadsheetExtractionAlgorithm.findCells(horizontalRulings, verticalRulings); - - // then use those cells to make table areas - tableAreas = this.getTableAreasFromCells(cells); - } - - // next find any vertical rulings that intersect tables - sometimes these won't have completely been captured as - // cells if there are missing horizontal lines (which there often are) - // let's assume though that these lines should be part of the table - for (Line2D.Float verticalRuling : verticalRulings) { - for (Rectangle tableArea : tableAreas) { - if (verticalRuling.intersects(tableArea) && - !(tableArea.contains(verticalRuling.getP1()) && tableArea.contains(verticalRuling.getP2()))) { - - tableArea.setTop((float) Math.floor(Math.min(tableArea.getTop(), verticalRuling.getY1()))); - tableArea.setBottom((float) Math.ceil(Math.max(tableArea.getBottom(), verticalRuling.getY2()))); - break; - } - } - } - - // the tabula Page coordinate space is half the size of the PDFBox image coordinate space - // so halve the table area size before proceeding and add a bit of padding to make sure we capture everything - for (Rectangle area : tableAreas) { - area.x = (float) Math.floor(area.x / 2) - TABLE_PADDING_AMOUNT; - area.y = (float) Math.floor(area.y / 2) - TABLE_PADDING_AMOUNT; - area.width = (float) Math.ceil(area.width / 2) + TABLE_PADDING_AMOUNT; - area.height = (float) Math.ceil(area.height / 2) + TABLE_PADDING_AMOUNT; - } - - // we're going to want halved horizontal lines later too - for (Line2D.Float ruling : horizontalRulings) { - ruling.x1 = ruling.x1 / 2; - ruling.y1 = ruling.y1 / 2; - ruling.x2 = ruling.x2 / 2; - ruling.y2 = ruling.y2 / 2; - } - - // now look at text rows to help us find more tables and flesh out existing ones - List textChunks = TextElement.mergeWords(page.getText()); - List lines = TextChunk.groupByLines(textChunks); - - // first look for text rows that intersect an existing table - those lines should probably be part of the table - for (Line textRow : lines) { - for (Rectangle tableArea : tableAreas) { - if (!tableArea.contains(textRow) && textRow.intersects(tableArea)) { - tableArea.setLeft((float) Math.floor(Math.min(textRow.getLeft(), tableArea.getLeft()))); - tableArea.setRight((float) Math.ceil(Math.max(textRow.getRight(), tableArea.getRight()))); - } - } - } - - // get rid of tables that DO NOT intersect any text areas - these are likely graphs or some sort of graphic - for (Iterator iterator = tableAreas.iterator(); iterator.hasNext(); ) { - Rectangle table = iterator.next(); - - boolean intersectsText = false; - for (Line textRow : lines) { - if (table.intersects(textRow)) { - intersectsText = true; - break; - } - } - - if (!intersectsText) { - iterator.remove(); - } - } - - // lastly, there may be some tables that don't have any vertical rulings at all - // we'll use text edges we've found to try and guess which text rows are part of a table - - // in his thesis nurminen goes through every row to try to assign a probability that the line is in a table - // we're going to try a general heuristic instead, trying to find what type of edge (left/right/mid) intersects - // the most text rows, and then use that magic number of "relevant" edges to decide what text rows should be - // part of a table. - - boolean foundTable; - - do { - foundTable = false; - - // get rid of any text lines contained within existing tables, this allows us to find more tables - for (Iterator iterator = lines.iterator(); iterator.hasNext(); ) { - Line textRow = iterator.next(); - for (Rectangle table : tableAreas) { - if (table.contains(textRow)) { - iterator.remove(); - break; - } - } - } - - // get text edges from remaining lines in the document - TextEdges textEdges = this.getTextEdges(lines); - List leftTextEdges = textEdges.get(TextEdge.LEFT); - List midTextEdges = textEdges.get(TextEdge.MID); - List rightTextEdges = textEdges.get(TextEdge.RIGHT); - - // find the relevant text edges (the ones we think define where a table is) - RelevantEdges relevantEdgeInfo = this.getRelevantEdges(textEdges, lines); - - // we found something relevant so let's look for rows that fit our criteria - if (relevantEdgeInfo.edgeType != -1) { - List relevantEdges = null; - switch (relevantEdgeInfo.edgeType) { - case TextEdge.LEFT: - relevantEdges = leftTextEdges; - break; - case TextEdge.MID: - relevantEdges = midTextEdges; - break; - case TextEdge.RIGHT: - relevantEdges = rightTextEdges; - break; - } - - Rectangle table = this.getTableFromText(lines, relevantEdges, relevantEdgeInfo.edgeCount, horizontalRulings); - - if (table != null) { - foundTable = true; - tableAreas.add(table); - } - } - } while (foundTable); - - // create a set of our current tables that will eliminate duplicate tables - Set tableSet = new TreeSet<>(new Comparator() { - @Override - public int compare(Rectangle o1, Rectangle o2) { - if (o1.equals(o2)) { - return 0; - } - - // o1 is "equal" to o2 if o2 contains all of o1 - if (o2.contains(o1)) { - return 0; - } - - if (o1.contains(o2)) { - return 0; - } - - // otherwise see if these tables are "mostly" the same - float overlap = o1.overlapRatio(o2); - if (overlap >= IDENTICAL_TABLE_OVERLAP_RATIO) { - return 0; - } else { - return 1; - } - } - }); - - tableSet.addAll(tableAreas); - - return new ArrayList<>(tableSet); - } - - private Rectangle getTableFromText(List lines, - List relevantEdges, - int relevantEdgeCount, - List horizontalRulings) { - - Rectangle table = new Rectangle(); - - Line prevRow = null; - Line firstTableRow = null; - Line lastTableRow = null; - - int tableSpaceCount = 0; - float totalRowSpacing = 0; - - // go through the lines and find the ones that have the correct count of the relevant edges - for (Line textRow : lines) { - int numRelevantEdges = 0; - - if (firstTableRow != null && tableSpaceCount > 0) { - // check to make sure this text row is within a line or so of the other lines already added - // if it's not, we should stop the table here - float tableLineThreshold = (totalRowSpacing / tableSpaceCount) * 2.5f; - float lineDistance = textRow.getTop() - prevRow.getTop(); - - if (lineDistance > tableLineThreshold) { - lastTableRow = prevRow; - break; - } - } - - // for larger tables, be a little lenient on the number of relevant rows the text intersects - // for smaller tables, not so much - otherwise we'll end up treating paragraphs as tables too - int relativeEdgeDifferenceThreshold = 1; - if (relevantEdgeCount <= 3) { - relativeEdgeDifferenceThreshold = 0; - } - - for (TextEdge edge : relevantEdges) { - if (textRow.intersectsLine(edge)) { - numRelevantEdges++; - } - } - - // see if we have a candidate text row - if (numRelevantEdges >= (relevantEdgeCount - relativeEdgeDifferenceThreshold)) { - // keep track of table row spacing - if (prevRow != null && firstTableRow != null) { - tableSpaceCount++; - totalRowSpacing += (textRow.getTop() - prevRow.getTop()); - } - - // row is part of a table - if (table.getArea() == 0) { - firstTableRow = textRow; - table.setRect(textRow); - } else { - table.setLeft(Math.min(table.getLeft(), textRow.getLeft())); - table.setBottom(Math.max(table.getBottom(), textRow.getBottom())); - table.setRight(Math.max(table.getRight(), textRow.getRight())); - } - } else { - // no dice - // if we're at the end of the table, save the last row - if (firstTableRow != null && lastTableRow == null) { - lastTableRow = prevRow; - } - } - - prevRow = textRow; - } - - // if we don't have a table now, we won't after the next step either - if (table.getArea() == 0) { - return null; - } - - if (lastTableRow == null) { - // takes care of one-row tables or tables that end at the bottom of a page - lastTableRow = prevRow; - } - - // use the average row height and nearby horizontal lines to extend the table area - float avgRowHeight; - if (tableSpaceCount > 0) { - avgRowHeight = totalRowSpacing / tableSpaceCount; - } else { - avgRowHeight = lastTableRow.height; - } - - float rowHeightThreshold = avgRowHeight * 1.5f; - - // check lines after the bottom of the table - for (Line2D.Float ruling : horizontalRulings) { - - if (ruling.getY1() < table.getBottom()) { - continue; - } - - float distanceFromTable = (float) ruling.getY1() - table.getBottom(); - if (distanceFromTable <= rowHeightThreshold) { - // use this ruling to help define the table - table.setBottom((float) Math.max(table.getBottom(), ruling.getY1())); - table.setLeft((float) Math.min(table.getLeft(), ruling.getX1())); - table.setRight((float) Math.max(table.getRight(), ruling.getX2())); - } else { - // no use checking any further - break; - } - } - - // do the same for lines at the top, but make the threshold greater since table headings tend to be - // larger to fit up to three-ish rows of text (at least but we don't want to grab too much) - rowHeightThreshold = avgRowHeight * 3.8f; - - for (int i = horizontalRulings.size() - 1; i >= 0; i--) { - Line2D.Float ruling = horizontalRulings.get(i); - - if (ruling.getY1() > table.getTop()) { - continue; - } - - float distanceFromTable = table.getTop() - (float) ruling.getY1(); - if (distanceFromTable <= rowHeightThreshold) { - table.setTop((float) Math.min(table.getTop(), ruling.getY1())); - table.setLeft((float) Math.min(table.getLeft(), ruling.getX1())); - table.setRight((float) Math.max(table.getRight(), ruling.getX2())); - } else { - break; - } - } - - // add a bit of padding since the halved horizontal lines are a little fuzzy anyways - table.setTop((float) Math.floor(table.getTop()) - TABLE_PADDING_AMOUNT); - table.setBottom((float) Math.ceil(table.getBottom()) + TABLE_PADDING_AMOUNT); - table.setLeft((float) Math.floor(table.getLeft()) - TABLE_PADDING_AMOUNT); - table.setRight((float) Math.ceil(table.getRight()) + TABLE_PADDING_AMOUNT); - - return table; - } - - private RelevantEdges getRelevantEdges(TextEdges textEdges, List lines) { - List leftTextEdges = textEdges.get(TextEdge.LEFT); - List midTextEdges = textEdges.get(TextEdge.MID); - List rightTextEdges = textEdges.get(TextEdge.RIGHT); - - // first we'll find the number of lines each type of edge crosses - int[][] edgeCountsPerLine = new int[lines.size()][TextEdge.NUM_TYPES]; - - for (TextEdge edge : leftTextEdges) { - edgeCountsPerLine[edge.intersectingTextRowCount - 1][TextEdge.LEFT]++; - } - - for (TextEdge edge : midTextEdges) { - edgeCountsPerLine[edge.intersectingTextRowCount - 1][TextEdge.MID]++; - } - - for (TextEdge edge : rightTextEdges) { - edgeCountsPerLine[edge.intersectingTextRowCount - 1][TextEdge.RIGHT]++; - } - - // now let's find the relevant edge type and the number of those edges we should look for - // we'll only take a minimum of two edges to look for tables - int relevantEdgeType = -1; - int relevantEdgeCount = 0; - for (int i = edgeCountsPerLine.length - 1; i > 2; i--) { - if (edgeCountsPerLine[i][TextEdge.LEFT] > 2 && - edgeCountsPerLine[i][TextEdge.LEFT] >= edgeCountsPerLine[i][TextEdge.RIGHT] && - edgeCountsPerLine[i][TextEdge.LEFT] >= edgeCountsPerLine[i][TextEdge.MID]) { - relevantEdgeCount = edgeCountsPerLine[i][TextEdge.LEFT]; - relevantEdgeType = TextEdge.LEFT; - break; - } - - if (edgeCountsPerLine[i][TextEdge.RIGHT] > 1 && - edgeCountsPerLine[i][TextEdge.RIGHT] >= edgeCountsPerLine[i][TextEdge.LEFT] && - edgeCountsPerLine[i][TextEdge.RIGHT] >= edgeCountsPerLine[i][TextEdge.MID]) { - relevantEdgeCount = edgeCountsPerLine[i][TextEdge.RIGHT]; - relevantEdgeType = TextEdge.RIGHT; - break; - } - - if (edgeCountsPerLine[i][TextEdge.MID] > 1 && - edgeCountsPerLine[i][TextEdge.MID] >= edgeCountsPerLine[i][TextEdge.RIGHT] && - edgeCountsPerLine[i][TextEdge.MID] >= edgeCountsPerLine[i][TextEdge.LEFT]) { - relevantEdgeCount = edgeCountsPerLine[i][TextEdge.MID]; - relevantEdgeType = TextEdge.MID; - break; - } - } - - return new RelevantEdges(relevantEdgeType, relevantEdgeCount); - } - - private TextEdges getTextEdges(List lines) { - - // get all text edges (lines that align with the left, middle and right of chunks of text) that extend - // uninterrupted over at least REQUIRED_TEXT_LINES_FOR_EDGE lines of text - List leftTextEdges = new ArrayList<>(); - List midTextEdges = new ArrayList<>(); - List rightTextEdges = new ArrayList<>(); - - Map> currLeftEdges = new HashMap<>(); - Map> currMidEdges = new HashMap<>(); - Map> currRightEdges = new HashMap<>(); - - - int numOfLines = lines.size(); - for (Line textRow : lines) { - for (TextChunk text : textRow.getTextElements()) { - Integer left = (int) Math.floor(text.getLeft()); - Integer right = (int) Math.floor(text.getRight()); - Integer mid = left + ((right - left) / 2); - - // first put this chunk into any edge buckets it belongs to - List leftEdge = currLeftEdges.computeIfAbsent(left, k -> new ArrayList<>()); - leftEdge.add(text); - - List midEdge = currMidEdges.computeIfAbsent(mid, k -> new ArrayList<>()); - midEdge.add(text); - - List rightEdge = currRightEdges.computeIfAbsent(right, k -> new ArrayList<>()); - rightEdge.add(text); - - // now see if this text chunk blows up any other edges - leftTextEdges.addAll( - calculateExtendedEdges(numOfLines, currLeftEdges, left, right) - ); - - midTextEdges.addAll( - calculateExtendedEdges(numOfLines, currMidEdges, left, right, mid, 2) - ); - - rightTextEdges.addAll( - calculateExtendedEdges(numOfLines, currRightEdges, left, right) - ); - } - } - - // add the leftovers - leftTextEdges.addAll( - calculateLeftoverEdges(numOfLines, currLeftEdges) - ); - - midTextEdges.addAll( - calculateLeftoverEdges(numOfLines, currMidEdges) - ); - - rightTextEdges.addAll( - calculateLeftoverEdges(numOfLines, currRightEdges) - ); - - return new TextEdges(leftTextEdges, midTextEdges, rightTextEdges); - } - - private Set calculateLeftoverEdges(int numOfLines, Map> currDirectedEdges) { - Set leftoverEdges = new HashSet<>(); - for (Integer key : currDirectedEdges.keySet()) { - List edgeChunks = currDirectedEdges.get(key); - if (edgeChunks.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { - TextEdge edge = getEdgeFromChunks(numOfLines, key, edgeChunks); - - leftoverEdges.add(edge); - } - } - return leftoverEdges; - } - - private TextEdge getEdgeFromChunks(int numOfLines, Integer key, List edgeChunks) { - TextChunk first = edgeChunks.get(0); - TextChunk last = edgeChunks.get(edgeChunks.size() - 1); - - TextEdge edge = new TextEdge(key, first.getTop(), key, last.getBottom()); - edge.intersectingTextRowCount = Math.min(edgeChunks.size(), numOfLines); - return edge; - } - - - private Collection calculateExtendedEdges(Integer numOfLines, Map> currDirectedEdges, Integer left, Integer right) { - return calculateExtendedEdges(numOfLines, currDirectedEdges, left, right, null, null); - } - - private Collection calculateExtendedEdges(Integer numOfLines, Map> currDirectedEdges, Integer left, Integer right, Integer mid, Integer minDistToMid) { - Set extendedEdges = new HashSet<>(); - Iterator>> edgeIterator = currDirectedEdges.entrySet().iterator(); - while (edgeIterator.hasNext()) { - Map.Entry> entry = edgeIterator.next(); - Integer key = entry.getKey(); - - // if mid and minDistToMid are set, we calculate if the distance to mid is actually above, - // otherwise we ignore it - boolean hasMinDistToMid = mid == null || minDistToMid == null || Math.abs(key - mid) > minDistToMid; - - if (key > left && key < right && hasMinDistToMid) { - edgeIterator.remove(); - List edgeChunks = entry.getValue(); - if (edgeChunks.size() >= REQUIRED_TEXT_LINES_FOR_EDGE) { - TextEdge edge = getEdgeFromChunks(numOfLines, key, edgeChunks); - extendedEdges.add(edge); - } - } - } - return extendedEdges; - } - - private List getTableAreasFromCells(List cells) { - List> cellGroups = new ArrayList<>(); - for (Rectangle cell : cells) { - boolean addedToGroup = false; - - cellCheck: - for (List cellGroup : cellGroups) { - for (Rectangle groupCell : cellGroup) { - Point2D[] groupCellCorners = groupCell.getPoints(); - Point2D[] candidateCorners = cell.getPoints(); - - for (Point2D candidateCorner : candidateCorners) { - for (Point2D groupCellCorner : groupCellCorners) { - if (candidateCorner.distance(groupCellCorner) < CELL_CORNER_DISTANCE_MAXIMUM) { - cellGroup.add(cell); - addedToGroup = true; - break cellCheck; - } - } - } - } - } - - if (!addedToGroup) { - ArrayList cellGroup = new ArrayList<>(); - cellGroup.add(cell); - cellGroups.add(cellGroup); - } - } - - // create table areas based on cell group - List tableAreas = new ArrayList<>(); - for (List cellGroup : cellGroups) { - // less than four cells should not make a table - if (cellGroup.size() < REQUIRED_CELLS_FOR_TABLE) { - continue; - } - - float top = Float.MAX_VALUE; - float left = Float.MAX_VALUE; - float bottom = Float.MIN_VALUE; - float right = Float.MIN_VALUE; - - for (Rectangle cell : cellGroup) { - if (cell.getTop() < top) top = cell.getTop(); - if (cell.getLeft() < left) left = cell.getLeft(); - if (cell.getBottom() > bottom) bottom = cell.getBottom(); - if (cell.getRight() > right) right = cell.getRight(); - } - - tableAreas.add(new Rectangle(top, left, right - left, bottom - top)); - } - - return tableAreas; - } - - private List getHorizontalRulings(BufferedImage image) { - - // get all horizontal edges, which we'll define as a change in grayscale colour - // along a straight line of a certain length - ArrayList horizontalRulings = new ArrayList<>(); - - Raster r = image.getRaster(); - int width = r.getWidth(); - int height = r.getHeight(); - - for (int x = 0; x < width; x++) { - - int[] lastPixel = r.getPixel(x, 0, (int[]) null); - - for (int y = 1; y < height - 1; y++) { - - int[] currPixel = r.getPixel(x, y, (int[]) null); - - int diff = Math.abs(currPixel[0] - lastPixel[0]); - if (diff > GRAYSCALE_INTENSITY_THRESHOLD) { - // we hit what could be a line - // don't bother scanning it if we've hit a pixel in the line before - boolean alreadyChecked = false; - for (Line2D.Float line : horizontalRulings) { - if (y == line.getY1() && x >= line.getX1() && x <= line.getX2()) { - alreadyChecked = true; - break; - } - } - - if (alreadyChecked) { - lastPixel = currPixel; - continue; - } - - int lineX = x + 1; - - while (lineX < width) { - int[] linePixel = r.getPixel(lineX, y, (int[]) null); - int[] abovePixel = r.getPixel(lineX, y - 1, (int[]) null); - - if (Math.abs(linePixel[0] - abovePixel[0]) <= GRAYSCALE_INTENSITY_THRESHOLD - || Math.abs(currPixel[0] - linePixel[0]) > GRAYSCALE_INTENSITY_THRESHOLD) { - break; - } - - lineX++; - } - - int endX = lineX - 1; - int lineWidth = endX - x; - if (lineWidth > HORIZONTAL_EDGE_WIDTH_MINIMUM) { - horizontalRulings.add(new Ruling(new Point2D.Float(x, y), new Point2D.Float(endX, y))); - } - } - - lastPixel = currPixel; - } - } - - return horizontalRulings; - } - - private List getVerticalRulings(BufferedImage image) { - - // get all vertical edges, which we'll define as a change in grayscale colour - // along a straight line of a certain length - ArrayList verticalRulings = new ArrayList<>(); - - Raster r = image.getRaster(); - int width = r.getWidth(); - int height = r.getHeight(); - - for (int y = 0; y < height; y++) { - - int[] lastPixel = r.getPixel(0, y, (int[]) null); - - for (int x = 1; x < width - 1; x++) { - - int[] currPixel = r.getPixel(x, y, (int[]) null); - - int diff = Math.abs(currPixel[0] - lastPixel[0]); - if (diff > GRAYSCALE_INTENSITY_THRESHOLD) { - // we hit what could be a line - // don't bother scanning it if we've hit a pixel in the line before - boolean alreadyChecked = false; - for (Line2D.Float line : verticalRulings) { - if (x == line.getX1() && y >= line.getY1() && y <= line.getY2()) { - alreadyChecked = true; - break; - } - } - - if (alreadyChecked) { - lastPixel = currPixel; - continue; - } - - int lineY = y + 1; - - while (lineY < height) { - int[] linePixel = r.getPixel(x, lineY, (int[]) null); - int[] leftPixel = r.getPixel(x - 1, lineY, (int[]) null); - - if (Math.abs(linePixel[0] - leftPixel[0]) <= GRAYSCALE_INTENSITY_THRESHOLD - || Math.abs(currPixel[0] - linePixel[0]) > GRAYSCALE_INTENSITY_THRESHOLD) { - break; - } - - lineY++; - } - - int endY = lineY - 1; - int lineLength = endY - y; - if (lineLength > VERTICAL_EDGE_HEIGHT_MINIMUM) { - verticalRulings.add(new Ruling(new Point2D.Float(x, y), new Point2D.Float(x, endY))); - } - } - - lastPixel = currPixel; - } - } - - return verticalRulings; - } - - private PDDocument removeText(PDPage page) throws IOException { - - PDFStreamParser parser = new PDFStreamParser(page); - parser.parse(); - - PDDocument document = new PDDocument(); - PDPage newPage = document.importPage(page); - newPage.setResources(page.getResources()); - - PDStream newContents = new PDStream(document); - OutputStream out = newContents.createOutputStream(COSName.FLATE_DECODE); - ContentStreamWriter writer = new ContentStreamWriter(out); - List tokensWithoutText = createTokensWithoutText(page); - writer.writeTokens(tokensWithoutText); - out.close(); - newPage.setContents(newContents); - return document; - } - - - /** - * @param contentStream contentStream - * @return newTokens - * @throws IOException When parseNextToken on Error - * @see ... - */ - private static List createTokensWithoutText(PDContentStream contentStream) throws IOException { - PDFStreamParser parser = new PDFStreamParser(contentStream); - Object token = parser.parseNextToken(); - List newTokens = new ArrayList<>(); - while (token != null) { - if (token instanceof Operator) { - Operator op = (Operator) token; - String opName = op.getName(); - if (OperatorName.SHOW_TEXT_ADJUSTED.equals(opName) - || OperatorName.SHOW_TEXT.equals(opName) - || OperatorName.SHOW_TEXT_LINE.equals(opName)) { - // remove the argument to this operator - newTokens.remove(newTokens.size() - 1); - - token = parser.parseNextToken(); - continue; - } else if (OperatorName.SHOW_TEXT_LINE_AND_SPACE.equals(opName)) { - // remove the 3 arguments to this operator - newTokens.remove(newTokens.size() - 1); - newTokens.remove(newTokens.size() - 1); - newTokens.remove(newTokens.size() - 1); - - token = parser.parseNextToken(); - continue; - } - } - newTokens.add(token); - token = parser.parseNextToken(); - } - return newTokens; - } - - -} diff --git a/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java deleted file mode 100644 index 43136ba5..00000000 --- a/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java +++ /dev/null @@ -1,30 +0,0 @@ -package technology.tabula.detectors; - -import technology.tabula.Cell; -import technology.tabula.Page; -import technology.tabula.Rectangle; -import technology.tabula.extractors.SpreadsheetExtractionAlgorithm; - -import java.util.Collections; -import java.util.List; - -/** - * Created by matt on 2015-12-14. - * - * This is the basic spreadsheet table detection algorithm currently implemented in tabula (web). - * - * It uses intersecting ruling lines to find tables. - */ -public class SpreadsheetDetectionAlgorithm implements DetectionAlgorithm { - @Override - public List detect(Page page) { - List cells = SpreadsheetExtractionAlgorithm.findCells(page.getHorizontalRulings(), page.getVerticalRulings()); - - List tables = SpreadsheetExtractionAlgorithm.findSpreadsheetsFromCells(cells); - - // we want tables to be returned from top to bottom on the page - Collections.sort(tables, Rectangle.ILL_DEFINED_ORDER); - - return tables; - } -} diff --git a/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java b/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java deleted file mode 100644 index ed2e78e3..00000000 --- a/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java +++ /dev/null @@ -1,166 +0,0 @@ -package technology.tabula.extractors; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.List; -import java.util.Arrays; - -import technology.tabula.Line; -import technology.tabula.Page; -import technology.tabula.Rectangle; -import technology.tabula.Ruling; -import technology.tabula.Table; -import technology.tabula.TextChunk; -import technology.tabula.TextElement; - -public class BasicExtractionAlgorithm implements ExtractionAlgorithm { - - private List verticalRulings = null; - - public BasicExtractionAlgorithm() { - } - - public BasicExtractionAlgorithm(List verticalRulings) { - this.verticalRulings = verticalRulings; - } - - public List

extract(Page page, List verticalRulingPositions) { - List verticalRulings = new ArrayList<>(verticalRulingPositions.size()); - for (Float p: verticalRulingPositions) { - verticalRulings.add(new Ruling(page.getTop(), p, 0.0f, (float) page.getHeight())); - } - this.verticalRulings = verticalRulings; - return this.extract(page); - } - - @Override - public List
extract(Page page) { - - List textElements = page.getText(); - - if (textElements.size() == 0) { - return Arrays.asList(new Table[] { Table.empty() }); - } - - List textChunks = this.verticalRulings == null ? TextElement.mergeWords(page.getText()) : TextElement.mergeWords(page.getText(), this.verticalRulings); - List lines = TextChunk.groupByLines(textChunks); - List columns = null; - - if (this.verticalRulings != null) { - Collections.sort(this.verticalRulings, new Comparator() { - @Override - public int compare(Ruling arg0, Ruling arg1) { - return Double.compare(arg0.getLeft(), arg1.getLeft()); - } - }); - columns = new ArrayList<>(this.verticalRulings.size()); - for (Ruling vr: this.verticalRulings) { - columns.add(vr.getLeft()); - } - } - else { - columns = columnPositions(lines); - } - - Table table = new Table(this); - table.setRect(page.getLeft(), page.getTop(), page.getWidth(), page.getHeight()); - table.setPageNumber(page.getPageNumber()); - - for (int i = 0; i < lines.size(); i++) { - Line line = lines.get(i); - List elements = line.getTextElements(); - - Collections.sort(elements, new Comparator() { - - @Override - public int compare(TextChunk o1, TextChunk o2) { - return Float.compare(o1.getLeft(), o2.getLeft()); - } - }); - - for (TextChunk tc: elements) { - if (tc.isSameChar(Line.WHITE_SPACE_CHARS)) { - continue; - } - - int j = 0; - boolean found = false; - for(; j < columns.size(); j++) { - if (tc.getLeft() <= columns.get(j)) { - found = true; - break; - } - } - table.add(tc, i, found ? j : columns.size()); - } - } - - return Arrays.asList(new Table[] { table } ); - } - - @Override - public String toString() { - return "stream"; - } - - - /** - * @param lines must be an array of lines sorted by their +top+ attribute - * @return a list of column boundaries (x axis) - */ - public static List columnPositions(List lines) { - - List regions = new ArrayList<>(); - for (TextChunk tc: lines.get(0).getTextElements()) { - if (tc.isSameChar(Line.WHITE_SPACE_CHARS)) { - continue; - } - Rectangle r = new Rectangle(); - r.setRect(tc); - regions.add(r); - } - - for (Line l: lines.subList(1, lines.size())) { - List lineTextElements = new ArrayList<>(); - for (TextChunk tc: l.getTextElements()) { - if (!tc.isSameChar(Line.WHITE_SPACE_CHARS)) { - lineTextElements.add(tc); - } - } - - for (Rectangle cr: regions) { - - List overlaps = new ArrayList<>(); - for (TextChunk te: lineTextElements) { - if (cr.horizontallyOverlaps(te)) { - overlaps.add(te); - } - } - - for (TextChunk te: overlaps) { - cr.merge(te); - } - - lineTextElements.removeAll(overlaps); - } - - for (TextChunk te: lineTextElements) { - Rectangle r = new Rectangle(); - r.setRect(te); - regions.add(r); - } - } - - List rv = new ArrayList<>(); - for (Rectangle r: regions) { - rv.add(r.getRight()); - } - - Collections.sort(rv); - - return rv; - - } - -} diff --git a/src/main/java/technology/tabula/extractors/ExtractionAlgorithm.java b/src/main/java/technology/tabula/extractors/ExtractionAlgorithm.java deleted file mode 100644 index 9ad692c4..00000000 --- a/src/main/java/technology/tabula/extractors/ExtractionAlgorithm.java +++ /dev/null @@ -1,13 +0,0 @@ -package technology.tabula.extractors; - -import java.util.List; - -import technology.tabula.Page; -import technology.tabula.Table; - -public interface ExtractionAlgorithm { - - List extract(Page page); - String toString(); - -} diff --git a/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java b/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java deleted file mode 100644 index 5b4af3d5..00000000 --- a/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java +++ /dev/null @@ -1,326 +0,0 @@ -package technology.tabula.extractors; - -import technology.tabula.*; - -import java.awt.geom.Point2D; -import java.util.*; - -/** - * @author manuel - * - */ -public class SpreadsheetExtractionAlgorithm implements ExtractionAlgorithm { - - private static final float MAGIC_HEURISTIC_NUMBER = 0.65f; - - private static final Comparator Y_FIRST_POINT_COMPARATOR = (point1, point2) -> { - int compareY = compareRounded(point1.getY(), point2.getY()); - if (compareY == 0) { - return compareRounded(point1.getX(), point2.getX()); - } - return compareY; - }; - - private static final Comparator X_FIRST_POINT_COMPARATOR = (point1, point2) -> { - int compareX = compareRounded(point1.getX(), point2.getX()); - if (compareX == 0) { - return compareRounded(point1.getY(), point2.getY()); - } - return compareX; - }; - - private static int compareRounded(double d1, double d2) { - float d1Rounded = Utils.round(d1, 2); - float d2Rounded = Utils.round(d2, 2); - - return Float.compare(d1Rounded, d2Rounded); - } - - @Override - public List
extract(Page page) { - return extract(page, page.getRulings()); - } - - /** - * Extract a list of Table from page using rulings as separators - */ - public List
extract(Page page, List rulings) { - // split rulings into horizontal and vertical - List horizontalR = new ArrayList<>(); - List verticalR = new ArrayList<>(); - - for (Ruling r: rulings) { - if (r.horizontal()) { - horizontalR.add(r); - } - else if (r.vertical()) { - verticalR.add(r); - } - } - horizontalR = Ruling.collapseOrientedRulings(horizontalR); - verticalR = Ruling.collapseOrientedRulings(verticalR); - - List cells = findCells(horizontalR, verticalR); - List spreadsheetAreas = findSpreadsheetsFromCells(cells); - - List
spreadsheets = new ArrayList<>(); - for (Rectangle area: spreadsheetAreas) { - - List overlappingCells = new ArrayList<>(); - for (Cell c: cells) { - if (c.intersects(area)) { - - c.setTextElements(TextElement.mergeWords(page.getText(c))); - overlappingCells.add(c); - } - } - - List horizontalOverlappingRulings = new ArrayList<>(); - for (Ruling hr: horizontalR) { - if (area.intersectsLine(hr)) { - horizontalOverlappingRulings.add(hr); - } - } - List verticalOverlappingRulings = new ArrayList<>(); - for (Ruling vr: verticalR) { - if (area.intersectsLine(vr)) { - verticalOverlappingRulings.add(vr); - } - } - - TableWithRulingLines t = new TableWithRulingLines(area, overlappingCells, horizontalOverlappingRulings, verticalOverlappingRulings, this, page.getPageNumber()); - spreadsheets.add(t); - } - Utils.sort(spreadsheets, Rectangle.ILL_DEFINED_ORDER); - return spreadsheets; - } - - public boolean isTabular(Page page) { - - // if there's no text at all on the page, it's not a table - // (we won't be able to do anything with it though) - if (page.getText().isEmpty()){ - return false; - } - - // get minimal region of page that contains every character (in effect, - // removes white "margins") - Page minimalRegion = page.getArea(Utils.bounds(page.getText())); - - List tables = new SpreadsheetExtractionAlgorithm().extract(minimalRegion); - if (tables.isEmpty()) { - return false; - } - Table table = tables.get(0); - int rowsDefinedByLines = table.getRowCount(); - int colsDefinedByLines = table.getColCount(); - - tables = new BasicExtractionAlgorithm().extract(minimalRegion); - if (tables.isEmpty()) { - return false; - } - table = tables.get(0); - int rowsDefinedWithoutLines = table.getRowCount(); - int colsDefinedWithoutLines = table.getColCount(); - - float ratio = (((float) colsDefinedByLines / colsDefinedWithoutLines) + - ((float) rowsDefinedByLines / rowsDefinedWithoutLines)) / 2.0f; - - return ratio > MAGIC_HEURISTIC_NUMBER && ratio < (1 / MAGIC_HEURISTIC_NUMBER); - } - - public static List findCells(List horizontalRulingLines, List verticalRulingLines) { - List cellsFound = new ArrayList<>(); - Map intersectionPoints = Ruling.findIntersections(horizontalRulingLines, verticalRulingLines); - List intersectionPointsList = new ArrayList<>(intersectionPoints.keySet()); - intersectionPointsList.sort(Y_FIRST_POINT_COMPARATOR); - - for (int i = 0; i < intersectionPointsList.size(); i++) { - Point2D topLeft = intersectionPointsList.get(i); - Ruling[] hv = intersectionPoints.get(topLeft); - - List xPoints = new ArrayList<>(); - List yPoints = new ArrayList<>(); - - for (Point2D p: intersectionPointsList.subList(i, intersectionPointsList.size())) { - if (p.getX() == topLeft.getX() && p.getY() > topLeft.getY()) { - xPoints.add(p); - } - if (p.getY() == topLeft.getY() && p.getX() > topLeft.getX()) { - yPoints.add(p); - } - } - outer: - for (Point2D xPoint: xPoints) { - - // is there a vertical edge b/w topLeft and xPoint? - if (!hv[1].equals(intersectionPoints.get(xPoint)[1])) { - continue; - } - for (Point2D yPoint: yPoints) { - // is there an horizontal edge b/w topLeft and yPoint ? - if (!hv[0].equals(intersectionPoints.get(yPoint)[0])) { - continue; - } - Point2D btmRight = new Point2D.Float((float) yPoint.getX(), (float) xPoint.getY()); - if (intersectionPoints.containsKey(btmRight) - && intersectionPoints.get(btmRight)[0].equals(intersectionPoints.get(xPoint)[0]) - && intersectionPoints.get(btmRight)[1].equals(intersectionPoints.get(yPoint)[1])) { - cellsFound.add(new Cell(topLeft, btmRight)); - break outer; - } - } - } - } - - // TODO create cells for vertical ruling lines with aligned endpoints at the top/bottom of a grid - // that aren't connected with an horizontal ruler? - // see: https://github.com/jazzido/tabula-extractor/issues/78#issuecomment-41481207 - - return cellsFound; - } - - public static List findSpreadsheetsFromCells(List cells) { - // via: http://stackoverflow.com/questions/13746284/merging-multiple-adjacent-rectangles-into-one-polygon - List rectangles = new ArrayList<>(); - Set pointSet = new HashSet<>(); - Map edgesH = new HashMap<>(); - Map edgesV = new HashMap<>(); - int i = 0; - - cells = new ArrayList<>(new HashSet<>(cells)); - - Utils.sort(cells, Rectangle.ILL_DEFINED_ORDER); - - for (Rectangle cell: cells) { - for(Point2D pt: cell.getPoints()) { - if (pointSet.contains(pt)) { // shared vertex, remove it - pointSet.remove(pt); - } - else { - pointSet.add(pt); - } - } - } - - // X first sort - List pointsSortX = new ArrayList<>(pointSet); - pointsSortX.sort(X_FIRST_POINT_COMPARATOR); - // Y first sort - List pointsSortY = new ArrayList<>(pointSet); - pointsSortY.sort(Y_FIRST_POINT_COMPARATOR); - - while (i < pointSet.size()) { - float currY = (float) pointsSortY.get(i).getY(); - while (i < pointSet.size() && Utils.feq(pointsSortY.get(i).getY(), currY)) { - edgesH.put(pointsSortY.get(i), pointsSortY.get(i+1)); - edgesH.put(pointsSortY.get(i+1), pointsSortY.get(i)); - i += 2; - } - } - - i = 0; - while (i < pointSet.size()) { - float currX = (float) pointsSortX.get(i).getX(); - while (i < pointSet.size() && Utils.feq(pointsSortX.get(i).getX(), currX)) { - edgesV.put(pointsSortX.get(i), pointsSortX.get(i+1)); - edgesV.put(pointsSortX.get(i+1), pointsSortX.get(i)); - i += 2; - } - } - - // Get all the polygons - List> polygons = new ArrayList<>(); - Point2D nextVertex; - while (!edgesH.isEmpty()) { - ArrayList polygon = new ArrayList<>(); - Point2D first = edgesH.keySet().iterator().next(); - polygon.add(new PolygonVertex(first, Direction.HORIZONTAL)); - edgesH.remove(first); - - while (true) { - PolygonVertex curr = polygon.get(polygon.size() - 1); - PolygonVertex lastAddedVertex; - if (curr.direction == Direction.HORIZONTAL) { - nextVertex = edgesV.get(curr.point); - edgesV.remove(curr.point); - lastAddedVertex = new PolygonVertex(nextVertex, Direction.VERTICAL); - } - else { - nextVertex = edgesH.get(curr.point); - edgesH.remove(curr.point); - lastAddedVertex = new PolygonVertex(nextVertex, Direction.HORIZONTAL); - } - polygon.add(lastAddedVertex); - - if (lastAddedVertex.equals(polygon.get(0))) { - // closed polygon - polygon.remove(polygon.size() - 1); - break; - } - } - - for (PolygonVertex vertex: polygon) { - edgesH.remove(vertex.point); - edgesV.remove(vertex.point); - } - polygons.add(polygon); - } - - // calculate grid-aligned minimum area rectangles for each found polygon - for(List poly: polygons) { - float top = java.lang.Float.MAX_VALUE; - float left = java.lang.Float.MAX_VALUE; - float bottom = java.lang.Float.MIN_VALUE; - float right = java.lang.Float.MIN_VALUE; - for (PolygonVertex pt: poly) { - top = (float) Math.min(top, pt.point.getY()); - left = (float) Math.min(left, pt.point.getX()); - bottom = (float) Math.max(bottom, pt.point.getY()); - right = (float) Math.max(right, pt.point.getX()); - } - rectangles.add(new Rectangle(top, left, right - left, bottom - top)); - } - - return rectangles; - } - - @Override - public String toString() { - return "lattice"; - } - - private enum Direction { - HORIZONTAL, - VERTICAL - } - - static class PolygonVertex { - Point2D point; - Direction direction; - - public PolygonVertex(Point2D point, Direction direction) { - this.direction = direction; - this.point = point; - } - - @Override - public boolean equals(Object other) { - if (this == other) - return true; - if (!(other instanceof PolygonVertex)) - return false; - return this.point.equals(((PolygonVertex) other).point); - } - - @Override - public int hashCode() { - return this.point.hashCode(); - } - - @Override - public String toString() { - return String.format("%s[point=%s,direction=%s]", this.getClass().getName(), this.point.toString(), this.direction.toString()); - } - } -} diff --git a/src/main/java/technology/tabula/json/RectangularTextContainerSerializer.java b/src/main/java/technology/tabula/json/RectangularTextContainerSerializer.java deleted file mode 100644 index 4a61aa31..00000000 --- a/src/main/java/technology/tabula/json/RectangularTextContainerSerializer.java +++ /dev/null @@ -1,29 +0,0 @@ -package technology.tabula.json; - -import java.lang.reflect.Type; - -import com.google.gson.JsonElement; -import com.google.gson.JsonObject; -import com.google.gson.JsonSerializationContext; -import com.google.gson.JsonSerializer; - -import technology.tabula.RectangularTextContainer; - -public final class RectangularTextContainerSerializer implements JsonSerializer> { - - public static final RectangularTextContainerSerializer INSTANCE = new RectangularTextContainerSerializer(); - - private RectangularTextContainerSerializer() {} - - @Override - public JsonElement serialize(RectangularTextContainer textContainer, Type type, JsonSerializationContext context) { - JsonObject json = new JsonObject(); - json.addProperty("top", textContainer.getTop()); - json.addProperty("left", textContainer.getLeft()); - json.addProperty("width", textContainer.getWidth()); - json.addProperty("height", textContainer.getHeight()); - json.addProperty("text", textContainer.getText()); - return json; - } - -} \ No newline at end of file diff --git a/src/main/java/technology/tabula/json/TableSerializer.java b/src/main/java/technology/tabula/json/TableSerializer.java deleted file mode 100644 index 0caaf0e5..00000000 --- a/src/main/java/technology/tabula/json/TableSerializer.java +++ /dev/null @@ -1,46 +0,0 @@ -package technology.tabula.json; - -import java.lang.reflect.Type; -import java.util.List; - -import technology.tabula.RectangularTextContainer; -import technology.tabula.Table; - -import com.google.gson.JsonArray; -import com.google.gson.JsonElement; -import com.google.gson.JsonObject; -import com.google.gson.JsonSerializationContext; -import com.google.gson.JsonSerializer; - -public final class TableSerializer implements JsonSerializer
{ - - public static final TableSerializer INSTANCE = new TableSerializer(); - - private TableSerializer() {} - - @Override - public JsonElement serialize(Table table, Type type, JsonSerializationContext context) { - JsonObject json = new JsonObject(); - JsonArray data = new JsonArray(); - - json.addProperty("extraction_method", table.getExtractionMethod()); - json.addProperty("page_number", table.getPageNumber()); - json.addProperty("top", table.getTop()); - json.addProperty("left", table.getLeft()); - json.addProperty("width", table.getWidth()); - json.addProperty("height", table.getHeight()); - json.addProperty("right", table.getRight()); - json.addProperty("bottom", table.getBottom()); - json.add("data", data); - - for (List tableRow : table.getRows()) { - JsonArray jsonRow = new JsonArray(); - for (RectangularTextContainer textChunk : tableRow) - jsonRow.add(context.serialize(textChunk)); - data.add(jsonRow); - } - - return json; - } - -} diff --git a/src/main/java/technology/tabula/writers/CSVWriter.java b/src/main/java/technology/tabula/writers/CSVWriter.java deleted file mode 100644 index 682397b8..00000000 --- a/src/main/java/technology/tabula/writers/CSVWriter.java +++ /dev/null @@ -1,47 +0,0 @@ -package technology.tabula.writers; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -import org.apache.commons.csv.CSVPrinter; -import org.apache.commons.csv.CSVFormat; - -import technology.tabula.RectangularTextContainer; -import technology.tabula.Table; - -public class CSVWriter implements Writer { - - private final CSVFormat format; - - public CSVWriter() { - this(CSVFormat.EXCEL); - } - - protected CSVWriter(CSVFormat format) { - this.format = format; - } - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - @Override - public void write(Appendable out, Table table) throws IOException { - write(out, Collections.singletonList(table)); - } - - @Override - public void write(Appendable out, List
tables) throws IOException { - try (CSVPrinter printer = new CSVPrinter(out, format)) { - for (Table table : tables) { - for (List row : table.getRows()) { - List cells = new ArrayList<>(row.size()); - for (RectangularTextContainer cell : row) - cells.add(cell.getText()); - printer.printRecord(cells); - } - } - printer.flush(); - } - } - -} diff --git a/src/main/java/technology/tabula/writers/JSONWriter.java b/src/main/java/technology/tabula/writers/JSONWriter.java deleted file mode 100644 index bb566f2d..00000000 --- a/src/main/java/technology/tabula/writers/JSONWriter.java +++ /dev/null @@ -1,61 +0,0 @@ -package technology.tabula.writers; - -import com.google.gson.ExclusionStrategy; -import com.google.gson.FieldAttributes; -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; -import com.google.gson.JsonArray; - -import technology.tabula.Cell; -import technology.tabula.RectangularTextContainer; -import technology.tabula.Table; -import technology.tabula.TextChunk; -import technology.tabula.json.RectangularTextContainerSerializer; -import technology.tabula.json.TableSerializer; - -import java.io.IOException; -import java.util.List; - -import static java.lang.reflect.Modifier.PUBLIC; - -public class JSONWriter implements Writer { - - private static final ExclusionStrategy ALL_CLASSES_SKIPPING_NON_PUBLIC_FIELDS = new ExclusionStrategy() { - @Override - public boolean shouldSkipClass(Class c) { - return false; - } - - @Override - public boolean shouldSkipField(FieldAttributes fieldAttributes) { - return !fieldAttributes.hasModifier(PUBLIC); - } - }; - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - @Override - public void write(Appendable out, Table table) throws IOException { - out.append(gson().toJson(table, Table.class)); - } - - @Override - public void write(Appendable out, List
tables) throws IOException { - Gson gson = gson(); - JsonArray jsonElements = new JsonArray(); - for (Table table : tables) - jsonElements.add(gson.toJsonTree(table, Table.class)); - out.append(gson.toJson(jsonElements)); - } - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - private static Gson gson() { - return new GsonBuilder() - .addSerializationExclusionStrategy(ALL_CLASSES_SKIPPING_NON_PUBLIC_FIELDS) - .registerTypeAdapter(Table.class, TableSerializer.INSTANCE) - .registerTypeAdapter(RectangularTextContainer.class, RectangularTextContainerSerializer.INSTANCE) - .registerTypeAdapter(Cell.class, RectangularTextContainerSerializer.INSTANCE) - .registerTypeAdapter(TextChunk.class, RectangularTextContainerSerializer.INSTANCE) - .create(); - } - -} diff --git a/src/main/java/technology/tabula/writers/TSVWriter.java b/src/main/java/technology/tabula/writers/TSVWriter.java deleted file mode 100644 index 115d0347..00000000 --- a/src/main/java/technology/tabula/writers/TSVWriter.java +++ /dev/null @@ -1,11 +0,0 @@ -package technology.tabula.writers; - -import org.apache.commons.csv.CSVFormat; - -public class TSVWriter extends CSVWriter { - - public TSVWriter() { - super(CSVFormat.TDF); - } - -} diff --git a/src/main/java/technology/tabula/writers/Writer.java b/src/main/java/technology/tabula/writers/Writer.java deleted file mode 100644 index 99b708c6..00000000 --- a/src/main/java/technology/tabula/writers/Writer.java +++ /dev/null @@ -1,14 +0,0 @@ -package technology.tabula.writers; - -import java.io.IOException; -import java.util.List; - -import technology.tabula.Table; - -public interface Writer { - - void write(Appendable out, Table table) throws IOException; - - void write(Appendable out, List
tables) throws IOException; - -} diff --git a/src/test/java/technology/tabula/TableTest.java b/src/test/java/technology/tabula/TableTest.java deleted file mode 100644 index c574a553..00000000 --- a/src/test/java/technology/tabula/TableTest.java +++ /dev/null @@ -1,45 +0,0 @@ -package technology.tabula; - -import static org.junit.Assert.*; - -import org.junit.Test; - -public class TableTest { - - @Test public void testEmpty() { - Table empty = Table.empty(); - - assertEquals(TextChunk.EMPTY, empty.getCell(0, 0)); - assertEquals(TextChunk.EMPTY, empty.getCell(1, 1)); - - assertEquals(0, empty.getRowCount()); - assertEquals(0, empty.getColCount()); - - assertEquals("", empty.getExtractionMethod()); - - assertEquals(0, empty.getTop(), 0); - assertEquals(0, empty.getRight(), 0); - assertEquals(0, empty.getBottom(), 0); - assertEquals(0, empty.getLeft(), 0); - - assertEquals(0, empty.getArea(), 0); - } - - @Test public void testRowColCounts() { - Table table = Table.empty(); - - assertEquals(0, table.getRowCount()); - assertEquals(0, table.getColCount()); - - table.add(TextChunk.EMPTY, 0, 0); - - assertEquals(1, table.getRowCount()); - assertEquals(1, table.getColCount()); - - table.add(TextChunk.EMPTY, 9, 9); - - assertEquals(10, table.getRowCount()); - assertEquals(10, table.getColCount()); - } - -} diff --git a/src/test/java/technology/tabula/TestBasicExtractor.java b/src/test/java/technology/tabula/TestBasicExtractor.java deleted file mode 100644 index b56fd6ea..00000000 --- a/src/test/java/technology/tabula/TestBasicExtractor.java +++ /dev/null @@ -1,352 +0,0 @@ -package technology.tabula; - -import static org.junit.Assert.*; - -import java.io.File; -import java.io.IOException; -import java.nio.charset.Charset; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVParser; -import org.apache.commons.csv.CSVRecord; -import org.junit.Test; - -import technology.tabula.extractors.BasicExtractionAlgorithm; -import technology.tabula.writers.CSVWriter; - -public class TestBasicExtractor { - - private static final String EU_002_PDF = "src/test/resources/technology/tabula/eu-002.pdf"; - private static final String[][] EU_002_EXPECTED = { - {"", "", "Involvement of pupils in", ""}, - {"", "Preperation and", "Production of", "Presentation an"}, - {"", "planing", "materials", "evaluation"}, - {"Knowledge and awareness of different cultures", "0,2885", "0,3974", "0,3904"}, - {"Foreign language competence", "0,3057", "0,4184", "0,3899"}, - {"Social skills and abilities", "0,3416", "0,3369", "0,4303"}, - {"Acquaintance of special knowledge", "0,2569", "0,2909", "0,3557"}, - {"Self competence", "0,3791", "0,3320", "0,4617"} - }; - - private static final String ARGENTINA_DIPUTADOS_VOTING_RECORD_PDF = "src/test/resources/technology/tabula/argentina_diputados_voting_record.pdf"; - private static final String[][] ARGENTINA_DIPUTADOS_VOTING_RECORD_EXPECTED = { - {"ABDALA de MATARAZZO, Norma Amanda", "Frente Cívico por Santiago", "Santiago del Estero", "AFIRMATIVO"}, - {"ALBRIEU, Oscar Edmundo Nicolas", "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}, - {"ALONSO, María Luz", "Frente para la Victoria - PJ", "La Pampa", "AFIRMATIVO"}, - {"ARENA, Celia Isabel", "Frente para la Victoria - PJ", "Santa Fe", "AFIRMATIVO"}, - {"ARREGUI, Andrés Roberto", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, - {"AVOSCAN, Herman Horacio", "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}, - {"BALCEDO, María Ester", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, - {"BARRANDEGUY, Raúl Enrique", "Frente para la Victoria - PJ", "Entre Ríos", "AFIRMATIVO"}, - {"BASTERRA, Luis Eugenio", "Frente para la Victoria - PJ", "Formosa", "AFIRMATIVO"}, - {"BEDANO, Nora Esther", "Frente para la Victoria - PJ", "Córdoba", "AFIRMATIVO"}, - {"BERNAL, María Eugenia", "Frente para la Victoria - PJ", "Jujuy", "AFIRMATIVO"}, - {"BERTONE, Rosana Andrea", "Frente para la Victoria - PJ", "Tierra del Fuego", "AFIRMATIVO"}, - {"BIANCHI, María del Carmen", "Frente para la Victoria - PJ", "Cdad. Aut. Bs. As.", "AFIRMATIVO"}, - {"BIDEGAIN, Gloria Mercedes", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, - {"BRAWER, Mara", "Frente para la Victoria - PJ", "Cdad. Aut. Bs. As.", "AFIRMATIVO"}, - {"BRILLO, José Ricardo", "Movimiento Popular Neuquino", "Neuquén", "AFIRMATIVO"}, - {"BROMBERG, Isaac Benjamín", "Frente para la Victoria - PJ", "Tucumán", "AFIRMATIVO"}, - {"BRUE, Daniel Agustín", "Frente Cívico por Santiago", "Santiago del Estero", "AFIRMATIVO"}, - {"CALCAGNO, Eric", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, - {"CARLOTTO, Remo Gerardo", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, - {"CARMONA, Guillermo Ramón", "Frente para la Victoria - PJ", "Mendoza", "AFIRMATIVO"}, - {"CATALAN MAGNI, Julio César", "Frente para la Victoria - PJ", "Tierra del Fuego", "AFIRMATIVO"}, - {"CEJAS, Jorge Alberto", "Frente para la Victoria - PJ", "Rio Negro", "AFIRMATIVO"}, - {"CHIENO, María Elena", "Frente para la Victoria - PJ", "Corrientes", "AFIRMATIVO"}, - {"CIAMPINI, José Alberto", "Frente para la Victoria - PJ", "Neuquén", "AFIRMATIVO"}, - {"CIGOGNA, Luis Francisco Jorge", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, - {"CLERI, Marcos", "Frente para la Victoria - PJ", "Santa Fe", "AFIRMATIVO"}, - {"COMELLI, Alicia Marcela", "Movimiento Popular Neuquino", "Neuquén", "AFIRMATIVO"}, - {"CONTI, Diana Beatriz", "Frente para la Victoria - PJ", "Buenos Aires", "AFIRMATIVO"}, - {"CORDOBA, Stella Maris", "Frente para la Victoria - PJ", "Tucumán", "AFIRMATIVO"}, - {"CURRILEN, Oscar Rubén", "Frente para la Victoria - PJ", "Chubut", "AFIRMATIVO"} - }; - - private static final String EU_017_PDF = "src/test/resources/technology/tabula/eu-017.pdf"; - private static final String[][] EU_017_EXPECTED = { - {"", "Austria", "77", "1", "78"}, - {"", "Belgium", "159", "2", "161"}, - {"", "Bulgaria", "52", "0", "52"}, - {"", "Croatia", "144", "0", "144"}, - {"", "Cyprus", "43", "2", "45"}, - {"", "Czech Republic", "78", "0", "78"}, - {"", "Denmark", "151", "2", "153"}, - {"", "Estonia", "46", "0", "46"}, - {"", "Finland", "201", "1", "202"}, - {"", "France", "428", "7", "435"}, - {"", "Germany", "646", "21", "667"}, - {"", "Greece", "113", "2", "115"}, - {"", "Hungary", "187", "0", "187"}, - {"", "Iceland", "18", "0", "18"}, - {"", "Ireland", "213", "4", "217"}, - {"", "Israel", "25", "0", "25"}, - {"", "Italy", "627", "12", "639"}, - {"", "Latvia", "7", "0", "7"}, - {"", "Lithuania", "94", "1", "95"}, - {"", "Luxembourg", "22", "0", "22"}, - {"", "Malta", "18", "0", "18"}, - {"", "Netherlands", "104", "1", "105"}, - {"", "Norway", "195", "0", "195"}, - {"", "Poland", "120", "1", "121"}, - {"", "Portugal", "532", "3", "535"}, - {"", "Romania", "110", "0", "110"}, - {"", "Slovakia", "176", "0", "176"}, - {"", "Slovenia", "56", "0", "56"}, - {"", "Spain", "614", "3", "617"}, - {"", "Sweden", "122", "3", "125"}, - {"", "Switzerland", "64", "0", "64"}, - {"", "Turkey", "96", "0", "96"}, - {"", "United Kingdom", "572", "14", "586"} - }; - - private static final String FRX_2012_DISCLOSURE_PDF = "src/test/resources/technology/tabula/frx_2012_disclosure.pdf"; - private static final String[][] FRX_2012_DISCLOSURE_EXPECTED = { - {"AANONSEN, DEBORAH, A", "", "STATEN ISLAND, NY", "MEALS", "$85.00"}, - {"TOTAL", "", "", "", "$85.00"}, - {"AARON, CAREN, T", "", "RICHMOND, VA", "EDUCATIONAL ITEMS", "$78.80"}, - {"AARON, CAREN, T", "", "RICHMOND, VA", "MEALS", "$392.45"}, - {"TOTAL", "", "", "", "$471.25"}, - {"AARON, JOHN", "", "CLARKSVILLE, TN", "MEALS", "$20.39"}, - {"TOTAL", "", "", "", "$20.39"}, - {"AARON, JOSHUA, N", "", "WEST GROVE, PA", "MEALS", "$310.33"}, - {"", "REGIONAL PULMONARY & SLEEP", "", "", ""}, - {"AARON, JOSHUA, N", "", "WEST GROVE, PA", "SPEAKING FEES", "$4,700.00"}, - {"", "MEDICINE", "", "", ""}, - {"TOTAL", "", "", "", "$5,010.33"}, - {"AARON, MAUREEN, M", "", "MARTINSVILLE, VA", "MEALS", "$193.67"}, - {"TOTAL", "", "", "", "$193.67"}, - {"AARON, MICHAEL, L", "", "WEST ISLIP, NY", "MEALS", "$19.50"}, - {"TOTAL", "", "", "", "$19.50"}, - {"AARON, MICHAEL, R", "", "BROOKLYN, NY", "MEALS", "$65.92"} - }; - - private static final String[][] EXPECTED_EMPTY_TABLE = { /* actually empty! */ }; - - - @Test - public void testRemoveSequentialSpaces() throws IOException { - Page page = UtilsForTesting.getAreaFromFirstPage("src/test/resources/technology/tabula/m27.pdf", 79.2f, 28.28f, 103.04f, 732.6f); - BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); - Table table = bea.extract(page).get(0); - List firstRow = table.getRows().get(0); - - assertTrue(firstRow.get(1).getText().equals("ALLEGIANT AIR")); - assertTrue(firstRow.get(2).getText().equals("ALLEGIANT AIR LLC")); - page.getPDDoc().close(); - } - - @Test - public void testColumnRecognition() throws IOException { - Page page = UtilsForTesting.getAreaFromFirstPage(ARGENTINA_DIPUTADOS_VOTING_RECORD_PDF, 269.875f, 12.75f, 790.5f, 561f); - BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); - Table table = bea.extract(page).get(0); - assertArrayEquals(ARGENTINA_DIPUTADOS_VOTING_RECORD_EXPECTED, UtilsForTesting.tableToArrayOfRows(table)); - page.getPDDoc().close(); - } - - @Test - public void testVerticalRulingsPreventMergingOfColumns() throws IOException { - List rulings = new ArrayList<>(); - Float[] rulingsVerticalPositions = {147f, 256f, 310f, 375f, 431f, 504f}; - for (int i = 0; i < 6; i++) { - rulings.add(new Ruling(255.57f, rulingsVerticalPositions[i], 0, 398.76f - 255.57f)); - } - - Page page = UtilsForTesting.getAreaFromFirstPage( - "src/test/resources/technology/tabula/campaign_donors.pdf", - 255.57f, 40.43f, 398.76f, 557.35f); - BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(rulings); - Table table = bea.extract(page).get(0); - List sixthRow = table.getRows().get(5); - - assertTrue(sixthRow.get(0).getText().equals("VALSANGIACOMO BLANC")); - assertTrue(sixthRow.get(1).getText().equals("OFERNANDO JORGE")); - page.getPDDoc().close(); - } - - @Test - public void testExtractColumnsCorrectly() throws IOException { - Page page = UtilsForTesting.getAreaFromPage(EU_002_PDF, 1, 115.0f, 70.0f, 233.0f, 510.0f); - BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); - Table table = bea.extract(page).get(0); - assertArrayEquals(EU_002_EXPECTED, UtilsForTesting.tableToArrayOfRows(table)); - page.getPDDoc().close(); - } - - @Test - public void testExtractColumnsCorrectly2() throws IOException { - Page page = UtilsForTesting.getPage(EU_017_PDF, 3); - BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(page.getVerticalRulings()); - Table table = bea.extract(page.getArea(299.625f, 148.44f, 711.875f, 452.32f)).get(0); - assertArrayEquals(EU_017_EXPECTED, UtilsForTesting.tableToArrayOfRows(table)); - page.getPDDoc().close(); - } - - @Test - public void testExtractColumnsCorrectly3() throws IOException { - Page page = UtilsForTesting.getAreaFromFirstPage(FRX_2012_DISCLOSURE_PDF, 106.01f, 48.09f, 227.31f, 551.89f); - BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); - Table table = bea.extract(page).get(0); - assertArrayEquals(FRX_2012_DISCLOSURE_EXPECTED, UtilsForTesting.tableToArrayOfRows(table)); - page.getPDDoc().close(); - } - - @Test - public void testCheckSqueezeDoesntBreak() throws IOException { - Page page = UtilsForTesting.getAreaFromFirstPage("src/test/resources/technology/tabula/12s0324.pdf", - 99.0f, 17.25f, 316.5f, 410.25f); - BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); - Table table = bea.extract(page).get(0); - List> rows = table.getRows(); - List firstRow = rows.get(0); - List lastRow = rows.get(rows.size() - 1); - assertTrue(firstRow.get(0).getText().equals("Violent crime. . . . . . . . . . . . . . . . . .")); - assertTrue(lastRow.get(lastRow.size() - 1).getText().equals("(X)")); - page.getPDDoc().close(); - } - - @Test - public void testNaturalOrderOfRectangles() throws IOException { - Page page = UtilsForTesting.getPage( - "src/test/resources/technology/tabula/us-017.pdf", 2) - .getArea(446.0f, 97.0f, 685.0f, 520.0f); - BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm( - page.getVerticalRulings()); - Table table = bea.extract(page).get(0); - - List cells = new ArrayList<>(table.cells.values()); - for (RectangularTextContainer rectangularTextContainer : cells) { - System.out.println(rectangularTextContainer.getText()); - } - //Column headers - assertEquals("Project", cells.get(0).getText()); - assertEquals("Agency", cells.get(1).getText()); - assertEquals("Institution", cells.get(2).getText()); - - //First row - assertEquals("Nanotechnology and its publics", cells.get(3).getText()); - assertEquals("NSF", cells.get(4).getText()); - assertEquals("Pennsylvania State Universit", cells.get(5).getText()); - - //Second row - assertEquals("Public information and deliberation in nanoscience and", cells.get(6).getText()); - assertEquals("North Carolina State", cells.get(7).getText()); - assertEquals("Interagency", cells.get(8).getText()); - assertEquals("nanotechnology policy (SGER)", cells.get(9).getText()); - assertEquals("University", cells.get(10).getText()); - - //Third row - assertEquals("Social and ethical research and education in agrifood", cells.get(11).getText()); - assertEquals("NSF", cells.get(12).getText()); - assertEquals("Michigan State University", cells.get(13).getText()); - assertEquals("nanotechnology (NIRT)", cells.get(14).getText()); - - //Fourth row - assertEquals("From laboratory to society: developing an informed", cells.get(15).getText()); - assertEquals("NSF", cells.get(16).getText()); - assertEquals("University of South Carolina", cells.get(17).getText()); - assertEquals("approach to nanoscale science and engineering (NIRT)", cells.get(18).getText()); - - //Fifth row - assertEquals("Database and innovation timeline for nanotechnology", cells.get(19).getText()); - assertEquals("NSF", cells.get(20).getText()); - assertEquals("UCLA", cells.get(21).getText()); - - //Sixth row - assertEquals("Social and ethical dimensions of nanotechnology", cells.get(22).getText()); - assertEquals("NSF", cells.get(23).getText()); - assertEquals("University of Virginia", cells.get(24).getText()); - - //Seventh row - assertEquals("Undergraduate exploration of nanoscience,", cells.get(25).getText()); - assertEquals("Michigan Technological", cells.get(26).getText()); - assertEquals("NSF", cells.get(27).getText()); - assertEquals("applications and societal implications (NUE)", cells.get(28).getText()); - assertEquals("University", cells.get(29).getText()); - - //Eighth row - assertEquals("Ethics and belief inside the development of", cells.get(30).getText()); - assertEquals("NSF", cells.get(31).getText()); - assertEquals("University of Virginia", cells.get(32).getText()); - assertEquals("nanotechnology (CAREER)", cells.get(33).getText()); - - //Ninth row - assertEquals("All centers, NNIN and NCN have a societal", cells.get(34).getText()); - assertEquals("NSF, DOE,", cells.get(35).getText()); - assertEquals("All nanotechnology centers", cells.get(36).getText()); - assertEquals("implications components", cells.get(37).getText()); - assertEquals("DOD, and NIH", cells.get(38).getText()); - assertEquals("and networks", cells.get(39).getText()); - - page.getPDDoc().close(); - - } - - @Test - public void testNaturalOrderOfRectanglesOneMoreTime() throws IOException { - CSVParser parse = org.apache.commons.csv.CSVParser.parse(new File("src/test/resources/technology/tabula/csv/TestBasicExtractor-RECTANGLE_TEST_NATURAL_ORDER.csv"), - Charset.forName("utf-8"), - CSVFormat.DEFAULT); - - List rectangles = new ArrayList<>(); - - for (CSVRecord record : parse) { - rectangles.add(new Rectangle(Float.parseFloat(record.get(0)), - Float.parseFloat(record.get(1)), - Float.parseFloat(record.get(2)), - Float.parseFloat(record.get(3)))); - } - - - //List rectangles = Arrays.asList(RECTANGLES_TEST_NATURAL_ORDER); - Utils.sort(rectangles, Rectangle.ILL_DEFINED_ORDER); - - for (int i = 0; i < (rectangles.size() - 1); i++) { - Rectangle rectangle = rectangles.get(i); - Rectangle nextRectangle = rectangles.get(i + 1); - - assertTrue(rectangle.compareTo(nextRectangle) < 0); - } - } - - @Test - public void testRealLifeRTL2() throws IOException { - String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/indictb1h_14.csv"); - Page page = UtilsForTesting.getAreaFromPage("src/test/resources/technology/tabula/indictb1h_14.pdf", 1, - 205.0f, 120.0f, 622.82f, 459.9f); - BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); - Table table = bea.extract(page).get(0); - - StringBuilder sb = new StringBuilder(); - (new CSVWriter()).write(sb, table); - assertEquals(expectedCsv, sb.toString()); - page.getPDDoc().close(); - } - - - @Test - public void testEmptyRegion() throws IOException { - Page page = UtilsForTesting.getAreaFromPage("src/test/resources/technology/tabula/indictb1h_14.pdf", 1, 0, 0, 80.82f, 100.9f); // an empty area - BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); - Table table = bea.extract(page).get(0); - assertArrayEquals(EXPECTED_EMPTY_TABLE, UtilsForTesting.tableToArrayOfRows(table)); - page.getPDDoc().close(); - } - - - @Test - public void testTableWithMultilineHeader() throws IOException { - String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/us-020.csv"); - Page page = UtilsForTesting.getAreaFromPage("src/test/resources/technology/tabula/us-020.pdf", 2, 103.0f, 35.0f, 641.0f, 560.0f); - BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); - Table table = bea.extract(page).get(0); - - StringBuilder sb = new StringBuilder(); - (new CSVWriter()).write(sb, table); - assertEquals(expectedCsv, sb.toString()); - } - -} diff --git a/src/test/java/technology/tabula/TestCell.java b/src/test/java/technology/tabula/TestCell.java deleted file mode 100644 index 2795565c..00000000 --- a/src/test/java/technology/tabula/TestCell.java +++ /dev/null @@ -1,46 +0,0 @@ -package technology.tabula; - -import static org.junit.Assert.*; - -import java.util.List; -import java.util.ArrayList; - -import org.apache.pdfbox.pdmodel.font.PDType1Font; -import org.apache.pdfbox.pdmodel.font.Standard14Fonts; -import org.junit.Test; - -public class TestCell { - - @Test - public void testIsSpanning() { - Cell cell = new Cell(0, 0, 0, 0); - assertFalse(cell.isSpanning()); - cell.setSpanning(true); - assertTrue(cell.isSpanning()); - } - - @Test - public void testIsPlaceholder() { - Cell cell = new Cell(0, 0, 0, 0); - assertFalse(cell.isPlaceholder()); - cell.setPlaceholder(true); - assertTrue(cell.isPlaceholder()); - } - - @Test - public void testGetTextElements() { - Cell cell = new Cell(0, 0, 0, 0); - assertTrue(cell.getTextElements().isEmpty()); - - TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5); - TextChunk tChunk = new TextChunk(tElement); - List tList = new ArrayList<>(); - tList.add(tChunk); - cell.setTextElements(tList); - - assertEquals("test", cell.getTextElements().get(0).getText()); - - - } - -} diff --git a/src/test/java/technology/tabula/TestCohenSutherland.java b/src/test/java/technology/tabula/TestCohenSutherland.java deleted file mode 100644 index 2d747608..00000000 --- a/src/test/java/technology/tabula/TestCohenSutherland.java +++ /dev/null @@ -1,102 +0,0 @@ -package technology.tabula; - -import org.junit.Before; -import org.junit.Test; - -import java.awt.geom.Line2D; -import java.awt.geom.Rectangle2D; - -import static org.junit.Assert.*; - -public class TestCohenSutherland { - - private Rectangle2D clipWindow; - private CohenSutherlandClipping algorithm; - private static final double DELTA = 0.001; - - @Before - public void set() { - clipWindow = new Rectangle(10, 10, 50, 50); - algorithm = new CohenSutherlandClipping(clipWindow); - } - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - // TODO: How to parameterize the tests? - @Test - public void theLineIsCompletelyInside() { - Line2D.Float line = new Line2D.Float(20, 20, 30, 30); - assertTrue(algorithm.clip(line)); - assertEquals(20, line.x1, DELTA); - assertEquals(20, line.y1, DELTA); - assertEquals(30, line.x2, DELTA); - assertEquals(30, line.y2, DELTA); - } - - @Test - public void theLineIsCompletelyOnTheLeft() { - float x1 = 3, y1 = 13, x2 = 6, y2 = 16; - Line2D.Float line = new Line2D.Float(x1, y1, x2, y2); - assertFalse(algorithm.clip(line)); - assertEquals(x1, line.x1, DELTA); - assertEquals(y1, line.y1, DELTA); - assertEquals(x2, line.x2, DELTA); - assertEquals(y2, line.y2, DELTA); - } - - @Test - public void theLineIsCompletelyOnTheUp() { - float x1 = 15, y1 = 5, x2 = 25, y2 = 2; - Line2D.Float line = new Line2D.Float(x1, y1, x2, y2); - assertFalse(algorithm.clip(line)); - assertEquals(x1, line.x1, DELTA); - assertEquals(y1, line.y1, DELTA); - assertEquals(x2, line.x2, DELTA); - assertEquals(y2, line.y2, DELTA); - } - - @Test - public void theLineIsCompletelyOnTheRight() { - float x1 = 65, y1 = 15, x2 = 70, y2 = 20; - Line2D.Float line = new Line2D.Float(x1, y1, x2, y2); - assertFalse(algorithm.clip(line)); - assertEquals(x1, line.x1, DELTA); - assertEquals(y1, line.y1, DELTA); - assertEquals(x2, line.x2, DELTA); - assertEquals(y2, line.y2, DELTA); - } - - @Test - public void theLineIsCompletelyOnTheBottom() { - float x1 = 15, y1 = 65, x2 = 25, y2 = 70; - Line2D.Float line = new Line2D.Float(x1, y1, x2, y2); - assertFalse(algorithm.clip(line)); - assertEquals(x1, line.x1, DELTA); - assertEquals(y1, line.y1, DELTA); - assertEquals(x2, line.x2, DELTA); - assertEquals(y2, line.y2, DELTA); - } - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - @Test - public void lineCrossesTopLeftCorner() { - float x1 = 5, y1 = 25, x2 = 25, y2 = 5; - Line2D.Float line = new Line2D.Float(x1, y1, x2, y2); - assertTrue(algorithm.clip(line)); - assertEquals(10, line.x1, DELTA); - assertEquals(20, line.y1, DELTA); - assertEquals(20, line.x2, DELTA); - assertEquals(10, line.y2, DELTA); - } - - @Test - public void lineCrossesPartiallyTopLeftCorner() { - float x1 = 15, y1 = 15, x2 = 25, y2 = 5; - Line2D.Float line = new Line2D.Float(x1, y1, x2, y2); - assertTrue(algorithm.clip(line)); - assertEquals(x1, line.x1, DELTA); - assertEquals(y1, line.y1, DELTA); - assertEquals(20, line.x2, DELTA); - assertEquals(10, line.y2, DELTA); - } - -} diff --git a/src/test/java/technology/tabula/TestCommandLineApp.java b/src/test/java/technology/tabula/TestCommandLineApp.java deleted file mode 100644 index 5a4e3af5..00000000 --- a/src/test/java/technology/tabula/TestCommandLineApp.java +++ /dev/null @@ -1,219 +0,0 @@ -package technology.tabula; - -import static org.junit.Assert.*; - -import java.io.File; -import java.io.IOException; -import java.nio.file.*; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.DefaultParser; -import org.apache.commons.cli.ParseException; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -public class TestCommandLineApp { - - @Rule - public TemporaryFolder folder = new TemporaryFolder(); - - private String csvFromCommandLineArgs(String[] args) throws ParseException { - CommandLineParser parser = new DefaultParser(); - CommandLine cmd = parser.parse(CommandLineApp.buildOptions(), args); - - StringBuilder stringBuilder = new StringBuilder(); - new CommandLineApp(stringBuilder, cmd).extractTables(cmd); - - return stringBuilder.toString(); - } - - @Test - public void testExtractSpreadsheetWithArea() throws ParseException, IOException { - - String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/spreadsheet_no_bounding_frame.csv"); - - assertEquals(expectedCsv, this.csvFromCommandLineArgs(new String[]{ - "src/test/resources/technology/tabula/spreadsheet_no_bounding_frame.pdf", - "-p", "1", "-a", - "150.56,58.9,654.7,536.12", "-f", - "CSV" - })); - } - - @Test - public void testExtractBatchSpreadsheetWithArea() throws ParseException, IOException { - FileSystem fs = FileSystems.getDefault(); - String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/spreadsheet_no_bounding_frame.csv"); - Path tmpFolder = Files.createTempDirectory("tabula-java-batch-test"); - tmpFolder.toFile().deleteOnExit(); - - Path copiedPDF = tmpFolder.resolve(fs.getPath("spreadsheet.pdf")); - Path sourcePDF = fs.getPath("src/test/resources/technology/tabula/spreadsheet_no_bounding_frame.pdf"); - Files.copy(sourcePDF, copiedPDF); - copiedPDF.toFile().deleteOnExit(); - - this.csvFromCommandLineArgs(new String[]{ - "-b", tmpFolder.toString(), - "-p", "1", "-a", - "150.56,58.9,654.7,536.12", "-f", - "CSV" - }); - - Path csvPath = tmpFolder.resolve(fs.getPath("spreadsheet.csv")); - assertTrue(csvPath.toFile().exists()); - assertArrayEquals(expectedCsv.getBytes(), Files.readAllBytes(csvPath)); - } - - @Test - public void testExtractSpreadsheetWithAreaAndNewFile() throws ParseException, IOException { - - String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/spreadsheet_no_bounding_frame.csv"); - - File newFile = folder.newFile(); - this.csvFromCommandLineArgs(new String[]{ - "src/test/resources/technology/tabula/spreadsheet_no_bounding_frame.pdf", - "-p", "1", "-a", - "150.56,58.9,654.7,536.12", "-f", - "CSV", "-o", newFile.getAbsolutePath() - }); - - assertArrayEquals(expectedCsv.getBytes(), Files.readAllBytes(Paths.get(newFile.getAbsolutePath()))); - } - - - @Test - public void testExtractJSONWithArea() throws ParseException, IOException { - - String expectedJson = UtilsForTesting.loadJson("src/test/resources/technology/tabula/json/spanning_cells_basic.json"); - - assertEquals(expectedJson, this.csvFromCommandLineArgs(new String[]{ - "src/test/resources/technology/tabula/spanning_cells.pdf", - "-p", "1", "-a", - "150.56,58.9,654.7,536.12", "-f", - "JSON" - })); - } - - @Test - public void testExtractCSVWithArea() throws ParseException, IOException { - - String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/spanning_cells.csv"); - - assertEquals(expectedCsv, this.csvFromCommandLineArgs(new String[]{ - "src/test/resources/technology/tabula/spanning_cells.pdf", - "-p", "1", "-a", - "150.56,58.9,654.7,536.12", "-f", - "CSV" - })); - } - - @Test - public void testGuessOption() throws ParseException, IOException { - String expectedCsvNoGuessing = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/TestCommandLineApp_testGuessOption_no_guessing.csv"); - assertEquals(expectedCsvNoGuessing, this.csvFromCommandLineArgs(new String[]{ - "src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-001.pdf", - "-p", "1", - "-f", "CSV" - })); - - String expectedCsvWithGuessing = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/TestCommandLineApp_testGuessOption_with_guessing.csv"); - assertEquals(expectedCsvWithGuessing, this.csvFromCommandLineArgs(new String[]{ - "src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-001.pdf", - "-p", "1", - "-f", "CSV", - "-g" - })); - } - - @Test - public void testEncryptedPasswordSupplied() throws ParseException { - String s = this.csvFromCommandLineArgs(new String[]{ - "src/test/resources/technology/tabula/encrypted.pdf", - "-s", "userpassword", - "-p", "1", - "-f", "CSV" - }); - assertEquals("FLA Audit Profile,,,,,,,,,", s.split("\\r?\\n")[0]); - } - - @Test(expected=org.apache.commons.cli.ParseException.class) - public void testEncryptedWrongPassword() throws ParseException { - String s = this.csvFromCommandLineArgs(new String[]{ - "src/test/resources/technology/tabula/encrypted.pdf", - "-s", "wrongpassword", - "-p", "1", - "-f", "CSV" - }); - } - - @Test - public void testExtractWithMultiplePercentArea() throws ParseException, IOException { - - String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/MultiColumn.csv"); - - assertEquals(expectedCsv, this.csvFromCommandLineArgs(new String[]{ - "src/test/resources/technology/tabula/MultiColumn.pdf", - "-p", "1", "-a", - "%0,0,100,50", "-a", - "%0,50,100,100", "-f", - "CSV" - })); - } - - @Test - public void testExtractWithMultipleAbsoluteArea() throws ParseException, IOException { - - String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/MultiColumn.csv"); - - assertEquals(expectedCsv, this.csvFromCommandLineArgs(new String[]{ - "src/test/resources/technology/tabula/MultiColumn.pdf", - "-p", "1", "-a", - "0,0,451,212", "-a", - "0,212,451,425", "-f", - "CSV" - })); - } - - @Test - public void testExtractWithPercentAndAbsoluteArea() throws ParseException, IOException { - - String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/MultiColumn.csv"); - - assertEquals(expectedCsv, this.csvFromCommandLineArgs(new String[]{ - "src/test/resources/technology/tabula/MultiColumn.pdf", - "-p", "1", "-a", - "%0,0,100,50", "-a", - "0,212,451,425", "-f", - "CSV" - })); - } - - @Test - public void testLatticeModeWithColumnOption() throws ParseException, IOException { - - String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/AnimalSounds.csv"); - - assertEquals(expectedCsv, this.csvFromCommandLineArgs(new String[]{ - "src/test/resources/technology/tabula/AnimalSounds.pdf", - "-p", "1", "-c", - "59,218,331,551", - "-r", "-f", "CSV" - })); - } - - @Test - public void testLatticeModeWithColumnAndMultipleAreasOption() throws ParseException, IOException { - - String expectedJson = UtilsForTesting.loadJson("src/test/resources/technology/tabula/json/AnimalSounds1.json"); - String resultJson = this.csvFromCommandLineArgs(new String[]{ - "src/test/resources/technology/tabula/AnimalSounds1.pdf", - "-p", "1", "-c", "57,136,197,296,314,391,457,553", - "-a", "%0,0,100,50", "-a", "%0,50,100,100", - "-r", "-f", "JSON" - }); - assertEquals(expectedJson, resultJson); - } - -} diff --git a/src/test/java/technology/tabula/TestDebug.java b/src/test/java/technology/tabula/TestDebug.java deleted file mode 100644 index 2e8de98c..00000000 --- a/src/test/java/technology/tabula/TestDebug.java +++ /dev/null @@ -1,15 +0,0 @@ -package technology.tabula; - -public class TestDebug { - - private final static String PATH = "src/test/resources/technology/tabula/spanning_cells.pdf"; - -// @Test -// public void test() throws IOException { -// File outFile = new File(new File(System.getProperty("java.io.tmpdir")), "/rendered_page.jpg"); -// Debug.renderPage(PATH, outFile.getAbsolutePath(), 0, null, true, false, false, false, false, false, false, false, false, false); -// assertTrue(outFile.exists()); -// System.out.println(outFile.getAbsolutePath()); -// } - -} diff --git a/src/test/java/technology/tabula/TestLine.java b/src/test/java/technology/tabula/TestLine.java deleted file mode 100644 index f7a6a88d..00000000 --- a/src/test/java/technology/tabula/TestLine.java +++ /dev/null @@ -1,72 +0,0 @@ -package technology.tabula; - -import static org.junit.Assert.*; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.pdfbox.pdmodel.font.PDType1Font; -import org.apache.pdfbox.pdmodel.font.Standard14Fonts; -import org.junit.Test; - -public class TestLine { - - @Test - public void testSetTextElements() { - Line line = new Line(); - - TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5); - TextChunk tChunk = new TextChunk(tElement); - List tList = new ArrayList<>(); - tList.add(tChunk); - line.setTextElements(tList); - - assertEquals("test", line.getTextElements().get(0).getText()); - - } - - @Test - public void testAddTextChunkIntTextChunk() { - Line line = new Line(); - - TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5); - TextChunk tChunk = new TextChunk(tElement); - line.addTextChunk(3, tChunk); - - assertEquals("test", line.getTextElements().get(3).getText()); - } - - @Test - public void testLessThanAddTextChunkIntTextChunk() { - Line line = new Line(); - - TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5); - TextChunk tChunk = new TextChunk(tElement); - line.addTextChunk(0, tChunk); - line.addTextChunk(0, tChunk); - - assertEquals("testtest", line.getTextElements().get(0).getText()); - } - - @Test(expected = IllegalArgumentException.class) - public void testErrorAddTextChunkIntTextChunk() { - Line line = new Line(); - - TextElement tElement = new TextElement(0, 0, 0, 0,new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5); - TextChunk tChunk = new TextChunk(tElement); - line.addTextChunk(-1, tChunk); - } - - @Test - public void testToString() { - Line line = new Line(); - - TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5); - TextChunk tChunk = new TextChunk(tElement); - line.addTextChunk(0, tChunk); - line.addTextChunk(0, tChunk); - - assertEquals("technology.tabula.Line[x=0.0,y=0.0,w=0.0,h=0.0,bottom=0.000000,right=0.000000,chunks='testtest', ]", line.toString()); - } - -} diff --git a/src/test/java/technology/tabula/TestObjectExtractor.java b/src/test/java/technology/tabula/TestObjectExtractor.java deleted file mode 100644 index 69864c61..00000000 --- a/src/test/java/technology/tabula/TestObjectExtractor.java +++ /dev/null @@ -1,150 +0,0 @@ -package technology.tabula; - -import static org.junit.Assert.*; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.pdfbox.Loader; -import org.apache.pdfbox.pdmodel.PDDocument; -import org.junit.Test; - -public class TestObjectExtractor { - - /*@Test(expected=IOException.class) - public void testWrongPasswordRaisesException() throws IOException { - PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/encrypted.pdf")); - ObjectExtractor oe = new ObjectExtractor(pdf_document, "wrongpass"); - oe.extract().next(); - }*/ - - @Test(expected = IOException.class) - public void testEmptyOnEncryptedFileRaisesException() throws IOException { - PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/encrypted.pdf")); - try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { - oe.extract().next(); - } - } - - @Test - public void testCanReadPDFWithOwnerEncryption() throws IOException { - PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf")); - try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { - PageIterator pi = oe.extract(); - int i = 0; - while (pi.hasNext()) { - i++; - pi.next(); - } - assertEquals(2, i); - } - } - - - @Test - public void testGoodPassword() throws IOException { - PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/encrypted.pdf"), "userpassword"); - try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { - List pages = new ArrayList<>(); - PageIterator pi = oe.extract(); - while (pi.hasNext()) { - pages.add(pi.next()); - } - assertEquals(1, pages.size()); - } - } - - - @Test - public void testTextExtractionDoesNotRaise() throws IOException { - PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/rotated_page.pdf")); - try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { - PageIterator pi = oe.extract(); - - assertTrue(pi.hasNext()); - assertNotNull(pi.next()); - assertFalse(pi.hasNext()); - } - } - - @Test - public void testShouldDetectRulings() throws IOException { - PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/should_detect_rulings.pdf")); - try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { - PageIterator pi = oe.extract(); - - Page page = pi.next(); - List rulings = page.getRulings(); - - for (Ruling r: rulings) { - assertTrue(page.contains(r.getBounds())); - } - } - } - - @Test - public void testDontThrowNPEInShfill() throws IOException { - PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/labor.pdf")); - - try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { - PageIterator pi = oe.extract(); - assertTrue(pi.hasNext()); - try { - Page p = pi.next(); - assertNotNull(p); - } catch (NullPointerException e) { - fail("NPE in ObjectExtractor " + e.toString()); - } - } - } - - @Test - public void testExtractOnePage() throws IOException { - PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf")); - assertEquals(2, pdf_document.getNumberOfPages()); - - try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { - Page page = oe.extract(2); - - assertNotNull(page); - } - - } - - @Test(expected = IndexOutOfBoundsException.class) - public void testExtractWrongPageNumber() throws IOException { - PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf")); - assertEquals(2, pdf_document.getNumberOfPages()); - - try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { - oe.extract(3); - } - } - - @Test - public void testTextElementsContainedInPage() throws IOException { - PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/cs-en-us-pbms.pdf")); - - try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { - Page page = oe.extractPage(1); - - for (TextElement te: page.getText()) { - assertTrue(page.contains(te)); - } - } - - } - - @Test public void testDoNotNPEInPointComparator() throws IOException { - PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/npe_issue_206.pdf")); - - try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { - Page p = oe.extractPage(1); - assertNotNull(p); - } catch (NullPointerException e) { - fail("NPE in ObjectExtractor " + e.toString()); - } - } -} diff --git a/src/test/java/technology/tabula/TestProjectionProfile.java b/src/test/java/technology/tabula/TestProjectionProfile.java deleted file mode 100644 index e6d93b39..00000000 --- a/src/test/java/technology/tabula/TestProjectionProfile.java +++ /dev/null @@ -1,110 +0,0 @@ -package technology.tabula; - -import static org.junit.Assert.*; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.pdmodel.font.PDType1Font; -import org.apache.pdfbox.pdmodel.font.Standard14Fonts; -import org.junit.Before; -import org.junit.Test; - -public class TestProjectionProfile { - - ProjectionProfile pProfile; - Page page; - - @Before - public void setUpProjectionProfile() { - PDPage pdPage = new PDPage(); - PDDocument pdDocument = new PDDocument(); - - PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA); - TextElement textElement = new TextElement(5f, 15f, 10f, 20f, font, 1f, "test", 1f); - TextElement textElement2 = new TextElement(5f, 15f, 10f, 20f, font, 1f, "test", 1f); - List textList = new ArrayList<>(); - textList.add(textElement); - textList.add(textElement2); - - Ruling ruling = new Ruling(0, 0, 10, 10); - List rulingList = new ArrayList<>(); - rulingList.add(ruling); - - page = Page.Builder.newInstance() - .withPageDims(PageDims.of(0, 0, 1, 1)) - .withRotation(0) - .withNumber(1) - .withPdPage(pdPage) - .withPdDocument(pdDocument) - .withTextElements(textList) - .withRulings(rulingList) - .build(); - - List rectangles = new ArrayList<>(); - rectangles.add(new Rectangle(0f, 0f, 500f, 5f)); - - pProfile = new ProjectionProfile(page, rectangles, 5, 5); - } - - @Test - public void testGetVerticalProjection() { - float[] projection = pProfile.getVerticalProjection(); - assertTrue(projection.length == 10); - } - - @Test - public void testGetHorizontalProjection() { - float[] projection = pProfile.getHorizontalProjection(); - assertTrue(projection.length == 10); - } - - @Test - public void testFindVerticalSeparators() { - float[] seperators = pProfile.findVerticalSeparators(page.getText().size() * 2.5f); - assertTrue(seperators.length == 0); - } - - @Test - public void testFindHorizontalSeparators() { - float[] seperators = pProfile.findHorizontalSeparators(page.getText().size() * 2.5f); - assertTrue(seperators.length == 0); - } - - @Test - public void testSmooth() { - float[] data = {0, 1, 2}; - float[] rv = ProjectionProfile.smooth(data, 3); - - assertEquals(1f, rv[2], 1e-5); - } - - @Test - public void testFilter() { - float[] data = {0, 1, 2}; - float[] rv = ProjectionProfile.filter(data, 3); - - assertEquals(3f, rv[1], 1e-5); - } - - @Test - public void testGetAutocorrelation() { - float[] projection = {0, 1, 2}; - float[] rv = ProjectionProfile.getAutocorrelation(projection); - - assertEquals(0f, rv[0], 1e-5); - assertTrue(rv.length == 2); - - } - - @Test - public void testGetFirstDeriv() { -// float[] -// float[] projection = pProfile.getFirstDeriv(new float[]{0.0, 0.0) -// System.out.println(Arrays.toString(projection)); -// assertEquals(10, projection[0], 1e-15); - } - -} diff --git a/src/test/java/technology/tabula/TestRectangle.java b/src/test/java/technology/tabula/TestRectangle.java deleted file mode 100644 index 7fa66f7a..00000000 --- a/src/test/java/technology/tabula/TestRectangle.java +++ /dev/null @@ -1,291 +0,0 @@ -package technology.tabula; - -import static org.junit.Assert.*; - -import java.awt.geom.Point2D; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; - -import org.junit.Assert; -import org.junit.Ignore; -import org.junit.Test; - -public class TestRectangle { - - - @Test - public void testCompareEqualsRectangles() { - Rectangle first = new Rectangle(); - Rectangle second = new Rectangle(); - - assertTrue(first.equals(second)); - assertTrue(second.equals(first)); - } - - @Test - public void testCompareAlignedHorizontalRectangle() { - Rectangle lower = new Rectangle(0f, 10f, 10f, 10f); - Rectangle upper = new Rectangle(0f,20f, 10f, 10f); - - assertTrue(lower.compareTo(upper) < 0); - } - - @Test - public void testCompareAlignedVerticalRectangle() { - Rectangle lower = new Rectangle(10f, 0f, 10f, 10f); - Rectangle upper = new Rectangle(20f,0f, 10f, 10f); - - assertTrue(lower.compareTo(upper) < 0); - } - - @Test - public void testCompareVerticalOverlapRectangle() { - Rectangle lower = new Rectangle(5f, 0f, 10f, 10f); - Rectangle upper = new Rectangle(0f, 10f, 10f, 10f); - - assertTrue(lower.compareTo(upper) < 0); - } - - @Test - public void testCompareVerticalOverlapLessThresholdRectangle() { - Rectangle lower = new Rectangle(0f, 10f, 10f, 10f); - Rectangle upper = new Rectangle(9.8f, 0f, 10f, 10f); - - assertTrue(lower.compareTo(upper) < 0); - } - - - - @Test - public void testQuickSortOneUpperThanOther() { - - Rectangle lower = new Rectangle(175.72f, 72.72f, 1.67f, 1.52f); //, (Comma after AARON) - Rectangle upper = new Rectangle(169.21f, 161.16f, 4.33f, 4.31f); // R (REGIONAL PULMONARY) - - assertTrue(lower.compareTo(upper) > 0); - - } - - - @Test - public void testQuickSortRectangleList() { - - //Testing wrong sorting - // Expected: AARON, JOSHUA, N - // but was: AARON JOSHUA N , , - Rectangle first = new Rectangle(172.92999267578125f, 51.47999954223633f, 4.0f, 4.309999942779541f); //A - Rectangle second = new Rectangle(175.72000122070312f, 72.72000122070312f, 1.6699999570846558f, 1.5199999809265137f); //, - Rectangle third = new Rectangle(172.92999267578125f, 96.36000061035156f, 4.0f, 4.309999942779541f); //A - Rectangle fourth = new Rectangle(175.72000122070312f, 100.31999969482422f, 1.6699999570846558f, 1.5199999809265137f); //, - Rectangle fifth = new Rectangle(172.92999267578125f, 103.68000030517578f, 4.329999923706055f, 4.309999942779541f); //N - Rectangle sixth = new Rectangle(169.2100067138672f, 161.16000366210938f, 4.329999923706055f, 4.309999942779541f); //R - - List expectedList = new ArrayList<>(); - expectedList.add(first); - expectedList.add(sixth); - expectedList.add(second); - expectedList.add(third); - expectedList.add(fourth); - expectedList.add(fifth); - List toSortList = new ArrayList<>(); - toSortList.add(sixth); - toSortList.add(second); - toSortList.add(third); - toSortList.add(fifth); - toSortList.add(first); - toSortList.add(fourth); - - Collections.sort(toSortList, Rectangle.ILL_DEFINED_ORDER); - - assertEquals(expectedList, toSortList); - } - - @Test - public void testGetVerticalOverlapShouldReturnZero() { - - Rectangle lower = new Rectangle(10f, 0f, 10f, 10f); - Rectangle upper = new Rectangle(20f,0f, 10f, 10f); - - float overlap = lower.verticalOverlap(upper); - - assertEquals(0f, overlap, 0); - assertTrue(!lower.verticallyOverlaps(upper)); - assertEquals(0f, lower.verticalOverlapRatio(upper), 0); - assertEquals(0f, lower.overlapRatio(upper), 0); - - } - - @Test - public void testGetVerticalOverlapShouldReturnMoreThanZero() { - - Rectangle lower = new Rectangle(15f, 10f, 10f, 10f); - Rectangle upper = new Rectangle(20f, 0f, 10f, 10f); - - float overlap = lower.verticalOverlap(upper); - - assertEquals(5f, overlap, 0); - assertTrue(lower.verticallyOverlaps(upper)); - assertEquals(0.5f, lower.verticalOverlapRatio(upper), 0); - assertEquals(0f, lower.overlapRatio(upper), 0); - - } - - @Test - public void testGetHorizontalOverlapShouldReturnZero() { - - Rectangle one = new Rectangle(0f, 0f, 10f, 10f); - Rectangle two = new Rectangle(10f, 10f, 10f, 10f); - - assertTrue(!one.horizontallyOverlaps(two)); - assertEquals(0f, one.overlapRatio(two), 0); - - } - - @Test - public void testGetHorizontalOverlapShouldReturnMoreThanZero() { - - Rectangle one = new Rectangle(0f, 0f, 10f, 10f); - Rectangle two = new Rectangle(10f, 5f, 10f, 10f); - - assertTrue(one.horizontallyOverlaps(two)); - assertEquals(5f, one.horizontalOverlap(two), 0); - assertEquals(0f, one.overlapRatio(two), 0); - - } - - @Test - public void testGetOverlapShouldReturnMoreThanZero() { - - Rectangle one = new Rectangle(0f, 0f, 10f, 10f); - Rectangle two = new Rectangle(5f, 5f, 10f, 10f); - - assertTrue(one.horizontallyOverlaps(two)); - assertTrue(one.verticallyOverlaps(two)); - assertEquals(5f, one.horizontalOverlap(two), 0); - assertEquals(5f, one.verticalOverlap(two), 0); - assertEquals((25f/175), one.overlapRatio(two), 0); - - } - - @Test - public void testMergeNoOverlappingRectangles() { - - Rectangle one = new Rectangle(0f, 0f, 10f, 10f); - Rectangle two = new Rectangle(0f, 10f, 10f, 10f); - - one.merge(two); - - assertEquals(20f, one.getWidth(), 0); - assertEquals(10f, one.getHeight(), 0); - assertEquals(0f, one.getLeft(), 0); - assertEquals(0f, one.getTop(), 0); - assertEquals(10f, one.getBottom(), 0); - assertEquals(20f * 10f, one.getArea(), 0); - - } - - @Test - public void testMergeOverlappingRectangles() { - - Rectangle one = new Rectangle(0f, 0f, 10f, 10f); - Rectangle two = new Rectangle(5f, 5f, 10f, 10f); - - one.merge(two); - - assertEquals(15f, one.getWidth(), 0); - assertEquals(15f, one.getHeight(), 0); - assertEquals(0f, one.getLeft(), 0); - assertEquals(0f, one.getTop(), 0); - - } - - @Test - public void testRectangleGetPoints() { - - Rectangle one = new Rectangle(10f, 20f, 30f, 40f); - - Point2D[] points = one.getPoints(); - - Point2D[] expectedPoints = new Point2D[]{ - new Point2D.Float(20f, 10f), - new Point2D.Float(50f, 10f), - new Point2D.Float(50f, 50f), - new Point2D.Float(20f, 50f) - - }; - - Assert.assertArrayEquals(expectedPoints, points); - - } - - @Test - public void testGetBoundingBox() { - - List rectangles = new ArrayList<>(); - rectangles.add(new Rectangle(0f, 0f, 10f, 10f)); - rectangles.add(new Rectangle(20f, 30f, 10f, 10f)); - - Rectangle boundingBoxOf = Rectangle.boundingBoxOf(rectangles); - - assertEquals(new Rectangle(0f, 0f, 40f, 30f), boundingBoxOf); - - - - - } - - @Test - public void testTransitiveComparison1() { - // +-------+ - // | | - // | A | +-------+ - // | | | | - // +-------+ | B | +-------+ - // | | | | - // +-------+ | C | - // | | - // +-------+ - Rectangle a = new Rectangle(0,0,2,2); - Rectangle b = new Rectangle(1,1,2,2); - Rectangle c = new Rectangle(2,2,2,2); - assertTrue(a.compareTo(b) < 0); - assertTrue(b.compareTo(c) < 0); - assertTrue(a.compareTo(c) < 0); - } - - @Test @Ignore - public void testTransitiveComparison2() { - // +-------+ - // | | - // +-------+ | C | - // | | | | - // +-------+ | B | +-------+ - // | | | | - // | A | +-------+ - // | | - // +-------+ - Rectangle a = new Rectangle(2,0,2,2); - Rectangle b = new Rectangle(1,1,2,2); - Rectangle c = new Rectangle(0,2,2,2); - assertTrue(a.compareTo(b) < 0); - assertTrue(b.compareTo(c) < 0); - assertTrue(a.compareTo(c) < 0); - } - - @Test @Ignore - public void testWellDefinedComparison1() { - Rectangle a = new Rectangle(2,0,2,2); - Rectangle b = new Rectangle(1,1,2,2); - Rectangle c = new Rectangle(0,2,2,2); - List l1 = new ArrayList<>(Arrays.asList(b, a, c)); - List l2 = new ArrayList<>(Arrays.asList(c, b, a)); - QuickSort.sort(l1, Rectangle.ILL_DEFINED_ORDER); - QuickSort.sort(l2, Rectangle.ILL_DEFINED_ORDER); - assertEquals(l1.get(0), l2.get(0)); - assertEquals(l1.get(1), l2.get(1)); - assertEquals(l1.get(2), l2.get(2)); - } - -} diff --git a/src/test/java/technology/tabula/TestRectangleSpatialIndex.java b/src/test/java/technology/tabula/TestRectangleSpatialIndex.java deleted file mode 100644 index 46eb1ea3..00000000 --- a/src/test/java/technology/tabula/TestRectangleSpatialIndex.java +++ /dev/null @@ -1,21 +0,0 @@ -package technology.tabula; - -import static org.junit.Assert.*; - -import org.junit.Test; - -public class TestRectangleSpatialIndex { - - @Test - public void testIntersects() { - - Rectangle r = new Rectangle(0, 0, 0, 0); - - RectangleSpatialIndex rSpatialIndex = new RectangleSpatialIndex<>(); - rSpatialIndex.add(r); - - assertTrue(rSpatialIndex.intersects(r).size() > 0); - - } - -} diff --git a/src/test/java/technology/tabula/TestRuling.java b/src/test/java/technology/tabula/TestRuling.java deleted file mode 100644 index e21e3a27..00000000 --- a/src/test/java/technology/tabula/TestRuling.java +++ /dev/null @@ -1,107 +0,0 @@ -package technology.tabula; - -import static org.junit.Assert.*; - -import org.junit.Before; -import org.junit.Test; - -public class TestRuling { - - Ruling ruling; - - @Before - public void setUpRuling() { - ruling = new Ruling(0, 0, 10, 10); - } - - @Test - public void testGetWidth() { - assertEquals(10f, ruling.getWidth(), 1e-5); - } - - @Test - public void testGetHeight() { - assertEquals(10f, ruling.getHeight(), 1e-5); - } - - @Test - public void testToString() { - assertEquals("class technology.tabula.Ruling[x1=0.000000 y1=0.000000 x2=10.000000 y2=10.000000]",ruling.toString()); - } - - @Test - public void testEqualsOther() { - Ruling other = new Ruling(0, 0, 11, 10); - assertTrue(ruling.equals(ruling)); - } - - @Test - public void testEqualsDifferentInstance() { - assertFalse(ruling.equals("test")); - } - - @Test - public void testNearlyIntersects(){ - Ruling another = new Ruling(0, 0, 11, 10); - - assertTrue(ruling.nearlyIntersects(another)); - } - - @Test(expected = UnsupportedOperationException.class) - public void testGetPositionError(){ - Ruling other = new Ruling(0, 0, 1, 1); - other.getPosition(); - fail(); - } - - @Test(expected = UnsupportedOperationException.class) - public void testSetPositionError(){ - Ruling other = new Ruling(0, 0, 1, 1); - other.setPosition(5f); - fail(); - } - - @Test(expected = UnsupportedOperationException.class) - public void testsetPosition(){ - ruling.setPosition(0); - } - - @Test(expected = UnsupportedOperationException.class) - public void testGetStartError(){ - Ruling other = new Ruling(0, 0, 1, 1); - other.getStart(); - fail(); - } - - @Test(expected = UnsupportedOperationException.class) - public void testGetEndError(){ - Ruling other = new Ruling(0, 0, 1, 1); - other.getEnd(); - fail(); - } - - @Test(expected = UnsupportedOperationException.class) - public void testSetEndError(){ - Ruling other = new Ruling(0, 0, 1, 1); - other.setEnd(5f); - fail(); - } - - - @Test - public void testColinear(){ -// Ruling another = new Ruling(0, 0, 500, 5); - java.awt.geom.Point2D.Float float1 = new java.awt.geom.Point2D.Float(20, 20); - java.awt.geom.Point2D.Float float2 = new java.awt.geom.Point2D.Float(0, 0); - java.awt.geom.Point2D.Float float3 = new java.awt.geom.Point2D.Float(20, 0); - java.awt.geom.Point2D.Float float4 = new java.awt.geom.Point2D.Float(0, 20); - - assertFalse(ruling.colinear(float1)); - assertTrue(ruling.colinear(float2)); - assertFalse(ruling.colinear(float3)); - assertFalse(ruling.colinear(float4)); - - - } - -} diff --git a/src/test/java/technology/tabula/TestSpreadsheetExtractor.java b/src/test/java/technology/tabula/TestSpreadsheetExtractor.java deleted file mode 100644 index f8bd4074..00000000 --- a/src/test/java/technology/tabula/TestSpreadsheetExtractor.java +++ /dev/null @@ -1,547 +0,0 @@ -package technology.tabula; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import java.awt.geom.Point2D; -import java.io.File; -import java.io.IOException; -import java.nio.charset.Charset; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Map; - -import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVParser; -import org.apache.commons.csv.CSVRecord; -import org.junit.Test; - -import technology.tabula.extractors.SpreadsheetExtractionAlgorithm; -import technology.tabula.writers.CSVWriter; -import technology.tabula.writers.JSONWriter; - -public class TestSpreadsheetExtractor { - - - public static final Rectangle[] EXPECTED_RECTANGLES = { - new Rectangle(40.0f, 18.0f, 208.0f, 40.0f), - new Rectangle(84.0f, 18.0f, 962.0f, 464.0f) - }; - - private static final Ruling[] VERTICAL_RULING_LINES = { - new Ruling(40.0f, 18.0f, 0.0f, 40.0f), - new Ruling(44.0f, 70.0f, 0.0f, 36.0f), - new Ruling(40.0f, 226.0f, 0.0f, 40.0f) - }; - - private static final Ruling[] HORIZONTAL_RULING_LINES = { - new Ruling(40.0f, 18.0f, 208.0f, 0.0f), - new Ruling(44.0f, 18.0f, 208.0f, 0.0f), - new Ruling(50.0f, 18.0f, 208.0f, 0.0f), - new Ruling(54.0f, 18.0f, 208.0f, 0.0f), - new Ruling(60.0f, 18.0f, 208.0f, 0.0f), - new Ruling(64.0f, 18.0f, 208.0f, 0.0f), - new Ruling(70.0f, 18.0f, 208.0f, 0.0f), - new Ruling(74.0f, 18.0f, 208.0f, 0.0f), - new Ruling(80.0f, 18.0f, 208.0f, 0.0f) - }; - - private static final Cell[] EXPECTED_CELLS = { - new Cell(40.0f, 18.0f, 208.0f, 4.0f), - new Cell(44.0f, 18.0f, 52.0f, 6.0f), - new Cell(50.0f, 18.0f, 52.0f, 4.0f), - new Cell(54.0f, 18.0f, 52.0f, 6.0f), - new Cell(60.0f, 18.0f, 52.0f, 4.0f), - new Cell(64.0f, 18.0f, 52.0f, 6.0f), - new Cell(70.0f, 18.0f, 52.0f, 4.0f), - new Cell(74.0f, 18.0f, 52.0f, 6.0f), - new Cell(44.0f, 70.0f, 156.0f, 6.0f), - new Cell(50.0f, 70.0f, 156.0f, 4.0f), - new Cell(54.0f, 70.0f, 156.0f, 6.0f), - new Cell(60.0f, 70.0f, 156.0f, 4.0f), - new Cell(64.0f, 70.0f, 156.0f, 6.0f), - new Cell(70.0f, 70.0f, 156.0f, 4.0f), - new Cell(74.0f, 70.0f, 156.0f, 6.0f)}; - - private static final Ruling[][] SINGLE_CELL_RULINGS = { - { - new Ruling(new Point2D.Float(151.653545f, 185.66929f), new Point2D.Float(380.73438f, 185.66929f)), - new Ruling(new Point2D.Float(151.653545f, 314.64567f), new Point2D.Float(380.73438f, 314.64567f)) - }, - { - new Ruling(new Point2D.Float(151.653545f, 185.66929f), new Point2D.Float(151.653545f, 314.64567f)), - new Ruling(new Point2D.Float(380.73438f, 185.66929f), new Point2D.Float(380.73438f, 314.64567f)) - } - }; - - private static final Ruling[][] TWO_SINGLE_CELL_RULINGS = { - { - new Ruling(new Point2D.Float(151.653545f, 185.66929f), new Point2D.Float(287.4074f, 185.66929f)), - new Ruling(new Point2D.Float(151.653545f, 262.101f), new Point2D.Float(287.4074f, 262.101f)), - new Ruling(new Point2D.Float(232.44095f, 280.62992f), new Point2D.Float(368.1948f, 280.62992f)), - new Ruling(new Point2D.Float(232.44095f, 357.06164f), new Point2D.Float(368.1948f, 357.06164f)) - }, - { - new Ruling(new Point2D.Float(151.653545f, 185.66929f), new Point2D.Float(151.653545f, 262.101f)), - new Ruling(new Point2D.Float(287.4074f, 185.66929f), new Point2D.Float(287.4074f, 262.101f)), - new Ruling(new Point2D.Float(232.44095f, 280.62992f), new Point2D.Float(232.44095f, 357.06164f)), - new Ruling(new Point2D.Float(368.1948f, 280.62992f), new Point2D.Float(368.1948f, 357.06164f)) - } - }; - - private static final Ruling[] EXTERNALLY_DEFINED_RULINGS = { - new Ruling(new Point2D.Float(320.0f, 285.0f), new Point2D.Float(564.4409f, 285.0f)), - new Ruling(new Point2D.Float(320.0f, 457.0f), new Point2D.Float(564.4409f, 457.0f)), - new Ruling(new Point2D.Float(320.0f, 331.0f), new Point2D.Float(564.4409f, 331.0f)), - new Ruling(new Point2D.Float(320.0f, 315.0f), new Point2D.Float(564.4409f, 315.0f)), - new Ruling(new Point2D.Float(320.0f, 347.0f), new Point2D.Float(564.4409f, 347.0f)), - new Ruling(new Point2D.Float(320.0f, 363.0f), new Point2D.Float(564.44088f, 363.0f)), - new Ruling(new Point2D.Float(320.0f, 379.0f), new Point2D.Float(564.44087f, 379.0f)), - new Ruling(new Point2D.Float(320.0f, 395.5f), new Point2D.Float(564.44086f, 395.5f)), - new Ruling(new Point2D.Float(320.00006f, 415.0f), new Point2D.Float(564.4409f, 415.0f)), - new Ruling(new Point2D.Float(320.00007f, 431.0f), new Point2D.Float(564.4409f, 431.0f)), - - new Ruling(new Point2D.Float(320.0f, 285.0f), new Point2D.Float(320.0f, 457.0f)), - new Ruling(new Point2D.Float(565.0f, 285.0f), new Point2D.Float(564.4409f, 457.0f)), - new Ruling(new Point2D.Float(470.5542f, 285.0f), new Point2D.Float(470.36865f, 457.0f)) - }; - - private static final Ruling[] EXTERNALLY_DEFINED_RULINGS2 = { - new Ruling(new Point2D.Float(51.796964f, 180.0f), new Point2D.Float(560.20312f, 180.0f)), - new Ruling(new Point2D.Float(51.797017f, 219.0f), new Point2D.Float(560.2031f, 219.0f)), - new Ruling(new Point2D.Float(51.797f, 239.0f), new Point2D.Float(560.2031f, 239.0f)), - new Ruling(new Point2D.Float(51.797f, 262.0f), new Point2D.Float(560.20312f, 262.0f)), - new Ruling(new Point2D.Float(51.797f, 283.50247f), new Point2D.Float(560.05024f, 283.50247f)), - new Ruling(new Point2D.Float(51.796964f, 309.0f), new Point2D.Float(560.20312f, 309.0f)), - new Ruling(new Point2D.Float(51.796982f, 333.0f), new Point2D.Float(560.20312f, 333.0f)), - new Ruling(new Point2D.Float(51.797f, 366.0f), new Point2D.Float(560.20312f, 366.0f)), - - - new Ruling(new Point2D.Float(52.0f, 181.0f), new Point2D.Float(51.797f, 366.0f)), - new Ruling(new Point2D.Float(208.62891f, 181.0f), new Point2D.Float(208.62891f, 366.0f)), - new Ruling(new Point2D.Float(357.11328f, 180.0f), new Point2D.Float(357.0f, 366.0f)), - new Ruling(new Point2D.Float(560.11328f, 180.0f), new Point2D.Float(560.0f, 366.0f)) - }; - - @Test - public void testLinesToCells() { - List cells = SpreadsheetExtractionAlgorithm.findCells(Arrays.asList(HORIZONTAL_RULING_LINES), Arrays.asList(VERTICAL_RULING_LINES)); - Collections.sort(cells, Rectangle.ILL_DEFINED_ORDER); - List expected = Arrays.asList(EXPECTED_CELLS); - Collections.sort(expected, Rectangle.ILL_DEFINED_ORDER); - assertTrue(cells.equals(expected)); - } - - @Test - public void testDetectSingleCell() { - List cells = SpreadsheetExtractionAlgorithm.findCells(Arrays.asList(SINGLE_CELL_RULINGS[0]), - Arrays.asList(SINGLE_CELL_RULINGS[1])); - assertEquals(1, cells.size()); - Cell cell = cells.get(0); - assertTrue(Utils.feq(151.65355, cell.getLeft())); - assertTrue(Utils.feq(185.6693, cell.getTop())); - assertTrue(Utils.feq(229.08083, cell.getWidth())); - assertTrue(Utils.feq(128.97636, cell.getHeight())); - } - - @Test - public void testDetectTwoSingleCells() { - List cells = SpreadsheetExtractionAlgorithm.findCells(Arrays.asList(TWO_SINGLE_CELL_RULINGS[0]), - Arrays.asList(TWO_SINGLE_CELL_RULINGS[1])); - assertEquals(2, cells.size()); - // should not overlap - assertFalse(cells.get(0).intersects(cells.get(1))); - } - - @Test - public void testFindSpreadsheetsFromCells() throws IOException { - - CSVParser parse = org.apache.commons.csv.CSVParser.parse(new File("src/test/resources/technology/tabula/csv/TestSpreadsheetExtractor-CELLS.csv"), - Charset.forName("utf-8"), - CSVFormat.DEFAULT); - - List cells = new ArrayList<>(); - - for (CSVRecord record : parse) { - cells.add(new Cell(Float.parseFloat(record.get(0)), - Float.parseFloat(record.get(1)), - Float.parseFloat(record.get(2)), - Float.parseFloat(record.get(3)))); - } - - - List expected = Arrays.asList(EXPECTED_RECTANGLES); - Collections.sort(expected, Rectangle.ILL_DEFINED_ORDER); - List foundRectangles = SpreadsheetExtractionAlgorithm.findSpreadsheetsFromCells(cells); - Collections.sort(foundRectangles, Rectangle.ILL_DEFINED_ORDER); - assertTrue(foundRectangles.equals(expected)); - } - - // TODO Add assertions - @Test - public void testSpreadsheetExtraction() throws IOException { - Page page = UtilsForTesting - .getAreaFromFirstPage( - "src/test/resources/technology/tabula/argentina_diputados_voting_record.pdf", - 269.875f, 12.75f, 790.5f, 561f); - - SpreadsheetExtractionAlgorithm.findCells(page.getHorizontalRulings(), page.getVerticalRulings()); - page.getPDDoc().close(); - } - - @Test - public void testSpanningCells() throws IOException { - Page page = UtilsForTesting - .getPage("src/test/resources/technology/tabula/spanning_cells.pdf", 1); - String expectedJson = UtilsForTesting.loadJson("src/test/resources/technology/tabula/json/spanning_cells.json"); - SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); - List
tables = se.extract(page); - assertEquals(2, tables.size()); - - - StringBuilder sb = new StringBuilder(); - (new JSONWriter()).write(sb, tables); - assertEquals(expectedJson, sb.toString()); - page.getPDDoc().close(); - } - - @Test - public void testSpanningCellsToCsv() throws IOException { - Page page = UtilsForTesting - .getPage("src/test/resources/technology/tabula/spanning_cells.pdf", 1); - String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/spanning_cells.csv"); - SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); - List
tables = se.extract(page); - assertEquals(2, tables.size()); - - - StringBuilder sb = new StringBuilder(); - (new CSVWriter()).write(sb, tables); - assertEquals(expectedCsv, sb.toString()); - page.getPDDoc().close(); - } - - @Test - public void testIncompleteGrid() throws IOException { - Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/china.pdf", 1); - SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); - List tables = se.extract(page); - assertEquals(2, tables.size()); - page.getPDDoc().close(); - } - - @Test - public void testNaturalOrderOfRectanglesDoesNotBreakContract() throws IOException { - Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/us-017.pdf", 2); - SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); - List tables = se.extract(page); - - StringBuilder sb = new StringBuilder(); - (new CSVWriter()).write(sb, tables.get(0)); - - String result = sb.toString(); - String expected = "Project,Agency,Institution\r\nNanotechnology and its publics,NSF,Pennsylvania State University\r\n\"Public information and deliberation in nanoscience and\rnanotechnology policy (SGER)\",Interagency,\"North Carolina State\rUniversity\"\r\n\"Social and ethical research and education in agrifood\rnanotechnology (NIRT)\",NSF,Michigan State University\r\n\"From laboratory to society: developing an informed\rapproach to nanoscale science and engineering (NIRT)\",NSF,University of South Carolina\r\nDatabase and innovation timeline for nanotechnology,NSF,UCLA\r\nSocial and ethical dimensions of nanotechnology,NSF,University of Virginia\r\n\"Undergraduate exploration of nanoscience,\rapplications and societal implications (NUE)\",NSF,\"Michigan Technological\rUniversity\"\r\n\"Ethics and belief inside the development of\rnanotechnology (CAREER)\",NSF,University of Virginia\r\n\"All centers, NNIN and NCN have a societal\rimplications components\",\"NSF, DOE,\rDOD, and NIH\",\"All nanotechnology centers\rand networks\"\r\n"; - - assertEquals(expected, result); - page.getPDDoc().close(); - } - - @Test - public void testMergeLinesCloseToEachOther() throws IOException { - Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/20.pdf", 1); - List rulings = page.getVerticalRulings(); - float[] expectedRulings = new float[]{105.549774f, 107.52332f, 160.58167f, 377.1792f, 434.95804f, 488.21783f}; - for (int i = 0; i < rulings.size(); i++) { - assertEquals(expectedRulings[i], rulings.get(i).getLeft(), 0.1); - } - assertEquals(6, rulings.size()); - page.getPDDoc().close(); - } - - @Test - public void testSpreadsheetWithNoBoundingFrameShouldBeSpreadsheet() throws IOException { - Page page = UtilsForTesting.getAreaFromPage("src/test/resources/technology/tabula/spreadsheet_no_bounding_frame.pdf", 1, - 150.56f, 58.9f, 654.7f, 536.12f); - - String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/spreadsheet_no_bounding_frame.csv"); - - SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); - boolean isTabular = se.isTabular(page); - assertTrue(isTabular); - List tables = se.extract(page); - StringBuilder sb = new StringBuilder(); - (new CSVWriter()).write(sb, tables.get(0)); - - assertEquals(expectedCsv, sb.toString()); - page.getPDDoc().close(); - - } - - @Test - public void testExtractSpreadsheetWithinAnArea() throws IOException { - Page page = UtilsForTesting.getAreaFromPage( - "src/test/resources/technology/tabula/puertos1.pdf", - 1, - 273.9035714285714f, 30.32142857142857f, 554.8821428571429f, 546.7964285714286f); - SpreadsheetExtractionAlgorithm se = new SpreadsheetExtractionAlgorithm(); - List tables = se.extract(page); - Table table = tables.get(0); - assertEquals(15, table.getRows().size()); - - String expected = "\"\",TM,M.U$S,TM,M.U$S,TM,M.U$S,TM,M.U$S,TM,M.U$S,TM,M.U$S,TM\n" + - "Peces vivos,1,25,1,23,2,38,1,37,2,67,2,89,1\n" + - "\"Pescado fresco\n" + - "o refrigerado.\n" + - "exc. filetes\",7.704,7.175,8.931,6.892,12.635,10.255,16.742,13.688,14.357,11.674,13.035,13.429,9.727\n" + - "\"Pescado congelado\n" + - "exc. filetes\",90.560,105.950,112.645,108.416,132.895,115.874,152.767,133.765,148.882,134.847,156.619,165.134,137.179\n" + - "\"Filetes y demás car-\n" + - "nes de pescado\",105.434,200.563,151.142,218.389,152.174,227.780,178.123,291.863,169.422,313.735,176.427,381.640,144.814\n" + - "\"Pescado sec./sal./\n" + - "en salm. har./pol./\n" + - "pell. aptos\n" + - "p/c humano\",6.837,14.493,6.660,9.167,14.630,17.579,18.150,21.302,18.197,25.739,13.460,23.549,11.709\n" + - "Crustáceos,61.691,375.798,52.488,251.043,47.635,387.783,27.815,217.443,7.123,86.019,39.488,373.583,45.191\n" + - "Moluscos,162.027,174.507,109.436,111.443,90.834,104.741,57.695,109.141,98.182,206.304,187.023,251.352,157.531\n" + - "\"Prod. no exp. en\n" + - "otros capítulos.\n" + - "No apto p/c humano\",203,328,7,35,521,343,\"1,710\",\"1,568\",125,246,124,263,131\n" + - "\"Grasas y aceites de\n" + - "pescado y mamíferos\n" + - "marinos\",913,297,\"1,250\",476,\"1,031\",521,\"1,019\",642,690,483,489,710,959\n" + - "\"Extractos y jugos de\n" + - "pescado y mariscos\",5,25,1,3,4,4,31,93,39,117,77,230,80\n" + - "\"Preparaciones y con-\n" + - "servas de pescado\",846,\"3,737\",\"1,688\",\"4,411\",\"1,556\",\"3,681\",\"2,292\",\"5,474\",\"2,167\",\"7,494\",\"2,591\",\"8,833\",\"2,795\"\n" + - "\"Preparaciones y con-\n" + - "servas de mariscos\",348,\"3,667\",345,\"1,771\",738,\"3,627\",561,\"2,620\",607,\"3,928\",314,\"2,819\",250\n" + - "\"Harina, polvo y pe-\n" + - "llets de pescado.No\n" + - "aptos p/c humano\",\"16,947\",\"8,547\",\"11,867\",\"6,315\",\"32,528\",\"13,985\",\"37,313\",\"18,989\",\"35,787\",\"19,914\",\"37,821\",\"27,174\",\"30,000\"\n" + - "TOTAL,\"453,515\",\"895,111\",\"456,431\",\"718,382\",\"487,183\",\"886,211\",\"494,220\",\"816,623\",\"495,580\",\"810,565\",\"627,469\",\"1,248,804\",\"540,367\"\n"; - - - // TODO add better assertions - StringBuilder sb = new StringBuilder(); - (new CSVWriter()).write(sb, tables.get(0)); - String result = sb.toString(); - - List parsedExpected = org.apache.commons.csv.CSVParser.parse(expected, CSVFormat.EXCEL).getRecords(); - List parsedResult = org.apache.commons.csv.CSVParser.parse(result, CSVFormat.EXCEL).getRecords(); - - assertEquals(parsedResult.size(), parsedExpected.size()); - for (int i = 0; i < parsedResult.size(); i++) { - assertEquals(parsedResult.get(i).size(), parsedExpected.get(i).size()); - } - page.getPDDoc().close(); - } - - @Test - public void testAlmostIntersectingRulingsShouldIntersect() { - Ruling v = new Ruling(new Point2D.Float(555.960876f, 271.569641f), new Point2D.Float(555.960876f, 786.899902f)); - Ruling h = new Ruling(new Point2D.Float(25.620499f, 786.899902f), new Point2D.Float(555.960754f, 786.899902f)); - Map m = Ruling.findIntersections(Arrays.asList(new Ruling[]{h}), Arrays.asList(new Ruling[]{v})); - assertEquals(m.values().size(), 1); - } - - // TODO add assertions - @Test - public void testDontRaiseSortException() throws IOException { - Page page = UtilsForTesting.getAreaFromPage( - "src/test/resources/technology/tabula/us-017.pdf", - 2, - 446.0f, 97.0f, 685.0f, 520.0f); - page.getText(); - SpreadsheetExtractionAlgorithm bea = new SpreadsheetExtractionAlgorithm(); - bea.extract(page).get(0); - page.getPDDoc().close(); - } - - @Test - public void testShouldDetectASingleSpreadsheet() throws IOException { - Page page = UtilsForTesting.getAreaFromPage( - "src/test/resources/technology/tabula/offense.pdf", - 1, - 68.08f, 16.44f, 680.85f, 597.84f); - SpreadsheetExtractionAlgorithm bea = new SpreadsheetExtractionAlgorithm(); - List
tables = bea.extract(page); - assertEquals(1, tables.size()); - page.getPDDoc().close(); - } - - @Test - public void testExtractTableWithExternallyDefinedRulings() throws IOException { - Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/us-007.pdf", - 1); - SpreadsheetExtractionAlgorithm bea = new SpreadsheetExtractionAlgorithm(); - List
tables = bea.extract(page, - Arrays.asList(EXTERNALLY_DEFINED_RULINGS)); - assertEquals(1, tables.size()); - Table table = tables.get(0); - - assertEquals("Payroll Period", table.getRows().get(0).get(0).getText()); - assertEquals("One Withholding\rAllowance", table.getRows().get(0).get(1).getText()); - assertEquals("Weekly", table.getRows().get(1).get(0).getText()); - assertEquals("$71.15", table.getRows().get(1).get(1).getText()); - assertEquals("Biweekly", table.getRows().get(2).get(0).getText()); - assertEquals("142.31", table.getRows().get(2).get(1).getText()); - assertEquals("Semimonthly", table.getRows().get(3).get(0).getText()); - assertEquals("154.17", table.getRows().get(3).get(1).getText()); - assertEquals("Monthly", table.getRows().get(4).get(0).getText()); - assertEquals("308.33", table.getRows().get(4).get(1).getText()); - assertEquals("Quarterly", table.getRows().get(5).get(0).getText()); - assertEquals("925.00", table.getRows().get(5).get(1).getText()); - assertEquals("Semiannually", table.getRows().get(6).get(0).getText()); - assertEquals("1,850.00", table.getRows().get(6).get(1).getText()); - assertEquals("Annually", table.getRows().get(7).get(0).getText()); - assertEquals("3,700.00", table.getRows().get(7).get(1).getText()); - assertEquals("Daily or Miscellaneous\r(each day of the payroll period)", table.getRows().get(8).get(0).getText()); - assertEquals("14.23", table.getRows().get(8).get(1).getText()); - page.getPDDoc().close(); - - } - - @Test - public void testAnotherExtractTableWithExternallyDefinedRulings() throws IOException { - Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/us-024.pdf", - 1); - SpreadsheetExtractionAlgorithm bea = new SpreadsheetExtractionAlgorithm(); - List
tables = bea.extract(page, - Arrays.asList(EXTERNALLY_DEFINED_RULINGS2)); - assertEquals(1, tables.size()); - Table table = tables.get(0); - - assertEquals("Total Supply", table.getRows().get(4).get(0).getText()); - assertEquals("6.6", table.getRows().get(6).get(2).getText()); - page.getPDDoc().close(); - } - - @Test - public void testSpreadsheetsSortedByTopAndRight() throws IOException { - Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/sydney_disclosure_contract.pdf", - 1); - - SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - List
tables = sea.extract(page); - for (int i = 1; i < tables.size(); i++) { - assert (tables.get(i - 1).getTop() <= tables.get(i).getTop()); - } - page.getPDDoc().close(); - } - - @Test - public void testDontStackOverflowQuicksort() throws IOException { - Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/failing_sort.pdf", - 1); - - SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - List
tables = sea.extract(page); - for (int i = 1; i < tables.size(); i++) { - assert (tables.get(i - 1).getTop() <= tables.get(i).getTop()); - } - page.getPDDoc().close(); - } - - @Test - public void testRTL() throws IOException { - Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/arabic.pdf", - 1); - SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - List
tables = sea.extract(page); - // assertEquals(1, tables.size()); - Table table = tables.get(0); - - - assertEquals("اسمي سلطان", table.getRows().get(1).get(1).getText()); - assertEquals("من اين انت؟", table.getRows().get(2).get(1).getText()); - assertEquals("1234", table.getRows().get(3).get(0).getText()); - assertEquals("هل انت شباك؟", table.getRows().get(4).get(0).getText()); - assertEquals("انا من ولاية كارولينا الشمال", table.getRows().get(2).get(0).getText()); // conjoined lam-alif gets missed - assertEquals("اسمي Jeremy في الانجليزية", table.getRows().get(4).get(1).getText()); // conjoined lam-alif gets missed - assertEquals("عندي 47 قطط", table.getRows().get(3).get(1).getText()); // the real right answer is 47. - assertEquals("Jeremy is جرمي in Arabic", table.getRows().get(5).get(0).getText()); // the real right answer is 47. - assertEquals("مرحباً", table.getRows().get(1).get(0).getText()); // really ought to be ً, but this is forgiveable for now - - // there is one remaining problems that are not yet addressed - // - diacritics (e.g. Arabic's tanwinً and probably Hebrew nekudot) are put in the wrong place. - // this should get fixed, but this is a good first stab at the problem. - - // these (commented-out) tests reflect the theoretical correct answer, - // which is not currently possible because of the two problems listed above - // assertEquals("مرحباً", table.getRows().get(0).get(0).getText()); // really ought to be ً, but this is forgiveable for now - - page.getPDDoc().close(); - } - - - @Test - public void testRealLifeRTL() throws IOException { - Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/mednine.pdf", - 1); - SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - List
tables = sea.extract(page); - // assertEquals(1, tables.size()); - Table table = tables.get(0); - - assertEquals("الانتخابات التشريعية 2014", table.getRows().get(0).get(0).getText()); // the doubled spaces might be a bug in my implementation. - assertEquals("ورقة كشف نتائج دائرة مدنين", table.getRows().get(1).get(0).getText()); - assertEquals("426", table.getRows().get(4).get(0).getText()); - assertEquals("63", table.getRows().get(4).get(1).getText()); - assertEquals("43", table.getRows().get(4).get(2).getText()); - assertEquals("56", table.getRows().get(4).get(3).getText()); - assertEquals("58", table.getRows().get(4).get(4).getText()); - assertEquals("49", table.getRows().get(4).get(5).getText()); - assertEquals("55", table.getRows().get(4).get(6).getText()); - assertEquals("33", table.getRows().get(4).get(7).getText()); - assertEquals("32", table.getRows().get(4).get(8).getText()); - assertEquals("37", table.getRows().get(4).get(9).getText()); - assertEquals("قائمة من أجل تحقيق سلطة الشعب", table.getRows().get(4).get(10).getText()); - - // there is one remaining problems that are not yet addressed - // - diacritics (e.g. Arabic's tanwinً and probably Hebrew nekudot) are put in the wrong place. - // this should get fixed, but this is a good first stab at the problem. - - // these (commented-out) tests reflect the theoretical correct answer, - // which is not currently possible because of the two problems listed above - // assertEquals("مرحباً", table.getRows().get(0).get(0).getText()); // really ought to be ً, but this is forgiveable for now - page.getPDDoc().close(); - - } - - @Test - public void testExtractColumnsCorrectly3() throws IOException { - - Page page = UtilsForTesting.getAreaFromFirstPage("src/test/resources/technology/tabula/frx_2012_disclosure.pdf", - 106.01f, 48.09f, 227.31f, 551.89f); - SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - Table table = sea.extract(page).get(0); - - assertEquals("REGIONAL PULMONARY & SLEEP\rMEDICINE", table.getRows().get(8).get(1).getText()); - page.getPDDoc().close(); - - } - - @Test - public void testSpreadsheetExtractionIssue656() throws IOException { - Page page = UtilsForTesting - .getAreaFromFirstPage( - "src/test/resources/technology/tabula/Publication_of_award_of_Bids_for_Transport_Sector__August_2016.pdf", - 56.925f,24.255f,549.945f,786.555f); - String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/Publication_of_award_of_Bids_for_Transport_Sector__August_2016.csv"); - - SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - List
tables = sea.extract(page); - assertEquals(1, tables.size()); - Table table = tables.get(0); - - StringBuilder sb = new StringBuilder(); - (new CSVWriter()).write(sb, table); - String result = sb.toString(); - assertEquals(expectedCsv, result); - page.getPDDoc().close(); - } - -} diff --git a/src/test/java/technology/tabula/TestTableDetection.java b/src/test/java/technology/tabula/TestTableDetection.java deleted file mode 100644 index c13ff201..00000000 --- a/src/test/java/technology/tabula/TestTableDetection.java +++ /dev/null @@ -1,336 +0,0 @@ -package technology.tabula; - -import com.google.gson.Gson; -import org.apache.pdfbox.Loader; -import org.apache.pdfbox.pdmodel.PDDocument; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.w3c.dom.NodeList; -import technology.tabula.detectors.NurminenDetectionAlgorithm; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.*; -import java.util.logging.Level; -import java.util.logging.Logger; - -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -/** - * Created by matt on 2015-12-14. - */ -@RunWith(Parameterized.class) -public class TestTableDetection { - - private static int numTests = 0; - private static int numPassingTests = 0; - private static int totalExpectedTables = 0; - private static int totalCorrectlyDetectedTables = 0; - private static int totalErroneouslyDetectedTables = 0; - - private static Level defaultLogLevel; - - private static final class TestStatus { - public int numExpectedTables; - public int numCorrectlyDetectedTables; - public int numErroneouslyDetectedTables; - public boolean expectedFailure; - - private transient boolean firstRun; - private transient String pdfFilename; - - public TestStatus(String pdfFilename) { - this.numExpectedTables = 0; - this.numCorrectlyDetectedTables = 0; - this.expectedFailure = false; - this.pdfFilename = pdfFilename; - } - - public static TestStatus load(String pdfFilename) { - TestStatus status; - - try { - String json = UtilsForTesting.loadJson(jsonFilename(pdfFilename)); - status = new Gson().fromJson(json, TestStatus.class); - status.pdfFilename = pdfFilename; - } catch (IOException ioe) { - status = new TestStatus(pdfFilename); - status.firstRun = true; - } - - return status; - } - - public void save() { - try (FileWriter w = new FileWriter(jsonFilename(this.pdfFilename))) { - Gson gson = new Gson(); - w.write(gson.toJson(this)); - w.close(); - } catch (Exception e) { - throw new Error(e); - } - } - - public boolean isFirstRun() { - return this.firstRun; - } - - private static String jsonFilename(String pdfFilename) { - return pdfFilename.replace(".pdf", ".json"); - } - } - - @BeforeClass - public static void disableLogging() { - Logger pdfboxLogger = Logger.getLogger("org.apache.pdfbox"); - defaultLogLevel = pdfboxLogger.getLevel(); - pdfboxLogger.setLevel(Level.OFF); - } - - @AfterClass - public static void enableLogging() { - Logger.getLogger("org.apache.pdfbox").setLevel(defaultLogLevel); - } - - @Parameterized.Parameters - public static Collection data() { - String[] regionCodes = {"eu", "us"}; - - ArrayList data = new ArrayList<>(); - - for (String regionCode : regionCodes) { - String directoryName = "src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-" + regionCode + "/"; - File dir = new File(directoryName); - - File[] pdfs = dir.listFiles((dir1, name) -> name.toLowerCase().endsWith(".pdf")); - - for (File pdf : pdfs) { - data.add(new Object[]{pdf}); - } - } - - return data; - } - - private File pdf; - private DocumentBuilder builder; - private TestStatus status; - - private int numCorrectlyDetectedTables = 0; - private int numErroneouslyDetectedTables = 0; - - public TestTableDetection(File pdf) { - this.pdf = pdf; - this.status = TestStatus.load(pdf.getAbsolutePath()); - - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); - try { - this.builder = factory.newDocumentBuilder(); - } catch (Exception e) { - // ignored - } - } - - private void printTables(Map> tables) { - for (Integer page : tables.keySet()) { - System.out.println("Page " + page.toString()); - for (Rectangle table : tables.get(page)) { - System.out.println(table); - } - } - } - - @Test - public void testDetectionOfTables() throws Exception { - numTests++; - - // xml parsing stuff for ground truth - Document regionDocument = this.builder.parse(this.pdf.getAbsolutePath().replace(".pdf", "-reg.xml")); - NodeList tables = regionDocument.getElementsByTagName("table"); - - // tabula extractors - - - PDDocument pdfDocument = Loader.loadPDF(this.pdf); - ObjectExtractor extractor = new ObjectExtractor(pdfDocument); - - // parse expected tables from the ground truth dataset - Map> expectedTables = new HashMap<>(); - - int numExpectedTables = 0; - - for (int i = 0; i < tables.getLength(); i++) { - - Element table = (Element) tables.item(i); - Element region = (Element) table.getElementsByTagName("region").item(0); - Element boundingBox = (Element) region.getElementsByTagName("bounding-box").item(0); - - // we want to know where tables appear in the document - save the page and areas where tables appear - Integer page = Integer.decode(region.getAttribute("page")); - float x1 = Float.parseFloat(boundingBox.getAttribute("x1")); - float y1 = Float.parseFloat(boundingBox.getAttribute("y1")); - float x2 = Float.parseFloat(boundingBox.getAttribute("x2")); - float y2 = Float.parseFloat(boundingBox.getAttribute("y2")); - - List pageTables = expectedTables.get(page); - if (pageTables == null) { - pageTables = new ArrayList<>(); - expectedTables.put(page, pageTables); - } - - // have to invert y co-ordinates - // unfortunately the ground truth doesn't contain page dimensions - // do some extra work to extract the page with tabula and get the dimensions from there - Page extractedPage = extractor.extractPage(page); - - float top = (float) extractedPage.getHeight() - y2; - float left = x1; - float width = x2 - x1; - float height = y2 - y1; - - pageTables.add(new Rectangle(top, left, width, height)); - numExpectedTables++; - } - - // now find tables detected by tabula-java - Map> detectedTables = new HashMap<>(); - - // the algorithm we're going to be testing - NurminenDetectionAlgorithm detectionAlgorithm = new NurminenDetectionAlgorithm(); - - PageIterator pages = extractor.extract(); - while (pages.hasNext()) { - Page page = pages.next(); - List tablesOnPage = detectionAlgorithm.detect(page); - if (!tablesOnPage.isEmpty()) { - detectedTables.put(page.getPageNumber(), tablesOnPage); - } - } - - // now compare - System.out.println("Testing " + this.pdf.getName()); - - List errors = new ArrayList<>(); - this.status.numExpectedTables = numExpectedTables; - totalExpectedTables += numExpectedTables; - - for (Integer page : expectedTables.keySet()) { - List expectedPageTables = expectedTables.get(page); - List detectedPageTables = detectedTables.get(page); - - if (detectedPageTables == null) { - errors.add("Page " + page.toString() + ": " + expectedPageTables.size() + " expected tables not found"); - continue; - } - - errors.addAll(this.comparePages(page, detectedPageTables, expectedPageTables)); - - detectedTables.remove(page); - } - - // leftover pages means we detected extra tables - for (Integer page : detectedTables.keySet()) { - List detectedPageTables = detectedTables.get(page); - errors.add("Page " + page.toString() + ": " + detectedPageTables.size() + " tables detected where there are none"); - - this.numErroneouslyDetectedTables += detectedPageTables.size(); - totalErroneouslyDetectedTables += detectedPageTables.size(); - } - - boolean failed = errors.size() > 0; - - if (failed) { - System.out.println("==== CURRENT TEST ERRORS ===="); - for (String error : errors) { - System.out.println(error); - } - } else { - numPassingTests++; - } - - System.out.println("==== CUMULATIVE TEST STATISTICS ===="); - - System.out.println(numPassingTests + " out of " + numTests + " currently passing"); - System.out.println(totalCorrectlyDetectedTables + " out of " + totalExpectedTables + " expected tables detected"); - System.out.println(totalErroneouslyDetectedTables + " tables incorrectly detected"); - - - if (this.status.isFirstRun()) { - // make the baseline - this.status.expectedFailure = failed; - this.status.numCorrectlyDetectedTables = this.numCorrectlyDetectedTables; - this.status.numErroneouslyDetectedTables = this.numErroneouslyDetectedTables; - this.status.save(); - } else { - // compare to baseline - if (this.status.expectedFailure) { - // make sure the failure didn't get worse - assertTrue("This test is an expected failure, but it now detects even fewer tables.", this.numCorrectlyDetectedTables >= this.status.numCorrectlyDetectedTables); - assertTrue("This test is an expected failure, but it now detects more bad tables.", this.numErroneouslyDetectedTables <= this.status.numErroneouslyDetectedTables); - assertTrue("This test used to fail but now it passes! Hooray! Please update the test's JSON file accordingly.", failed); - } else { - assertFalse("Table detection failed. Please see the error messages for more information.", failed); - } - } - } - - private List comparePages(Integer page, List detected, List expected) { - ArrayList errors = new ArrayList<>(); - - // go through the detected tables and try to match them with expected tables - // from http://www.orsigiorgio.net/wp-content/papercite-data/pdf/gho*12.pdf (comparing regions): - // for other (e.g.“black-box”) algorithms, bounding boxes and content are used. A region is correct if it - // contains the minimal bounding box of the ground truth without intersecting additional content. - for (Iterator detectedIterator = detected.iterator(); detectedIterator.hasNext(); ) { - Rectangle detectedTable = detectedIterator.next(); - - for (int i = 0; i < expected.size(); i++) { - if (detectedTable.contains(expected.get(i))) { - // we have a candidate for the detected table, make sure it doesn't intersect any others - boolean intersectsOthers = false; - for (int j = 0; j < expected.size(); j++) { - if (i == j) continue; - if (detectedTable.intersects(expected.get(j))) { - intersectsOthers = true; - break; - } - } - - if (!intersectsOthers) { - // success - detectedIterator.remove(); - expected.remove(i); - - this.numCorrectlyDetectedTables++; - totalCorrectlyDetectedTables++; - - break; - } - } - } - } - - // any expected tables left over weren't detected - for (Rectangle expectedTable : expected) { - errors.add("Page " + page.toString() + ": " + expectedTable.toString() + " not detected"); - } - - // any detected tables left over were detected erroneously - for (Rectangle detectedTable : detected) { - errors.add("Page " + page.toString() + ": " + detectedTable.toString() + " detected where there is no table"); - this.numErroneouslyDetectedTables++; - totalErroneouslyDetectedTables++; - } - - return errors; - } -} diff --git a/src/test/java/technology/tabula/TestTextElement.java b/src/test/java/technology/tabula/TestTextElement.java deleted file mode 100644 index ee0fbf3d..00000000 --- a/src/test/java/technology/tabula/TestTextElement.java +++ /dev/null @@ -1,215 +0,0 @@ -package technology.tabula; - -import org.apache.pdfbox.pdmodel.font.PDFont; -import org.apache.pdfbox.pdmodel.font.PDType1Font; -import org.apache.pdfbox.pdmodel.font.Standard14Fonts; -import org.junit.Assert; -import org.junit.Test; - -import java.util.ArrayList; -import java.util.List; - -public class TestTextElement { - - - @Test - public void createTextElement() { - - TextElement textElement = new TextElement(5f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f); - - Assert.assertNotNull(textElement); - Assert.assertEquals("A", textElement.getText()); - Assert.assertEquals(1f, textElement.getFontSize(), 0); - Assert.assertEquals(15f, textElement.getLeft(), 0); - Assert.assertEquals(5f, textElement.getTop(), 0); - Assert.assertEquals(10f, textElement.getWidth(), 0); - Assert.assertEquals(20f, textElement.getHeight(), 0); - Assert.assertEquals(Standard14Fonts.FontName.HELVETICA.getName(), textElement.getFont().getName()); - Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0); - Assert.assertEquals(0f, textElement.getDirection(), 0); - - - } - - @Test - public void createTextElementWithDirection() { - - TextElement textElement = new TextElement(5f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f); - - Assert.assertNotNull(textElement); - Assert.assertEquals("A", textElement.getText()); - Assert.assertEquals(1f, textElement.getFontSize(), 0); - Assert.assertEquals(15f, textElement.getLeft(), 0); - Assert.assertEquals(5f, textElement.getTop(), 0); - Assert.assertEquals(10f, textElement.getWidth(), 0); - Assert.assertEquals(20f, textElement.getHeight(), 0); - Assert.assertEquals(Standard14Fonts.FontName.HELVETICA.getName(), textElement.getFont().getName()); - Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0); - Assert.assertEquals(6f, textElement.getDirection(), 0); - - - } - - @Test - public void mergeFourElementsIntoFourWords() { - - List elements = new ArrayList<>(); - PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA); - elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f)); - elements.add(new TextElement(20f, 15f, 10f, 20f, font, 1f, "B", 1f, 6f)); - elements.add(new TextElement(40f, 15f, 10f, 20f, font, 1f, "C", 1f, 6f)); - elements.add(new TextElement(60f, 15f, 10f, 20f, font, 1f, "D", 1f, 6f)); - - List words = TextElement.mergeWords(elements); - - List expectedWords = new ArrayList<>(); - expectedWords.add(new TextChunk(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f))); - expectedWords.add(new TextChunk(new TextElement(20f, 15f, 10f, 20f, font, 1f, "B", 1f, 6f))); - expectedWords.add(new TextChunk(new TextElement(40f, 15f, 10f, 20f, font, 1f, "C", 1f, 6f))); - expectedWords.add(new TextChunk(new TextElement(60f, 15f, 10f, 20f, font, 1f, "D", 1f, 6f))); - - Assert.assertEquals(expectedWords, words); - - } - - @Test - public void mergeFourElementsIntoOneWord() { - - List elements = new ArrayList<>(); - PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA); - elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f)); - elements.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f)); - elements.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f)); - elements.add(new TextElement(0f, 45f, 10f, 20f, font, 1f, "D", 1f, 6f)); - - List words = TextElement.mergeWords(elements); - - List expectedWords = new ArrayList<>(); - TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f)); - textChunk.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f)); - textChunk.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f)); - textChunk.add(new TextElement(0f, 45f, 10f, 20f, font, 1f, "D", 1f, 6f)); - expectedWords.add(textChunk); - - Assert.assertEquals(expectedWords, words); - - } - - @Test - public void mergeElementsShouldBeIdempotent() { - /* - * a bug in TextElement.merge_words would delete the first TextElement in the array - * it was called with. Discussion here: https://github.com/tabulapdf/tabula-java/issues/78 - */ - - List elements = new ArrayList<>(); - PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA); - elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f)); - elements.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f)); - elements.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f)); - elements.add(new TextElement(0f, 45f, 10f, 20f, font, 1f, "D", 1f, 6f)); - - List words = TextElement.mergeWords(elements); - List words2 = TextElement.mergeWords(elements); - Assert.assertEquals(words, words2); - } - - @Test - public void mergeElementsWithSkippingRules() { - - List elements = new ArrayList<>(); - PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA); - elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f)); - elements.add(new TextElement(0f, 17f, 10f, 20f, font, 1f, "A", 1f, 6f)); - elements.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f)); - elements.add(new TextElement(0.001f, 25f, 10f, 20f, font, 1f, " ", 1f, 6f)); - elements.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f)); - PDFont TIMES_ROMAN = new PDType1Font(Standard14Fonts.FontName.TIMES_ROMAN); - elements.add(new TextElement(0f, 45f, 10f, 20f, TIMES_ROMAN, 10f, "D", 1f, 6f)); - - List words = TextElement.mergeWords(elements); - - List expectedWords = new ArrayList<>(); - TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f)); - textChunk.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f)); - textChunk.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f)); - textChunk.add(new TextElement(0f, 45f, 10f, 20f, TIMES_ROMAN, 10f, "D", 1f, 6f)); - expectedWords.add(textChunk); - - Assert.assertEquals(expectedWords, words); - - } - - @Test - public void mergeTenElementsIntoTwoWords() { - - List elements = new ArrayList<>(); - PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA); - elements.add(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f)); - elements.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f)); - elements.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f)); - elements.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f)); - elements.add(new TextElement(0f, 60f, 10f, 20f, font, 1f, "M", 1f, 6f)); - elements.add(new TextElement(0f, 70f, 10f, 20f, font, 1f, "U", 1f, 6f)); - elements.add(new TextElement(0f, 80f, 10f, 20f, font, 1f, "N", 1f, 6f)); - elements.add(new TextElement(0f, 90f, 10f, 20f, font, 1f, "D", 1f, 6f)); - elements.add(new TextElement(0f, 100f, 10f, 20f, font, 1f, "O", 1f, 6f)); - - List words = TextElement.mergeWords(elements); - - List expectedWords = new ArrayList<>(); - TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f)); - textChunk.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f)); - textChunk.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f)); - textChunk.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f)); - textChunk.add(new TextElement(0f, 30f, 10.5f, 20f, font, 1f, " ", 1f)); //Check why width=10.5? - expectedWords.add(textChunk); - TextChunk textChunk2 = new TextChunk(new TextElement(0f, 60f, 10f, 20f, font, 1f, "M", 1f, 6f)); - textChunk2.add(new TextElement(0f, 70f, 10f, 20f, font, 1f, "U", 1f, 6f)); - textChunk2.add(new TextElement(0f, 80f, 10f, 20f, font, 1f, "N", 1f, 6f)); - textChunk2.add(new TextElement(0f, 90f, 10f, 20f, font, 1f, "D", 1f, 6f)); - textChunk2.add(new TextElement(0f, 100f, 10f, 20f, font, 1f, "O", 1f, 6f)); - expectedWords.add(textChunk2); - - Assert.assertEquals(2, words.size()); - Assert.assertEquals(expectedWords, words); - - } - - @Test - public void mergeTenElementsIntoTwoLines() { - - List elements = new ArrayList<>(); - PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA); - elements.add(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f)); - elements.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f)); - elements.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f)); - elements.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f)); - elements.add(new TextElement(20f, 0f, 10f, 20f, font, 1f, "M", 1f, 6f)); - elements.add(new TextElement(20f, 10f, 10f, 20f, font, 1f, "U", 1f, 6f)); - elements.add(new TextElement(20f, 20f, 10f, 20f, font, 1f, "N", 1f, 6f)); - elements.add(new TextElement(20f, 30f, 10f, 20f, font, 1f, "D", 1f, 6f)); - elements.add(new TextElement(20f, 40f, 10f, 20f, font, 1f, "O", 1f, 6f)); - - List words = TextElement.mergeWords(elements); - - List expectedWords = new ArrayList<>(); - TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f)); - textChunk.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f)); - textChunk.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f)); - textChunk.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f)); - expectedWords.add(textChunk); - TextChunk textChunk2 = new TextChunk(new TextElement(20f, 0f, 10f, 20f, font, 1f, "M", 1f, 6f)); - textChunk2.add(new TextElement(20f, 10f, 10f, 20f, font, 1f, "U", 1f, 6f)); - textChunk2.add(new TextElement(20f, 20f, 10f, 20f, font, 1f, "N", 1f, 6f)); - textChunk2.add(new TextElement(20f, 30f, 10f, 20f, font, 1f, "D", 1f, 6f)); - textChunk2.add(new TextElement(20f, 40f, 10f, 20f, font, 1f, "O", 1f, 6f)); - expectedWords.add(textChunk2); - - Assert.assertEquals(2, words.size()); - Assert.assertEquals(expectedWords, words); - - } - - -} diff --git a/src/test/java/technology/tabula/TestUtils.java b/src/test/java/technology/tabula/TestUtils.java deleted file mode 100644 index cb85cb7b..00000000 --- a/src/test/java/technology/tabula/TestUtils.java +++ /dev/null @@ -1,131 +0,0 @@ -package technology.tabula; - -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; - -import java.awt.geom.Point2D; -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; - -import org.apache.pdfbox.Loader; -import org.apache.pdfbox.rendering.ImageType; -import org.apache.commons.cli.ParseException; -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; -import org.junit.Test; - -public class TestUtils { - - public static final Ruling[] RULINGS = { - new Ruling(new Point2D.Float(0, 0), new Point2D.Float(1,1)), - new Ruling(new Point2D.Float(2, 2), new Point2D.Float(3,3)) - }; - - public static final Rectangle[] RECTANGLES = { - new Rectangle(), - new Rectangle(0, 0, 2, 4) - }; - - - @Test - public void testBoundsOfTwoRulings() { - Rectangle r = Utils.bounds(Arrays.asList(RULINGS)); - assertEquals(0, r.getMinX(), 0); - assertEquals(0, r.getMinY(), 0); - assertEquals(3, r.getWidth(), 0); - assertEquals(3, r.getHeight(), 0); - } - - @Test - public void testBoundsOfOneEmptyRectangleAndAnotherNonEmpty() { - Rectangle r = Utils.bounds(Arrays.asList(RECTANGLES)); - assertEquals(r, RECTANGLES[1]); - } - - @Test - public void testBoundsOfOneRectangle() { - ArrayList shapes = new ArrayList<>(); - shapes.add(new Rectangle(0, 0, 20, 40)); - Rectangle r = Utils.bounds(shapes); - assertEquals(r, shapes.get(0)); - } - - @Test - public void testParsePagesOption() throws ParseException { - - List rv = Utils.parsePagesOption("1"); - assertArrayEquals(new Integer[] { 1 }, rv.toArray()); - - rv = Utils.parsePagesOption("1-4"); - assertArrayEquals(new Integer[] { 1,2,3,4 }, rv.toArray()); - - rv = Utils.parsePagesOption("1-4,20-24"); - assertArrayEquals(new Integer[] { 1,2,3,4,20,21,22,23,24 }, rv.toArray()); - - rv = Utils.parsePagesOption("all"); - assertNull(rv); - } - - @Test(expected=ParseException.class) - public void testExceptionInParsePages() throws ParseException { - Utils.parsePagesOption("1-4,24-22"); - } - - @Test(expected=ParseException.class) - public void testAnotherExceptionInParsePages() throws ParseException { - Utils.parsePagesOption("quuxor"); - } - - @Test - public void testQuickSortEmptyList() { - List numbers = new ArrayList<>(); - QuickSort.sort(numbers); - - assertEquals(Collections.emptyList(), numbers); - } - - @Test - public void testQuickSortOneElementList() { - List numbers = Arrays.asList(5); - QuickSort.sort(numbers); - - assertEquals(Arrays.asList(5), numbers); - } - - @Test - public void testQuickSortShortList() { - List numbers = Arrays.asList(4, 5, 6, 8, 7, 1, 2, 3); - QuickSort.sort(numbers); - - assertEquals(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8), numbers); - } - - @Test - public void testQuickSortLongList() { - - List numbers = new ArrayList<>(); - List expectedNumbers = new ArrayList<>(); - - for(int i = 0; i <= 12000; i++){ - numbers.add(12000 - i); - expectedNumbers.add(i); - } - - QuickSort.sort(numbers); - - assertEquals(expectedNumbers, numbers); - } - - @Test - public void testJPEG2000DoesNotRaise() throws IOException { - PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/jpeg2000.pdf")); - PDPage page = pdf_document.getPage(0); - Utils.pageConvertToImage(pdf_document, page, 360, ImageType.RGB); - } - -} diff --git a/src/test/java/technology/tabula/TestWriters.java b/src/test/java/technology/tabula/TestWriters.java deleted file mode 100644 index 961d57af..00000000 --- a/src/test/java/technology/tabula/TestWriters.java +++ /dev/null @@ -1,136 +0,0 @@ -package technology.tabula; - -import static org.junit.Assert.assertEquals; - -import java.io.IOException; -import java.util.List; - -import org.junit.Test; - -import com.google.gson.Gson; -import com.google.gson.JsonArray; - -import technology.tabula.extractors.BasicExtractionAlgorithm; -import technology.tabula.extractors.SpreadsheetExtractionAlgorithm; -import technology.tabula.writers.CSVWriter; -import technology.tabula.writers.JSONWriter; -import technology.tabula.writers.TSVWriter; - -public class TestWriters { - - private static final String EXPECTED_CSV_WRITER_OUTPUT = "\"ABDALA de MATARAZZO, Norma Amanda\",Frente Cívico por Santiago,Santiago del Estero,AFIRMATIVO"; - - private Table getTable() throws IOException { - Page page = UtilsForTesting.getAreaFromFirstPage("src/test/resources/technology/tabula/argentina_diputados_voting_record.pdf", 269.875f, 12.75f, 790.5f, 561f); - BasicExtractionAlgorithm bea = new BasicExtractionAlgorithm(); - Table table = bea.extract(page).get(0); - return table; - } - - private List
getTables() throws IOException { - - Page page = UtilsForTesting.getPage("src/test/resources/technology/tabula/twotables.pdf", 1); - SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - return sea.extract(page); - } - - @Test - public void testCSVWriter() throws IOException { - String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/argentina_diputados_voting_record.csv"); - Table table = this.getTable(); - StringBuilder sb = new StringBuilder(); - (new CSVWriter()).write(sb, table); - String s = sb.toString(); - String[] lines = s.split("\\r?\\n"); - assertEquals(lines[0], EXPECTED_CSV_WRITER_OUTPUT); - assertEquals(expectedCsv, s); - } - - // TODO Add assertions - @Test - public void testTSVWriter() throws IOException { - Table table = this.getTable(); - StringBuilder sb = new StringBuilder(); - (new TSVWriter()).write(sb, table); - String s = sb.toString(); - //System.out.println(s); - //String[] lines = s.split("\\r?\\n"); - //assertEquals(lines[0], EXPECTED_CSV_WRITER_OUTPUT); - } - - @Test - public void testJSONWriter() throws IOException { - String expectedJson = UtilsForTesting.loadJson("src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json"); - Table table = this.getTable(); - StringBuilder sb = new StringBuilder(); - (new JSONWriter()).write(sb, table); - String s = sb.toString(); - assertEquals(expectedJson, s); - } - - @Test - public void testJSONSerializeInfinity() throws IOException { - String expectedJson = UtilsForTesting.loadJson("src/test/resources/technology/tabula/json/schools.json"); - Page page = UtilsForTesting.getAreaFromFirstPage("src/test/resources/technology/tabula/schools.pdf", 53.74f, 16.97f, 548.74f, 762.3f); - SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - Table table = sea.extract(page).get(0); - - StringBuilder sb = new StringBuilder(); - (new JSONWriter()).write(sb, table); - String s = sb.toString(); - assertEquals(expectedJson, s); - } - - @Test - public void testCSVSerializeInfinity() throws IOException { - String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/schools.csv"); - Page page = UtilsForTesting.getAreaFromFirstPage("src/test/resources/technology/tabula/schools.pdf", 53.74f, 16.97f, 548.74f, 762.3f); - SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - Table table = sea.extract(page).get(0); - - StringBuilder sb = new StringBuilder(); - (new CSVWriter()).write(sb, table); - String s = sb.toString(); - assertEquals(expectedCsv, s); - } - - @Test - public void testJSONSerializeTwoTables() throws IOException { - String expectedJson = UtilsForTesting.loadJson("src/test/resources/technology/tabula/json/twotables.json"); - List
tables = this.getTables(); - StringBuilder sb = new StringBuilder(); - (new JSONWriter()).write(sb, tables); - - String s = sb.toString(); - assertEquals(expectedJson, s); - - Gson gson = new Gson(); - JsonArray json = gson.fromJson(s, JsonArray.class); - assertEquals(2, json.size()); - } - - @Test - public void testCSVSerializeTwoTables() throws IOException { - String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/twotables.csv"); - List
tables = this.getTables(); - StringBuilder sb = new StringBuilder(); - (new CSVWriter()).write(sb, tables); - - String s = sb.toString(); - assertEquals(expectedCsv, s); - } - - @Test - public void testCSVMultilineRow() throws IOException { - String expectedCsv = UtilsForTesting.loadCsv("src/test/resources/technology/tabula/csv/frx_2012_disclosure.csv"); - Page page = UtilsForTesting.getAreaFromFirstPage("src/test/resources/technology/tabula/frx_2012_disclosure.pdf", 53.0f, 49.0f, 735.0f, 550.0f); - SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm(); - Table table = sea.extract(page).get(0); - - StringBuilder sb = new StringBuilder(); - (new CSVWriter()).write(sb, table); - String s = sb.toString(); - assertEquals(expectedCsv, s); - } - -} diff --git a/src/test/java/technology/tabula/UtilsForTesting.java b/src/test/java/technology/tabula/UtilsForTesting.java deleted file mode 100644 index 8d3c91cf..00000000 --- a/src/test/java/technology/tabula/UtilsForTesting.java +++ /dev/null @@ -1,91 +0,0 @@ -package technology.tabula; - -import java.io.*; -import java.nio.charset.Charset; -import java.util.List; - -import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVParser; -import org.apache.commons.csv.CSVPrinter; -import org.apache.pdfbox.Loader; -import org.apache.pdfbox.pdmodel.PDDocument; -import org.junit.Assert; - -public class UtilsForTesting { - - public static Page getAreaFromFirstPage(String path, float top, float left, float bottom, float right) throws IOException { - return getAreaFromPage(path, 1, top, left, bottom, right); - } - - public static Page getAreaFromPage(String path, int page, float top, float left, float bottom, float right) throws IOException { - return getPage(path, page).getArea(top, left, bottom, right); - } - - public static Page getPage(String path, int pageNumber) throws IOException { - ObjectExtractor oe = null; - try { - PDDocument document = Loader.loadPDF(new File(path)); - oe = new ObjectExtractor(document); - return oe.extract(pageNumber); - } finally { - if (oe != null) - oe.close(); - } - } - - public static String[][] tableToArrayOfRows(Table table) { - List> tableRows = table.getRows(); - - int maxColCount = 0; - - for (int i = 0; i < tableRows.size(); i++) { - List row = tableRows.get(i); - if (maxColCount < row.size()) { - maxColCount = row.size(); - } - } - - Assert.assertEquals(maxColCount, table.getColCount()); - - String[][] rv = new String[tableRows.size()][maxColCount]; - - for (int i = 0; i < tableRows.size(); i++) { - List row = tableRows.get(i); - for (int j = 0; j < row.size(); j++) { - rv[i][j] = table.getCell(i, j).getText(); - } - } - - return rv; - } - - public static String loadJson(String path) throws IOException { - - StringBuilder stringBuilder = new StringBuilder(); - try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF-8"))) { - String line = null; - while ((line = reader.readLine()) != null) { - stringBuilder.append(line); - } - } - - return stringBuilder.toString(); - - } - - public static String loadCsv(String path) throws IOException { - - StringBuilder out = new StringBuilder(); - CSVParser parse = org.apache.commons.csv.CSVParser.parse(new File(path), Charset.forName("utf-8"), CSVFormat.EXCEL); - - CSVPrinter printer = new CSVPrinter(out, CSVFormat.EXCEL); - printer.printRecords(parse); - printer.close(); - - String csv = out.toString().replaceAll("(? - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-001-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-001-str.xml deleted file mode 100644 index 3739bf09..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-001-str.xml +++ /dev/null @@ -1,2929 +0,0 @@ - - - - - - - THRESHOLD FOR RELEASES - - - - - - - - - - - - - - - - to air -kg/year - - - - - - - to water -kg/year - - - - - - - - - - - to land -kg/year - - - - - - - - - - Carbon dioxide (CO2) - - - - - - - - - - - - - - - 100 million - - - - - - - - - - - - - - - - - - - - - - Hydro-fluorocarbons (HFCs) - - - - - - - - - - - - - - 100 - - - - - - - - - - - - - - - - - Methane (CH4) - - - - - - - - - - 100 000 - - - - - - - - - - - - - - - - - - Nitrous oxide (N2O) - - - - - - - - - - - - - - - - 10 000 - - - - - - - - - - - - - - - - - Perfluorocarbons (PFCs) - - - - - - - - - - - - - - 100 - - - - - - - - - - - - - - - - - Sulphur hexafluoride (SF6) - - - - - - - - - - - - - - - 50 - - - - - - - - - - - - - - - - -
- - - - - THRESHOLD FOR RELEASES - - - - - - - - - - - - - - - - to air -kg/year - - - - - - - to water -kg/year - - - - - - - - - - - to land -kg/year - - - - - - - - - Ammonia (NH3) - - - - - - - - - 10 000 - - - - - - - - - - - - - - - - - Carbon monoxide (CO) - - - - - - - - - - - - 500 000 - - - - - - - - - - - - - - - - - - Chlorine and inorganic compounds -(as HCl) - - - - - - - - - - - - 10 000 - - - - - - - - - - - - - - - - - Chlorofluorocarbons (CFCs) - - - - - - - - - - - - - - - - 1 - - - - - - - - - - - - - - - - - Flourine and inorganic compounds -(as HF) - - - - - - - - - - - - - 5 000 - - - - - - - - - - - - - - - - - Halons - - - - - - - 1 - - - - - - - - - - - - - - - - - Hydrochlorofluorocarbons (HCFCs) - - - - - - - - - - - - - - - - - - - - 1 - - - - - - - - - - - - - - - - - Hydrogen Cyanide (HCN) - - - - - - - - - - - - - 200 - - - - - - - - - - - - - - - - - Nitrogen oxides (NOx/NO2) - - - - - - - - - - - - - - - 100 000 - - - - - - - - - - - - - - - - - - Non-methane volatile organic -compounds (NMVOC) - - - - - - - - - - - - - - - 100 000 - - - - - - - - - - - - - - - - - - Sulphur oxides (SOx/SO2) - - - - - - - - - - - - - - - - - - 150 000 - - - - - - - - - - - - - - - - -
- - - - - THRESHOLD FOR RELEASES - - - - - - - - - - - - - - - - to air -kg/year - - - - - - - to water -kg/year - - - - - - - - - - - to land -kg/year - - - - - - - - - - Arsenic and compounds (as As) - - - - - - - - - - - - - 20 - - - - - 5 - - - - - 5 - - - - - Cadmium and compounds (as Cd) - - - - - - - - - - - - - 10 - - - - - 5 - - - - - 5 - - - - - Chromium and compounds (as Cr) - - - - - - - - - - - - 100 - - - - - 50 - - - - - 50 - - - - - Copper and compounds (as Cu) - - - - - - - - - - - - - 100 - - - - - 50 - - - - - 50 - - - - - Lead and compounds (as Pb) - - - - - - - - - - - 200 - - - - - 20 - - - - - 20 - - - - - Mercury and compounds (as Hg) - - - - - - - - 10 - - - - - 1 - - - - - 1 - - - - - Nickel and compounds (as Ni) - - - - - - - - - - - - - - - - - - - - - - - - 50 - - - - - 20 - - - - - 20 - - - - - Zinc and compounds (as Zn) - - - - - - - - - - - - - 200 - - - - - 100 - - - - - 100 - - - -
- - - - - THRESHOLD FOR RELEASES - - - - - - - - - - - - - - - - to air -kg/year - - - - - - - to water -kg/year - - - - - - - - - - - to land -kg/year - - - - - - - - - 1,2,3,4,5,6- hexachlorocyclohexane -(HCH) - - - - - - - - - - - - - - - - - - - - 10 - - - - - 1 - - - - - 1 - - - - - Alachlor - - - - - - - - - - - - - - 1 - - - - - 1 - - - - - Aldrin - - - - - - - 1 - - - - - 1 - - - - - 1 - - - - - Atrazine - - - - - - - - - - - - - 1 - - - - - 1 - - - - - Chlordane - - - - - - - - 1 - - - - - 1 - - - - - 1 - - - - - Chlordecone - - - - - - - - - 1 - - - - - 1 - - - - - 1 - - - - - Chlorfenvinphos - - - - - - - - - - - - - 1 - - - - - 1 - - - - - Chlorpyrifos - - - - - - - - - - - - - - - - 1 - - - - - 1 - - - - - DDT - - - - - 1 - - - - - 1 - - - - - 1 - - - - - Diuron - - - - - - - - - - - - - 1 - - - - - 1 - - - - - Endosulphan - - - - - - - - - - - 1 - - - - - 1 - - - - - Endrin - - - - - 1 - - - - - 1 - - - - - 1 - - - - - Heptachlor - - - - - - - 1 - - - - - 1 - - - - - 1 - - - - - Isodrin - - - - - - - - - - - - - - 1 - - - - - - - - - - - Isoproturon - - - - - - - - - - - - - - - 1 - - - - - 1 - - - - - Lindane - - - - - - 1 - - - - - 1 - - - - - 1 - - - - - Mirex - - - - - - - 1 - - - - - 1 - - - - - 1 - - - - - Simazine - - - - - - - - - - - - - - - - 1 - - - - - 1 - - - - - Toxaphene - - - - - - 1 - - - - - 1 - - - - - 1 - - - - - Tributylin and compounds - - - - - - - - - - - - - - - - - - - - - - - - - - 1 - - - - - 1 - - - - - Trifluralin - - - - - - - - - - - - - - - - - 1 - - - - - 1 - - - - - Triphenyltin and compounds - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1 - - - - - 1 - - - -
- - - - - THRESHOLD FOR RELEASES - - - - - - - - - - - - - - - - to air -kg/year - - - - - - - to water -kg/year - - - - - - - - - - - to land -kg/year - - - - - - - - - - 1,1,1-trichloroethane - - - - - - - - - - - 100 - - - - - - - - - - - - - - - - - 1,1,2,2-tetrachloroethane - - - - - - - - - - 50 - - - - - - - - - - - - - - - - - - 1,2-dichloroethane (EDC) - - - - - - - - - - 1 000 - - - - - 10 - - - - - 10 - - - - - Brominated diphenylethers (PBDE) - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1 - - - - - 1 - - - - - Chloro-alkanes, C10-C13 - - - - - - - - - - - - - - - - - - - - - - 1 - - - - - 1 - - - - - Dichloromethane (DCM) - - - - - - - - - - - - - - 1 000 - - - - - 10 - - - - - 10 - - - - - Dieldrin - - - - - 1 - - - - - 1 - - - - - 1 - - - - - Halogenated Organic Compounds (AOX) - - - - - - - - - - - - - - - - - - 1 000 - - - - - 1 000 - - - - - Hexabromobifenyl - - - - - - - - - 0,1 - - - - - - 0,1 - - - - - - 0,1 - - - - - - Hexachlorobenzene (HCB) - - - - - - - - - - - - - - - - - - 10 - - - - - 1 - - - - - 1 - - - - - Hexachlorobutadiene (HCBD) - - - - - - - - - - - - - - - - - - - - - - - - - 1 - - - - - 1 - - - - - PCDD+PCFD (Dioxins+furans) (as Teq) - - - - - - - - - - - - - - - - - 0,0001 - - - - - - 0,0001 - - - - - - 0,0001 - - - - - - Pentachlorobenzene - - - - - - - - - - 1 - - - - - 1 - - - - - 1 - - - - - Pentachlorophenol (PCP) - - - - - - - - - - - - 10 - - - - - 1 - - - - - 1 - - - - - Polychlorinated biphenyls (PCB) - - - - - - - - - - - - - - 0,1 - - - - - - 0,1 - - - - - - 0,1 - - - - - - Tetrachloroethylene (PER) - - - - - - - - - - - - - - - - - - 2 000 - - - - - 10 - - - - - - - - - - - Tetrachloromethane (TCM) - - - - - - 100 - - - - - 1 - - - - - - - - - - - Trichlorobenzenes (TCBs) (all isomers) - - - - - - - - - - - - - - - - - - - - - - - - 10 - - - - - 1 - - - - - - - - - - - Trichloroethylene - - - - - - - - - - - - - - 2 000 - - - - - 10 - - - - - - - - - - - Trichloromethane - - - - - - - - - - 500 - - - - - 10 - - - - - - - - - - - Vynil chloride - - - - - - - - - - - - - - 1 000 - - - - - 10 - - - - - 10 - - - -
- - - - - THRESHOLD FOR RELEASES - - - - - - - - - - - - - - - - to air -kg/year - - - - - - - to water -kg/year - - - - - - - - - - - to land -kg/year - - - - - - - - - Anthracene - - - - - - - 50 - - - - - 1 - - - - - 1 - - - - - Benzene - - - - - - - - 1 000 - - - - - 200 (as -BTEX) - - - - - - - - - 200 (as -BTEX) - - - - - - - - - Benzo(g,h,i)perylene - - - - - - - - - - - - - - - - - 1 - - - - - - - - - - - Di-(2-ethyl hexyl) phthalate (DEHP) - - - - - - - - - - - - - - - - - - - - - - - - - - - 10 - - - - - 1 - - - - - 1 - - - - - Ethyl benzene - - - - - - - - - - - - - - - - 200 (as -BTEX) - - - - - - - - - 200 (as -BTEX) - - - - - - - - - Ethylene oxide - - - - - - - - - 1 000 - - - - - 10 - - - - - 10 - - - - - Fluoranthene - - - - - - - - - - - - - 1 - - - - - - - - - - - Naphthalene - - - - - - - - 100 - - - - - 10 - - - - - 10 - - - - - Nonylphenol and Nonylphenol ethoxylates -(NP/NPEs) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1 - - - - - 1 - - - - - Octylphenols and octylphenol ethoxylates - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1 - - - - - - - - - - - Organotin compounds (as total Sn) - - - - - - - - - - - - - - - - - - 50 - - - - - 50 - - - - - Phenols (as total C) - - - - - - - - - - - - - - - - - - - - - 20 - - - - - 20 - - - - - Polycyclic Aromatic hydrocarbons (PAHs) - - - - - - - - - - - - - - - - - - - - - - - - - - - - 50 - - - - - 5 - - - - - 5 - - - - - Toluene - - - - - - - - - - - - 200 (as -BTEX) - - - - - - - - - 200 (as -BTEX) - - - - - - - - - Total Organic Carbon (TOC) (as total C or -COD/3) - - - - - - - - - - - - - - - - - - - - - - - - - - - 50 000 - - - - - - - - - - - - Xylenes - - - - - - - - - - - - - - 200 (as -BTEX) - - - - - - - - - 200 (as -BTEX) - - - - - - - -
- - - - - THRESHOLD FOR RELEASES - - - - - - - - - - - - - - - - to air -kg/year - - - - - - - to water -kg/year - - - - - - - - - - - to land -kg/year - - - - - - - - - - Asbestos - - - - - - 1 - - - - - 1 - - - - - 1 - - - - - Chlorides (as total Cl) - - - - - - - - - - - - - - - - - - - - - - - - - 2 million - - - - - - - - 2 million - - - - - - - - Cyanides (as total CN) - - - - - - - - - - - - - - - - - - - 50 - - - - - 50 - - - - - Fluorides (as total F) - - - - - - - - - - - - - - 2 000 - - - - - 2 000 - - - - - Particulate matter (PM10) - - - - - - - - - - 50 000 - - - - - - - - - - - - - - - - - Total Nitrogen - - - - - - - - - - - - - - - 50 000 - - - - - 50 000 - - - - - Total Phosphorus - - - - - - - - - - - - - - - 5 000 - - - - - 5 000 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-001.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-001.json deleted file mode 100644 index a801a86e..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-001.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":7,"numCorrectlyDetectedTables":7,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-001.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-001.pdf deleted file mode 100644 index 20680bda..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-001.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-002-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-002-reg.xml deleted file mode 100644 index 4846e94f..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-002-reg.xml +++ /dev/null @@ -1,101 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-002-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-002-str.xml deleted file mode 100644 index 0c1fb641..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-002-str.xml +++ /dev/null @@ -1,232 +0,0 @@ - - - - - - - Q1 - - - - - Q2 - - - - - Q3 - - - - - Q4 - - - - - Total - - - - - - - - - 2004 - - - - - 34.7 - - - - - - - 36.2 - - - - - - - 44.5 - - - - - - - 51.3 - - - - - - - 166.7 - - - - - - - - 2005 - - - - - 58.1 - - - - - - - 63.4 - - - - - - - 61.6 - - - - - - - 55.2 - - - - - - - 238.4 - - - - - - - - 2006 - - - - - 74.7 - - - - - - - 84.1 - - - - - - - 96.5 - - - - - - - 111.8 - - - - - - - - 367.1 - - - - - - - - 2007 - - - - - 148.8 - - - - - - - - 142.3 - - - - - - - - 156.7 - - - - - - - - 186.1 - - - - - - - - 633.9 - - - - - - - - 2008 - - - - - 120.9 - - - - - - - - 106 - - - - - 226.8 - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-002.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-002.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-002.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-002.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-002.pdf deleted file mode 100644 index 1606cd28..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-002.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-003-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-003-reg.xml deleted file mode 100644 index f68b8383..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-003-reg.xml +++ /dev/null @@ -1,253 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-003-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-003-str.xml deleted file mode 100644 index ea1a77be..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-003-str.xml +++ /dev/null @@ -1,565 +0,0 @@ - - - - - - - All companies analysed - - - - - FTSE Eurotop 100 companies -analysed - - - - - - - - - - - - - - - Number of member states in -the analysis - - - - - - - - - - - - - - - 21 - - - - - 8 - - - - - Number of member states -where one or more of the -financial companies applied the -amendment - - - - - - - - - - - - - - - - - - - - - - 11 - - - - - 3 - - - -
- - - - - Number of -financial -companies - - - - - - - - - - - Pct of all -companies -analysed - - - - - - - - - - - Number of -financial -companies on -FTSE Eurotop -100 - - - - - - - - - - - - - - - - Pct of FTSE -Eurotop 100 -companies - - - - - - - - - - - - - - 0 reclassifications - - - - - - - 52 - - - - - 52% - - - - - 14 - - - - - 64% - - - - - 1 reclassification - - - - - - - 28 - - - - - 28% - - - - - 4 - - - - - 18% - - - - - 2 reclassifications - - - - - - - 11 - - - - - 11% - - - - - 2 - - - - - 9% - - - - - 3 reclassifications - - - - - - - 8 - - - - - 8% - - - - - 2 - - - - - 9% - - - - - 4 reclassifications - - - - - - - 1 - - - - - 1% - - - - - 0 - - - - - 0% - - - - - Total - - - - - 100 - - - - - 22 - - - -
- - - - - Reclassification -from Fair value -through profit -and loss to -loans and -receivables - - - - - - - - - - - - - - - - - - - - Reclassification -from Available -for Sale to -loans and -receivables - - - - - - - - - - - - - - - - - Reclassification -from Fair value -through profit -and loss to -Available for -sale - - - - - - - - - - - - - - - - - - - - - - - Reclassification -from Fair value -through profit -and loss to -Held to -Maturity - - - - - - - - - - - - - - - - - - - Total - - - - - - - - Number of -financial -companies -who applied -the option for -this category - - - - - - - - - - - - - - - 27 - - - - - 16 - - - - - 23 - - - - - 15 - - - - - 81 - - - - - Percentage of -all financial -companies -analysed who -applied the -option for this -category - - - - - - - - - - - - - - - - - - - - - - - - 33% - - - - - 20% - - - - - 28% - - - - - 19% - - - - - 100% - - - - - Number of -financial -companies -where the -disclosure -requirements -were stricter - - - - - - - - - - - - - - - - - - 8 - - - - - 3 - - - - - 6 - - - - - 2 - - - - - 19 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-003.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-003.json deleted file mode 100644 index 71ae6a77..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-003.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":3,"numCorrectlyDetectedTables":3,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-003.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-003.pdf deleted file mode 100644 index dc3fe8b5..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-003.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-004-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-004-reg.xml deleted file mode 100644 index 94ca5f9d..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-004-reg.xml +++ /dev/null @@ -1,2336 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-004-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-004-str.xml deleted file mode 100644 index 192aa10b..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-004-str.xml +++ /dev/null @@ -1,5781 +0,0 @@ - - - - - - - per capita -GNP ($000) -1995 - - - - - - - - - population -1995 (mn) - - - - - - - - - - - - Number of -retail outlets -(000) - - - - - - - - - inhabitants -per outlet - - - - - - - - - - Retail -sales -(Ecu bn) - - - - - - - - - - Retail sales -per outlet -(Ecu 000) - - - - - - - - - - - Germany - - - - - - - - - 27.5 - - - - - 81.9 - - - - - 415.3 - - - - - 196 - - - - - 373 - - - - - 898 - - - - - France - - - - - - - - 25.0 - - - - - 58.1 - - - - - 343.4 - - - - - 169 - - - - - 292 - - - - - 850 - - - - - UK - - - - - 18.7 - - - - - 58.6 - - - - - 289.9 - - - - - 202 - - - - - 233 - - - - - 803 - - - - - Italy - - - - - - - - - 19.0 - - - - - 57.3 - - - - - 627.2 - - - - - 91 - - - - - 311 - - - - - 496 - - - - - Spain - - - - - - - 13.6 - - - - - 39.3 - - - - - 440.2 - - - - - 89 - - - - - 89 - - - - - 202 - - - - - Netherlands - - - - - - - - - - - 24.0 - - - - - 15.4 - - - - - 123.3 - - - - - 125 - - - - - 61 - - - - - 494 - - - - - Belgium/Lux - - - - - - - - - - - - 25.7 - - - - - 10.6 - - - - - 110 - - - - - 92 - - - - - 33 - - - - - 300 - - - - - Greece - - - - - 8.2 - - - - - 10.4 - - - - - 170.7 - - - - - 61 - - - - - - 25 - - - - - - 147 - - - - - Portugal - - - - - - - - - - - 9.7 - - - - - - 9.9 - - - - - 130.4 - - - - - 76 - - - - - - 26 - - - - - - 200 - - - - - Sweden - - - - - - - - - 23.8 - - - - - 8.8 - - - - - - 52.8 - - - - - 167 - - - - - 29 - - - - - 547 - - - - - Austria - - - - - - - 26.9 - - - - - - - - 8.1 - - - - - - - 37.7 - - - - - - - - 214 - - - - - - - 31 - - - - - - 822 - - - - - - - Denmark - - - - - - - - - 29.9 - - - - - 5.2 - - - - - - 40.9 - - - - - 128 - - - - - 28 - - - - - 684 - - - - - Finland - - - - - - - - 20.6 - - - - - 5.1 - - - - - 31.7 - - - - - 162 - - - - - 22 - - - - - 694 - - - - - Ireland - - - - - - - - - - 14.7 - - - - - 3.6 - - - - - 35.9 - - - - - 101 - - - - - 12 - - - - - 334 - - - - - EU15 Total - - - - - - - - 372.3 - - - - - 3236.5 - - - - - 115 - - - - - 1565 - - - - - 549 - - - -
- - - - - population -1995 (mn) - - - - - - - - - - - - number of -food outlets -1996/7 (000)* - - - - - - - - - - - - - - - - - - inhabitants -per outlet -1996/7 - - - - - - - - - - - - - - - - number of food -outlets 1992/3 -(000)** - - - - - - - - - - - - - - - - - - - - inhabitants -per outlet -1992/3 - - - - - - - - - - - - - - - - Germany - - - - - - - - - 81.9 - - - - - 73.6 - - - - - 1111 - - - - - 44 - - - - - 1883 - - - - - France - - - - - - - - 58.1 - - - - - 34.8 - - - - - 1667 - - - - - 87 - - - - - 670 - - - - - UK - - - - - 58.6 - - - - - 33.9 - - - - - 1667 - - - - - 60 - - - - - 975 - - - - - Italy - - - - - - - - - 57.3 - - - - - 114.6 - - - - - 500 - - - - - 296 - - - - - 193 - - - - - Spain - - - - - - - 39.3 - - - - - 79 - - - - - 476 - - - - - 177 - - - - - 223 - - - - - Netherlands - - - - - - - - - - - 15.4 - - - - - 6 - - - - - 2500 - - - - - 21 - - - - - 748 - - - - - Belgium/Lux - - - - - - - - - - - - 10.6 - - - - - 13 - - - - - 769 - - - - - 37 - - - - - 289 - - - - - Greece - - - - - - - 10.4 - - - - - - - - 17.2 - - - - - - - - 588 - - - - - - - 54 - - - - - - 194 - - - - - - - Portugal - - - - - - - - - - - 9.9 - - - - - - 27.3 - - - - - 344 - - - - - 53 - - - - - 188 - - - - - Sweden - - - - - - - - - 8.8 - - - - - 6.2 - - - - - - 1428 - - - - - 14 - - - - - 609 - - - - - Austria - - - - - 8.1 - - - - - 7.2 - - - - - - 1111 - - - - - 7 - - - - - 1157 - - - - - Denmark - - - - - - - - - 5.2 - - - - - - 3.2 - - - - - 1667 - - - - - 12 - - - - - 446 - - - - - Finland - - - - - - - - - 5.1 - - - - - - 4.1 - - - - - 1250 - - - - - 7 - - - - - 743 - - - - - Ireland - - - - - - - - - - - 3.6 - - - - - - 9.5 - - - - - 370 - - - - - 9 - - - - - 383 - - - - - EU15 Total - - - - - - - - 372.3 - - - - - 429.4 - - - - - 867 - - - - - 876 - - - - - 425 - - - -
- - - - - population -(mn) - - - - - - - - - - - - number of -enterprises -(000) - - - - - - - - - - - - - population -per -enterprise - - - - - - - - - - - - - - turnover -(mn ecu) - - - - - - - - - - - - - - turnover per -enterprise -(000 ecu) - - - - - - - - - - - - - - - - - - EU15 - - - - - 372.3 - - - - - 2553 - - - - - 146 - - - - - 1261 - - - - - 494 - - - - - USA - - - - - 263.1 - - - - - 1530 - - - - - 171 - - - - - 1350 - - - - - 883 - - - - - Japan - - - - - - - 125.2 - - - - - 1519 - - - - - 82 - - - - - 682 - - - - - 449 - - - -
- - - - - % growth in total -retail sales volume, -1990-94 - - - - - - - - - - - - - - - - - - - - - - - - value of food sales -1996 (1990=100) - - - - - - - - - - - - - - - Austria - - - - - n.a. - - - - - 113.4 - - - - - Belgium/Luxembourg - - - - - - - - - - - - - - - - - - - 6.9 - - - - - 130.7 - - - - - Denmark - - - - - - - - - 6.4 - - - - - - 123.8 - - - - - Finland - - - - - - - - - n.a. - - - - - - - 95.2 - - - - - France - - - - - - - - - 5.8 - - - - - 113.4 - - - - - Germany - - - - - - - - - - 6.5 - - - - - - 111.4** - - - - - - Greece - - - - - -10.8 - - - - - - 147.2*** - - - - - - - Ireland - - - - - - - - - - - 12.0 - - - - - 130.1 - - - - - Italy - - - - - - - - - 1.0* - - - - - - 139.9 - - - - - Netherlands - - - - - - - - - - - - 7.9 - - - - - 117.4 - - - - - Portugal - - - - - - - - - - - n.a. - - - - - - - 180.3 - - - - - Spain - - - - - - - n.a. - - - - - - - 132.5 - - - - - Sweden - - - - - - - - - n.a. - - - - - - 109.1 - - - - - UK - - - - - 8.6 - - - - - 140.0 - - - -
- - - - - Current -Level* - - - - - - - - - - - - Change in recent -years** - - - - - - - - - - - - - - - - Source - - - - - - - % points - - - - - - - - period - - - - - - - Austria - - - - - - - 79 - - - - - +14 - - - - - 1990-96 - - - - - - “Regal” 1997 - - - - - - - - - - Belgium/Luxembourg - - - - - - - - - - - - - - 57 - - - - - +1 - - - - - 1988-92 - - - - - - AIM - - - - - - Denmark - - - - - - - - - (78) - - - - - - “Food Business” - - - - - - - - - - - Finland - - - - - - - - 96 - - - - - +3 - - - - - 1990-96 - - - - - - Nielson, Finland, 1997 - - - - - - - - - - - - - France - - - - - - - - 67.2 - - - - - +7 - - - - - - 1988-92 - - - - - - AIM - - - - - - Germany - - - - - - - - - - 75.2 - - - - - +10 - - - - - - 1988-92 - - - - - - AIM - - - - - - Greece - - - - - - - (59) - - - - - - The Retail Pocket Book 1998 - - - - - Ireland - - - - - - - - 50 - - - - - Italy - - - - - - - - 30 - - - - - Netherlands - - - - - - - - - - 79 - - - - - 0 - - - - - 1988-92 - - - - - - Portugal - - - - - - - - 52 - - - - - Spain - - - - - - - 38 - - - - - +11 - - - - - 1988-92 - - - - - - AIM - - - - - - Sweden - - - - - - - - 87 - - - - - +24 - - - - - 1985-96 - - - - - - Supermarket Svenska -Detaljhandel, 1997, for Food -and Daily Goods - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - UK - - - - - 67 - - - - - +7 - - - - - 1988-92 - - - - - - AIM - - - - -
- - - - - hypermarkets - - - - - - - - - - supermarkets - - - - - - - - - others* - - - - - - - - 1996 - - - - - change -since 1980 - - - - - - - - - - - 1996 - - - - - change -since 1990 - - - - - - - - - - - 1996 - - - - - Austria - - - - - - - 12 - - - - - +3 - - - - - 52 - - - - - +11 - - - - - - 36 - - - - - Belgium/Luxembourg - - - - - - - - - - - - - - 16 - - - - - - - - - - - 70 - - - - - +5 - - - - - 14 - - - - - Denmark - - - - - - - - - 17 - - - - - n.a. - - - - - 59 - - - - - +8 - - - - - 24 - - - - - Finland - - - - - - - - 22 - - - - - n.a. - - - - - 51 - - - - - -1 - - - - - 27 - - - - - France - - - - - - - - 51 - - - - - +16 - - - - - - 44 - - - - - - - - - - - 5 - - - - - Germany - - - - - - - - - - 24 - - - - - +8 - - - - - 52 - - - - - +7 - - - - - 24 - - - - - Greece - - - - - - - 5 - - - - - +5 - - - - - 51 - - - - - n.a. - - - - - 44 - - - - - Ireland - - - - - - - - 12 - - - - - n.a. - - - - - 41 - - - - - n.a. - - - - - 47 - - - - - Italy - - - - - - - - 13 - - - - - +13 - - - - - - 39 - - - - - n.a. - - - - - 48 - - - - - Netherlands - - - - - - - - - - 5 - - - - - +3 - - - - - 82 - - - - - +7 - - - - - 13 - - - - - Portugal - - - - - - - - 42 - - - - - +42 - - - - - - 28 - - - - - +10 - - - - - - 30 - - - - - Spain - - - - - - - 34 - - - - - +22 - - - - - - 25 - - - - - +5 - - - - - 31 - - - - - Sweden - - - - - - - - 13 - - - - - n.a. - - - - - 64 - - - - - +4 - - - - - 23 - - - - - UK - - - - - 45 - - - - - +29 - - - - - - 42 - - - - - +2 - - - - - 13 - - - -
- - - - - 1996 - - - - - 1991 - - - - - growth in share - - - - - - - - - - - - - - % of -national -turnover - - - - - - - - - - - - - - - no. of -stores - - - - - - - % of -national -turnover - - - - - - - - - - - - - - - - - - no. of -stores - - - - - - - (% points) - - - - - - - - - Austria - - - - - - - 17 - - - - - - 568 - - - - - - - 14 - - - - - - 530 - - - - - - - 3 - - - - - Belgium/Luxembourg - - - - - - - - - - - - - - - - 25 - - - - - 762 - - - - - 18 - - - - - 587 - - - - - 7 - - - - - Denmark - - - - - - - - - 20 - - - - - 739 - - - - - 15 - - - - - 544 - - - - - 5 - - - - - Finland - - - - - - - - 12 - - - - - 820 - - - - - 10 - - - - - 760 - - - - - 2 - - - - - France - - - - - - - - 7 - - - - - 1940 - - - - - 1 - - - - - 436 - - - - - 6 - - - - - Germany - - - - - - - - - 30 - - - - - 12130 - - - - - 24 - - - - - 8290 - - - - - 6 - - - - - Greece - - - - - - - n.a. - - - - - - - n.a. - - - - - - - n.a. - - - - - - - n.a. - - - - - - - n.a. - - - - - - - Ireland - - - - - - - - - - - n.a. - - - - - - - n.a. - - - - - - - n.a. - - - - - - - n.a. - - - - - - - n.a. - - - - - - - Italy - - - - - - - - - 10 - - - - - 2360 - - - - - .. - - - - - 60 - - - - - 10 - - - - - Netherlands - - - - - - - - - - - 13 - - - - - 607 - - - - - 10 - - - - - 482 - - - - - 3 - - - - - Portugal - - - - - - - - 9 - - - - - 314 - - - - - - - 2 - - - - - 30 - - - - - - 7 - - - - - Spain - - - - - - - 9 - - - - - 2315 - - - - - 5 - - - - - 1180 - - - - - 4 - - - - - Sweden - - - - - - - - 11 - - - - - 305 - - - - - 6 - - - - - 166 - - - - - 5 - - - - - UK - - - - - 11 - - - - - 1440 - - - - - 6 - - - - - 1129 - - - - - 5 - - - -
- - - - - number of franchisors - - - - - - - - - - - - - - - - number of franchisees - - - - - - - - - - - - - - - 1993 - - - - - 1994 - - - - - 1993 - - - - - 1994 - - - - - Austria - - - - - 80 - - - - - 170 - - - - - 2500 - - - - - 2700 - - - - - Belgium/Luxembourg - - - - - - - - - - - - - - - - 90 - - - - - 135 - - - - - 3200 - - - - - 2495 - - - - - Denmark - - - - - - - - - 42 - - - - - 42 - - - - - 500 - - - - - 500 - - - - - Finland - - - - - - - - - - - .. - - - - - - .. - - - - - - .. - - - - - - .. - - - - - - France - - - - - - - - 500 - - - - - 500 - - - - - 30000 - - - - - 30000 - - - - - Germany - - - - - - - - - 370 - - - - - 420 - - - - - 15500 - - - - - 18000 - - - - - Greece - - - - - - - .. - - - - - - .. - - - - - - .. - - - - - - .. - - - - - - Ireland - - - - - - - - 20 - - - - - - .. - - - - - - .. - - - - - - .. - - - - - - Italy - - - - - - - - - 318 - - - - - 361 - - - - - 16100 - - - - - 17500 - - - - - Netherlands - - - - - - - - - - - 331 - - - - - 340 - - - - - 12640 - - - - - 12120 - - - - - Portugal - - - - - - - - 55 - - - - - - 70 - - - - - - .. - - - - - - .. - - - - - - Spain - - - - - - - 117 - - - - - 250 - - - - - 14500 - - - - - 20000 - - - - - Sweden - - - - - - - - 200 - - - - - 200 - - - - - 900 - - - - - 900 - - - - - UK - - - - - 373 - - - - - 396 - - - - - 18100 - - - - - 24900 - - - - - EU Total - - - - - - - - 2496 - - - - - 2884 - - - - - 113940 - - - - - 129115 - - - -
- - - - - 1994 - - - - - 1991 - - - - - 1987 - - - - - 1981 - - - - - Austria - - - - - - - 47 - - - - - - Belgium/Luxembourg - - - - - - - - - - - - - - - - - - - 35 - - - - - 0.1 - - - - - Denmark - - - - - - - - - 27 - - - - - - 0.1 - - - - - - Finland - - - - - - - - 32 - - - - - France - - - - - - - - 200 - - - - - 66 - - - - - 16 - - - - - Germany - - - - - - - - - - 149 - - - - - 73 - - - - - 10 - - - - - 0.2 - - - - - Greece - - - - - - - Ireland - - - - - - - - 3 - - - - - 1 - - - - - 0.1 - - - - - - - Italy - - - - - - - - 68 - - - - - - 37 - - - - - - 5 - - - - - 0.1 - - - - - - - Netherlands - - - - - - - - - - - 30 - - - - - 11 - - - - - 4 - - - - - Portugal - - - - - - - - 44 - - - - - - 3 - - - - - Spain - - - - - - - 115 - - - - - 50 - - - - - 2 - - - - - Sweden - - - - - - - - 60 - - - - - - UK - - - - - 180 - - - - - 60 - - - - - 8 - - - - - 0.1 - - - - - - EU Total - - - - - - - - 990 - - - -
- - - - - 1997 - - - - - 1995* - - - - - 1992* - - - - - 1990** - - - - - 1980** - - - - - By leading retailers (1993/4) - - - - - UK - - - - - - 42.3 - - - - - - - - 29 - - - - - - 25 - - - - - - 31 - - - - - - 22 - - - - - - Sainsbury 55; Tesco 46; Safeway -38; Asda 32. - - - - - - - - - - - - - - - - - - - - - - - Belgium/Lux - - - - - - - - - - - 24.9 - - - - - - - - 22 - - - - - - 16 - - - - - - Netherlands - - - - - - - - - - - - 19.1 - - - - - 16 - - - - - - 16 - - - - - - France - - - - - - - - - 18.2 - - - - - 16 - - - - - - 16 - - - - - - 20 - - - - - - 11 - - - - - - Monoprix 28; Casino 25; -Intermarche 23; Carrefour 22; -Auchan 19; Leclerc 10 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Denmark - - - - - - - - - 13 - - - - - - Germany - - - - - - - - - - 12.6 - - - - - 11 - - - - - - 6 - - - - - 24 - - - - - 15 - - - - - Aldi 90; Metro 33; Tengelmann 18. - - - - - - - - - - - - - - - - - - - Spain - - - - - - - 10 - - - - - 8 - - - - - 9 - - - - - 2 - - - - - Eroski 24; Pryca 20; Alcampo 15 - - - - - - - - - - - - - - - - - - - - - Portugal - - - - - - - - 9 - - - - - Austria - - - - - - - 9 - - - - - Finland - - - - - - - - 8 - - - - - 8 - - - - - Sweden - - - - - - - - 8 - - - - - 8 - - - - - Italy - - - - - - - - 6 - - - - - 4 - - - - - Greece - - - - - - - 3 - - - -
- - - - - Country of origin - - - - - - - - - - - - - - - Sales (bn ecus) - - - - - - - - - - Nestle - - - - - - Switzerland - - - - - - - - 38.2 - - - - - - - - Food Ingredients Specialities - - - - - - - - - - - - - - Switzerland - - - - - - - 38.1 - - - - - - - - Rewe - - - - - - - Germany - - - - - - - - - - 14.6 - - - - - Sandoz Nutrition - - - - - - - - - - - Switzerland - - - - - - - 10.7 - - - - - - - - Casino Guichard Perrachon - - - - - - - - - - - - - - France - - - - - - - - 9.5 - - - - - - - Coop Valais - - - - - - - - Switzerland - - - - - - - - 7.3 - - - - - - - Spar Handels - - - - - - - - - - Germany - - - - - - - - - - 6.9 - - - - - - Edeka - - - - - - - Germany - - - - - - - 6.3 - - - - - - - Faellesforenigen for Danmarks - - - - - - - - - - - Denmark - - - - - - - - 3.0 - - - - - - - Booker Belmont - - - - - - - - - UK - - - - - - 2.9 - - - - - - - SEITA - - - - - - France - - - - - - - - - 2.4 - - - - - Nurdin & Peacock - - - - - - - - - - UK - - - - - - 2.0 - - - - - - - Merkur - - - - - - Switzerland - - - - - - - - 2.0 - - - - - - - Ramsvita - - - - - - - - - Switzerland - - - - - - - - 2.0 - - - - - - - Tengelmann - - - - - - - - - - - Germany - - - - - - - - - - 1.8 - - - - - - Hofer & Curti - - - - - - - - - - Switzerland - - - - - - - - 1.7 - - - - - - - Systeme U Centre Regional Ouest - - - - - - - - - - - - - - France - - - - - - - - 1.6 - - - - - - - Schuitema - - - - - - - - - - Netherlands - - - - - - - - - - 1.4 - - - - - - - Skandinavisk Holding - - - - - - - - - - - - - - - - - - Denmark - - - - - - - - - - - 1.3 - - - - - Fyffes - - - - - - - Ireland - - - - - - - 1.1 - - - - - -
- - - - - World food -sales -($US bn) - - - - - - - - - - - - - Total sales in EU food -manufacturing -(bn ecus) - - - - - - - - - - - - - - - - - - - - - - - - - - - Markets in which firm is one -of the 5 leaders -(NACE 3 digit) - - - - - - - - - - - - - - - - - - - - - - - - - - - - Nestle - - - - - - - - - 38.8 - - - - - 13.1 - - - - - 412,413,414,417,421,423,428 - - - - - Philip Morris - - - - - - 33.4 - - - - - 11.3 - - - - - 413,417,421,423,429 - - - - - Unilever - - - - - - - - - - 26.7 - - - - - 14.6 - - - - - 411,412,414,415,421 - - - - - ConAgra - - - - - - - - 24.8 - - - - - Pepsico - - - - - - - - - 19.1 - - - - - Cargill - - - - - - - - - - 18.7 - - - - - 2.4 - - - - - - 411 - - - - - Coca-Cola - - - - - - - - - - - 18.0 - - - - - 1.8 - - - - - - 428 - - - - - Danone - - - - - - - 14.2 - - - - - 8.9 - - - - - - 427,428,423,413,417 419 - - - - - Archer Daniels - - - - - - - - - - - 13.3 - - - - - - - - Mars - - - - - - - 13 - - - - - 3.1 - - - - - 421,422 - - - - - Grand Metropolitan - - - - - - - 12.7 - - - - - 2.4 - - - - - - 413,424 - - - - - IBP - - - - - - - 12.7 - - - - - Kinn - - - - - - 11.6 - - - - - CPC International - - - - - - - - - - - - - - - - 9.8 - - - - - - 1.6 - - - - - 418,423 - - - - - Anheuser-Busch - - - - - - - - - - - - - 9.6 - - - - - Sara Lee - - - - - - - - - - 9.4 - - - - - - 1.3 - - - - - 423 - - - - - ABF - - - - - 9.2 - - - - - 416,418,419,420 - - - - - Heinz - - - - - - - - 9.1 - - - - - - 1.5 - - - - - 423 - - - - - Asahi Breweries - - - - - - - - - 9.1 - - - - - - - Eridania Beghin-Say - - - - - - - - - - - - - 9.1 - - - - - - - Nabisco - - - - - - - - - 8.3 - - - - - Novartis - - - - - - 8.1 - - - - - - - Cadbury-Schweppes - - - - - - - - - - - - - 7.7 - - - - - - 3.0 - - - - - 428 - - - - - Campbell Soup - - - - - - - - - - - 7.7 - - - - - Guinness - - - - - - - - 7.6 - - - - - 2.2 - - - - - - 424,427 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-004.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-004.json deleted file mode 100644 index 755a6e46..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-004.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":12,"numCorrectlyDetectedTables":12,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-004.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-004.pdf deleted file mode 100644 index 2acd5297..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-004.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-005-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-005-reg.xml deleted file mode 100644 index 7c1a638a..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-005-reg.xml +++ /dev/null @@ -1,350 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-005-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-005-str.xml deleted file mode 100644 index dd08990f..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-005-str.xml +++ /dev/null @@ -1,926 +0,0 @@ - - - - - - - 1996 - - - - - 1993 - - - - - Austria - - - - - - - 59 - - - - - - 54 - - - - - - Belgium/Lux - - - - - - - - - - - 62 - - - - - - 60 - - - - - - Denmark - - - - - - - - - 59 - - - - - - 54 - - - - - - Finland - - - - - - - - 89 - - - - - 94 - - - - - France - - - - - - - - 51 - - - - - - 48 - - - - - - Germany - - - - - - - - - - 45 - - - - - - 45 - - - - - - Greece - - - - - - - 28 - - - - - - 11 - - - - - - Ireland - - - - - - - - 64 - - - - - - 62 - - - - - - Italy - - - - - - - - 12 - - - - - - 11 - - - - - - Netherlands - - - - - - - - - - - 50 - - - - - 52 - - - - - Portugal - - - - - - - - 56 - - - - - - 36 - - - - - - Spain - - - - - - - 32 - - - - - 22 - - - - - Sweden - - - - - - - - 78 - - - - - - 79 - - - - - - UK - - - - - 56 - - - - - 50 - - - -
- - - - - Our estimates -1996 - - - - - - - - - - - LDA -1997 - - - - - - PBUK -1996 - - - - - - EH -1996 - - - - - - AIM -1992 - - - - - - - HBS - - - - - OXIRM - - - - - - - - Average of -other estimates - - - - - - - - - - - - - - - Austria - - - - - - - 58.6 - - - - - - - - 79 - - - - - - 67.9 - - - - - - - - 72.9 - - - - - - - - 73.3 - - - - - - - - Bel/Lux - - - - - - - - - 61.6 - - - - - 57 - - - - - - 56.9 - - - - - 77.4 - - - - - 53 - - - - - - 60 - - - - - - 60.9 - - - - - Denmark - - - - - - - - - 59.5 - - - - - 48 - - - - - - 78 - - - - - - 63.0 - - - - - Finland - - - - - - - - - 89.1 - - - - - 96 - - - - - - 95.4 - - - - - 97.5 - - - - - 96.3 - - - - - France - - - - - - - - - 50.6 - - - - - 67.2 - - - - - 60.1 - - - - - 49 - - - - - - 65 - - - - - - 60.3 - - - - - Germany - - - - - - - - - - 45.4 - - - - - 75.2 - - - - - 41.5 - - - - - 73.5 - - - - - 37 - - - - - - 56.8 - - - - - Greece - - - - - - - 28.0 - - - - - - - - 58.7 - - - - - - - - 58.7 - - - - - - - - Ireland - - - - - - - - - - - 64.2 - - - - - 50 - - - - - - 50.4 - - - - - 50.2 - - - - - Italy - - - - - - - - - 11.8 - - - - - 30 - - - - - - 35 - - - - - - 21 - - - - - - 58.5 - - - - - 36.1 - - - - - Netherlands - - - - - - - - - - - - 50.4 - - - - - 79 - - - - - - 76.7 - - - - - 71.7 - - - - - 59 - - - - - - 71.6 - - - - - Portugal - - - - - - - - - - - 55.7 - - - - - 52 - - - - - - 52.9 - - - - - 55 - - - - - - 53.3 - - - - - Spain - - - - - - - 32.1 - - - - - 38 - - - - - - 34.6 - - - - - 23 - - - - - - 47.7 - - - - - 35.8 - - - - - Sweden - - - - - - - - - 77.9 - - - - - 87 - - - - - - 70.5 - - - - - 93.5 - - - - - 83.7 - - - - - UK - - - - - 56.2 - - - - - 67 - - - - - - 65.2 - - - - - 60 - - - - - - 63 - - - - - - 63.8 - - - - - Average - - - - - - - - - - 52.9 - - - - - 61.7 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-005.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-005.json deleted file mode 100644 index 1bf5fd29..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-005.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-005.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-005.pdf deleted file mode 100644 index 5ea9bd53..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-005.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-006-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-006-reg.xml deleted file mode 100644 index 46b5f5a3..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-006-reg.xml +++ /dev/null @@ -1,321 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-006-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-006-str.xml deleted file mode 100644 index 0a7207b9..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-006-str.xml +++ /dev/null @@ -1,725 +0,0 @@ - - - - - - - Names - - - - - - - - Own brands market share - - - - - - - - - - - - - - - - Number of items - - - - - - - - - - - - Franprix - - - - - - - - - 28.0 - - - - - n.a. - - - - - - Casino - - - - - - - - 24.8 - - - - - 1800 - - - - - Intermarché - - - - - - - - - - - - - 24.7 - - - - - 2500 - - - - - Géant - - - - - - - 20.0 - - - - - 1800 - - - - - Carrefour - - - - - - - - - - - 18.9 - - - - - 1642 - - - - - Monoprix - - - - - - - - - 18.7 - - - - - 1800 - - - - - Système U - - - - - - - 18.5 - - - - - - - - 985 - - - - - - - Continent - - - - - - - - - - - 17.8 - - - - - 1440 - - - - - Stoc - - - - - - - 16.2 - - - - - 650 - - - - - Auchan - - - - - - - 15.7 - - - - - 1500 - - - - - Match - - - - - - - - 15.4 - - - - - 1100 - - - - - Champion - - - - - - - - - 15.1 - - - - - 1240 - - - - - Leclerc - - - - - - - - - - - 14.8 - - - - - 500 - - - - - Cora - - - - - - - 12.2 - - - - - 1224 - - - - - Prisunic - - - - - - - - - 11.7 - - - - - 550 - - - -
- - - - - 1991 - - - - - 1994 - - - - - 1995 - - - - - 1996 - - - - - National Brands - - - - - - - - - - - - - 80.6 - - - - - 75.0 - - - - - 75.3 - - - - - 76.0 - - - - - Own Brands - - - - - - - - 14.7 - - - - - 17.1 - - - - - 17.4 - - - - - 17.1 - - - - - Low price items - - - - - - - 4.7 - - - - - - - 7.9 - - - - - - - 7.3 - - - - - - - 6.9 - - - - - -
- - - - - Retailer - - - - - - - - - - - Own Brands Market Shares - - - - - - - - - - - - - - - Monoprix - - - - - - - - - 28% - - - - - Casino - - - - - - - - 25% - - - - - Intermarché - - - - - - - - - 23% - - - - - - - Carrefour - - - - - - - 22% - - - - - - - Auchan - - - - - - - - 19% - - - - - - - Leclerc - - - - - 10% - - - - - -
- - - - - Groups - - - - - - Foreign turnover (FFr bn.) - - - - - - - - - - - - - - % of Total Turnover - - - - - - - - - - - - Carrefour - - - - - - - - - - - 62.7 - - - - - 40.5% - - - - - Promodès - - - - - - - - - - - 37.0 - - - - - 35.7% - - - - - Auchan - - - - - - - - 23.5 - - - - - 19.5% - - - - - Cora - - - - - - - 11.0 - - - - - 24.0% - - - - - Casino - - - - - - - - 8.5 - - - - - 11.5% - - - - - Comptoirs Modernes - - - - - - - - - - - - 2.0 - - - - - - - 7.0% - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-006.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-006.json deleted file mode 100644 index 11be9878..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-006.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":4,"numCorrectlyDetectedTables":4,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-006.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-006.pdf deleted file mode 100644 index 431ca0a9..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-006.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-007-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-007-reg.xml deleted file mode 100644 index 39fb826d..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-007-reg.xml +++ /dev/null @@ -1,606 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-007-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-007-str.xml deleted file mode 100644 index c9d5b40c..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-007-str.xml +++ /dev/null @@ -1,1059 +0,0 @@ - - - - - - - Up-market - - - - - - - - - Medium - - - - - - - - Down-market - - - - - - - - - - - - Procter & Gamble - - - - - - - - - - - Ariel - - - - - - Vizir - - - - - - Bonux - - - - - - - - - Lever - - - - - Skip - - - - - - Omo - - - - - - Persil - - - - - - - Henkel - - - - - - - - Le Chat - - - - - - - - Super Croix - - - - - - - - - - Colgate-Palmolive - - - - - - - - - - Dash - - - - - - Axion - - - - - - - Gama - - - - - -
- - - - - Years - - - - - - - 1990 - - - - - 1992 - - - - - 1993 - - - - - 1994 - - - - - 1995 - - - - - 1996 - - - - - Budget Coefficient for Coffee, -Tea, Herb Tea - - - - - - - - - - - - - - - - - - - - - - 0.323 - - - - - 0.272 - - - - - 0.265 - - - - - 0.290 - - - - - 0.340 - - - - - 0.346 - - - -
- - - - - Product - - - - - - - - - Roast coffee - - - - - - Instant Coffee - - - - - - - - - - - Turnover in FFr. millions - - - - - - - - - - - - - - - 7,581 - - - - - - - - - 2,517 - - - - - - - -
- - - - - Brands - - - - - - - - - - Market shares in -volume (1996) - - - - - - - - - - - - - Market shares in -volume (1997) - - - - - - - - - - - - - Maison du Café -(Douwe Egberts) - - - - - - - - - - - - - - - - 14.9% - - - - - 16.5% - - - - - Carte Noire -(K-J-S) - - - - - - - - - - - - - - 17.0% - - - - - 16.5% - - - - - Jacques Vabre -(K-J-S) - - - - - - - - - - - - - - - - 14.2% - - - - - 13.8% - - - - - Grand Mère -(K-J-S) - - - - - - - - - - - - - - 13.2% - - - - - 13.2% - - - - - Lavazza - - - - - 7.0% - - - - - - - - 6.2% - - - - - - - - Segafredo - - - - - - - - - - - 5.8% - - - - - 5.4% - - - - - Legal - - - - - - - - - 5.6% - - - - - - 4.5% - - - - - - Malongo - - - - - - - - - 3.0% - - - - - - 3.1% - - - - - - Own Brands and First Price Products - - - - - - - - - - - - - - - - - - - - - - 15.9% - - - - - 17.3% - - - - - Other Brands - - - - - - - - - - 3.3% - - - - - 3.4% - - - -
- - - - - Products - - - - - - - - - Butter - - - - - - - Margarine - - - - - - - Low fat products - - - - - - - - - - Turnover Ffr. m. - - - - - - - - - - - - - - 6028 - - - - - 1385 - - - - - 836 - - - -
- - - - - Groups - - - - - - Butter - - - - - - - - Margarine - - - - - - - - - Low fat products - - - - - - - - - - - - - Astra Calvé - - - - - - - - - - - - Total: 0% - - - - - - - - - Total: 47% -(Fruit d’or, Plantafin, -Equilibre, Effi, Astra, ...) - - - - - - - - - - - - - - - - - - - - Total: 39% -Fruit d’or (8.4%) -Effi (10.5%) -Plantafin (20.1%) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Besnier - - - - - - - Total: 18.1% -Bfpridel (4.4%) -Président (13.5%) - - - - - - - - - - - - - - - - - - - - - - - - - - - - Total: 0% - - - - - - - - - Total: 9.5% -Bridélight (5.6%) -Bridélice (3.2%) -Président (0.7%) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Cema - - - - - - - Primevère (1.3%) - - - - - - - - - - CLE - - - - - - - Elles § Vire (6.7%) - - - - - - - - - - - Laïta - - - - - - - - - Paysan Breton (6.3%) - - - - - - - - - - - - - - - - - Vedial - - - - - - - - Total: 0% - - - - - - - - - Total: 19% -(Prima, St Hubert 41, Le -Fleurier,Mr Tournesol, -Tournolive...) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Total: 34.8% -Prima (3.3%) -St Hubert 41 (18.7%) -Le Fleurier (12.8%) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Own brands - - - - - - - - 26.9% - - - - - 19.4% - - - - - 7% - - - - - Others - - - - - - - - 42.2% - - - - - 13.3% - - - - - 9.7% - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-007.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-007.json deleted file mode 100644 index 4bed3d03..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-007.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":6,"numCorrectlyDetectedTables":6,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-007.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-007.pdf deleted file mode 100644 index 46cf9b6e..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-007.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-008-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-008-reg.xml deleted file mode 100644 index 6a32444c..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-008-reg.xml +++ /dev/null @@ -1,94 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-008-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-008-str.xml deleted file mode 100644 index 0b50733f..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-008-str.xml +++ /dev/null @@ -1,325 +0,0 @@ - - - - - - - Country/Heading - - - - - - - - - - - - - Cohesion Fund EURbn - - - - - - - - ERDF Convergence EURbn - - - - - - - - - - - - Total EURbn - - - - - - - Bulgaria - - - - - 2.3 - - - - - 3.2 - - - - - 5.5 - - - - - Cyprus - - - - - - - - 0.21 - - - - - 0 - - - - - 0.21 - - - - - Czech Republic - - - - - 8.8 - - - - - 13.4 - - - - - 22.2 - - - - - Estonia - - - - - 1.1 - - - - - 1.9 - - - - - 3.0 - - - - - Hungary - - - - - - 8.6 - - - - - 11.2 - - - - - 19.8 - - - - - Latvia - - - - - 1.5 - - - - - 2.4 - - - - - 3.9 - - - - - Lithuania - - - - - 2.3 - - - - - 3.4 - - - - - 5.7 - - - - - Malta - - - - - 0.28 - - - - - 0.44 - - - - - 0.72 - - - - - Poland - - - - - 22 - - - - - 33 - - - - - 55 - - - - - Romania - - - - - 6.5 - - - - - 9 - - - - - 15.5 - - - - - Slovakia - - - - - 4 - - - - - 6 - - - - - 10 - - - - - Slovenia - - - - - 1.4 - - - - - 1.9 - - - - - 3.3 - - - - - Technical Assistance - - - - - - 0.87 - - - - - TOTAL - - - - - - - 58.99 - - - - - 86.70 - - - - - 145.69 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-008.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-008.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-008.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-008.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-008.pdf deleted file mode 100644 index 533a16c5..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-008.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-009a-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-009a-reg.xml deleted file mode 100644 index fc408904..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-009a-reg.xml +++ /dev/null @@ -1,110 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-009a-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-009a-str.xml deleted file mode 100644 index 6bb1f8c0..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-009a-str.xml +++ /dev/null @@ -1,224 +0,0 @@ - - - - - - - Assignment Categories - - - - - - - JASPERS Categories - - - - - EV Categories - - - - - Category - - - - - - Description - - - - - - Category - - - - - - Description - - - - - - 1 - - - - - Involvement “at the -beginning of project -preparation” - - - - - - - - - - - - - 1a - - - - - Influence on project -concept - - - - - - - - - - - - - - 1b - - - - - No influence on project -concept (presentation -only) - - - - - - - - - - - - - 2 - - - - - Involvement “during -the feasibility study -preparation” - - - - - - - - - - 2a - - - - - Influence on project -concept - - - - - - - - - - - - - - 2b - - - - - No influence on project -concept (presentation -only) - - - - - - - - - - - - - 3 - - - - - Involvement “after -draft application is -prepared” - - - - - - - - - - - - 3a - - - - - - Influence on project -concept - - - - - - - - - - - - - - 3b - - - - - Other presentation -issues - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-009a.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-009a.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-009a.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-009a.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-009a.pdf deleted file mode 100644 index 049449c3..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-009a.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-009b-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-009b-reg.xml deleted file mode 100644 index ff791244..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-009b-reg.xml +++ /dev/null @@ -1,107 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-009b-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-009b-str.xml deleted file mode 100644 index 83dc7a29..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-009b-str.xml +++ /dev/null @@ -1,217 +0,0 @@ - - - - - - - JASPERS Categories - - - - - EV Categories - - - - - Category - - - - - - Description - - - - - - Category - - - - - - Description - - - - - - 1 - - - - - Involvement “at the -beginning of project -preparation” - - - - - - - - - - - - - 1a - - - - - Influence on project -concept - - - - - - - - - - - - - - 1b - - - - - No influence on project -concept (presentation -only) - - - - - - - - - - - - - 2 - - - - - Involvement “during -the feasibility study -preparation” - - - - - - - - - - 2a - - - - - Influence on project -concept - - - - - - - - - - - - - - 2b - - - - - No influence on project -concept (presentation -only) - - - - - - - - - - - - - 3 - - - - - Involvement “after -draft application is -prepared” - - - - - - - - - - - - 3a - - - - - - Influence on project -concept - - - - - - - - - - - - - - 3b - - - - - Other presentation -issues - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-010-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-010-reg.xml deleted file mode 100644 index 852d99d7..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-010-reg.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-010-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-010-str.xml deleted file mode 100644 index a5b5f317..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-010-str.xml +++ /dev/null @@ -1,130 +0,0 @@ - - - - - - - FEMIP Country - - - - - - - Signed TA -(EURm) - - - - - - - - - Algeria - - - - - 6.19 - - - - - Egypt - - - - - 6.60 - - - - - Gaza & West Bank - - - - - - - 2.60 - - - - - Jordan - - - - - 4.20 - - - - - Lebanon - - - - - - 2.57 - - - - - Morocco - - - - - 21.09 - - - - - Regional - - - - - - 7.29 - - - - - Syria - - - - - 33.42 - - - - - Tunisia - - - - - - 14.50 - - - - - Total - - - - - 98.46 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-010.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-010.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-010.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-010.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-010.pdf deleted file mode 100644 index 76f113e1..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-010.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-011-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-011-reg.xml deleted file mode 100644 index c878bc25..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-011-reg.xml +++ /dev/null @@ -1,60 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-011-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-011-str.xml deleted file mode 100644 index 744f2f95..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-011-str.xml +++ /dev/null @@ -1,179 +0,0 @@ - - - - - - - Differences with respect to Germany - - - - - - - - - - - - - - - - Portugal - - - - - - Greece - - - - - Spain - - - - - Italy - - - - - France - - - - - Q2 2006 – Q1 2010** - - - - - - - - - 0.33 - - - - - 0.51 - - - - - 0.31 - - - - - 0.18 - - - - - 0.05 - - - - - Q1 2009 – Q4 2009 - - - - - - - - -0.01 - - - - - 0.45 - - - - - 0.21 - - - - - 0.18 - - - - - 0.05 - - - - - Q4 2009 - - - - - - 0.17 - - - - - 0.70 - - - - - 0.26 - - - - - 0.09 - - - - - -0.01 - - - - - Q1 2010 - - - - - - 0.64 - - - - - 0.72 - - - - - 0.56 - - - - - 0.43 - - - - - 0.25 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-011.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-011.json deleted file mode 100644 index 492239e1..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-011.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":3,"expectedFailure":true} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-011.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-011.pdf deleted file mode 100644 index c18929d2..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-011.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-012-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-012-reg.xml deleted file mode 100644 index 6204f9e1..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-012-reg.xml +++ /dev/null @@ -1,265 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-012-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-012-str.xml deleted file mode 100644 index 1eca2428..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-012-str.xml +++ /dev/null @@ -1,1113 +0,0 @@ - - - - - - - Finland - - - - - - - EU-15 - - - - - EU-25 - - - - - 1996 - - - - - 3.7 - - - - - 1.6 - - - - - 1.7 - - - - - 2000 - - - - - 5.0 - - - - - 3.9 - - - - - 3.9 - - - - - 2005 - - - - - 1.5 - - - - - 1.5 - - - - - 1.6 - - - - - 2006 (*) - - - - - 3.5 - - - - - 2.0 - - - - - 2.1 - - - -
- - - - - Agriculture - - - - - - - Industry - - - - - - - Services - - - - - 1991 - - - - - Men - - - - - 11.0 - - - - - 40.7 - - - - - 48.4 - - - - - Women - - - - - - - 6.6 - - - - - 16.0 - - - - - 77.4 - - - - - Total - - - - - 8.9 - - - - - 28.7 - - - - - 62.4 - - - - - Men - - - - - 9.5 - - - - - 39.4 - - - - - 51.0 - - - - - 1996 - - - - - Women - - - - - - - 5.3 - - - - - 13.9 - - - - - 80.9 - - - - - Total - - - - - 7.5 - - - - - 27.3 - - - - - 65.2 - - - - - Men - - - - - 7.5 - - - - - 39.4 - - - - - 53.1 - - - - - 2001 - - - - - Women - - - - - - - 3.7 - - - - - 13.8 - - - - - 82.5 - - - - - Total - - - - - 5.7 - - - - - 27.2 - - - - - 67.1 - - - - - Men - - - - - 6.7 - - - - - 38.1 - - - - - 55.2 - - - - - 2003 - - - - - Women - - - - - - - 3.0 - - - - - 12.4 - - - - - 84.6 - - - - - Total - - - - - 4.9 - - - - - 25.7 - - - - - 69.4 - - - -
- - - - - Finland - - - - - - - EU-15 - - - - - EU-25 - - - - - Female - - - - - - - Male - - - - - - - Total - - - - - - - 15-24 - - - - - - Female - - - - - - - Male - - - - - - - Total - - - - - - - 15-24 - - - - - - Female - - - - - - - - - Male - - - - - - - Total - - - - - - - 15-24 - - - - - - 1995 - - - - - 15.1 - - - - - 15.7 - - - - - 15.4 - - - - - 29.7 - - - - - 12.0 - - - - - 8.7 - - - - - 10.1 - - - - - 21.1 - - - - - : - - - - - : - - - - - : - - - - - : - - - - - 2000 - - - - - 10.6 - - - - - 9.1 - - - - - 9.8 - - - - - 21.4 - - - - - 9.3 - - - - - 6.4 - - - - - 7.7 - - - - - 15.3 - - - - - 10.2 - - - - - 7.4 - - - - - 8.6 - - - - - 17.4 - - - - - 2005 - - - - - 8.6 - - - - - 8.2 - - - - - 8.4 - - - - - 20.1 - - - - - 8.9 - - - - - 7.0 - - - - - 7.9 - - - - - 16.7 - - - - - 9.8 - - - - - 7.9 - - - - - 8.7 - - - - - 18.5 - - - -
- - - - - Finland - - - - - - - EU-15 - - - - - - - EU-25 - - - - - - - Female - - - - - - - Male - - - - - - - Total - - - - - - - Female - - - - - - - Male - - - - - - - Total - - - - - - - Female - - - - - - - Male - - - - - - - Total - - - - - - - 1995 - - - - - 59.0 - - - - - 64.2 - - - - - 61.6 - - - - - 49.7 - - - - - 70.5 - - - - - 60.1 - - - - - : - - - - - : - - - - - : - - - - - 2000 - - - - - 64.2 - - - - - 70.1 - - - - - 67.2 - - - - - 54.1 - - - - - 72.8 - - - - - 63.4 - - - - - 53.6 - - - - - 71.2 - - - - - 62.4 - - - - - 2005 - - - - - 66.5 - - - - - 70.3 - - - - - 68.4 - - - - - 57.4 - - - - - 72.9 - - - - - 65.1 - - - - - 56.3 - - - - - 71.3 - - - - - 63.8 - - - -
- - - - - Country - - - - - - - ISCED 0-2 - - - - - - - - - ISCED 3-4 - - - - - - - - ISCED 5-6 - - - - - - - - Denmark - - - - - - 17 - - - - - 49 - - - - - 34 - - - - - Finland - - - - - 21 - - - - - 44 - - - - - 34 - - - - - Estonia - - - - - 11 - - - - - 56 - - - - - 33 - - - - - Sweden - - - - - 16 - - - - - 54 - - - - - 29 - - - - - Spain - - - - - 51 - - - - - 21 - - - - - 29 - - - - - Germany - - - - - - 17 - - - - - 58 - - - - - 25 - - - - - Italy - - - - - - - 49 - - - - - 39 - - - - - 13 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-012.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-012.json deleted file mode 100644 index b1fa6930..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-012.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":5,"numCorrectlyDetectedTables":5,"numErroneouslyDetectedTables":1,"expectedFailure":true} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-012.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-012.pdf deleted file mode 100644 index f614de46..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-012.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-013-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-013-reg.xml deleted file mode 100644 index 6afe9c9b..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-013-reg.xml +++ /dev/null @@ -1,157 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-013-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-013-str.xml deleted file mode 100644 index 976d2faf..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-013-str.xml +++ /dev/null @@ -1,441 +0,0 @@ - - - - - - - 2000 - - - - - 2002 - - - - - 2004 - - - - - General upper secondary education - - - - - - 15 455 - - - - - 13 951 - - - - - 12 068 - - - - - Competence-based qualifications - - - - - - - - 35 190 - - - - - 44 307 - - - - - 60 152 - - - - - Polytechnics - - - - - - 20 527 - - - - - 20 922 - - - - - 22 083 - - - -
- - - - - Number of -participants (*) - - - - - - - - - - - - - Females -(%) - - - - - - - - - - Vocational further education and training, not apprenticeship training - - - - - - - - - - 79 935 - - - - - 55 - - - - - Vocational further education and training, apprenticeship training - - - - - - - - - - 2 961 - - - - - 73 - - - - - Labour market training - - - - - - - - 51 955 - - - - - 52 - - - - - Courses ordered by employers - - - - - - - - 298 839 - - - - - 43 - - - - - Courses organised as liberal education - - - - - - - - 1 649 692 - - - - - 72 - - - - - Open polytechnics education - - - - - - - 13 560 - - - - - 66 - - - - - Open university education - - - - - - - - 57 986 - - - - - 80 - - - - - Other education - - - - - 91 568 - - - - - 55 - - - - - Total - - - - - 2 246 496 - - - - - - 66 - - - -
- - - - - 2001 - - - - - 2002 - - - - - 2003 - - - - - 2004 - - - - - 2005 - - - - - On labour market training - - - - - - - - 29 300 - - - - - 25 800 - - - - - 33 300 - - - - - 33 500 - - - - - 33 000 - - - -
- - - - - Roles and tasks - - - - - - - Teachers - - - - - - - Trainers - - - - - - - Curriculum development - - - - - - - - - x - - - - - Supporting students in preparing their individual study plans - - - - - - - - - - - x - - - - - x - - - - - Evaluation of education - - - - - - x - - - - - Student assessment - - - - - - x - - - - - x - - - - - Cooperation between school and enterprise - - - - - x - - - - - x - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-013.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-013.json deleted file mode 100644 index 074b6f59..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-013.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":4,"numCorrectlyDetectedTables":4,"numErroneouslyDetectedTables":1,"expectedFailure":true} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-013.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-013.pdf deleted file mode 100644 index 0fb7d4a3..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-013.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-014-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-014-reg.xml deleted file mode 100644 index 2bd199fd..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-014-reg.xml +++ /dev/null @@ -1,46 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-014-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-014-str.xml deleted file mode 100644 index 326a17cf..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-014-str.xml +++ /dev/null @@ -1,114 +0,0 @@ - - - - - - - Indicators - - - - - Weight of indicator in 2006 - - - - - - Effectiveness - - - - - - - - Employment - - - - - - - - - 40 - - - - - Further studies (in higher education) - - - - - - - - 15 - - - - - Processes - - - - - - - Dropping out - - - - - - 15 - - - - - % ratio of qualification certificate holders to entrants - - - - - - 13 - - - - - Staff - - - - - - Formal teaching qualifications - - - - - - - 11 - - - - - Staff development - - - - - - - - 6 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-014.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-014.json deleted file mode 100644 index e0d0b7bd..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-014.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":0,"numErroneouslyDetectedTables":1,"expectedFailure":true} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-014.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-014.pdf deleted file mode 100644 index d310a8bd..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-014.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-015-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-015-reg.xml deleted file mode 100644 index 0e683701..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-015-reg.xml +++ /dev/null @@ -1,272 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-015-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-015-str.xml deleted file mode 100644 index 0634ca2d..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-015-str.xml +++ /dev/null @@ -1,1213 +0,0 @@ - - - - - - - Topic - - - - - Enquiries - - - - - EU Institutions - - - - - 3.597 - - - - - EU general and Member States - - - - - 1.847 - - - - - Employment, social affairs and equal opportunities - - - - - 1.783 - - - - - Air passengers rights - - - - - 1.726 - - - - - Justice Freedom and Security - - - - - 1.451 - - - - - Consumer / Food safety / Public health - - - - - 1.241 - - - - - Enterprise and industry - - - - - 1.215 - - - - - External relations and development - - - - - 732 - - - - - Education / Training / Youth - - - - - 714 - - - - - Customs and taxation - - - - - 556 - - - - - Total - - - - - 14.862 - - - -
- - - - - Topic - - - - - Enquiries - - - - - Other specific policies including Competition, External -trade, Enlargement, Agriculture and rural development, -Regional policy, Information Society and media, Culture, -Economic and monetary affairs, Research and innovation, -Fisheries and maritime affairs, Internal Market and services -and Environment - - - - - - - - - - 4.330 - - - - - EIT - - - - - 119 - - - - - Research enquiry service - - - - - 2.003 - - - - - Export Helpdesk - - - - - 169 - - - - - Practicalities (Including complaints, OPOCE, mission, history, -issues not related to the EU, bilateral agreements, national -authorities, request for contact details and request for -clarification) - - - - - - - - 2.417 - - - - - Grand Total - - - - - 23.900 - - - -
- - - - - Airpassengersrights - - - - - Enquiries - - - - - Spain - - - - - 268 - - - - - Germany - - - - - 233 - - - - - UnitedKingdom - - - - - 139 - - - - - France - - - - - 136 - - - - - Italy - - - - - 92 - - - - - Netherlands - - - - - 79 - - - - - Belgium - - - - - 66 - - - - - Portugal - - - - - 44 - - - - - Austria - - - - - 28 - - - - - Finland - - - - - 25 - - - - - Greece - - - - - 22 - - - - - Ireland - - - - - 14 - - - - - Denmark - - - - - 12 - - - - - Sweden - - - - - 11 - - - - - Luxembourg - - - - - 9 - - - - - TotalEU-15 - - - - - 1.178 - - - - - Hungary - - - - - 17 - - - - - Romania - - - - - 16 - - - - - Cyprus - - - - - 12 - - - - - Bulgaria - - - - - 9 - - - - - CzechRepublic - - - - - 7 - - - - - Estonia - - - - - 7 - - - - - Poland - - - - - 7 - - - - - Malta - - - - - 5 - - - - - Latvia - - - - - 3 - - - - - Slovakia - - - - - 3 - - - - - Slovenia - - - - - 2 - - - - - TotalEU-12 - - - - - 88 - - - - - non-EU - - - - - 97 - - - - - Unspecified - - - - - 363 - - - - - GrandTotal - - - - - 1.726 - - - -
- - - - - Freemovementof -persons/workers - - - - - - Enquiries - - - - - Spain - - - - - 153 - - - - - Germany - - - - - 119 - - - - - UnitedKingdom - - - - - 114 - - - - - France - - - - - 99 - - - - - Italy - - - - - 82 - - - - - Belgium - - - - - 49 - - - - - Netherlands - - - - - 45 - - - - - Greece - - - - - 40 - - - - - Portugal - - - - - 26 - - - - - Ireland - - - - - 23 - - - - - Austria - - - - - 22 - - - - - Sweden - - - - - 20 - - - - - Finland - - - - - 10 - - - - - Denmark - - - - - 9 - - - - - Luxembourg - - - - - 2 - - - - - TotalEU-15 - - - - - 813 - - - - - Bulgaria - - - - - 32 - - - - - Romania - - - - - 30 - - - - - Cyprus - - - - - 22 - - - - - Poland - - - - - 20 - - - - - CzechRepublic - - - - - 12 - - - - - Hungary - - - - - 11 - - - - - Latvia - - - - - 11 - - - - - Slovakia - - - - - 8 - - - - - Lithuania - - - - - 7 - - - - - Estonia - - - - - 4 - - - - - Slovenia - - - - - 2 - - - - - Malta - - - - - 1 - - - - - TotalEU-12 - - - - - 159 - - - - - non-EU - - - - - 102 - - - - - Unspecified - - - - - 182 - - - - - GrandTotal - - - - - 1.256 - - - -
- - - - - Treatyreform/IGC/ -LisbonTreaty - - - - - - Enquiries - - - - - Germany - - - - - 91 - - - - - Spain - - - - - 85 - - - - - UnitedKingdom - - - - - 74 - - - - - Belgium - - - - - 56 - - - - - France - - - - - 46 - - - - - Italy - - - - - 37 - - - - - Netherlands - - - - - 37 - - - - - Ireland - - - - - 29 - - - - - Austria - - - - - 17 - - - - - Sweden - - - - - 16 - - - - - Portugal - - - - - 11 - - - - - Finland - - - - - 9 - - - - - Denmark - - - - - 5 - - - - - Greece - - - - - 5 - - - - - Luxembourg - - - - - 4 - - - - - TotalEU-15 - - - - - 522 - - - - - Poland - - - - - 29 - - - - - CzechRepublic - - - - - 13 - - - - - Romania - - - - - 12 - - - - - Cyprus - - - - - 9 - - - - - Hungary - - - - - 9 - - - - - Bulgaria - - - - - 6 - - - - - Lithuania - - - - - 6 - - - - - Latvia - - - - - 4 - - - - - Estonia - - - - - 3 - - - - - Slovakia - - - - - 3 - - - - - Slovenia - - - - - 2 - - - - - Malta - - - - - 1 - - - - - TotalEU-12 - - - - - 97 - - - - - non-EU - - - - - 40 - - - - - Unspecified - - - - - 196 - - - - - GrandTotal - - - - - 855 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-015.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-015.json deleted file mode 100644 index fc890104..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-015.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":5,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":2,"expectedFailure":true} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-015.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-015.pdf deleted file mode 100644 index 53469d00..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-015.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-016-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-016-reg.xml deleted file mode 100644 index 849675f6..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-016-reg.xml +++ /dev/null @@ -1,230 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-016-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-016-str.xml deleted file mode 100644 index 17286467..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-016-str.xml +++ /dev/null @@ -1,849 +0,0 @@ - - - - - - - Country - - - - - - - - domestic(%) - - - - - - Imported(%) - - - - - - - - - Unknown(%) - - - - - - - Total(n) - - - - - - Austria - - - - - - - 86.2 - - - - - 13.8 - - - - - 0 - - - - - 3,375 - - - - - Belgium - - - - - - - 0 - - - - - 0 - - - - - 100.0 - - - - - 3,973 - - - - - Bulgaria - - - - - - 0 - - - - - 0 - - - - - 100.0 - - - - - 1,136 - - - - - Cyprus - - - - - - 81.0 - - - - - 3.8 - - - - - 15.2 - - - - - 158 - - - - - CzechRepublic - - - - - - - - 98.6 - - - - - 1.4 - - - - - 0 - - - - - 17,655 - - - - - Denmark - - - - - - - 3.1 - - - - - 10.2 - - - - - 86.7 - - - - - 1,662 - - - - - Estonia - - - - - - 94.2 - - - - - 5.8 - - - - - 0 - - - - - 430 - - - - - Finland - - - - - - 13.6 - - - - - 83.0 - - - - - 3.4 - - - - - 2,737 - - - - - France - - - - - - - - 0 - - - - - 0 - - - - - 100.0 - - - - - 5,510 - - - - - Germany - - - - - - - - 90.5 - - - - - 4.4 - - - - - 5.1 - - - - - 55,400 - - - - - hungary - - - - - - - 99.8 - - - - - 0.2 - - - - - 0 - - - - - 6,575 - - - - - ireland - - - - - - - 33.9 - - - - - 31.6 - - - - - 34.5 - - - - - 440 - - - - - italy - - - - - - 0 - - - - - 0 - - - - - 100.0 - - - - - 4,499 - - - - - Latvia - - - - - - - 98.1 - - - - - 1.9 - - - - - 0 - - - - - 619 - - - - - Lithuania - - - - - 99.0 - - - - - 1.0 - - - - - 0 - - - - - 2,270 - - - - - Luxembourg - - - - - - - - 93.9 - - - - - 6.1 - - - - - 0 - - - - - 163 - - - - - malta - - - - - - 96.5 - - - - - 3.5 - - - - - 0 - - - - - 85 - - - - - netherlands - - - - - - - 87.1 - - - - - 12.9 - - - - - 0 - - - - - 1,245 - - - - - poland - - - - - - 0 - - - - - 0 - - - - - 100.0 - - - - - 11,155 - - - - - portugal - - - - - - - 0 - - - - - 1.0 - - - - - 99.0 - - - - - 482 - - - - - Romania - - - - - - 0 - - - - - 0 - - - - - 100.0 - - - - - 620 - - - - - Slovakia - - - - - - - - 99.4 - - - - - 0.6 - - - - - 0 - - - - - 8,367 - - - - - Slovenia - - - - - - - 0 - - - - - 0 - - - - - 100.0 - - - - - 1,346 - - - - - Spain - - - - - 100.0 - - - - - 0 - - - - - 0 - - - - - 3,658 - - - - - Sweden - - - - - - - 23.9 - - - - - 73.7 - - - - - 2.4 - - - - - 3,930 - - - - - UnitedKingdom - - - - - - - 24.0 - - - - - 21.4 - - - - - 54.6 - - - - - 13,802 - - - - - EUTotal - - - - - - - 65.1 - - - - - 7.9 - - - - - 27.0 - - - - - 151,292 - - - - - iceland - - - - - - - 19.4 - - - - - 66.7 - - - - - 14.0 - - - - - 93 - - - - - Liechtenstein - - - - - - - - 0 - - - - - 0 - - - - - 100.0 - - - - - 1 - - - - - norway - - - - - - - - - 23.7 - - - - - 72.2 - - - - - 4.1 - - - - - 1,649 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-016.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-016.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-016.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-016.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-016.pdf deleted file mode 100644 index 40138322..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-016.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-017-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-017-reg.xml deleted file mode 100644 index f49d234b..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-017-reg.xml +++ /dev/null @@ -1,210 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-017-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-017-str.xml deleted file mode 100644 index febc0c60..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-017-str.xml +++ /dev/null @@ -1,769 +0,0 @@ - - - - - - - Country - - - - - - - - domestic(%) - - - - - - Imported(%) - - - - - - - - - Unknown(%) - - - - - - - Total(n) - - - - - - Austria - - - - - - - 92.9 - - - - - 7.1 - - - - - 0 - - - - - 5,821 - - - - - Belgium - - - - - - - 0 - - - - - 0 - - - - - 100.0 - - - - - 5,906 - - - - - Bulgaria - - - - - - 0 - - - - - 0 - - - - - 100.0 - - - - - 38 - - - - - Cyprus - - - - - - 100.0 - - - - - 0 - - - - - 0 - - - - - 17 - - - - - CzechRepublic - - - - - - - - 99.2 - - - - - 0.8 - - - - - 0 - - - - - 24,137 - - - - - Denmark - - - - - - - 10.0 - - - - - 12.1 - - - - - 77.9 - - - - - 3,868 - - - - - Estonia - - - - - - 86.8 - - - - - 13.2 - - - - - 0 - - - - - 114 - - - - - Finland - - - - - - 19.2 - - - - - 56.6 - - - - - 24.2 - - - - - 4,107 - - - - - France - - - - - - - - 23.9 - - - - - 4.5 - - - - - 71.6 - - - - - 3,058 - - - - - Germany - - - - - - - - 88.0 - - - - - 6.6 - - - - - 5.3 - - - - - 66,107 - - - - - hungary - - - - - - - 99.9 - - - - - 0.1 - - - - - 0 - - - - - 5,809 - - - - - ireland - - - - - - - 12.1 - - - - - 1.5 - - - - - 86.4 - - - - - 1,885 - - - - - italy - - - - - - 7.4 - - - - - 1.2 - - - - - 91.4 - - - - - 676 - - - - - Lithuania - - - - - 99.6 - - - - - 0.4 - - - - - 0 - - - - - 564 - - - - - Luxembourg - - - - - - - - 49.0 - - - - - 6.7 - - - - - 44.3 - - - - - 345 - - - - - malta - - - - - - 97.8 - - - - - 1.1 - - - - - 1.1 - - - - - 91 - - - - - netherlands - - - - - - - 93.5 - - - - - 6.5 - - - - - 0 - - - - - 3,289 - - - - - poland - - - - - - 99.5 - - - - - 0.5 - - - - - 0 - - - - - 192 - - - - - Slovakia - - - - - - - - 99.4 - - - - - 0.6 - - - - - 0 - - - - - 3,380 - - - - - Slovenia - - - - - - - 0 - - - - - 0.6 - - - - - 99.4 - - - - - 1,127 - - - - - Spain - - - - - 100.0 - - - - - 0 - - - - - 0 - - - - - 5,055 - - - - - Sweden - - - - - - - 30.2 - - - - - 65.3 - - - - - 4.5 - - - - - 7,106 - - - - - UnitedKingdom - - - - - - - 22.8 - - - - - 1.4 - - - - - 75.8 - - - - - 57,815 - - - - - EUTotal - - - - - - - 61.6 - - - - - 6.8 - - - - - 31.6 - - - - - 200,507 - - - - - iceland - - - - - - - 48.4 - - - - - 46.2 - - - - - 5.4 - - - - - 93 - - - - - Liechtenstein - - - - - - - - 0 - - - - - 21.4 - - - - - 78.6 - - - - - 14 - - - - - norway - - - - - - - - - 41.2 - - - - - 51.0 - - - - - 7.9 - - - - - 2,836 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-017.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-017.json deleted file mode 100644 index 16d936d5..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-017.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":1,"expectedFailure":true} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-017.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-017.pdf deleted file mode 100644 index 4f24b7fc..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-017.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-018-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-018-reg.xml deleted file mode 100644 index 3134e0f7..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-018-reg.xml +++ /dev/null @@ -1,262 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-018-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-018-str.xml deleted file mode 100644 index 68579546..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-018-str.xml +++ /dev/null @@ -1,1068 +0,0 @@ - - - - - - - Country - - - - - - - - Sample -unit - - - - - - - Sample -size - - - - - - - - 2007 - - - - - 2006 - - - - - 2005 - - - - - 2004 - - - - - 2003 - - - - - n - - - - - %Pos - - - - - - n - - - - - %Pos - - - - - - n - - - - - %Pos - - - - - - n - - - - - %Pos - - - - - - n - - - - - %Pos - - - - - - Austria - - - - - - - Single - - - - - 25g - - - - - 109 - - - - - 0.9 - - - - - 93 - - - - - 1.1 - - - - - 89 - - - - - 1.1 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Germany - - - - - - - - Single - - - - - 25g - - - - - 123 - - - - - 0.8 - - - - - 290 - - - - - 0.7 - - - - - 391 - - - - - 0.5 - - - - - 454 - - - - - 2.0 - - - - - 188 - - - - - 2.7 - - - - - netherlands - - - - - - - Single - - - - - 25g - - - - - 269 - - - - - 1.1 - - - - - 397 - - - - - 0.3 - - - - - 389 - - - - - 0 - - - - - 287 - - - - - 1.1 - - - - - 227 - - - - - 0 - - - - - Spain - - - - - Single - - - - - 25g - - - - - 36 - - - - - 0 - - - - - 40 - - - - - 0 - - - - - 107 - - - - - 0 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Total(4MSs) - - - - - - 537 - - - - - 0.9 - - - - - 820 - - - - - 0.5 - - - - - 976 - - - - - 0.3 - - - - - 741 - - - - - 1.7 - - - - - 415 - - - - - 1.2 - - - -
- - - - - Country - - - - - - - - Sample -unit - - - - - - - Sample -size - - - - - - - - 2007 - - - - - 2006 - - - - - 2005 - - - - - 2004 - - - - - 2003 - - - - - n - - - - - %Pos - - - - - - n - - - - - %Pos - - - - - - n - - - - - %Pos - - - - - - n - - - - - %Pos - - - - - - n - - - - - %Pos - - - - - - Estonia - - - - - - Single - - - - - 25g - - - - - - - - - - - - - - - - - 42 - - - - - 0 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Germany - - - - - - - - Single - - - - - 25g - - - - - 35 - - - - - 0 - - - - - 43 - - - - - 0 - - - - - 47 - - - - - 2.1 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - hungary - - - - - - - Single - - - - - 25g - - - - - - - - - - - - - - - - - 202 - - - - - 2.5 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - italy - - - - - - Single - - - - - 25g - - - - - 334 - - - - - 2.4 - - - - - 241 - - - - - 0.4 - - - - - 394 - - - - - 0.5 - - - - - 196 - - - - - 0 - - - - - 161 - - - - - 0.6 - - - - - Luxembourg - - - - - - - - Single - - - - - 10g - - - - - 62 - - - - - 0 - - - - - 37 - - - - - 0 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - netherlands - - - - - - - Single - - - - - 25g - - - - - 264 - - - - - 0 - - - - - 936 - - - - - 0.4 - - - - - 463 - - - - - 1.1 - - - - - 847 - - - - - 0.8 - - - - - 678 - - - - - 0.2 - - - - - Romania - - - - - - Single - - - - - - - - - - - - - - - - - - - - - - - 37 - - - - - 0 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Total(7MSs) - - - - - - 695 - - - - - 1.2 - - - - - 1,538 - - - - - 0.7 - - - - - 904 - - - - - 0.9 - - - - - 1,043 - - - - - 0.6 - - - - - 839 - - - - - 0.3 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-018.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-018.json deleted file mode 100644 index 1bf5fd29..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-018.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-018.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-018.pdf deleted file mode 100644 index 1ba2ecaa..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-018.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-019-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-019-reg.xml deleted file mode 100644 index 0693ca41..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-019-reg.xml +++ /dev/null @@ -1,145 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-019-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-019-str.xml deleted file mode 100644 index d5d2ac51..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-019-str.xml +++ /dev/null @@ -1,524 +0,0 @@ - - - - - - - Country - - - - - - - - domestic(%) - - - - - - Imported(%) - - - - - - - - - Unknown(%) - - - - - - - Total(n) - - - - - - Austria - - - - - - - 0 - - - - - 100 - - - - - 0 - - - - - 1 - - - - - Belgium - - - - - - - 0 - - - - - 0 - - - - - 100 - - - - - 3 - - - - - Bulgaria - - - - - - 0 - - - - - 0 - - - - - 100 - - - - - 9 - - - - - Finland - - - - - - 0 - - - - - 0 - - - - - 100 - - - - - 2 - - - - - France - - - - - - - - 0 - - - - - 100 - - - - - 0 - - - - - 14 - - - - - Germany - - - - - - - - 14.3 - - - - - 76.2 - - - - - 9.5 - - - - - 21 - - - - - hungary - - - - - - - 0 - - - - - 100 - - - - - 0 - - - - - 1 - - - - - ireland - - - - - - - 0 - - - - - 0 - - - - - 100 - - - - - 7 - - - - - italy - - - - - - 0 - - - - - 0 - - - - - 100 - - - - - 76 - - - - - netherlands - - - - - - - 0 - - - - - 80.0 - - - - - 20.0 - - - - - 5 - - - - - poland - - - - - - 0 - - - - - 0 - - - - - 100 - - - - - 1 - - - - - portugal - - - - - - - 0 - - - - - 0 - - - - - 100 - - - - - 74 - - - - - Romania - - - - - - 0 - - - - - 0 - - - - - 100 - - - - - 4 - - - - - Slovenia - - - - - - - 0 - - - - - 100 - - - - - 0 - - - - - 1 - - - - - Spain - - - - - 100 - - - - - 0 - - - - - 0 - - - - - 201 - - - - - Sweden - - - - - - - 0 - - - - - 100 - - - - - 0 - - - - - 8 - - - - - UnitedKingdom - - - - - - - 0 - - - - - 46.2 - - - - - 53.9 - - - - - 13 - - - - - EUTotal - - - - - - - 46.3 - - - - - 11.6 - - - - - 42.2 - - - - - 441 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-019.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-019.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-019.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-019.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-019.pdf deleted file mode 100644 index 9bad6614..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-019.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-020-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-020-reg.xml deleted file mode 100644 index 735487b8..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-020-reg.xml +++ /dev/null @@ -1,535 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-020-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-020-str.xml deleted file mode 100644 index fc15d66f..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-020-str.xml +++ /dev/null @@ -1,744 +0,0 @@ - - - - - - - Faculty cluster - - - - - - - - - - - - - - - - - - Population size - - - - - - - - - - - - - - - - - - Sample size - - - - - - - - - - - - - - Sciences - - - - - - - - - - - - 1269 (19.9%) - - - - - - - - - - - - - - - 101(20.4%) - - - - - - - - - - - - - - Social Sciences - - - - - - - - - - - - - - - - - - 3212 (50.6%) - - - - - - - - - - - - - - - 247(50.0%) - - - - - - - - - - - - - - Humanities - - - - - - - - - - - - - - 1168 (18.4%) - - - - - - - - - - - - - - - 95(19.3%) - - - - - - - - - - - - - Civil Sciences - - - - - - - - - - - - - - - - - 705 (11.1%) - - - - - - - - - - - - - - 51(10.3%) - - - - - - - - - - - -
- - - - - Facultycluster - - - - - - - - - - - - - - - - - - Female students - - - - - - - - - - - - - - - - - - Sample - - - - - - - - - - Population - - - - - - - - - - - - - - Sciences - - - - - - - - - - - - 63 (18.5%) - - - - - - - - - - - - - 597 (16.4%) - - - - - - - - - - - - - - Social Sciences - - - - - - - - - - - - - - - - - - 189 (55.6%) - - - - - - - - - - - - - - 2075 (57.0%) - - - - - - - - - - - - - - - Humanities - - - - - - - - - - - - - - 77 (22.6%) - - - - - - - - - - - - - 755 (20.7%) - - - - - - - - - - - - - - Civil Sciences - - - - - - - - - - - - - - - - - 11 (3.2%) - - - - - - - - - - - - 213 (5.9%) - - - - - - - - - - - - - Total - - - - - - - - - 340 - - - - - - - 3640 - - - - - - -
- - - - - Facultycluster - - - - - - - - - - - - - - - - - - Male students - - - - - - - - - - - - - - - - Sample - - - - - - - - - - Population - - - - - - - - - - - - - - Sciences - - - - - - - - - - - - 38 (24.7%) - - - - - - - - - - - - - 672 (24.8%) - - - - - - - - - - - - - - Social Sciences - - - - - - - - - - - - - - - - - - 58 (37.7%) - - - - - - - - - - - - - 1137 (41.9%) - - - - - - - - - - - - - - - Humanities - - - - - - - - - - - - - - 18 (11.7%) - - - - - - - - - - - - - 413 (15.2%) - - - - - - - - - - - - - - Civil Sciences - - - - - - - - - - - - - - - - - 40 (26.0%) - - - - - - - - - - - - - 492 (18.1%) - - - - - - - - - - - - - - Total - - - - - - - - - 154 - - - - - - - 2714 - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-020.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-020.json deleted file mode 100644 index 71ae6a77..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-020.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":3,"numCorrectlyDetectedTables":3,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-020.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-020.pdf deleted file mode 100644 index 3752173b..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-020.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-021-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-021-reg.xml deleted file mode 100644 index 6c153fbd..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-021-reg.xml +++ /dev/null @@ -1,1376 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-021-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-021-str.xml deleted file mode 100644 index 5956f7f3..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-021-str.xml +++ /dev/null @@ -1,2281 +0,0 @@ - - - - - - - Treatment/Therapy - - - - - - - - - - - - - - - - - - - - - Suffered -from - - - - - - - - - - - - - - - - Followed -treatment - - - - - - - - - - - - - - - - - - - - - Allergy problems - - - - - - - - - - - - - - - - - - - Count - - - - - - - - - 93 - - - - - - 77 - - - - - - Percentage - - - - - - - - - - - - - - 18.8% - - - - - - - - - 15.6% - - - - - - - - - Anxiety disorder - - - - - - - - - - - - - - - - - - - Count - - - - - - - - - 81 - - - - - - 29 - - - - - - Percentage - - - - - - - - - - - - - - 16.4% - - - - - - - - - 5.9% - - - - - - - - Asthma - - - - - - - - - - Count - - - - - - - - - 31 - - - - - - 22 - - - - - - Percentage - - - - - - - - - - - - - - 6.3% - - - - - - - - 4.4% - - - - - - - - Chronic fatigue -syndrome - - - - - - - - - - - - - - - - - - - - - - - - - - Count - - - - - - - - - 19 - - - - - - 4 - - - - - Percentage - - - - - - - - - - - - - - 3.8% - - - - - - - - 0.8% - - - - - - - - Depression - - - - - - - - - - - - - - Count - - - - - - - - - 48 - - - - - - 12 - - - - - - Percentage - - - - - - - - - - - - - - 9.7% - - - - - - - - 2.4% - - - - - - - - Diabetes - - - - - - - - - - - - Count - - - - - - - - - 1 - - - - - 1 - - - - - Percentage - - - - - - - - - - - - - - 0.2% - - - - - - - - 0.2% - - - - - - - - Sexually transmitted -diseases - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Count - - - - - - - - - 7 - - - - - 7 - - - - - Percentage - - - - - - - - - - - - - - 1.4% - - - - - - - - 1.4% - - - - - - - - HIV infection - - - - - - - - - - - - - - - - Count - - - - - - - - - 0 - - - - - 0 - - - - - Percentage - - - - - - - - - - - - - - 0.0% - - - - - - - - 0.0% - - - - - - - - Substance abuse -problem - - - - - - - - - - - - - - - - - - - - - - - - - Count - - - - - - - - - 10 - - - - - - 0 - - - - - Percentage - - - - - - - - - - - - - - 2.0% - - - - - - - - .0% - - - - - - - Back pain - - - - - - - - - - - - Count - - - - - - - - - 202 - - - - - - - 56 - - - - - - Percentage - - - - - - - - - - - - - - 40.9% - - - - - - - - - 11.3% - - - - - - - - - Broken bone/fracture - - - - - - - - - - - - - - - - - - - - - - - Count - - - - - - - - - 12 - - - - - - 12 - - - - - - Percentage - - - - - - - - - - - - - - 2.4% - - - - - - - - 2.4% - - - - - - - - Bronchitis/ear or -sinus infection - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Count - - - - - - - - - 104 - - - - - - - 87 - - - - - - Percentage - - - - - - - - - - - - - - 21.1% - - - - - - - - - 17.6% - - - - - - - - - Endometriosis - - - - - - - - - - - - - - - - - Count - - - - - - - - - 6 - - - - - 5 - - - - - Percentage - - - - - - - - - - - - - - 1.2% - - - - - - - - 1.0% - - - - - - -
- - - - - Difficulty - - - - - - - - - - - - - - Quite a lot - - - - - - - - - - - - - Not much - - - - - - - - - - - Not at all - - - - - - - - - - - - Alcohol use - - - - - - - - - - - - - - Count - - - - - - - - - 20 - - - - - - 56 - - - - - - 391 - - - - - - - Percentage - - - - - - - - - - - - - - 4.3% - - - - - - - - 12.0% - - - - - - - - - 83.7% - - - - - - - - - Allergies - - - - - - - - - - - - - Count - - - - - - - - - 40 - - - - - - 28 - - - - - - 398 - - - - - - - Percentage - - - - - - - - - - - - - - 8.6% - - - - - - - - 6.0% - - - - - - - - 85.4% - - - - - - - - - Chronic illness and -chronic pain - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Count - - - - - - - - - 19 - - - - - - 15 - - - - - - 425 - - - - - - - Percentage - - - - - - - - - - - - - - 4.1% - - - - - - - - 3.3% - - - - - - - - 92.6% - - - - - - - - - Colds/flu/sinus -infections - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Count - - - - - - - - - 89 - - - - - - 138 - - - - - - - 238 - - - - - - - Percentage - - - - - - - - - - - - - - 19.1% - - - - - - - - - 29.7% - - - - - - - - - 51.2% - - - - - - - - - Concern over friends -or family - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Count - - - - - - - - - 125 - - - - - - - 138 - - - - - - - 203 - - - - - - - Percentage - - - - - - - - - - - - - - 26.8% - - - - - - - - - 29.6% - - - - - - - - - 43.6% - - - - - - - - - Depression/anxiety - - - - - - - - - - - - - - - - - - - - - - Count - - - - - - - - - 80 - - - - - - 98 - - - - - - 288 - - - - - - - Percentage - - - - - - - - - - - - - - 17.2% - - - - - - - - - 21.0% - - - - - - - - - 61.8% - - - - - - - - - Drug use - - - - - - - - - - - Count - - - - - - - - - 4 - - - - - 7 - - - - - 448 - - - - - - - Percentage - - - - - - - - - - - - - - .9% - - - - - - - 1.5% - - - - - - - - 97.6% - - - - - - - - - Eating disorder - - - - - - - - - - - - - - - - - - Count - - - - - - - - - 16 - - - - - - 39 - - - - - - 409 - - - - - - - Percentage - - - - - - - - - - - - - - 3.4% - - - - - - - - 8.4% - - - - - - - - 88.1% - - - - - - - - - Injury - - - - - - - - - - Count - - - - - - - - - 7 - - - - - 23 - - - - - - 432 - - - - - - - Percentage - - - - - - - - - - - - - - 1.5% - - - - - - - - 5.0% - - - - - - - - 93.5% - - - - - - - - - Harassment - - - - - - - - - - - - - - Count - - - - - - - - - 8 - - - - - 7 - - - - - 446 - - - - - - - Percentage - - - - - - - - - - - - - - 1.7% - - - - - - - - 1.5% - - - - - - - - 96.7% - - - - - - - - - Relationship -difficulties - - - - - - - - - - - - - - - - - - - - - - - - - - - - Count - - - - - - - - - 108 - - - - - - - 101 - - - - - - - 257 - - - - - - - Percentage - - - - - - - - - - - - - - 23.2% - - - - - - - - - 21.7% - - - - - - - - - 55.2% - - - - - - - - - Sexually transmitted -diseases - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Count - - - - - - - - - 4 - - - - - 6 - - - - - 449 - - - - - - - Percentage - - - - - - - - - - - - - - .9% - - - - - - - 1.3% - - - - - - - - 97.8% - - - - - - - - - Sleep difficulties - - - - - - - - - - - - - - - - - - - - - Count - - - - - - - - - 75 - - - - - - 82 - - - - - - 311 - - - - - - - Percentage - - - - - - - - - - - - - - 16.0% - - - - - - - - - 17.5% - - - - - - - - - 66.5% - - - - - - - - - Stress - - - - - - - - - - Count - - - - - - - - - 237 - - - - - - - 110 - - - - - - - 122 - - - - - - - Percentage - - - - - - - - - - - - - - 50.5% - - - - - - - - - 23.5% - - - - - - - - - 26.0% - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-021.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-021.json deleted file mode 100644 index 1bf5fd29..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-021.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-021.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-021.pdf deleted file mode 100644 index f313bda2..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-021.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-022-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-022-reg.xml deleted file mode 100644 index 5aea1b1a..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-022-reg.xml +++ /dev/null @@ -1,422 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-022-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-022-str.xml deleted file mode 100644 index 01722bbd..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-022-str.xml +++ /dev/null @@ -1,708 +0,0 @@ - - - - - - - Substance - - - - - - - - - - - - - Frequency of substance abuse - - - - - - - - - - - - - - - - - - - - - - - - - - - - - In the last -month - - - - - - - - - - - - - - - - - - In the last -year - - - - - - - - - - - - - - - - - More than -a year - - - - - - - - - - - - - - - - - Never - - - - - - - - - Cannabis - - - - - - - - - - - - 9.6% - - - - - - - - 4.0% - - - - - - - - 6.3% - - - - - - - - 80.1% - - - - - - - - - Inhalants - - - - - - - - - - - - - 2.7% - - - - - - - - 1.6% - - - - - - - - 4.1% - - - - - - - - 91.6% - - - - - - - - - Ecstasy - - - - - - - - - - - 2.1% - - - - - - - - 0.9% - - - - - - - - 3.0% - - - - - - - - 94.0% - - - - - - - - - Anabolic steroids - - - - - - - - - - - - - - - - - - - - 2.1% - - - - - - - - 0.2% - - - - - - - - 0.7% - - - - - - - - 97.0% - - - - - - - - - Magic mushrooms - - - - - - - - - - - - - - - - - - 2.1% - - - - - - - - 0.5% - - - - - - - - 1.1% - - - - - - - - 96.3% - - - - - - - - - Heroin - - - - - - - - - - 0.5% - - - - - - - - 0.0% - - - - - - - - 0.5% - - - - - - - - 99.0% - - - - - - - - - Amphetamines - - - - - - - - - - - - - - - - 2.5% - - - - - - - - 0.7% - - - - - - - - 0.7% - - - - - - - - 96.1% - - - - - - - - - Ketamine - - - - - - - - - - - - 2.1% - - - - - - - - 0.0% - - - - - - - - 0.5% - - - - - - - - 97.4% - - - - - - - - - LSD - - - - - - - 2.3% - - - - - - - - 0.9% - - - - - - - - 1.4% - - - - - - - - 95.4% - - - - - - - - - Cocaine - - - - - - - - - - - 4.4% - - - - - - - - 1.6% - - - - - - - - 1.8% - - - - - - - - 92.2% - - - - - - - - - GHB - - - - - - - 1.1% - - - - - - - - 0.0% - - - - - - - - 0.0% - - - - - - - - 98.9% - - - - - - - - - Tranquillisers - - - - - - - - - - - - - - - - - - 3.4% - - - - - - - - 1.4% - - - - - - - - 1.8% - - - - - - - - 93.4% - - - - - - - - - Ritalin - - - - - - - - - - - 1.1% - - - - - - - - 0.0% - - - - - - - - 0.0% - - - - - - - - 98.9% - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-022.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-022.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-022.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-022.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-022.pdf deleted file mode 100644 index 46ec129c..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-022.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-023-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-023-reg.xml deleted file mode 100644 index 5201c4e0..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-023-reg.xml +++ /dev/null @@ -1,364 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-023-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-023-str.xml deleted file mode 100644 index f5de3080..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-023-str.xml +++ /dev/null @@ -1,523 +0,0 @@ - - - - - - - How often do you feel this way? - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Never - - - - - - - - - Sometimes - - - - - - - - - - - - - Often - - - - - - - - - Left out of things - - - - - - - - - - - - - - - - - - - 27.9% - - - - - - - - - 52.2% - - - - - - - - - 19.9% - - - - - - - - - Helpless - - - - - - - - - - - - 25.5% - - - - - - - - - 50.1% - - - - - - - - - 24.4% - - - - - - - - - Hopeless - - - - - - - - - - - - 32.4% - - - - - - - - - 43.5% - - - - - - - - - 24.1% - - - - - - - - - Tense and stressed out - - - - - - - - - - - - - - - - - - - - - - - 3.1% - - - - - - - - 31.4% - - - - - - - - - 65.5% - - - - - - - - - Overwhelmed by all you had to do - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 6.2% - - - - - - - - 27.7% - - - - - - - - - 66.1% - - - - - - - - - Exhausted (not from physical activity) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 7.2% - - - - - - - - 34.3% - - - - - - - - - 58.5% - - - - - - - - - Depressed and difficult to function - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 38.2% - - - - - - - - - 37.6% - - - - - - - - - 24.2% - - - - - - - - - Seriously considered suicide - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 85.6% - - - - - - - - - 11.3% - - - - - - - - - 3.1% - - - - - - - - Attempted suicide - - - - - - - - - - - - - - - - - - - - 97.1% - - - - - - - - - 1.9% - - - - - - - - 1.0% - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-023.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-023.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-023.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-023.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-023.pdf deleted file mode 100644 index 8cad8a4f..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-023.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-024-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-024-reg.xml deleted file mode 100644 index 8a359748..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-024-reg.xml +++ /dev/null @@ -1,263 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-024-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-024-str.xml deleted file mode 100644 index 14e55a50..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-024-str.xml +++ /dev/null @@ -1,422 +0,0 @@ - - - - - - - Perceived Discrimination - - - - - - - - - - - - - - - - - - - - - - - - - - - Frequently - - - - - - - - - - - - - - Occasionally - - - - - - - - - - - - - - - - Never - - - - - - - - - Age - - - - - - - 1.5% - - - - - - - - 3.6% - - - - - - - - 94.9% - - - - - - - - - Social class - - - - - - - - - - - - - - - 0.4% - - - - - - - - 6.8% - - - - - - - - 92.8% - - - - - - - - - Physical appearance - - - - - - - - - - - - - - - - - - - - - - 0.4% - - - - - - - - 5.7% - - - - - - - - 93.8% - - - - - - - - - Disability - - - - - - - - - - - - - - 0.0% - - - - - - - - 1.1% - - - - - - - - 98.9% - - - - - - - - - Religion - - - - - - - - - - - - 0.0% - - - - - - - - 2.3% - - - - - - - - 97.7% - - - - - - - - - Ethnicity - - - - - - - - - - - - - .2% - - - - - - - 1.5% - - - - - - - - 98.3% - - - - - - - - - Gender - - - - - - - - - - .4% - - - - - - - 5.5% - - - - - - - - 94.1% - - - - - - - - - Sexual orientation - - - - - - - - - - - - - - - - - - - - - 0.0% - - - - - - - - 1.7% - - - - - - - - 98.3% - - - - - - - - - Language - - - - - - - - - - - - .6% - - - - - - - 10.6% - - - - - - - - - 88.8% - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-024.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-024.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-024.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-024.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-024.pdf deleted file mode 100644 index 5e2af0e0..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-024.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-025-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-025-reg.xml deleted file mode 100644 index 23ac85b8..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-025-reg.xml +++ /dev/null @@ -1,1114 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-025-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-025-str.xml deleted file mode 100644 index 8c30b379..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-025-str.xml +++ /dev/null @@ -1,1785 +0,0 @@ - - - - - - - Gender - - - - - - - - - - How healthy do you think you are? - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Very healthy - - - - - - - - - - - - - - - Quite healthy - - - - - - - - - - - - - - - - Unhealthy - - - - - - - - - - - - - Male - - - - - - - - 36 - - - - - - 102 - - - - - - - 16 - - - - - - Female - - - - - - - - - - 33 - - - - - - 270 - - - - - - - 32 - - - - -
- - - - - Psychosomatic -Symptoms - - - - - - - - - - - - - - - - - - - - - - - - - How often do you have these symptoms - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - At least every -week - - - - - - - - - - - - - - - - - - - - About every -month - - - - - - - - - - - - - - - - - - - Rarely/Never - - - - - - - - - - - - - - - - Headache - - - - - - - - - - - - 239 - - - - - - - 119 - - - - - - - 128 - - - - - - - Stomach-ache - - - - - - - - - - - - - - - - 78 - - - - - - 120 - - - - - - - 284 - - - - - - - Back-ache - - - - - - - - - - - - - 139 - - - - - - - 126 - - - - - - - 219 - - - - - - - Neck and shoulder pain - - - - - - - - - - - - - - - - - - - - - - - 146 - - - - - - - 99 - - - - - - 241 - - - - - - - Feeling low and down - - - - - - - - - - - - - - - - - - - - - 221 - - - - - - - 131 - - - - - - - 131 - - - - - - - Feeling nervous - - - - - - - - - - - - - - - - - - 303 - - - - - - - 108 - - - - - - - 71 - - - - - - Tired and exhausted - - - - - - - - - - - - - - - - - - - - - 377 - - - - - - - 63 - - - - - - 47 - - - - - - Sleep problems - - - - - - - - - - - - - - - - - 130 - - - - - - - 48 - - - - - - 308 - - - - - - - Feeling dizzy - - - - - - - - - - - - - - - - 123 - - - - - - - 69 - - - - - - 293 - - - - - -
- - - - - Psychosomatic -Symptoms - - - - - - - - - - - - - - - - - - - - - - - - - How often have you taken medicine or tablets? - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - No - - - - - - Occasionally - - - - - - - - - - - - - - - - Frequently - - - - - - - - - - - - - - Headache - - - - - - - - - - - - 228 - - - - - - - 163 - - - - - - - 99 - - - - - - Stomach-ache - - - - - - - - - - - - - - - - 421 - - - - - - - 49 - - - - - - 16 - - - - - - Sleeping Difficulties - - - - - - - - - - - - - - - - - - - - - - - - 458 - - - - - - - 16 - - - - - - 7 - - - - - Nervousness - - - - - - - - - - - - - - - 449 - - - - - - - 20 - - - - - - 14 - - - - -
- - - - - Illnesses - - - - - - - - - - - - - Gender - - - - - - - - - - Male - - - - - - - - Female - - - - - - - - - - Allergy problems - - - - - - - - - - - - - - - - - - - 25 - - - - - - 68 - - - - - - Anxiety disorder - - - - - - - - - - - - - - - - - - - 17 - - - - - - 64 - - - - - - Asthma - - - - - - - - - - 6 - - - - - 25 - - - - - - Chronic fatigue syndrome - - - - - - - - - - - - - - - - - - - - - - - - - - 5 - - - - - 14 - - - - - - Depression - - - - - - - - - - - - - - 20 - - - - - - 28 - - - - - - Diabetes - - - - - - - - - - - - 1 - - - - - 0 - - - - - Sexually transmitted diseases - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 3 - - - - - 4 - - - - - Substance abuse problem - - - - - - - - - - - - - - - - - - - - - - - - - 5 - - - - - 5 - - - - - Back pain - - - - - - - - - - - - 46 - - - - - - 156 - - - - - - - Broken bone/fracture - - - - - - - - - - - - - - - - - - - - - - - 7 - - - - - 5 - - - - - Bronchitis/ear or sinus infection - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 23 - - - - - - 81 - - - - - - Endometriosis - - - - - - - - - - - - - - - - - 0 - - - - - 6 - - - -
- - - - - Illnesses - - - - - - - - - - - - - Course Year - - - - - - - - - - - - - - 1st year - - - - - - - - - - - 2nd year - - - - - - - - - - - 3rd-5th year - - - - - - - - - - - - - - - Allergy problems - - - - - - - - - - - - - - - - - - - 23 - - - - - - 55 - - - - - - 15 - - - - - - Anxiety disorder - - - - - - - - - - - - - - - - - - - 22 - - - - - - 42 - - - - - - 17 - - - - - - Asthma - - - - - - - - - - 9 - - - - - 17 - - - - - - 5 - - - - - Chronic fatigue syndrome - - - - - - - - - - - - - - - - - - - - - - - - - - 6 - - - - - 4 - - - - - 9 - - - - - Depression - - - - - - - - - - - - - - 19 - - - - - - 24 - - - - - - 5 - - - - - Diabetes - - - - - - - - - - - - 0 - - - - - 1 - - - - - 0 - - - - - Sexually transmitted diseases - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 2 - - - - - 4 - - - - - 1 - - - - - Substance abuse problem - - - - - - - - - - - - - - - - - - - - - - - - - 2 - - - - - 8 - - - - - 0 - - - - - Back pain - - - - - - - - - - - - 68 - - - - - - 109 - - - - - - - 25 - - - - - - Broken bone/fracture - - - - - - - - - - - - - - - - - - - - - - - 5 - - - - - 6 - - - - - 1 - - - - - Bronchitis/ear or sinus infection - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 38 - - - - - - 50 - - - - - - 16 - - - - - - Endometriosis - - - - - - - - - - - - - - - - - 2 - - - - - 3 - - - - - 1 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-025.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-025.json deleted file mode 100644 index 4d74e0e7..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-025.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":5,"numCorrectlyDetectedTables":5,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-025.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-025.pdf deleted file mode 100644 index cb3de2ac..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-025.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-026-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-026-reg.xml deleted file mode 100644 index a1ee61c9..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-026-reg.xml +++ /dev/null @@ -1,125 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-026-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-026-str.xml deleted file mode 100644 index 0673aae2..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-026-str.xml +++ /dev/null @@ -1,379 +0,0 @@ - - - - - - - Freq. - - - - - - Percent - - - - - - - Mean Loss - - - - - - Fraction of -Wealth Lost - - - - - - - - - - I/we kept the assets - - - - - - - - - - - 344 - - - - - 75.2 - - - - - 12196 - - - - - 17.4% - - - - - I/we sold some of the assets - - - - - - - - - - - 53 - - - - - 11.6 - - - - - 23518 - - - - - 22.5% - - - - - I/we sold all of them - - - - - - - - - - - - 61 - - - - - 13.2 - - - - - 9187 - - - - - 22.5% - - - - - Total - - - - - - 458 - - - - - 100.0 - - - - - 13153 - - - - - 18.7% - - - -
- - - - - no. of correct answers - - - - - - - - - Freq. - - - - - - Percent - - - - - - - Cum. - - - - - 0 - - - - - 138 - - - - - 6.9 - - - - - 6.9 - - - - - 1 - - - - - 178 - - - - - 8.8 - - - - - 15.7 - - - - - 2 - - - - - 626 - - - - - 31.1 - - - - - 46.8 - - - - - 3 - - - - - 1070 - - - - - 53.2 - - - - - 100.0 - - - -
- - - - - no. of correct answers - - - - - - - - - Freq. - - - - - - Percent - - - - - - - Cum. - - - - - 0 - - - - - 871 - - - - - 43.3 - - - - - 43.3 - - - - - 1 - - - - - 434 - - - - - 21.6 - - - - - 64.9 - - - - - 2 - - - - - 403 - - - - - 20.0 - - - - - 84.9 - - - - - 3 - - - - - 303 - - - - - 15.1 - - - - - 100.0 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-026.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-026.json deleted file mode 100644 index 71ae6a77..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-026.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":3,"numCorrectlyDetectedTables":3,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-026.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-026.pdf deleted file mode 100644 index d8758fe3..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-026.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-027-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-027-reg.xml deleted file mode 100644 index 66b9caa3..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-027-reg.xml +++ /dev/null @@ -1,207 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-027-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-027-str.xml deleted file mode 100644 index 2967565a..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-027-str.xml +++ /dev/null @@ -1,766 +0,0 @@ - - - - - - - Variable - - - - - - Mean - - - - - Std. Dev. - - - - - - Min - - - - - Max - - - - - Age - - - - - 50.8 - - - - - 15.9 - - - - - 21 - - - - - 90 - - - - - Men - - - - - 0.47 - - - - - 0.50 - - - - - 0 - - - - - 1 - - - - - East - - - - - 0.28 - - - - - 0.45 - - - - - 0 - - - - - 1 - - - - - Rural - - - - - 0.15 - - - - - 0.36 - - - - - 0 - - - - - 1 - - - - - Married - - - - - 0.57 - - - - - 0.50 - - - - - 0 - - - - - 1 - - - - - Single - - - - - 0.21 - - - - - 0.40 - - - - - 0 - - - - - 1 - - - - - Divorced - - - - - - 0.13 - - - - - 0.33 - - - - - 0 - - - - - 1 - - - - - Widowed - - - - - - - 0.08 - - - - - 0.26 - - - - - 0 - - - - - 1 - - - - - Separated - - - - - 0.03 - - - - - 0.16 - - - - - 0 - - - - - 1 - - - - - Partner - - - - - - 0.65 - - - - - 0.48 - - - - - 0 - - - - - 1 - - - - - Employed - - - - - - - 0.55 - - - - - 0.50 - - - - - 0 - - - - - 1 - - - - - Fulltime - - - - - - 0.34 - - - - - 0.47 - - - - - 0 - - - - - 1 - - - - - Parttime - - - - - - 0.20 - - - - - 0.40 - - - - - 0 - - - - - 1 - - - - - Unemployed - - - - - - - 0.08 - - - - - 0.28 - - - - - 0 - - - - - 1 - - - - - Homemaker - - - - - - 0.19 - - - - - 0.40 - - - - - 0 - - - - - 1 - - - - - Retired - - - - - 0.28 - - - - - 0.45 - - - - - 0 - - - - - 1 - - - - - Household size - - - - - - 2.43 - - - - - 1.22 - - - - - 1 - - - - - 9 - - - - - Households with children - - - - - - - - 0.37 - - - - - 0.48 - - - - - 0 - - - - - 1 - - - - - Number of children - - - - - - - - - - 1.67 - - - - - 1.38 - - - - - 0 - - - - - 8 - - - - - Lower secondary education - - - - - - - - - - 0.08 - - - - - 0.27 - - - - - 0 - - - - - 1 - - - - - Upper secondary education - - - - - - - - 0.60 - - - - - 0.49 - - - - - 0 - - - - - 1 - - - - - Post secondary, non tert. education - - - - - - - - - - - - 0.12 - - - - - 0.33 - - - - - 0 - - - - - 1 - - - - - First stage tertiary education - - - - - - - - 0.17 - - - - - 0.38 - - - - - 0 - - - - - 1 - - - - - Other education - - - - - - 0.03 - - - - - 0.17 - - - - - 0 - - - - - 1 - - - - - Household income (Euro/month) - - - - - - - - 2,127 - - - - - 1,389 - - - - - 22 - - - - - 22,500 - - - - - Gross wealth - end of 2007 (Euro) - - - - - - - - - - - - 187,281 - - - - - 384,198 - - - - - 0 - - - - - 7,720,000 - - - - - Gross financial wealth - end of 2007 (Euro) - - - - - - - - - - - - - 38,855 - - - - - 114,128 - - - - - 0 - - - - - 2,870,000 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-027.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-027.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-027.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-027.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-027.pdf deleted file mode 100644 index fe5682b0..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-027.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-001-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-001-reg.xml deleted file mode 100644 index 46f0af1f..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-001-reg.xml +++ /dev/null @@ -1,698 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-001-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-001-str.xml deleted file mode 100644 index 1939b001..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-001-str.xml +++ /dev/null @@ -1,2326 +0,0 @@ - - - - - - - Category - - - - - - - - 2005 - - - - - - 2010 - - - - - - Difference - - - - - - - Number - - - - - - - - Margin of -error (±) - - - - - - - - - - - - Percent - - - - - - - Margin of -error (±) - - - - - - - - - - - Number - - - - - - Margin of -error (±) - - - - - - - - Percent - - - - - - - Margin of -error (±) - - - - - - - - - - - Number - - - - - - Percent - - - - - - - All ages . . . . . . . . . . . . - - - - - - - - - 291,099 - - - - - ***** - - - - - 100.0 - - - - - (X) - - - - - 303,858 - - - - - ***** - - - - - 100.0 - - - - - (X) - - - - - **12,760 - - - - - (X) - - - - - With a disability . . . . . . . . . . . . - - - - - - 54,425 - - - - - 894 - - - - - 18.7 - - - - - 0.3 - - - - - 56,672 - - - - - 905 - - - - - 18.7 - - - - - 0.3 - - - - - *2,247 - - - - - - - - - - Severe disability. . . . . . . . . . - - - - - - - - 34,947 - - - - - 601 - - - - - 12.0 - - - - - 0.2 - - - - - 38,284 - - - - - 654 - - - - - 12.6 - - - - - 0.2 - - - - - *3,337 - - - - - *0.6 - - - - - Aged 6 and older. . . . . - - - - - - - 266,752 - - - - - 84 - - - - - 100.0 - - - - - (X) - - - - - 278,222 - - - - - 88 - - - - - 100.0 - - - - - (X) - - - - - *11,469 - - - - - (X) - - - - - Needed personal assistance. . - - - - - - 10,996 - - - - - 336 - - - - - 4.1 - - - - - 0.1 - - - - - 12,349 - - - - - 386 - - - - - 4.4 - - - - - 0.1 - - - - - *1,353 - - - - - *0.3 - - - - - Aged 15 and older. . . . - - - - - - - - - - - - - - - - - - 230,391 - - - - - - - - ***** - - - - - - 100.0 - - - - - - (X) - - - - - 241,682 - - - - - - - ***** - - - - - - - 100.0 - - - - - - (X) - - - - - **11,291 - - - - - - - - (X) - - - - - With a disability . . . . . . . . . . . . - - - - - - - - - - - - - - - - 49,069 - - - - - - - - 794 - - - - - 21.3 - - - - - - 0.3 - - - - - - 51,454 - - - - - - - 838 - - - - - - 21.3 - - - - - - 0.3 - - - - - - *2,385 - - - - - - - - - - - Severe disability. . . . . . . . . . - - - - - - - - - - - - - - 32,771 - - - - - - - - 567 - - - - - 14.2 - - - - - - 0.2 - - - - - - 35,683 - - - - - - - 631 - - - - - - 14.8 - - - - - - 0.3 - - - - - - *2,912 - - - - - - *0.5 - - - - - Difficulty seeing. . . . . . . . . . . . - - - - - - - - - - - - 7,793 - - - - - 350 - - - - - 3.4 - - - - - - 0.2 - - - - - - 8,077 - - - - - - 354 - - - - - - 3.3 - - - - - - 0.1 - - - - - - 284 - - - - - - - - - - - Severe . . . . . . . . . . . . . . . . . - - - - - - - - - - - - - - - 1,783 - - - - - 129 - - - - - - 0.8 - - - - - - 0.1 - - - - - - 2,010 - - - - - - 139 - - - - - - 0.8 - - - - - - 0.1 - - - - - - *228 - - - - - 0.1 - - - - - Difficulty hearing . . . . . . . . . . . - - - - - - - - - - - - - - - 7,809 - - - - - 325 - - - - - - 3.4 - - - - - - 0.1 - - - - - - 7,572 - - - - - - 320 - - - - - - 3.1 - - - - - - 0.1 - - - - - - –237 - - - - - - *–0.3 - - - - - - - Severe . . . . . . . . . . . . . . . . . - - - - - - - - - - - - - - 993 - - - - - 103 - - - - - - 0.4 - - - - - - - - - - - 1,096 - - - - - - 122 - - - - - - 0.5 - - - - - - 0.1 - - - - - - 103 - - - - - - - - - - - Aged 21 to 64 . . . . . . . - - - - - - - - 170,349 - - - - - 185 - - - - - 100.0 - - - - - (X) - - - - - 177,295 - - - - - 193 - - - - - 100.0 - - - - - (X) - - - - - *6,945 - - - - - (X) - - - - - With a disability . . . . . . . . . . . . - - - - - - 28,141 - - - - - 622 - - - - - 16.5 - - - - - 0.4 - - - - - 29,479 - - - - - 705 - - - - - 16.6 - - - - - 0.4 - - - - - *1,338 - - - - - 0.1 - - - - - Employed. . . . . . . . . . . . . . . - - - - - - - - 12,838 - - - - - 495 - - - - - 45.6 - - - - - 1.2 - - - - - 12,115 - - - - - 432 - - - - - 41.1 - - - - - 1.0 - - - - - *–723 - - - - - *–4.5 - - - - - Severe disability . . . . . . . . . . . - - - - - - - - 18,705 - - - - - 469 - - - - - 11.0 - - - - - 0.3 - - - - - 20,286 - - - - - 566 - - - - - 11.4 - - - - - 0.3 - - - - - *1,581 - - - - - *0.5 - - - - - Employed . . . . . . . . . . . . . - - - - - - - - 5,738 - - - - - 277 - - - - - 30.7 - - - - - 1.2 - - - - - 5,570 - - - - - 261 - - - - - 27.5 - - - - - 1.0 - - - - - –167 - - - - - *–3.2 - - - - - Nonsevere disability . . . . . . . . - - - - - - - - 9,436 - - - - - 403 - - - - - 5.5 - - - - - 0.2 - - - - - 9,193 - - - - - 374 - - - - - 5.2 - - - - - 0.2 - - - - - –243 - - - - - *–0.4 - - - - - Employed . . . . . . . . . . . . . - - - - - - - - 7,100 - - - - - 356 - - - - - 75.2 - - - - - 1.6 - - - - - 6,544 - - - - - 311 - - - - - 71.2 - - - - - 1.6 - - - - - *–556 - - - - - *–4.1 - - - - - No disability. . . . . . . . . . . . . . . - - - - - - 142,208 - - - - - 636 - - - - - 83.5 - - - - - 0.4 - - - - - 147,816 - - - - - 733 - - - - - 83.4 - - - - - 0.4 - - - - - *5,607 - - - - - –0.1 - - - - - Employed. . . . . . . . . . . . . . . - - - - - - - - 118,707 - - - - - 678 - - - - - 83.5 - - - - - 0.3 - - - - - 116,881 - - - - - 862 - - - - - 79.1 - - - - - 0.4 - - - - - *–1,826 - - - - - *–4.4 - - - - - Aged 65 and older . . . . . - - - - - - - 35,028 - - - - - ***** - - - - - 100.0 - - - - - (X) - - - - - 38,599 - - - - - ***** - - - - - 100.0 - - - - - (X) - - - - - **3,571 - - - - - (X) - - - - - With a disability . . . . . . . . . . . . - - - - - - 18,132 - - - - - 324 - - - - - 51.8 - - - - - 0.9 - - - - - 19,234 - - - - - 327 - - - - - 49.8 - - - - - 0.8 - - - - - *1,102 - - - - - *–1.9 - - - - - Severe disability . . . . . . . . . . . - - - - - - - - 12,942 - - - - - 273 - - - - - 36.9 - - - - - 0.8 - - - - - 14,138 - - - - - 276 - - - - - 36.6 - - - - - 0.7 - - - - - *1,196 - - - - - –0.3 - - - -
- - - - - Age-adjusted disability rate - - - - - - - - - - - - - - - - - - Unadjusted disability rate - - - - - - - - - - - - - - 2005 - - - - - - 2010 - - - - - - 2005 - - - - - 2010 - - - - - Category - - - - - - Estimate - - - - - Margin of -error (±) - - - - - - - - - - - - Estimate - - - - - - Margin of -error (±) - - - - - - - - - - - Difference - - - - - - Estimate - - - - - - Margin of -error (±) - - - - - - - - - - - Estimate - - - - - - Margin of -error (±) - - - - - - - - - - - Difference - - - - All people . . . . . . . . . - - - - - - - 18.6 - - - - - 0.3 - - - - - 18.1 - - - - - 0.3 - - - - - *–0.5 - - - - - 18.7 - - - - - 0.3 - - - - - 18.7 - - - - - 0.3 - - - - - - - - - - Male. . . . . . . . . . . . . . . . . . - - - - - - 17.9 - - - - - 0.4 - - - - - 17.6 - - - - - 0.4 - - - - - –0.3 - - - - - 17.3 - - - - - 0.4 - - - - - 17.4 - - - - - 0.4 - - - - - 0.2 - - - - - Female. . . . . . . . . . . . . . . . - - - - - - - 19.0 - - - - - 0.3 - - - - - 18.3 - - - - - 0.4 - - - - - *–0.7 - - - - - 20.1 - - - - - 0.3 - - - - - 19.8 - - - - - 0.4 - - - - - –0.2 - - - - - White alone . . . . . . . . . . . . - - - - - - 17.9 - - - - - 0.3 - - - - - 17.4 - - - - - 0.3 - - - - - *–0.5 - - - - - 18.6 - - - - - 0.3 - - - - - 18.5 - - - - - 0.3 - - - - - - - - - - Not Hispanic. . . . . . . . . . - - - - - - 18.1 - - - - - 0.4 - - - - - 17.6 - - - - - 0.4 - - - - - –0.4 - - - - - 19.7 - - - - - 0.4 - - - - - 19.8 - - - - - 0.4 - - - - - 0.1 - - - - - Black alone . . . . . . . . . . . . - - - - - - - - 23.2 - - - - - 0.7 - - - - - 22.2 - - - - - 0.7 - - - - - –1.0 - - - - - 20.4 - - - - - 0.7 - - - - - 20.3 - - - - - 0.7 - - - - - –0.2 - - - - - Not Hispanic. . . . . . . . . . - - - - - - 23.3 - - - - - 0.7 - - - - - 22.3 - - - - - 0.7 - - - - - *–1.0 - - - - - 20.7 - - - - - 0.7 - - - - - 20.7 - - - - - 0.7 - - - - - - - - - - Asian Alone. . . . . . . . . . . . - - - - - - 14.5 - - - - - 1.3 - - - - - 14.5 - - - - - 1.1 - - - - - - - - - - 12.4 - - - - - 1.2 - - - - - 13.0 - - - - - 1.0 - - - - - 0.6 - - - - - Not Hispanic. . . . . . . . . . - - - - - - 14.6 - - - - - 1.3 - - - - - 14.4 - - - - - 1.1 - - - - - –0.2 - - - - - 12.5 - - - - - 1.2 - - - - - 13.0 - - - - - 1.1 - - - - - 0.5 - - - - - Hispanic or Latino . . . . . . . - - - - - - 18.4 - - - - - 0.9 - - - - - 17.8 - - - - - 0.7 - - - - - –0.6 - - - - - 13.1 - - - - - 0.7 - - - - - 13.2 - - - - - 0.6 - - - - - 0.1 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-001.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-001.json deleted file mode 100644 index a2697933..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-001.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":2,"expectedFailure":true} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-001.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-001.pdf deleted file mode 100644 index ff995b66..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-001.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-002-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-002-reg.xml deleted file mode 100644 index fffd80d2..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-002-reg.xml +++ /dev/null @@ -1,1235 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-002-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-002-str.xml deleted file mode 100644 index 644e26eb..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-002-str.xml +++ /dev/null @@ -1,2966 +0,0 @@ - - - - - - - Amount borrowed - - - - - - - - - - - - - - - - Student and institutional characteristics - - - - - - - - - - - - - - - - - - Percent -who -borrowed - - - - - - - - - - - - - - - - - - - Average -amount - - - - - - - - - - - - - - - Less than -$10,000 - - - - - - - - - - - - $10,000– -14,999 - - - - - - $15,000– -29,999 - - - - - - $30,000– -54,999 - - - - - - $55,000 -or more - - - - - - - - Total - - - - - - - - - 44.8 - - - - - $33,200 - - - - - 23.2 - - - - - 10.3 - - - - - 27.0 - - - - - 20.1 - - - - - 19.4 - - - - - Type of degree-granting institution - - - - - - - - - - - - - - - - - - - - - - - - Public 4-year - - - - - - - - - - - 44.4 - - - - - 31,200 - - - - - 25.5 - - - - - 10.2 - - - - - 26.9 - - - - - 19.3 - - - - - 18.1 - - - - - Non-doctorate-granting - - - - - - - - - - - - - - - - - - - 40.0 - - - - - 25,000 - - - - - 27.3 - - - - - 11.3 - - - - - 32.4 - - - - - 18.3 - - - - - 10.7 - - - - - Doctorate-granting - - - - - - - - - - - - - - - - 46.5 - - - - - 33,900 - - - - - 24.7 - - - - - 9.7 - - - - - - 24.6 - - - - - 19.8 - - - - - 21.2 - - - - - Private not-for-profit 4-year - - - - - - - - - - - - - - - - - - - - 45.9 - - - - - 36,600 - - - - - 20.3 - - - - - 10.3 - - - - - 25.7 - - - - - 21.8 - - - - - 21.8 - - - - - Non-doctorate-granting - - - - - - - - - - - - - - - - - - - 46.1 - - - - - 31,300 - - - - - 24.8 - - - - - 8.9 - - - - - - 27.5 - - - - - 21.6 - - - - - 17.1 - - - - - Doctorate-granting - - - - - - - - - - - - - - - - 45.7 - - - - - 42,300 - - - - - 15.5 - - - - - 11.8 - - - - - 23.8 - - - - - 22.1 - - - - - 26.8 - - - - - Other - - - - - - - - - 41.3 - - - - - 31,900 - - - - - 11.2 - - - - - 11.2 - - - - - 43.2 - - - - - 15.7 - - - - - 18.8 - - - - - Undergraduate major - - - - - - - - - - - - - Business and management - - - - - - - - - - - - - - - - - - - 34.3 - - - - - 28,700 - - - - - 28.1 - - - - - 7.1 - - - - - - 28.9 - - - - - 19.4 - - - - - 16.5 - - - - - Education - - - - - - - - - 34.3 - - - - - 23,000 - - - - - 37.4 - - - - - 12.5 - - - - - 25.5 - - - - - 12.9 - - - - - 11.8 - - - - - Engineering, mathematics, or science - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 47.1 - - - - - 47,400 - - - - - 19.1 - - - - - 8.7 - - - - - - 19.2 - - - - - 19.7 - - - - - 33.4 - - - - - Humanities or social sciences - - - - - - - - - - - - - 52.8 - - - - - - - - 32,800 - - - - - - - - - - 20.0 - - - - - - - - 8.5 - - - - - - 31.3 - - - - - - - - 21.5 - - - - - - - - 18.7 - - - - - - - - Other - - - - - - - - - 47.4 - - - - - 28,100 - - - - - 21.6 - - - - - 14.3 - - - - - 28.0 - - - - - 22.9 - - - - - 13.2 - - - - - Amount borrowed (undergraduate) - - - - - - - - - - - - - - - - - - - - - - - - - - - Did not borrow - - - - - - - - - - 36.0 - - - - - 36,900 - - - - - 19.3 - - - - - 7.7 - - - - - - 26.5 - - - - - 22.7 - - - - - 23.9 - - - - - Less than $5,000 - - - - - - - - - - - - 45.7 - - - - - 30,300 - - - - - 29.7 - - - - - 9.5 - - - - - - 28.4 - - - - - 17.3 - - - - - 15.2 - - - - - $5,000–9,999 - - - - - 56.3 - - - - - 29,200 - - - - - 28.0 - - - - - 13.0 - - - - - 31.0 - - - - - 12.9 - - - - - 15.1 - - - - - $10,000–14,999 - - - - - 54.7 - - - - - 28,700 - - - - - 25.7 - - - - - 16.1 - - - - - 23.6 - - - - - 19.4 - - - - - 15.3 - - - - - $15,000 or more - - - - - - - 58.4 - - - - - 35,100 - - - - - 18.7 - - - - - 9.9 - - - - - - 25.3 - - - - - 24.9 - - - - - 21.3 - - - - - Highest enrollment after bachelor’s -degree by 2003 - - - - - - - - - - - - - - - - - - - Master’s degree - - - - - - - - - - 37.9 - - - - - 19,900 - - - - - 30.0 - - - - - 13.2 - - - - - 33.5 - - - - - 18.9 - - - - - 4.5 - - - - - - Doctoral degree - - - - - - - - - - - 63.1 - - - - - 43,700 - - - - - 18.3 - - - - - 7.6 - - - - - - 21.6 - - - - - 21.1 - - - - - 31.5 - - - - - First-professional degree - - - - - - - - - - - - - - - - - - 71.8 - - - - - 69,200 - - - - - 4.6 - - - - - 2.6 - - - - - - 9.4 - - - - - 23.6 - - - - - - 59.9 - - - - - Highest degree earned by 2003 - - - - - - - - - - - - - - - - - Bachelor’s degree - - - - - - - - - - - 32.3 - - - - - 23,300 - - - - - 36.2 - - - - - 11.6 - - - - - 26.6 - - - - - 14.4 - - - - - 11.4 - - - - - Master’s degree - - - - - - - - - - 45.4 - - - - - 22,900 - - - - - 23.9 - - - - - 12.6 - - - - - 34.9 - - - - - 22.7 - - - - - 5.9 - - - - - - Doctoral degree - - - - - - - - - - - 64.3 - - - - - 44,100 - - - - - 21.4 - - - - - 9.6 - - - - - - 14.7 - - - - - 19.1 - - - - - 35.2 - - - - - First-professional degree - - - - - - - - - - - - - - - - - - - 78.8 - - - - - - - 75,500 - - - - - - - 2.0 - - - - - - - 1.1 - - - - - - - 8.2 - - - - - - - 21.6 - - - - - - - 67.1 - - - - - -
- - - - - Percent who borrowed - - - - - - - - - - - Average amount borrowed -(by borrowers) - - - - - - - - - - - - - - - - - - - Student and institutional characteristics - - - - - - - - - - - - - - - - - - Neither - - - - - - - Under- -graduate -only - - - - - - - - - - - - - - - - - Graduate -only - - - - - - - - - - - - - - Both - - - - - - - Under- -graduate -only - - - - - - - - - - - - - - - - - Graduate -only - - - - - - - - - - - - - - Both - - - - - - - Total - - - - - - - - - 31.4 - - - - - 23.7 - - - - - 17.7 - - - - - 27.2 - - - - - $9,600 - - - - - $36,900 - - - - - $41,700 - - - - - Type of degree-granting institution - - - - - - - - - - - - - - - - - - - - - - - - Public 4-year - - - - - - - - - - - 32.6 - - - - - 23.0 - - - - - 19.5 - - - - - 24.9 - - - - - 8,100 - - - - - - 36,500 - - - - - 36,900 - - - - - Non-doctorate-granting - - - - - - - - - - - - - - - - - - - - 34.4 - - - - - 25.6 - - - - - 15.9 - - - - - 24.1 - - - - - 7,800 - - - - - - 29,100 - - - - - 31,900 - - - - - Doctorate-granting - - - - - - - - - - - - - - - - 31.7 - - - - - 21.7 - - - - - 21.3 - - - - - 25.2 - - - - - 8,200 - - - - - - 39,300 - - - - - 39,300 - - - - - Private not-for-profit 4-year - - - - - - - - - - - - - - - - - - - - 29.7 - - - - - 24.2 - - - - - 15.3 - - - - - 30.8 - - - - - 12,500 - - - - - 37,500 - - - - - 48,900 - - - - - Non-doctorate-granting - - - - - - - - - - - - - - - - - - - - 27.5 - - - - - 26.1 - - - - - 14.5 - - - - - 32.0 - - - - - 12,000 - - - - - 31,500 - - - - - 43,200 - - - - - Doctorate-granting - - - - - - - - - - - - - - - - 32.1 - - - - - 22.3 - - - - - 16.1 - - - - - 29.5 - - - - - 13,000 - - - - - 43,200 - - - - - 55,500 - - - - - Other - - - - - - - - - 26.9 - - - - - 31.8 - - - - - 7.1 - - - - - - 34.2 - - - - - - - - - - - - - - - 39,600 - - - - - Undergraduate major - - - - - - - - - - - - - Business and management - - - - - - - - - - - - - - - - - - - 39.6 - - - - - 26.4 - - - - - 13.0 - - - - - 21.1 - - - - - 8,900 - - - - - - 33,000 - - - - - 38,800 - - - - - Education - - - - - - - - - 34.6 - - - - - 31.0 - - - - - 12.8 - - - - - 21.7 - - - - - 10,100 - - - - - 28,600 - - - - - 30,800 - - - - - Engineering, mathematics, or science - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 29.4 - - - - - 23.3 - - - - - 19.1 - - - - - 28.2 - - - - - 10,000 - - - - - 47,200 - - - - - 57,400 - - - - - Humanities or social sciences - - - - - - - - - - - - - 28.6 - - - - - - - - 18.4 - - - - - - - - 21.8 - - - - - - - - 31.1 - - - - - - - - 9,600 - - - - - - - - 38,200 - - - - - - - - - - 40,000 - - - - - - - - - - Other - - - - - - - - - 29.3 - - - - - 23.4 - - - - - 17.8 - - - - - 29.5 - - - - - 9,500 - - - - - - 31,000 - - - - - 37,400 - - - - - Amount borrowed (undergraduate) - - - - - - - - - - - - - - - - - - - - - - - - - - - Did not borrow - - - - - - - - - - 64.0 - - - - - - - - - - 36.0 - - - - - - - - - - - - - - - 36,900 - - - - - - - - - - Less than $5,000 - - - - - - - - - - - - - - - - - 54.4 - - - - - - - - - - 45.7 - - - - - 2,700 - - - - - - - - - - - 32,700 - - - - - $5,000–9,999 - - - - - - - - - - 43.7 - - - - - - - - - - 56.3 - - - - - 6,900 - - - - - - - - - - - 36,200 - - - - - $10,000–14,999 - - - - - - - - - - 45.3 - - - - - - - - - - 54.7 - - - - - 11,300 - - - - - - - - - - 40,200 - - - - - $15,000 or more - - - - - - - - - - - - 41.6 - - - - - - - - - - 58.4 - - - - - 22,400 - - - - - - - - - - 57,700 - - - - - Highest enrollment after bachelor’s degree by 2003 - - - - - - - - - - - - - - - - - - - - - - - - - - - - Master’s degree - - - - - - - - - - - 34.3 - - - - - 27.8 - - - - - 13.6 - - - - - 24.3 - - - - - 9,500 - - - - - - 22,600 - - - - - 29,000 - - - - - Doctoral degree - - - - - - - - - - - 23.6 - - - - - 13.2 - - - - - 28.3 - - - - - 34.9 - - - - - 10,400 - - - - - 44,300 - - - - - 54,900 - - - - - First-professional degree - - - - - - - - - - - - - - - - - - 20.4 - - - - - 7.6 - - - - - - 33.6 - - - - - 38.5 - - - - - - - - - - 68,000 - - - - - 82,000 - - - - - Highest degree earned by 2003 - - - - - - - - - - - - - - - - - Bachelor’s degree - - - - - - - - - - - 35.5 - - - - - 32.0 - - - - - 10.8 - - - - - 21.7 - - - - - 9,500 - - - - - - 25,500 - - - - - 32,500 - - - - - Master’s degree - - - - - - - - - - - 31.8 - - - - - 22.8 - - - - - 17.5 - - - - - 27.9 - - - - - 9,500 - - - - - - 25,200 - - - - - 32,500 - - - - - Doctoral degree - - - - - - - - - - - 23.7 - - - - - 11.8 - - - - - 31.2 - - - - - 33.3 - - - - - - - - - - 46,600 - - - - - 52,900 - - - - - First-professional degree - - - - - - - - - - - - - - - - - - 17.8 - - - - - 3.5 - - - - - - 37.6 - - - - - 41.1 - - - - - - - - - - 73,900 - - - - - 88,900 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-002.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-002.json deleted file mode 100644 index 1bf5fd29..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-002.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-002.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-002.pdf deleted file mode 100644 index 115c596a..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-002.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-003-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-003-reg.xml deleted file mode 100644 index 088f202a..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-003-reg.xml +++ /dev/null @@ -1,76 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-003-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-003-str.xml deleted file mode 100644 index 3ad4b75a..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-003-str.xml +++ /dev/null @@ -1,151 +0,0 @@ - - - - - - - 1994 - - - - - 1997 - - - - - 2003 - - - - - Lowest - - - - - - - - - - $9,594 or less - - - - - - - - $22,400 or less - - - - - - - - $34,000 or less - - - - - - - - Lower middle - - - - - - - - - - - - $9,595–$17,992 - - - - - $22,401–$29,992 - - - - - $34,001–$48,000 - - - - - Upper middle - - - - - - - - - $17,993–$25,771 - - - - - $29,993–$40,888 - - - - - $48,001–$66,900 - - - - - Highest - - - - - - - - - - - Greater than $25,771 - - - - - - - - - - - Greater than $40,888 - - - - - - - - - - - Greater than $66,900 - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-003.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-003.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-003.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-003.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-003.pdf deleted file mode 100644 index 08e8d69e..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-003.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-004-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-004-reg.xml deleted file mode 100644 index 3876ed3d..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-004-reg.xml +++ /dev/null @@ -1,352 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-004-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-004-str.xml deleted file mode 100644 index 3f977542..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-004-str.xml +++ /dev/null @@ -1,707 +0,0 @@ - - - - - - - Loan type - - - - - - - 12/31/2009 - - - - - - - 12/31/2010 - - - - - - - 6/30/2011 - - - - - - - $000's - - - - - - % - - - - - $000's - - - - - - % - - - - - $000's - - - - - - % - - - - - Real estate loans - - - - - - - - - - - - - - - - - 1-4 family residential mortgage - - - - - - - - - - - - - - - - - - - - - 4,151,000 - - - - - - - 25.0 - - - - - - 4,090,000 - - - - - - - - 27.5 - - - - - - 3,925,000 - - - - - - - 24.9 - - - - - - Commercial Mortgage - - - - - - - - - - - - - - - 361,000 - - - - - - 2.2 - - - - - - 331,000 - - - - - - - 2.2 - - - - - - 284,000 - - - - - - 1.8 - - - - - - Multifamily residential (5 or more) - - - - - - - - - - - - - - - - - - - - - - - - - - 380,000 - - - - - - 2.3 - - - - - - 327,000 - - - - - - - 2.2 - - - - - - 327,000 - - - - - - 2.1 - - - - - - Construction Loans - - - - - - - - - - - - - 173,000 - - - - - - 1.0 - - - - - - 148,000 - - - - - - - 1.0 - - - - - - 170,000 - - - - - - 1.1 - - - - - - Commercial & Industrial - - - - - - - - - - - - - - - - - - - - - 555,000 - - - - - - 3.3 - - - - - - 497,000 - - - - - - - 3.3 - - - - - - 438,000 - - - - - - 2.8 - - - - - - Consumer Loans - - - - - - - - - - - 63,000 - - - - - - 0.4 - - - - - - 69,000 - - - - - - - 0.5 - - - - - - 66,000 - - - - - - 0.4 - - - - - - Lease financing receivables - - - - - - - - - - - - - - - - - - - - - - - - - 3,508,000 - - - - - - - 21.1 - - - - - - 3,147,000 - - - - - - - - 21.2 - - - - - - 2,780,000 - - - - - - - 17.7 - - - - - - Other loans - - - - - - - - - - - - Loans to purchase securities - - - - - - - - - - - - - - - - 1,844,000 - - - - - - - 11.1 - - - - - - 1,148,000 - - - - - - - - 7.7 - - - - - - 2,754,000 - - - - - - - 17.5 - - - - - - Loans to nondepository Fin.Inst. - - - - - - - - - - - - - - - - - - 4,958,000 - - - - - - - 29.9 - - - - - - 4,512,000 - - - - - - - - 30.3 - - - - - - 4,207,000 - - - - - - - 26.7 - - - - - - All other Loans - - - - - - - - - - 611,000 - - - - - - 3.7 - - - - - - 602,000 - - - - - - - 4.0 - - - - - - 799,000 - - - - - - 5.1 - - - - - - Total Gross Loans - - - - - - - - - - - - 16,604,000 - - - - - - - 100.0 - - - - - - 14,871,000 - - - - - - - - 100.0 - - - - - - 15,750,000 - - - - - - - 100.0 - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-004.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-004.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-004.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-004.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-004.pdf deleted file mode 100644 index 94fe2c6c..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-004.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-005-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-005-reg.xml deleted file mode 100644 index 70712c26..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-005-reg.xml +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-005-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-005-str.xml deleted file mode 100644 index 51ff6a31..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-005-str.xml +++ /dev/null @@ -1,63 +0,0 @@ - - - - - - - Income level of individual or geography - - - - - - - % of the area median income - - - - - - Low-income - - - - - - Less than 50 - - - - - Moderate-income - - - - - At least 50 and less than 80 - - - - - Middle-income - - - - - - - At least 80 and less than 120 - - - - - Upper-income - - - - - 120 or more - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-005.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-005.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-005.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-005.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-005.pdf deleted file mode 100644 index e54c96fe..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-005.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-006-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-006-reg.xml deleted file mode 100644 index 5c55237e..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-006-reg.xml +++ /dev/null @@ -1,71 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-006-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-006-str.xml deleted file mode 100644 index 5c306416..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-006-str.xml +++ /dev/null @@ -1,118 +0,0 @@ - - - - - - - Child Race/Ethnicity - - - - - - - - - - - - - - - - - - 3-Year-Old Cohort - - - - - - - - - - - - - - - - 4-Year-Old Cohort - - - - - - - - - - - - - - - - Hispanic - - - - - - - - - - 37.4% - - - - - 51.6% - - - - - Black - - - - - - - - 32.8% - - - - - 17.5% - - - - - White/Other - - - - - - - - - - - - - 29.8% - - - - - 30.8% - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-006.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-006.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-006.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-006.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-006.pdf deleted file mode 100644 index 4e1ac750..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-006.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-007-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-007-reg.xml deleted file mode 100644 index 634c112e..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-007-reg.xml +++ /dev/null @@ -1,1473 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-007-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-007-str.xml deleted file mode 100644 index 228b5cfa..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-007-str.xml +++ /dev/null @@ -1,2419 +0,0 @@ - - - - - - - Measure - - - - - - - Age 4 -(Head Start -Year) - - - - - - - - - - - - - - - - - - - K - - - - - 1 -Grade - - - - - - - - - 3 -Grade - - - - - - - - - Language, Literacy, and Pre-Writing - - - - - - - - - - - - - - - - - - - - - - - - - - Color Identification - - - - - - - - - - - - - - - - - - 0.16 - - - - - NA - - - - - NA - - - - - NA - - - - - Pre-Writing (McCarthy Draw a Design) - - - - - - - - - - - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - NA - - - - - Emergent Literacy Scale (parent report) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0.31 - - - - - NA - - - - - NA - - - - - NA - - - - - Letter Naming - - - - - - - - - - - - - 0.25 - - - - - NA - - - - - NA - - - - - Test of Phonological Processing (CTOPPP Elision) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - Receptive Vocabulary (PPVT) - - - - - - - - - - - - - - - - - - - - - - - 0.09 - - - - - 0.09 - - - - - Letter-Word Identification (WJIII) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0.22 - - - - - Spelling (WJIII) - - - - - - - - - - - - - - - - - 0.15 - - - - - NA - - - - - Oral Comprehension (WJIII) - - - - - - - - - - - - - - - - - - - - - - - NA - - - - - Pre-Academic Skills (WJIII) - - - - - - - - - - - - - - - - - - - - - - - 0.19 - - - - - NA - - - - - Phonetic Skills/ Word Attack (WJIII) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - Basic Reading (WJIII) - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - Academic Applications (WJIII) - - - - - - - - - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - NA - - - - - Academic Skills (WJIII) - - - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - NA - - - - - Passage Comprehension (WJIII) - - - - - - - - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - NA - - - - - ECLS-K Reading - - - - - - - - - - - - - NA - - - - - NA - - - - - 0.11 - - - - - Writing Sample (WJIII) - - - - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - NA - - - - - Spanish Language - - - - - - - - - - - - - - Receptive Vocabulary (TVIP) - - - - - - - - - - - - - - - - - - - - - - - NA - - - - - Batería WM Identificación de letras y palabras - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Math - - - - - - - One-to-One Counting (Counting Bears) - - - - - - - - - - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - NA - - - - - Applied Problems (WJIII) - - - - - - - - - - - - - - - - - - - - - - Quantitative Concepts (WJIII) - - - - - - - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - Math Reasoning (WJIII) - - - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - Calculation (WJIII) - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - School Performance - - - - - - - - - - - - - - - - - School Accomplishments - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - Promotion (parent report) - - - - - - - - - - - - - - - - - - - - - NA - - - - - Language and Literacy Ability - - - - - - - - - - - - - - - - - - - - - - - NA - - - - - Math Ability - - - - - - - - - - - - NA - - - - - Math Skills - - - - - - - - - - - NA - - - - - NA - - - - - NA - - - - - Reading/Language Arts Skills - - - - - - - - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - NA - - - - - Social Studies and Science Ability - - - - - - - - - - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - -
- - - - - Measure - - - - - - - Age 3 -(Head Start -Year) - - - - - - - - - - - - - - - - - - Age 4 - - - - - - - K - - - - - 1 -Grade - - - - - - - - - 3 -Grade - - - - - - - - - Language, Literacy, and Pre-Writing - - - - - - - - - - - - - - - - - - - - - - - - - - Color Identification - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - NA - - - - - Pre-Writing (McCarthy Draw a Design) - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0.14 - - - - - NA - - - - - NA - - - - - NA - - - - - Emergent Literacy Scale (parent report) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0.35 - - - - - 0.16 - - - - - NA - - - - - NA - - - - - NA - - - - - Letter Naming - - - - - - - - - - - - - 0.24 - - - - - NA - - - - - NA - - - - - Test of Phonological Processing (CTOPPP Elision) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0.10 - - - - - 0.15 - - - - - NA - - - - - NA - - - - - Receptive Vocabulary (PPVT) - - - - - - - - - - - - - - - - - - - - - - - 0.18 - - - - - Letter-Word Identification (WJIII) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0.26 - - - - - Spelling (WJIII) - - - - - - - - - - - - - - - - - NA - - - - - Oral Comprehension (WJIII) - - - - - - - - - - - - - - - - - - - - - - - 0.08 - - - - - NA - - - - - Pre-Academic Skills (WJIII) - - - - - - - - - - - - - - - - - - - - - - - 0.22 - - - - - NA - - - - - Phonetic Skills/Word Attack (WJIII) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - NA - - - - - Basic Reading (WJIII) - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - NA - - - - - Academic Applications (WJIII) - - - - - - - - - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - NA - - - - - NA - - - - - Academic Skills (WJIII) - - - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - NA - - - - - NA - - - - - Passage Comprehension (WJIII) - - - - - - - - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - NA - - - - - NA - - - - - ECLS-K Reading - - - - - - - - - - - - - NA - - - - - NA - - - - - NA - - - - - Writing Sample (WJIII) - - - - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - NA - - - - - NA - - - - - Spanish Language - - - - - - - - - - - - - - Receptive Vocabulary (TVIP) - - - - - - - - - - - - - - - - - - - - - - - NA - - - - - Batería WM Identificación de letras y palabras - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0.26 - - - - - Math - - - - - - - One-to-One Counting/Counting Bears - - - - - - - - - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - NA - - - - - Applied Problems (WJIII) - - - - - - - - - - - - - - - - - - - - - - 0.15 - - - - - Quantitative Concepts (WJIII) - - - - - - - - - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - NA - - - - - Math Reasoning (WJIII) - - - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - NA - - - - - Calculation (WJIII) - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - NA - - - - - School Performance - - - - - - - - - - - - - - - - - School Accomplishments - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - NA - - - - - Promotion (parent report) - - - - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - -0.11 - - - - - - Language and Literacy Ability - - - - - - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - Math Ability - - - - - - - - - - - - NA - - - - - NA - - - - - -0.19 - - - - - - Math Skills - - - - - - - - - - - NA - - - - - NA - - - - - NA - - - - - NA - - - - - Reading/Language Arts Skills - - - - - - - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - NA - - - - - NA - - - - - Social Studies and Science Ability - - - - - - - - - - - - - - - - - - - - - - - - - - - NA - - - - - NA - - - - - NA - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-007.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-007.json deleted file mode 100644 index 1bf5fd29..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-007.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-007.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-007.pdf deleted file mode 100644 index c2fa93d8..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-007.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-008-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-008-reg.xml deleted file mode 100644 index e92b85db..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-008-reg.xml +++ /dev/null @@ -1,230 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-008-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-008-str.xml deleted file mode 100644 index 60f6e540..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-008-str.xml +++ /dev/null @@ -1,388 +0,0 @@ - - - - - - - Age Cohort - - - - - - - - - - Head Start Group - - - - - - - - - - - - - - - Control Group - - - - - - - - - - - - - Total Sample - - - - - - - - - - - - - 3-year-olds - - - - - - - - - - 1,530 - - - - - 1,029 - - - - - 2,559 - - - - - 4-year-olds - - - - - - - - - - 1,253 - - - - - 855 - - - - - 2,108 - - - - - Total - - - - - - - 2,783 - - - - - 1,884 - - - - - 4,667 - - - -
- - - - - Sample Group - - - - - - - - - - - - Some Year 1 -Head Start -Participation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - No Year 1 -Head Start -Participation - - - - - - - - - - - - - - - - - - - - - - - - - - - - Total - - - - - - - All Randomly Assigned (N=4,667): - - - - - - - - - - - - - - - - - - - - - - - - - - 3-Year-Old Cohort - - - - - - - - - - - - - - - - Head Start Group - - - - - - - - - - - - - - - 85.1% - - - - - 14.9% - - - - - 100% - - - - - Control Group - - - - - - - - - - - - - 17.3% - - - - - 82.7% - - - - - 100% - - - - - 4-Year-Old Cohort - - - - - - - - - - - - - - - - Head Start Group - - - - - - - - - - - - - - - 79.8% - - - - - 20.2% - - - - - 100% - - - - - Control Group - - - - - - - - - - - - - 13.9% - - - - - 86.1% - - - - - 100% - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-008.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-008.json deleted file mode 100644 index 1bf5fd29..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-008.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-008.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-008.pdf deleted file mode 100644 index e2aaad38..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-008.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-009-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-009-reg.xml deleted file mode 100644 index be2f68d0..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-009-reg.xml +++ /dev/null @@ -1,230 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-009-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-009-str.xml deleted file mode 100644 index d0659680..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-009-str.xml +++ /dev/null @@ -1,668 +0,0 @@ - - - - - - - Cost Category - - - - - - - Total Costs -All Funds - - - - - - - - - - - - Less: -Exclusions & -Unallowables - - - - - - - - - - - Indirect -Costs - - - - - - - - Total Direct -Costs - - - - - - - Federal -Program - - - - - - Non-Federal -Programs (3) - - - - - - - - - - Salaries (a) - - - - - 1,314,000 - - - - - 373,250 - - - - - 940,750 - - - - - 141,000 - - - - - 799,750 - - - - - Fringe Benefits (b) - - - - - - - 352,000 - - - - - 99,988 - - - - - 252,012 - - - - - 37,772 - - - - - 214,240 - - - - - Consultant Services - - - - - - - - - 26,000 - - - - - 14,000 - - - - - 12,000 - - - - - 1,800 - - - - - 10,200 - - - - - Staff Travel - - - - - - - 94,000 - - - - - 20,000 - - - - - 74,000 - - - - - 11,100 - - - - - 62,900 - - - - - Bad Debts - - - - - - 10,000 - - - - - 10,000 (1) - - - - - - Office Rent - - - - - - - 170,000 - - - - - 170,000 - - - - - 150,000 - - - - - 22,500 - - - - - 127,500 - - - - - Consumable Supplies - - - - - - - - - - 161,000 - - - - - 11,000 - - - - - 68,000 - - - - - 10,200 - - - - - 57,800 - - - - - Subcontracts - - - - - - - - 175,000 - - - - - 107,000 (2) - - - - - - Purchase, Equipment Lease - - - - - - - - - - 82,000 - - - - - 22,100 (2) - - - - - - 59,900 - - - - - Telephone - - - - - - - - - - - 109,400 - - - - - 55,000 - - - - - 54,400 - - - - - 8,200 - - - - - 46,200 - - - - - Entertainment - - - - - - - 1,800 - - - - - 1,800 (1) - - - - - - Printing & Reproduction - - - - - - - - - - - 48,000 - - - - - 11,000 - - - - - 37,000 - - - - - 5,500 - - - - - 31,500 - - - - - Insurance and Bonding - - - - - - - - - - - 42,000 - - - - - 42,000 - - - - - Fundraising - - - - - - - 120,000 - - - - - 120,000 - - - - - 120,000 - - - - - Postage and Delivery - - - - - - - - - - - - 34,000 - - - - - 5,100 - - - - - 28,900 - - - - - 4,300 - - - - - 24,600 - - - - - Depreciation - - - - - - - - 28,800 - - - - - 8,800 - - - - - 20,000 - - - - - 3,000 - - - - - 17,000 - - - - - Allowances - - - - - - - - - - 148,000 - - - - - 148,000 (2) - - - - - - Emergency Assistance - - - - - - - - - - - - 54,000 - - - - - 54,000 (2) - - - - - - Training Materials - - - - - - - - - 82,000 - - - - - 82,000 - - - - - 12,300 - - - - - 69,700 - - - - - Participant Support Costs - - - - - - - - - - - 36,000 - - - - - 36,000 (2) - - - - - - Total Costs - - - - - - - - 3,088,000 - - - - - 378,900 - - - - - 870,038 - - - - - 1,839,062 - - - - - 257,672 - - - - - 1,581,390 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-009.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-009.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-009.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-009.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-009.pdf deleted file mode 100644 index ea4ab573..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-009.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-010-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-010-reg.xml deleted file mode 100644 index fefe291f..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-010-reg.xml +++ /dev/null @@ -1,50 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-010-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-010-str.xml deleted file mode 100644 index 32f83dfc..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-010-str.xml +++ /dev/null @@ -1,164 +0,0 @@ - - - - - - - Launch: -May 21, 2009 - - - - - - 1 Year: -May 21, 2010 - - - - - - FY 2010 -Sept. 30, 2011 - - - - - - Total data sets available - - - - - 47 - - - - - 272,768 - - - - - 389,933 - - - - - Applications and mashups -developed by the public and -government - - - - - - - - - - 0 - - - - - 237 - - - - - 1,079 - - - - - Data set downloads - - - - - - - 0 - - - - - 652,412 - - - - - 2,348,928 - - - - - Nations establishing open data -sites - - - - - - 0 - - - - - 6 - - - - - 21 - - - - - States offering open data sites - - - - - - 0 - - - - - 8 - - - - - 29 - - - - - Cities in North America with -open data sites - - - - - - - - 0 - - - - - 8 - - - - - 11 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-010.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-010.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-010.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-010.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-010.pdf deleted file mode 100644 index d39e02e5..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-010.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-011a-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-011a-reg.xml deleted file mode 100644 index 421824ba..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-011a-reg.xml +++ /dev/null @@ -1,70 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-011a-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-011a-str.xml deleted file mode 100644 index fa833e10..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-011a-str.xml +++ /dev/null @@ -1,226 +0,0 @@ - - - - - - - Program - - - - - Performance.gov - - - - - Federal Risk Authorization and Management Program -(FedRAMP) - - - - - - - Federal Cloud Computing/Data Center Consolidation - - - - - IT Dashboard - - - - - Data.gov - - - - - Challenges Platform - - - - - Citizen Engagement Platform (Apps.gov) - - - - - USASpending.gov* - - - - - Small Business Dashboard* - - - - - - FFATA Subawards Reporting System Assistance* - - - - - - - - - - - Total - - - - - $8M - - - - - $2.88M - - - - - $0.1M - - - - - $1.2M - - - - - $0 - - - - - $0 - - - - - $0.9M - - - - - $1.06M - - - - - $0.44M - - - - - $0.3M - - - - - $1.1M - - - - - Budget - - - - - *Funded by OCSIT, managed by GSA Office of Government-wide Policy - - - - - - - -
- - - - - Program - - - - - Contact Center Services - - - - - Printed Publications Services and Citizen Outreach - - - - - - Web Management and Content (USA.gov & GobiernoUSA.gov) - - - - - - - Center for Excellence in Digital Government (CEDG) - - - - - - Information Technology Services Solutions (ITSS) - - - - - - - Total - - - - - Budget - - - - - $8.6M - - - - - $3.9M - - - - - $6.1M - - - - - $4.6M - - - - - $10.8M - - - - - $34M - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-011a.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-011a.json deleted file mode 100644 index 1bf5fd29..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-011a.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-011a.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-011a.pdf deleted file mode 100644 index bd3d80c2..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-011a.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-011b-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-011b-reg.xml deleted file mode 100644 index 85c88eca..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-011b-reg.xml +++ /dev/null @@ -1,65 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-011b-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-011b-str.xml deleted file mode 100644 index 650fe066..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-011b-str.xml +++ /dev/null @@ -1,217 +0,0 @@ - - - - - - - Program - - - - - Performance.gov - - - - - Federal Risk Authorization and Management Program -(FedRAMP) - - - - - - - Federal Cloud Computing/Data Center Consolidation - - - - - IT Dashboard - - - - - Data.gov - - - - - Challenges Platform - - - - - Citizen Engagement Platform (Apps.gov) - - - - - USASpending.gov* - - - - - Small Business Dashboard* - - - - - - FFATA Subawards Reporting System Assistance* - - - - - - - - - - - Total - - - - - $8M - - - - - $2.88M - - - - - $0.1M - - - - - $1.2M - - - - - $0 - - - - - $0 - - - - - $0.9M - - - - - $1.06M - - - - - $0.44M - - - - - $0.3M - - - - - $1.1M - - - - - Budget - - - -
- - - - - Program - - - - - Contact Center Services - - - - - Printed Publications Services and Citizen Outreach - - - - - - Web Management and Content (USA.gov & GobiernoUSA.gov) - - - - - - - Center for Excellence in Digital Government (CEDG) - - - - - - Information Technology Services Solutions (ITSS) - - - - - - - Total - - - - - Budget - - - - - $8.6M - - - - - $3.9M - - - - - $6.1M - - - - - $4.6M - - - - - $10.8M - - - - - $34M - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-012-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-012-reg.xml deleted file mode 100644 index dd21ce8b..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-012-reg.xml +++ /dev/null @@ -1,311 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-012-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-012-str.xml deleted file mode 100644 index edfbc0d6..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-012-str.xml +++ /dev/null @@ -1,622 +0,0 @@ - - - - - - - AYP Based on 2003–04 Testing - - - - AYP Based on 2005–06 Testing - - - - 5 - - - - Yes - - - - Yes - - - - No - - - - Yes - - - - NC - - - - Yes - - - - na - - - - ND - - - - Yes - - - - No - - - - No - - - - Yes - - - - Yes - - - - Yes - - - - Yes - - - - ~100 - - - - OH - - - - No - - - - OK - - - - na - - - - Yes - - - - Yes - - - - No - - - - Yes - - - - OR - - - - na - - - - Yes - - - - No - - - - Yes - - - - PA - - - - No - - - - Yes - - - - No - - - - na - - - - Yes - - - - na - - - - No - - - - Yes - - - - PR - - - - No - - - - Yes - - - - No - - - - RI - - - - Yes - - - - na - - - - No - - - - Yes - - - - Yes - - - - na - - - - SC - - - - No - - - - No - - - - No - - - - Yes - - - - Yes - - - - 14 - - - - Yes - - - - SD - - - - Yes - - - - TN - - - - No - - - - Yes - - - - No - - - - na - - - - Yes - - - - Yes - - - - TX - - - - Yes - - - - Yes - - - - Yes - - - - UT - - - - Yes - - - - No - - - - No - - - - na - - - - na - - - - Yes - - - - No - - - - VT - - - - Yes - - - - No - - - - Yes - - - - VA - - - - Yes - - - - 53 - - - - Yes - - - - Yes - - - - WA - - - - Yes - - - - Yes - - - - No - - - - Yes - - - - Yes - - - - No - - - - Yes - - - - na - - - - WV - - - - na - - - - Yes - - - - WI - - - - Yes - - - - Yes - - - - No - - - - Yes - - - - No - - - - Yes - - - - WY - - - - No - - - - na - - - - State included scores of -students taking alternate -assessments based on -alternate achievement -standards - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - State granted -exceptions to -districts to -exceed 1%cap - - - - - - - - - - - - - - - - - - State included scores of -students taking alternate -assessments based on -alternate achievement -standards - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - State granted -exceptions to -districts to -exceed 1%cap - - - - - - - - - - - - - - - - - - Number of -districts -granted -exceptions - - - - - - - - - - - - - - - - Respondent -unsure - - - - - - Respondent -unsure - - - - - - Respondent -unsure - - - - - - Respondent -unsure - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-012.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-012.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-012.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-012.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-012.pdf deleted file mode 100644 index f9c4e6be..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-012.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-013-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-013-reg.xml deleted file mode 100644 index 965cea96..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-013-reg.xml +++ /dev/null @@ -1,157 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-013-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-013-str.xml deleted file mode 100644 index 260f5e01..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-013-str.xml +++ /dev/null @@ -1,259 +0,0 @@ - - - - - - - General Assessment - - - - - - - Alternate Assessment -Based on Grade-Level -Achievement -Standards - - - - - - - - - - - - - - - - - - - - - - - Alternate Assessment -Based on Modified -Achievement Standards - - - - - - - - - - - - - - - - - - - - - - Alternate Assessment -Based on Alternate -Achievement -Standards - - - - - - - - - - - - - - - - - - - - - Content standards -taught and -assessed - - - - - - - - - Achievement -standards - - - - - - Participating -students - - - - - - Grade level - - - - - - Grade level - - - - - - Grade level - - - - - - Grade level extensions - - - - - - - - Grade level - - - - - - Grade level - - - - - - Modified level - - - - - - - - Alternate level - - - - - - All general education -students, most -students with -disabilities (with or -without -accommodations) - - - - - - - - - - - - - - - - - - - - - - - Students who need -alternate ways to show -mastery of grade-level -content - - - - - - - - - - - - - - - - Students with disabilities -who can make progress -toward, but may not reach, -grade-level achievement -standards in the time -frame covered by their IEP - - - - - - - - - - - - - - - - - - - - - - - - - Students with the most -significant cognitive -disabilities - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-013.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-013.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-013.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-013.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-013.pdf deleted file mode 100644 index 8f5ea8e7..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-013.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-014-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-014-reg.xml deleted file mode 100644 index a667b844..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-014-reg.xml +++ /dev/null @@ -1,200 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-014-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-014-str.xml deleted file mode 100644 index b64c9130..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-014-str.xml +++ /dev/null @@ -1,354 +0,0 @@ - - - - - - - Designation Under State or District Accountability -Initiative - - - - - - - - - - - - - - Low-performing - - - - - - - - - - No special designation - - - - - - - - - - - High-performing - - - - - - - - Other/not sure - - - - - No other system (other than NCLB) - - - - - - - - - - - Schools Identified -Under NCLB -(n = 469) - - - - - - - - - - - - - - - - - - - - Schools Not Identified -Under NCLB -(n = 918) - - - - - - - - - - - - - - - - - - - 34% - - - - - 3% - - - - - 11% - - - - - 33% - - - - - 2% - - - - - 18% - - - - - 14% - - - - - 9% - - - - - 39% - - - - - 37% - - - -
- - - - - Perceived Benefit and Drawback - - - - - - - - - - - - - - - - Gives us a more complete picture of our effectiveness than a single accountability system - - - - - - - - - - - - - - - - - - - - - - - - Results in staff confusion about our targets for student achievement - - - - - - - - - - - - - - - - Reduces community support for public schools - - - - - - - - - - - - - - - - - - Allows us to focus on the goals that are most important to us - - - - - - - - - - - - Helps us make effective decisions about how to improve student achievement - - - - - - - - - - - - - - - - - - - Percent of -Districts -Agreeing -(n = 154) - - - - - - - - - - - - - - Percent of -Schools -Agreeing -(n = 832) - - - - - - - - - - - - - - - 69% - - - - - 65% - - - - - 46% - - - - - 37% - - - - - 23% - - - - - 24% - - - - - 56% - - - - - 52% - - - - - 71% - - - - - 60% - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-014.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-014.json deleted file mode 100644 index 1bf5fd29..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-014.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-014.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-014.pdf deleted file mode 100644 index 913b9d9a..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-014.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-015-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-015-reg.xml deleted file mode 100644 index 53f41e91..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-015-reg.xml +++ /dev/null @@ -1,985 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-015-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-015-str.xml deleted file mode 100644 index be1d081e..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-015-str.xml +++ /dev/null @@ -1,1223 +0,0 @@ - - - - - - - Item Property - - - - - Reason for Change or Deletion - - - - - Clarity or relevance - - - - - - - - Response range - - - - - - - Variability - - - - - Reproducibility - - - - - - Inter-item correlation - - - - - - Ability to detect change - - - - - - Item discrimination - - - - - - Redundancy - - - - - - Recall period - - - - - • Reported as not relevant by a large segment of the target population -• Generates an unacceptably large amount of missing data points -• Generates many questions or requests for clarification from patients as they -complete the PRO instrument -• Patients interpret items and responses in a way that is inconsistent with the -PRO instrument’s conceptual framework - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - • A high percent of patients respond at the floor (response scale’s worst end) -or ceiling (response scale’s optimal end) -• Patients note that none of the response choices applies to them -• Distribution of item responses is highly skewed - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - • All patients give the same answer (i.e., no variance) -• Most patients choose only one response choice -• Differences among patients are not detected when important differences are -known - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - • Unstable scores over time when there is no logical reason for variation from -one assessment to the next - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - • Item highly correlated (redundant) withother items in the same concept of -interest - - - - - - - - - - - - - - - - • Item is not sensitive (i.e., does not change when there is a known change in -the concepts of interest) - - - - - - - - - - - - - - - - - - - - - - • Item is highly correlated with measures of concepts other than the one it is -intended to measure -• Item does not show variability in relation to some known population -characteristics (i.e., severity level, classification of condition, or other known -characteristic) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - • Item duplicates information collected with other items that have equal or -better measurement properties - - - - - - - - - - - - - - - - - - - - - • The population, disease state, or application of the instrument can affect the -appropriateness of the recall period - - - - - - - - - - - - - - - -
- - - - - Measurement -Property - - - - - - - Type - - - - - What Is Assessed? - - - - - - - FDA Review Considerations - - - - - - - - Reliability - - - - - - - Validity - - - - - - Ability to detect -change - - - - - - - - Content validity - - - - - - - - - - - - Construct validity - - - - - - - - - - - - - - Inter-interviewer reliability -(for interviewer-administered -PROs only) - - - - - - - - - - - - - - - - - - Test-retest or intra- -interviewer reliability (for -interviewer-administered -PROs only) - - - - - - - - - - - - - - - - - - - Internal consistency - - - - - - - - - - Stability of scores over time when no change -is expected in the concept of interest - - - - - - - - - - - - - - - - - - - - - - - - - - - • Intraclass correlation coefficient -• Time period of assessment - - - - - - - - - - - - - - • Extent to which items comprising a scale -measure the same concept -• Intercorrelation of items that contribute -to a score -• Internal consistency - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - • Cronbach’s alpha for summary scores -• Item-total correlations - - - - - - - - - - - - - - - - - - - - - - - Agreement among responses when the PRO -is administered by two or more different -interviewers - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - • Interclass correlation coefficient - - - - - - - - Evidence that the instrument measures the -concept of interest including evidence from -qualitative studies that the items and domains -of an instrument are appropriate and -comprehensive relative to its intended -measurement concept, population, and use. -Testing other measurement properties will -not replace or rectify problems with content -validity. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - • Derivation of all items -• Qualitative interview schedule -• Interview or focus group transcripts -• Items derived from the transcripts -• Composition of patients used to develop content -• Cognitive interview transcripts to evaluate patient -understanding - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Evidence that relationships among items, -domains, and concepts conform to a priori -hypotheses concerning logical relationships -that should exist with measures of related -concepts or scores produced in similar or -diverse patient groups - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - • Strength of correlation testing a priori hypotheses -(discriminant and convergent validity) -• Degree to which the PRO instrument can distinguish -among groups hypothesized a priori to be different -(known groups validity) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Evidence that a PRO instrument can identify -differences in scores over time in individuals -or groups (similar to those in the clinical -trials) who have changed with respect to the -measurement concept - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - • Within person change over time -• Effect size statistic - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-015.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-015.json deleted file mode 100644 index 1bf5fd29..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-015.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-015.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-015.pdf deleted file mode 100644 index 47497d67..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-015.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-016-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-016-reg.xml deleted file mode 100644 index 8bd3b2aa..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-016-reg.xml +++ /dev/null @@ -1,393 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-016-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-016-str.xml deleted file mode 100644 index 02d1e292..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-016-str.xml +++ /dev/null @@ -1,475 +0,0 @@ - - - - - - - Type - - - - - Description - - - - - Visual analog -scale (VAS) - - - - - - - Anchored or -categorized -VAS - - - - - - - Likert scale - - - - - Rating scale - - - - - Recording of -events as they -occur - - - - - - - - Pictorial scale - - - - - - Checklist - - - - - A line of fixed length (usually 100 mm) with words that anchor the scale at the extreme ends -and no words describing intermediate positions. Patients are instructed to indicate the place -on the line corresponding to their perceived state. The mark’s position is measured as the -score. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - A VAS that has the addition of one or more intermediate marks positioned along the line with -reference terms assigned to eachmark to help patients identify the locations between the -scale’s ends (e.g., half-way). - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - An ordered set of discrete terms or statements from which patients are asked to choose the -response that best describes their state or experience. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - A set of numerical categories from which patients are asked to choose the category that best -describes their state or experience. The ends of rating scales are anchored with words but the -categories are numbered rather than labeled with words. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Specific events are recorded as they occur using an event log that can be included in a patient -diary or other reporting system (e.g., interactive voice response system). - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - A set of pictures applied to any of the other response option types. Pictorial scales are often -used in pediatric questionnaires but also have been used for patients with cognitive -impairments and for patients who are otherwise unable to speak or write. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Checklists provide a simple choice between a limited set of options, such asYes, No, and -Don’t know. Some checklists ask patients to place a mark in a space if the statement in the -item is true. Checklists are reviewed for completeness and nonredundancy. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-016.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-016.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-016.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-016.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-016.pdf deleted file mode 100644 index e0361936..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-016.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-017-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-017-reg.xml deleted file mode 100644 index 569e3a89..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-017-reg.xml +++ /dev/null @@ -1,2203 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-017-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-017-str.xml deleted file mode 100644 index f7e5fde4..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-017-str.xml +++ /dev/null @@ -1,5907 +0,0 @@ - - - - - - - Year - - - - - - Total - - - - Public - - - - Private - - - - PK–12 - - - - PK–8 - - - - 9–12 - - - - PK–12 - - - - PK–8 - - - - 9–12 - - - - PK–12 - - - - PK–8 - - - - 9–12 - - - - Actual - - - - 1996 - - - - 51,544 - - - - 37,481 - - - - 14,062 - - - - 45,611 - - - - 32,762 - - - - 12,849 - - - - 5,933 - - - - 4,719 - - - - 1,213 - - - - 1997 - - - - 52,071 - - - - 37,797 - - - - 14,275 - - - - 46,127 - - - - 33,071 - - - - 13,056 - - - - 5,944 - - - - 4,726 - - - - 1,219 - - - - 1998 - - - - 52,526 - - - - 38,091 - - - - 14,435 - - - - 46,539 - - - - 33,344 - - - - 13,195 - - - - 5,988 - - - - 4,747 - - - - 1,240 - - - - 1999 - - - - 52,875 - - - - 38,251 - - - - 14,625 - - - - 46,857 - - - - 33,486 - - - - 13,371 - - - - 6,018 - - - - 4,764 - - - - 1,254 - - - - 2000 - - - - 53,373 - - - - 38,564 - - - - 14,809 - - - - 47,204 - - - - 33,686 - - - - 13,517 - - - - 6,169 - - - - 4,877 - - - - 1,292 - - - - 2001 - - - - 53,992 - - - - 38,929 - - - - 15,063 - - - - 47,672 - - - - 33,936 - - - - 13,736 - - - - 6,320 - - - - 4,993 - - - - 1,327 - - - - 2002 - - - - 54,403 - - - - 39,000 - - - - 15,404 - - - - 48,183 - - - - 34,114 - - - - 14,069 - - - - 6,220 - - - - 4,886 - - - - 1,335 - - - - 2003 - - - - 54,639 - - - - 38,962 - - - - 15,678 - - - - 48,540 - - - - 34,201 - - - - 14,339 - - - - 6,099 - - - - 4,761 - - - - 1,338 - - - - 2004 - - - - 54,882 - - - - 38,908 - - - - 15,974 - - - - 48,795 - - - - 34,178 - - - - 14,618 - - - - 6,087 - - - - 4,731 - - - - 1,356 - - - - 2005 - - - - 55,187 - - - - 38,903 - - - - 16,283 - - - - 49,113 - - - - 34,204 - - - - 14,909 - - - - 6,073 - - - - 4,699 - - - - 1,374 - - - - 2006 - - - - 55,307 - - - - 38,838 - - - - 16,469 - - - - 49,316 - - - - 34,235 - - - - 15,081 - - - - 5,991 - - - - 4,604 - - - - 1,388 - - - - 2007 - - - - 55,203 - - - - 38,722 - - - - 16,481 - - - - 49,293 - - - - 34,205 - - - - 15,087 - - - - 5,910 - - - - 4,517 - - - - 1,394 - - - - 2008 - - - - 54,973 - - - - 38,620 - - - - 16,353 - - - - 49,266 - - - - 34,286 - - - - 14,980 - - - - 5,707 - - - - 4,335 - - - - 1,373 - - - - 2009 - - - - 54,862 - - - - 38,569 - - - - 16,293 - - - - 49,373 - - - - 34,418 - - - - 14,955 - - - - 5,488 - - - - 4,151 - - - - 1,338 - - - - 2010 - - - - 54,876 - - - - 38,716 - - - - 16,160 - - - - 49,484 - - - - 34,625 - - - - 14,860 - - - - 5,391 - - - - 4,091 - - - - 1,300 - - - - Projected - - - - 2011 - - - - 54,956 - - - - 38,909 - - - - 16,047 - - - - 49,636 - - - - 34,849 - - - - 14,787 - - - - 5,320 - - - - 4,060 - - - - 1,260 - - - - 2012 - - - - 55,091 - - - - 39,115 - - - - 15,976 - - - - 49,828 - - - - 35,076 - - - - 14,752 - - - - 5,263 - - - - 4,039 - - - - 1,224 - - - - 2013 - - - - 55,288 - - - - 39,334 - - - - 15,954 - - - - 50,067 - - - - 35,301 - - - - 14,766 - - - - 5,221 - - - - 4,033 - - - - 1,188 - - - - 2014 - - - - 55,599 - - - - 39,539 - - - - 16,060 - - - - 50,407 - - - - 35,502 - - - - 14,905 - - - - 5,192 - - - - 4,037 - - - - 1,155 - - - - 2015 - - - - 55,957 - - - - 39,788 - - - - 16,169 - - - - 50,773 - - - - 35,735 - - - - 15,038 - - - - 5,183 - - - - 4,053 - - - - 1,130 - - - - 2016 - - - - 56,330 - - - - 40,114 - - - - 16,217 - - - - 51,146 - - - - 36,029 - - - - 15,116 - - - - 5,185 - - - - 4,085 - - - - 1,100 - - - - 2017 - - - - 56,722 - - - - 40,451 - - - - 16,271 - - - - 51,524 - - - - 36,329 - - - - 15,195 - - - - 5,198 - - - - 4,122 - - - - 1,076 - - - - 2018 - - - - 57,098 - - - - 40,797 - - - - 16,301 - - - - 51,880 - - - - 36,639 - - - - 15,241 - - - - 5,218 - - - - 4,158 - - - - 1,061 - - - - 2019 - - - - 57,507 - - - - 41,149 - - - - 16,358 - - - - 52,260 - - - - 36,956 - - - - 15,304 - - - - 5,247 - - - - 4,193 - - - - 1,054 - - - - 2020 - - - - 57,975 - - - - 41,506 - - - - 16,469 - - - - 52,688 - - - - 37,278 - - - - 15,410 - - - - 5,287 - - - - 4,228 - - - - 1,059 - - - - 2021 - - - - 58,444 - - - - 41,861 - - - - 16,583 - - - - 53,113 - - - - 37,598 - - - - 15,515 - - - - 5,331 - - - - 4,263 - - - - 1,068 - - -
- - - - - Year - - - - - - Total - - - - Grade - - - - PK - - - - K - - - - 1 - - - - 2 - - - - 3 - - - - 4 - - - - 5 - - - - 6 - - - - Actual - - - - 1996 - - - - 45,611 - - - - 670 - - - - 3,532 - - - - 3,770 - - - - 3,600 - - - - 3,524 - - - - 3,454 - - - - 3,453 - - - - 3,494 - - - - 1997 - - - - 46,127 - - - - 695 - - - - 3,503 - - - - 3,755 - - - - 3,689 - - - - 3,597 - - - - 3,507 - - - - 3,458 - - - - 3,492 - - - - 1998 - - - - 46,539 - - - - 729 - - - - 3,443 - - - - 3,727 - - - - 3,681 - - - - 3,696 - - - - 3,592 - - - - 3,520 - - - - 3,497 - - - - 1999 - - - - 46,857 - - - - 751 - - - - 3,397 - - - - 3,684 - - - - 3,656 - - - - 3,691 - - - - 3,686 - - - - 3,604 - - - - 3,564 - - - - 2000 - - - - 47,204 - - - - 776 - - - - 3,382 - - - - 3,636 - - - - 3,634 - - - - 3,676 - - - - 3,711 - - - - 3,707 - - - - 3,663 - - - - 2001 - - - - 47,672 - - - - 865 - - - - 3,379 - - - - 3,614 - - - - 3,593 - - - - 3,653 - - - - 3,695 - - - - 3,727 - - - - 3,769 - - - - 2002 - - - - 48,183 - - - - 915 - - - - 3,434 - - - - 3,594 - - - - 3,565 - - - - 3,623 - - - - 3,669 - - - - 3,711 - - - - 3,788 - - - - 2003 - - - - 48,540 - - - - 950 - - - - 3,503 - - - - 3,613 - - - - 3,544 - - - - 3,611 - - - - 3,619 - - - - 3,685 - - - - 3,772 - - - - 2004 - - - - 48,795 - - - - 990 - - - - 3,544 - - - - 3,663 - - - - 3,560 - - - - 3,580 - - - - 3,612 - - - - 3,635 - - - - 3,735 - - - - 2005 - - - - 49,113 - - - - 1,036 - - - - 3,619 - - - - 3,691 - - - - 3,606 - - - - 3,586 - - - - 3,578 - - - - 3,633 - - - - 3,670 - - - - 2006 - - - - 49,316 - - - - 1,084 - - - - 3,631 - - - - 3,751 - - - - 3,641 - - - - 3,627 - - - - 3,586 - - - - 3,602 - - - - 3,660 - - - - 2007 - - - - 49,293 - - - - 1,081 - - - - 3,609 - - - - 3,750 - - - - 3,704 - - - - 3,659 - - - - 3,624 - - - - 3,600 - - - - 3,628 - - - - 2008 - - - - 49,266 - - - - 1,180 - - - - 3,640 - - - - 3,708 - - - - 3,699 - - - - 3,708 - - - - 3,647 - - - - 3,629 - - - - 3,614 - - - - 2009 - - - - 49,373 - - - - 1,224 - - - - 3,679 - - - - 3,730 - - - - 3,666 - - - - 3,708 - - - - 3,701 - - - - 3,653 - - - - 3,645 - - - - 2010 - - - - 49,484 - - - - 1,279 - - - - 3,682 - - - - 3,754 - - - - 3,701 - - - - 3,686 - - - - 3,711 - - - - 3,718 - - - - 3,682 - - - - Projected - - - - 2011 - - - - 49,636 - - - - 1,287 - - - - 3,705 - - - - 3,779 - - - - 3,724 - - - - 3,717 - - - - 3,690 - - - - 3,725 - - - - 3,744 - - - - 2012 - - - - 49,828 - - - - 1,296 - - - - 3,733 - - - - 3,804 - - - - 3,749 - - - - 3,741 - - - - 3,721 - - - - 3,703 - - - - 3,752 - - - - 2013 - - - - 50,067 - - - - 1,307 - - - - 3,765 - - - - 3,832 - - - - 3,773 - - - - 3,766 - - - - 3,744 - - - - 3,735 - - - - 3,730 - - - - 2014 - - - - 50,407 - - - - 1,320 - - - - 3,801 - - - - 3,865 - - - - 3,802 - - - - 3,790 - - - - 3,770 - - - - 3,758 - - - - 3,761 - - - - 2015 - - - - 50,773 - - - - 1,333 - - - - 3,838 - - - - 3,902 - - - - 3,835 - - - - 3,819 - - - - 3,794 - - - - 3,784 - - - - 3,785 - - - - 2016 - - - - 51,146 - - - - 1,346 - - - - 3,876 - - - - 3,941 - - - - 3,871 - - - - 3,852 - - - - 3,822 - - - - 3,808 - - - - 3,811 - - - - 2017 - - - - 51,524 - - - - 1,359 - - - - 3,912 - - - - 3,980 - - - - 3,910 - - - - 3,888 - - - - 3,855 - - - - 3,837 - - - - 3,835 - - - - 2018 - - - - 51,880 - - - - 1,370 - - - - 3,946 - - - - 4,017 - - - - 3,948 - - - - 3,927 - - - - 3,892 - - - - 3,870 - - - - 3,864 - - - - 2019 - - - - 52,260 - - - - 1,381 - - - - 3,977 - - - - 4,052 - - - - 3,985 - - - - 3,966 - - - - 3,931 - - - - 3,907 - - - - 3,897 - - - - 2020 - - - - 52,688 - - - - 1,391 - - - - 4,006 - - - - 4,085 - - - - 4,020 - - - - 4,003 - - - - 3,970 - - - - 3,946 - - - - 3,935 - - - - 2021 - - - - 53,113 - - - - 1,400 - - - - 4,033 - - - - 4,115 - - - - 4,052 - - - - 4,038 - - - - 4,007 - - - - 3,985 - - - - 3,974 - - -
- - - - - Year - - - - - - Grade - - - - 7 - - - - 8 - - - - 9 - - - - 10 - - - - 11 - - - - 12 - - - - Elementary -ungraded - - - - - - Secondary -ungraded - - - - - - Actual - - - - 1996 - - - - 3,464 - - - - 3,403 - - - - 3,801 - - - - 3,323 - - - - 2,930 - - - - 2,586 - - - - 399 - - - - 208 - - - - 1997 - - - - 3,520 - - - - 3,415 - - - - 3,819 - - - - 3,376 - - - - 2,972 - - - - 2,673 - - - - 440 - - - - 216 - - - - 1998 - - - - 3,530 - - - - 3,480 - - - - 3,856 - - - - 3,382 - - - - 3,021 - - - - 2,722 - - - - 449 - - - - 214 - - - - 1999 - - - - 3,541 - - - - 3,497 - - - - 3,935 - - - - 3,415 - - - - 3,034 - - - - 2,782 - - - - 415 - - - - 205 - - - - 2000 - - - - 3,629 - - - - 3,538 - - - - 3,963 - - - - 3,491 - - - - 3,083 - - - - 2,803 - - - - 334 - - - - 177 - - - - 2001 - - - - 3,720 - - - - 3,616 - - - - 4,012 - - - - 3,528 - - - - 3,174 - - - - 2,863 - - - - 304 - - - - 159 - - - - 2002 - - - - 3,821 - - - - 3,709 - - - - 4,105 - - - - 3,584 - - - - 3,229 - - - - 2,990 - - - - 285 - - - - 161 - - - - 2003 - - - - 3,841 - - - - 3,809 - - - - 4,190 - - - - 3,675 - - - - 3,277 - - - - 3,046 - - - - 255 - - - - 150 - - - - 2004 - - - - 3,818 - - - - 3,825 - - - - 4,281 - - - - 3,750 - - - - 3,369 - - - - 3,094 - - - - 215 - - - - 122 - - - - 2005 - - - - 3,777 - - - - 3,802 - - - - 4,287 - - - - 3,866 - - - - 3,454 - - - - 3,180 - - - - 205 - - - - 121 - - - - 2006 - - - - 3,716 - - - - 3,766 - - - - 4,260 - - - - 3,882 - - - - 3,551 - - - - 3,277 - - - - 170 - - - - 110 - - - - 2007 - - - - 3,701 - - - - 3,709 - - - - 4,200 - - - - 3,863 - - - - 3,558 - - - - 3,375 - - - - 139 - - - - 92 - - - - 2008 - - - - 3,653 - - - - 3,692 - - - - 4,123 - - - - 3,822 - - - - 3,548 - - - - 3,400 - - - - 117 - - - - 87 - - - - 2009 - - - - 3,642 - - - - 3,652 - - - - 4,081 - - - - 3,810 - - - - 3,542 - - - - 3,433 - - - - 119 - - - - 90 - - - - 2010 - - - - 3,676 - - - - 3,659 - - - - 4,008 - - - - 3,800 - - - - 3,538 - - - - 3,472 - - - - 77 - - - - 42 - - - - Projected - - - - 2011 - - - - 3,713 - - - - 3,687 - - - - 4,016 - - - - 3,732 - - - - 3,529 - - - - 3,469 - - - - 77 - - - - 41 - - - - 2012 - - - - 3,775 - - - - 3,724 - - - - 4,046 - - - - 3,740 - - - - 3,466 - - - - 3,459 - - - - 78 - - - - 41 - - - - 2013 - - - - 3,783 - - - - 3,786 - - - - 4,087 - - - - 3,768 - - - - 3,473 - - - - 3,397 - - - - 79 - - - - 41 - - - - 2014 - - - - 3,761 - - - - 3,794 - - - - 4,155 - - - - 3,805 - - - - 3,499 - - - - 3,404 - - - - 80 - - - - 41 - - - - 2015 - - - - 3,793 - - - - 3,772 - - - - 4,164 - - - - 3,869 - - - - 3,534 - - - - 3,430 - - - - 81 - - - - 41 - - - - 2016 - - - - 3,817 - - - - 3,804 - - - - 4,139 - - - - 3,877 - - - - 3,593 - - - - 3,464 - - - - 81 - - - - 42 - - - - 2017 - - - - 3,843 - - - - 3,828 - - - - 4,175 - - - - 3,855 - - - - 3,601 - - - - 3,522 - - - - 82 - - - - 42 - - - - 2018 - - - - 3,868 - - - - 3,854 - - - - 4,201 - - - - 3,887 - - - - 3,580 - - - - 3,530 - - - - 83 - - - - 43 - - - - 2019 - - - - 3,897 - - - - 3,879 - - - - 4,229 - - - - 3,912 - - - - 3,610 - - - - 3,509 - - - - 84 - - - - 43 - - - - 2020 - - - - 3,930 - - - - 3,908 - - - - 4,257 - - - - 3,938 - - - - 3,633 - - - - 3,539 - - - - 84 - - - - 44 - - - - 2021 - - - - 3,968 - - - - 3,941 - - - - 4,288 - - - - 3,964 - - - - 3,658 - - - - 3,561 - - - - 85 - - - - 44 - - -
- - - - - Race/ethnicity - - - - Year - - - - - - Total - - - - White - - - - Black - - - - Hispanic - - - - Asian/ -Pacific -Islander - - - - - - - - American -Indian/ -Alaska Native - - - - - - - Two or -more -races - - - - - - - - - - Actual - - - - 1996 - - - - 45,611 - - - - 29,217 - - - - 7,707 - - - - 6,429 - - - - 1,731 - - - - 527 - - - - - - - - 1997 - - - - 46,127 - - - - 29,241 - - - - 7,851 - - - - 6,705 - - - - 1,796 - - - - 535 - - - - - - - - 1998 - - - - 46,539 - - - - 29,217 - - - - 7,935 - - - - 7,007 - - - - 1,846 - - - - 534 - - - - - - - - 1999 - - - - 46,857 - - - - 29,032 - - - - 8,054 - - - - 7,337 - - - - 1,892 - - - - 542 - - - - - - - - 2000 - - - - 47,204 - - - - 28,873 - - - - 8,099 - - - - 7,733 - - - - 1,949 - - - - 550 - - - - - - - - 2001 - - - - 47,672 - - - - 28,731 - - - - 8,176 - - - - 8,175 - - - - 2,026 - - - - 563 - - - - - - - - 2002 - - - - 48,183 - - - - 28,614 - - - - 8,297 - - - - 8,601 - - - - 2,088 - - - - 583 - - - - - - - - 2003 - - - - 48,540 - - - - 28,438 - - - - 8,347 - - - - 9,018 - - - - 2,144 - - - - 593 - - - - - - - - 2004 - - - - 48,795 - - - - 28,186 - - - - 8,400 - - - - 9,415 - - - - 2,204 - - - - 591 - - - - - - - - 2005 - - - - 49,113 - - - - 28,001 - - - - 8,443 - - - - 9,794 - - - - 2,278 - - - - 598 - - - - - - - - 2006 - - - - 49,316 - - - - 27,797 - - - - 8,421 - - - - 10,171 - - - - 2,331 - - - - 595 - - - - - - - - 2007 - - - - 49,293 - - - - 27,454 - - - - 8,392 - - - - 10,457 - - - - 2,396 - - - - 594 - - - - - - - - 2008 - - - - 49,266 - - - - 27,057 - - - - 8,358 - - - - 10,569 - - - - 2,449 - - - - 589 - - - - 244 - - - - 2009 - - - - 49,373 - - - - 26,753 - - - - 8,282 - - - - 10,918 - - - - 2,492 - - - - 593 - - - - 335 - - - - 2010 - - - - 49,484 - - - - 25,932 - - - - 7,916 - - - - 11,444 - - - - 2,466 - - - - 566 - - - - 1,161 - - - - Projected - - - - 2011 - - - - 49,636 - - - - 25,755 - - - - 7,902 - - - - 11,673 - - - - 2,535 - - - - 573 - - - - 1,197 - - - - 2012 - - - - 49,828 - - - - 25,627 - - - - 7,877 - - - - 11,920 - - - - 2,595 - - - - 576 - - - - 1,233 - - - - 2013 - - - - 50,067 - - - - 25,525 - - - - 7,876 - - - - 12,161 - - - - 2,653 - - - - 582 - - - - 1,270 - - - - 2014 - - - - 50,407 - - - - 25,455 - - - - 7,913 - - - - 12,427 - - - - 2,716 - - - - 590 - - - - 1,306 - - - - 2015 - - - - 50,773 - - - - 25,417 - - - - 7,949 - - - - 12,688 - - - - 2,777 - - - - 600 - - - - 1,344 - - - - 2016 - - - - 51,146 - - - - 25,379 - - - - 7,991 - - - - 12,945 - - - - 2,842 - - - - 608 - - - - 1,380 - - - - 2017 - - - - 51,524 - - - - 25,350 - - - - 8,035 - - - - 13,200 - - - - 2,906 - - - - 618 - - - - 1,416 - - - - 2018 - - - - 51,880 - - - - 25,320 - - - - 8,077 - - - - 13,445 - - - - 2,959 - - - - 627 - - - - 1,452 - - - - 2019 - - - - 52,260 - - - - 25,311 - - - - 8,135 - - - - 13,676 - - - - 3,015 - - - - 638 - - - - 1,486 - - - - 2020 - - - - 52,688 - - - - 25,329 - - - - 8,200 - - - - 13,921 - - - - 3,070 - - - - 648 - - - - 1,521 - - - - 2021 - - - - 53,113 - - - - 25,338 - - - - 8,273 - - - - 14,170 - - - - 3,117 - - - - 658 - - - - 1,557 - - -
- - - - - Race/ethnicity - - - - Year - - - - - - Total - - - - White - - - - Black - - - - Hispanic - - - - Asian/ -Pacific -Islander - - - - - - - - American -Indian/ -Alaska Native - - - - - - - Two or -more -races - - - - - - - - - - Actual - - - - 1996 - - - - 32,762 - - - - 20,687 - - - - 5,664 - - - - 4,821 - - - - 1,204 - - - - 386 - - - - - - - - 1997 - - - - 33,071 - - - - 20,625 - - - - 5,782 - - - - 5,030 - - - - 1,244 - - - - 390 - - - - - - - - 1998 - - - - 33,344 - - - - 20,548 - - - - 5,861 - - - - 5,274 - - - - 1,275 - - - - 386 - - - - - - - - 1999 - - - - 33,486 - - - - 20,313 - - - - 5,948 - - - - 5,529 - - - - 1,305 - - - - 391 - - - - - - - - 2000 - - - - 33,686 - - - - 20,123 - - - - 5,980 - - - - 5,838 - - - - 1,348 - - - - 397 - - - - - - - - 2001 - - - - 33,936 - - - - 19,954 - - - - 6,002 - - - - 6,167 - - - - 1,408 - - - - 405 - - - - - - - - 2002 - - - - 34,114 - - - - 19,760 - - - - 6,040 - - - - 6,453 - - - - 1,446 - - - - 415 - - - - - - - - 2003 - - - - 34,201 - - - - 19,554 - - - - 6,013 - - - - 6,736 - - - - 1,482 - - - - 415 - - - - - - - - 2004 - - - - 34,178 - - - - 19,266 - - - - 5,992 - - - - 6,988 - - - - 1,519 - - - - 413 - - - - - - - - 2005 - - - - 34,204 - - - - 19,047 - - - - 5,953 - - - - 7,223 - - - - 1,569 - - - - 412 - - - - - - - - 2006 - - - - 34,235 - - - - 18,859 - - - - 5,880 - - - - 7,470 - - - - 1,611 - - - - 414 - - - - - - - - 2007 - - - - 34,205 - - - - 18,678 - - - - 5,821 - - - - 7,636 - - - - 1,660 - - - - 412 - - - - - - - - 2008 - - - - 34,286 - - - - 18,500 - - - - 5,793 - - - - 7,695 - - - - 1,703 - - - - 410 - - - - 185 - - - - 2009 - - - - 34,418 - - - - 18,352 - - - - 5,742 - - - - 7,924 - - - - 1,736 - - - - 414 - - - - 251 - - - - 2010 - - - - 34,625 - - - - 17,823 - - - - 5,494 - - - - 8,319 - - - - 1,710 - - - - 394 - - - - 884 - - - - Projected - - - - 2011 - - - - 34,849 - - - - 17,773 - - - - 5,530 - - - - 8,461 - - - - 1,768 - - - - 405 - - - - 911 - - - - 2012 - - - - 35,076 - - - - 17,726 - - - - 5,548 - - - - 8,633 - - - - 1,820 - - - - 411 - - - - 938 - - - - 2013 - - - - 35,301 - - - - 17,681 - - - - 5,571 - - - - 8,800 - - - - 1,871 - - - - 417 - - - - 963 - - - - 2014 - - - - 35,502 - - - - 17,634 - - - - 5,591 - - - - 8,954 - - - - 1,911 - - - - 424 - - - - 988 - - - - 2015 - - - - 35,735 - - - - 17,613 - - - - 5,625 - - - - 9,099 - - - - 1,955 - - - - 431 - - - - 1,012 - - - - 2016 - - - - 36,029 - - - - 17,626 - - - - 5,677 - - - - 9,254 - - - - 1,997 - - - - 440 - - - - 1,037 - - - - 2017 - - - - 36,329 - - - - 17,637 - - - - 5,744 - - - - 9,405 - - - - 2,032 - - - - 448 - - - - 1,062 - - - - 2018 - - - - 36,639 - - - - 17,657 - - - - 5,817 - - - - 9,556 - - - - 2,064 - - - - 457 - - - - 1,088 - - - - 2019 - - - - 36,956 - - - - 17,671 - - - - 5,869 - - - - 9,751 - - - - 2,088 - - - - 464 - - - - 1,114 - - - - 2020 - - - - 37,278 - - - - 17,688 - - - - 5,917 - - - - 9,952 - - - - 2,111 - - - - 470 - - - - 1,140 - - - - 2021 - - - - 37,598 - - - - 17,706 - - - - 5,958 - - - - 10,158 - - - - 2,134 - - - - 475 - - - - 1,167 - - -
- - - - - Race/ethnicity - - - - - Year - - - - - - Total - - - - White - - - - Black - - - - Hispanic - - - - Asian/ -Pacific -Islander - - - - - - - - American -Indian/ -Alaska Native - - - - - - - Two or -more -races - - - - - - - - - - Actual - - - - 1996 - - - - 12,849 - - - - 8,530 - - - - 2,043 - - - - 1,608 - - - - 526 - - - - 141 - - - - - - - - 1997 - - - - 13,056 - - - - 8,616 - - - - 2,068 - - - - 1,675 - - - - 552 - - - - 145 - - - - - - - - 1998 - - - - 13,195 - - - - 8,670 - - - - 2,073 - - - - 1,732 - - - - 572 - - - - 148 - - - - - - - - 1999 - - - - 13,371 - - - - 8,719 - - - - 2,106 - - - - 1,808 - - - - 587 - - - - 151 - - - - - - - - 2000 - - - - 13,517 - - - - 8,750 - - - - 2,119 - - - - 1,894 - - - - 601 - - - - 153 - - - - - - - - 2001 - - - - 13,736 - - - - 8,777 - - - - 2,173 - - - - 2,008 - - - - 619 - - - - 158 - - - - - - - - 2002 - - - - 14,069 - - - - 8,854 - - - - 2,257 - - - - 2,148 - - - - 642 - - - - 168 - - - - - - - - 2003 - - - - 14,339 - - - - 8,884 - - - - 2,334 - - - - 2,282 - - - - 663 - - - - 177 - - - - - - - - 2004 - - - - 14,618 - - - - 8,920 - - - - 2,408 - - - - 2,427 - - - - 686 - - - - 178 - - - - - - - - 2005 - - - - 14,909 - - - - 8,954 - - - - 2,490 - - - - 2,570 - - - - 709 - - - - 186 - - - - - - - - 2006 - - - - 15,081 - - - - 8,938 - - - - 2,540 - - - - 2,701 - - - - 720 - - - - 181 - - - - - - - - 2007 - - - - 15,087 - - - - 8,776 - - - - 2,571 - - - - 2,821 - - - - 736 - - - - 183 - - - - - - - - 2008 - - - - 14,980 - - - - 8,556 - - - - 2,565 - - - - 2,874 - - - - 746 - - - - 179 - - - - 59 - - - - 2009 - - - - 14,955 - - - - 8,401 - - - - 2,540 - - - - 2,994 - - - - 757 - - - - 179 - - - - 84 - - - - 2010 - - - - 14,860 - - - - 8,109 - - - - 2,422 - - - - 3,125 - - - - 755 - - - - 171 - - - - 277 - - - - Projected - - - - 2011 - - - - 14,787 - - - - 7,982 - - - - 2,373 - - - - 3,212 - - - - 767 - - - - 168 - - - - 286 - - - - 2012 - - - - 14,752 - - - - 7,901 - - - - 2,328 - - - - 3,287 - - - - 775 - - - - 165 - - - - 296 - - - - 2013 - - - - 14,766 - - - - 7,844 - - - - 2,305 - - - - 3,361 - - - - 783 - - - - 165 - - - - 307 - - - - 2014 - - - - 14,905 - - - - 7,821 - - - - 2,322 - - - - 3,473 - - - - 805 - - - - 167 - - - - 318 - - - - 2015 - - - - 15,038 - - - - 7,804 - - - - 2,324 - - - - 3,588 - - - - 822 - - - - 168 - - - - 332 - - - - 2016 - - - - 15,116 - - - - 7,753 - - - - 2,314 - - - - 3,691 - - - - 845 - - - - 169 - - - - 344 - - - - 2017 - - - - 15,195 - - - - 7,713 - - - - 2,290 - - - - 3,794 - - - - 874 - - - - 170 - - - - 353 - - - - 2018 - - - - 15,241 - - - - 7,663 - - - - 2,260 - - - - 3,889 - - - - 895 - - - - 170 - - - - 364 - - - - 2019 - - - - 15,304 - - - - 7,640 - - - - 2,266 - - - - 3,925 - - - - 927 - - - - 174 - - - - 372 - - - - 2020 - - - - 15,410 - - - - 7,640 - - - - 2,283 - - - - 3,969 - - - - 958 - - - - 178 - - - - 381 - - - - 2021 - - - - 15,515 - - - - 7,633 - - - - 2,315 - - - - 4,011 - - - - 983 - - - - 182 - - - - 391 - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-017.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-017.json deleted file mode 100644 index 4bed3d03..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-017.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":6,"numCorrectlyDetectedTables":6,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-017.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-017.pdf deleted file mode 100644 index f1a11958..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-017.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-018-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-018-reg.xml deleted file mode 100644 index c4383d35..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-018-reg.xml +++ /dev/null @@ -1,2593 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-018-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-018-str.xml deleted file mode 100644 index b2a52aaa..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-018-str.xml +++ /dev/null @@ -1,8557 +0,0 @@ - - - - - - - Actual - - - - Projected - - - - Region and state - - - - 2003–04 - - - - 2004–05 - - - - 2005–06 - - - - 2006–07 - - - - 2007–08 - - - - 2008–09 - - - - 2009–10 - - - - 2010–11 - - - - 2011–12 - - - - 2012–13 - - - - United States - - - - 2,753,438 - - - - 2,799,250 - - - - 2,815,544 - - - - 2,893,045 - - - - 3,001,337 - - - - 3,039,015 - - - - 3,068,550 - - - - 3,103,540 - - - - 3,100,510 - - - - 3,092,290 - - - - Northeast - - - - 485,670 - - - - 503,528 - - - - 521,015 - - - - 536,697 - - - - 552,289 - - - - 552,973 - - - - 558,750 - - - - 560,500 - - - - 554,690 - - - - 549,890 - - - - Connecticut - - - - 34,573 - - - - 35,515 - - - - 36,222 - - - - 37,541 - - - - 38,419 - - - - 34,968 - - - - 38,120 - - - - 38,450 - - - - 37,100 - - - - 36,460 - - - - Maine - - - - 13,278 - - - - 13,077 - - - - 12,950 - - - - 13,151 - - - - 14,350 - - - - 14,093 - - - - 13,970 - - - - 14,030 - - - - 13,570 - - - - 13,060 - - - - Massachusetts - - - - 58,326 - - - - 59,665 - - - - 61,272 - - - - 63,903 - - - - 65,197 - - - - 65,258 - - - - 64,040 - - - - 63,820 - - - - 64,210 - - - - 63,530 - - - - New Hampshire - - - - 13,309 - - - - 13,775 - - - - 13,988 - - - - 14,452 - - - - 14,982 - - - - 14,757 - - - - 14,830 - - - - 14,300 - - - - 14,310 - - - - 14,060 - - - - New Jersey - - - - 83,826 - - - - 86,502 - - - - 90,049 - - - - 93,013 - - - - 94,994 - - - - 95,085 - - - - 96,510 - - - - 95,200 - - - - 94,280 - - - - 93,880 - - - - New York - - - - 142,526 - - - - 153,203 - - - - 161,817 - - - - 168,333 - - - - 176,310 - - - - 180,917 - - - - 182,880 - - - - 185,930 - - - - 185,910 - - - - 187,890 - - - - Pennsylvania - - - - 123,474 - - - - 124,758 - - - - 127,830 - - - - 128,603 - - - - 130,298 - - - - 130,658 - - - - 131,250 - - - - 132,100 - - - - 128,430 - - - - 124,830 - - - - Rhode Island - - - - 9,258 - - - - 9,881 - - - - 10,108 - - - - 10,384 - - - - 10,347 - - - - 10,028 - - - - 10,090 - - - - 9,880 - - - - 10,020 - - - - 9,650 - - - - Vermont - - - - 7,100 - - - - 7,152 - - - - 6,779 - - - - 7,317 - - - - 7,392 - - - - 7,209 - - - - 7,070 - - - - 6,790 - - - - 6,860 - - - - 6,530 - - - - Midwest - - - - 680,178 - - - - 676,786 - - - - 684,049 - - - - 702,987 - - - - 721,220 - - - - 717,536 - - - - 707,880 - - - - 702,540 - - - - 698,810 - - - - 692,640 - - - - Illinois - - - - 124,763 - - - - 123,615 - - - - 126,817 - - - - 130,220 - - - - 135,143 - - - - 131,670 - - - - 132,200 - - - - 132,670 - - - - 136,650 - - - - 135,750 - - - - Indiana - - - - 56,008 - - - - 55,444 - - - - 57,920 - - - - 59,887 - - - - 61,901 - - - - 63,663 - - - - 63,830 - - - - 65,460 - - - - 65,490 - - - - 65,990 - - - - Iowa - - - - 34,339 - - - - 33,547 - - - - 33,693 - - - - 34,127 - - - - 34,573 - - - - 33,926 - - - - 34,580 - - - - 33,710 - - - - 33,500 - - - - 32,580 - - - - Kansas - - - - 30,155 - - - - 30,355 - - - - 29,818 - - - - 30,139 - - - - 30,737 - - - - 30,368 - - - - 31,630 - - - - 31,320 - - - - 31,600 - - - - 31,380 - - - - Michigan - - - - 98,823 - - - - 101,582 - - - - 102,582 - - - - 111,838 - - - - 115,183 - - - - 112,742 - - - - 113,820 - - - - 110,300 - - - - 107,020 - - - - 107,150 - - - - Minnesota - - - - 59,096 - - - - 58,391 - - - - 58,898 - - - - 59,497 - - - - 60,409 - - - - 59,729 - - - - 60,400 - - - - 59,720 - - - - 58,770 - - - - 57,250 - - - - Missouri - - - - 57,983 - - - - 57,841 - - - - 58,417 - - - - 60,275 - - - - 61,717 - - - - 62,969 - - - - 63,640 - - - - 62,470 - - - - 62,310 - - - - 61,860 - - - - Nebraska - - - - 20,309 - - - - 19,940 - - - - 19,764 - - - - 19,873 - - - - 20,035 - - - - 19,501 - - - - 19,640 - - - - 19,620 - - - - 19,750 - - - - 19,660 - - - - North Dakota - - - - 7,888 - - - - 7,555 - - - - 7,192 - - - - 7,159 - - - - 6,999 - - - - 7,232 - - - - 7,160 - - - - 7,110 - - - - 7,000 - - - - 6,980 - - - - Ohio - - - - 119,029 - - - - 116,702 - - - - 117,356 - - - - 117,658 - - - - 120,758 - - - - 122,203 - - - - 107,900 - - - - 108,010 - - - - 105,130 - - - - 104,260 - - - - South Dakota - - - - 9,001 - - - - 8,585 - - - - 8,589 - - - - 8,346 - - - - 8,582 - - - - 8,123 - - - - 8,240 - - - - 8,550 - - - - 8,400 - - - - 8,280 - - - - Wisconsin - - - - 62,784 - - - - 63,229 - - - - 63,003 - - - - 63,968 - - - - 65,183 - - - - 65,410 - - - - 64,840 - - - - 63,600 - - - - 63,190 - - - - 61,500 - - - - South - - - - 946,808 - - - - 953,206 - - - - 962,327 - - - - 986,801 - - - - 1,031,773 - - - - 1,068,270 - - - - 1,087,000 - - - - 1,108,150 - - - - 1,111,310 - - - - 1,120,870 - - - - Alabama - - - - 36,464 - - - - 37,453 - - - - 37,918 - - - - 38,912 - - - - 41,346 - - - - 42,082 - - - - 43,110 - - - - 44,520 - - - - 44,570 - - - - 44,400 - - - - Arkansas - - - - 27,181 - - - - 26,621 - - - - 28,790 - - - - 27,166 - - - - 28,725 - - - - 28,057 - - - - 28,650 - - - - 28,440 - - - - 28,520 - - - - 28,170 - - - - Delaware - - - - 6,951 - - - - 6,934 - - - - 7,275 - - - - 7,205 - - - - 7,388 - - - - 7,839 - - - - 8,050 - - - - 8,190 - - - - 8,520 - - - - 8,320 - - - - District of Columbia - - - - 3,031 - - - - 2,781 - - - - 3,150 - - - - 2,944 - - - - 3,352 - - - - 3,517 - - - - 3,150 - - - - 3,260 - - - - 3,250 - - - - 3,200 - - - - Florida - - - - 131,418 - - - - 133,318 - - - - 134,686 - - - - 142,284 - - - - 149,046 - - - - 153,461 - - - - 158,070 - - - - 163,620 - - - - 159,450 - - - - 164,040 - - - - Georgia - - - - 68,550 - - - - 70,834 - - - - 73,498 - - - - 77,829 - - - - 83,505 - - - - 88,003 - - - - 89,730 - - - - 92,160 - - - - 90,130 - - - - 91,980 - - - - Kentucky - - - - 37,787 - - - - 38,399 - - - - 38,449 - - - - 39,099 - - - - 39,339 - - - - 41,851 - - - - 41,310 - - - - 41,930 - - - - 40,950 - - - - 40,310 - - - - Louisiana - - - - 37,019 - - - - 36,009 - - - - 33,275 - - - - 34,274 - - - - 34,401 - - - - 35,622 - - - - 34,790 - - - - 34,450 - - - - 34,700 - - - - 35,210 - - - - Maryland - - - - 52,870 - - - - 54,170 - - - - 55,536 - - - - 57,564 - - - - 59,171 - - - - 58,304 - - - - 58,560 - - - - 57,900 - - - - 58,760 - - - - 58,560 - - - - Mississippi - - - - 23,735 - - - - 23,523 - - - - 23,848 - - - - 24,186 - - - - 24,795 - - - - 24,505 - - - - 26,260 - - - - 26,930 - - - - 26,610 - - - - 26,710 - - - - North Carolina - - - - 72,126 - - - - 75,010 - - - - 76,710 - - - - 76,031 - - - - 83,307 - - - - 86,712 - - - - 84,900 - - - - 87,370 - - - - 90,280 - - - - 89,820 - - - - Oklahoma - - - - 36,799 - - - - 36,227 - - - - 36,497 - - - - 37,100 - - - - 37,630 - - - - 37,219 - - - - 38,510 - - - - 38,120 - - - - 38,170 - - - - 37,800 - - - - South Carolina - - - - 33,235 - - - - 33,439 - - - - 34,970 - - - - 35,108 - - - - 35,303 - - - - 39,114 - - - - 39,560 - - - - 39,880 - - - - 40,480 - - - - 39,960 - - - - 46,096 - - - - 47,967 - - - - 50,880 - - - - 54,502 - - - - 57,486 - - - - 60,368 - - - - 61,500 - - - - 62,520 - - - - 61,470 - - - - 60,180 - - - - 244,165 - - - - 239,717 - - - - 240,485 - - - - 241,193 - - - - 252,121 - - - - 264,275 - - - - 273,050 - - - - 279,970 - - - - 285,530 - - - - 292,940 - - - - Virginia - - - - 72,042 - - - - 73,667 - - - - 69,597 - - - - 73,997 - - - - 77,369 - - - - 79,651 - - - - 80,270 - - - - 81,600 - - - - 82,770 - - - - 81,870 - - - - West Virginia - - - - 17,339 - - - - 17,137 - - - - 16,763 - - - - 17,407 - - - - 17,489 - - - - 17,690 - - - - 17,540 - - - - 17,300 - - - - 17,150 - - - - 17,410 - - - - West - - - - 640,782 - - - - 665,730 - - - - 648,153 - - - - 666,560 - - - - 696,055 - - - - 700,236 - - - - 714,920 - - - - 732,350 - - - - 735,710 - - - - 728,890 - - - - Alaska - - - - 7,236 - - - - 6,909 - - - - 7,361 - - - - 7,666 - - - - 7,855 - - - - 8,008 - - - - 7,820 - - - - 7,720 - - - - 7,750 - - - - 7,450 - - - - Arizona - - - - 45,508 - - - - 59,498 - - - - 54,091 - - - - 55,954 - - - - 61,667 - - - - 62,374 - - - - 64,800 - - - - 66,490 - - - - 64,670 - - - - 61,830 - - - - California - - - - 343,480 - - - - 355,217 - - - - 343,515 - - - - 356,641 - - - - 374,561 - - - - 372,310 - - - - 375,070 - - - - 386,220 - - - - 390,270 - - - - 387,300 - - - - Colorado - - - - 44,777 - - - - 44,532 - - - - 44,424 - - - - 45,628 - - - - 46,082 - - - - 47,459 - - - - 49,780 - - - - 51,820 - - - - 52,580 - - - - 52,830 - - - - Hawaii - - - - 10,324 - - - - 10,813 - - - - 10,922 - - - - 11,063 - - - - 11,613 - - - - 11,508 - - - - 10,860 - - - - 11,070 - - - - 11,150 - - - - 10,670 - - - - Idaho - - - - 15,547 - - - - 15,768 - - - - 16,096 - - - - 16,242 - - - - 16,567 - - - - 16,807 - - - - 17,280 - - - - 17,390 - - - - 17,150 - - - - 16,840 - - - - Montana - - - - 10,500 - - - - 10,335 - - - - 10,283 - - - - 10,122 - - - - 10,396 - - - - 10,077 - - - - 9,910 - - - - 9,690 - - - - 9,560 - - - - 9,210 - - - - Nevada - - - - 15,201 - - - - 15,740 - - - - 16,455 - - - - 17,149 - - - - 18,815 - - - - 19,904 - - - - 22,190 - - - - 24,990 - - - - 25,790 - - - - 25,680 - - - - New Mexico - - - - 17,892 - - - - 17,353 - - - - 17,822 - - - - 16,131 - - - - 18,264 - - - - 17,931 - - - - 18,660 - - - - 19,080 - - - - 19,080 - - - - 18,840 - - - - Oregon - - - - 32,958 - - - - 32,602 - - - - 32,394 - - - - 33,446 - - - - 34,949 - - - - 35,138 - - - - 35,300 - - - - 35,410 - - - - 34,780 - - - - 34,730 - - - - Utah - - - - 30,252 - - - - 30,253 - - - - 29,050 - - - - 28,276 - - - - 28,167 - - - - 30,463 - - - - 31,280 - - - - 30,340 - - - - 30,590 - - - - 31,330 - - - - Washington - - - - 61,274 - - - - 61,094 - - - - 60,213 - - - - 62,801 - - - - 61,625 - - - - 62,764 - - - - 66,470 - - - - 66,580 - - - - 66,720 - - - - 66,790 - - - - Wyoming - - - - 5,833 - - - - 5,616 - - - - 5,527 - - - - 5,441 - - - - 5,494 - - - - 5,493 - - - - 5,510 - - - - 5,570 - - - - 5,630 - - - - 5,380 - - - - Tennessee - - - - - - Texas - - - - -
- - - - - Projected—Continued - - - - Region and state - - - - 2013–14 - - - - 2014–15 - - - - 2015–16 - - - - 2016–17 - - - - 2017–18 - - - - 2018–19 - - - - 2019–20 - - - - 2020–21 - - - - 2021–22 - - - - United States - - - - 3,037,040 - - - - 3,043,290 - - - - 3,066,000 - - - - 3,096,730 - - - - 3,148,670 - - - - 3,155,320 - - - - 3,136,780 - - - - 3,163,350 - - - - 3,183,360 - - - - Northeast - - - - 548,290 - - - - 548,450 - - - - 550,630 - - - - 550,980 - - - - 554,700 - - - - 550,820 - - - - 546,620 - - - - 551,920 - - - - 552,550 - - - - Connecticut - - - - 35,540 - - - - 34,960 - - - - 35,170 - - - - 34,730 - - - - 34,410 - - - - 34,030 - - - - 33,190 - - - - 33,910 - - - - 33,110 - - - - Maine - - - - 12,640 - - - - 12,530 - - - - 12,600 - - - - 12,380 - - - - 12,300 - - - - 12,250 - - - - 12,030 - - - - 12,000 - - - - 12,200 - - - - Massachusetts - - - - 62,690 - - - - 62,050 - - - - 63,010 - - - - 62,230 - - - - 62,430 - - - - 62,130 - - - - 61,620 - - - - 62,060 - - - - 62,010 - - - - New Hampshire - - - - 13,860 - - - - 13,710 - - - - 13,530 - - - - 13,270 - - - - 13,170 - - - - 12,930 - - - - 12,880 - - - - 12,640 - - - - 12,640 - - - - New Jersey - - - - 92,220 - - - - 91,260 - - - - 91,330 - - - - 92,080 - - - - 91,940 - - - - 91,460 - - - - 90,390 - - - - 91,400 - - - - 91,170 - - - - New York - - - - 193,480 - - - - 197,730 - - - - 198,150 - - - - 198,570 - - - - 200,720 - - - - 198,170 - - - - 198,050 - - - - 198,800 - - - - 197,120 - - - - Pennsylvania - - - - 122,330 - - - - 120,950 - - - - 121,560 - - - - 123,540 - - - - 125,360 - - - - 124,920 - - - - 123,700 - - - - 126,440 - - - - 129,430 - - - - Rhode Island - - - - 9,460 - - - - 9,150 - - - - 9,270 - - - - 8,270 - - - - 8,590 - - - - 9,150 - - - - 9,040 - - - - 8,960 - - - - 9,200 - - - - Vermont - - - - 6,070 - - - - 6,110 - - - - 6,030 - - - - 5,920 - - - - 5,770 - - - - 5,780 - - - - 5,730 - - - - 5,710 - - - - 5,680 - - - - Midwest - - - - 672,600 - - - - 670,100 - - - - 674,720 - - - - 674,480 - - - - 685,020 - - - - 687,300 - - - - 676,690 - - - - 679,440 - - - - 685,520 - - - - Illinois - - - - 130,340 - - - - 129,730 - - - - 130,450 - - - - 129,400 - - - - 132,620 - - - - 133,660 - - - - 132,270 - - - - 132,180 - - - - 131,130 - - - - Indiana - - - - 65,940 - - - - 64,820 - - - - 64,980 - - - - 65,380 - - - - 66,330 - - - - 68,240 - - - - 65,520 - - - - 65,220 - - - - 66,520 - - - - Iowa - - - - 32,310 - - - - 32,350 - - - - 32,670 - - - - 32,850 - - - - 33,240 - - - - 33,040 - - - - 33,030 - - - - 33,290 - - - - 33,540 - - - - Kansas - - - - 31,040 - - - - 30,740 - - - - 31,780 - - - - 32,210 - - - - 32,860 - - - - 33,140 - - - - 33,010 - - - - 33,780 - - - - 33,920 - - - - Michigan - - - - 100,060 - - - - 100,910 - - - - 99,920 - - - - 98,670 - - - - 99,620 - - - - 98,160 - - - - 95,110 - - - - 93,710 - - - - 94,400 - - - - Minnesota - - - - 56,320 - - - - 56,520 - - - - 56,570 - - - - 57,270 - - - - 58,380 - - - - 59,470 - - - - 59,210 - - - - 61,240 - - - - 62,200 - - - - Missouri - - - - 60,340 - - - - 59,930 - - - - 61,740 - - - - 61,110 - - - - 61,970 - - - - 61,540 - - - - 60,860 - - - - 61,270 - - - - 62,210 - - - - Nebraska - - - - 19,500 - - - - 19,490 - - - - 19,710 - - - - 20,010 - - - - 20,660 - - - - 20,970 - - - - 21,280 - - - - 21,570 - - - - 22,280 - - - - North Dakota - - - - 6,980 - - - - 6,930 - - - - 7,050 - - - - 7,010 - - - - 6,700 - - - - 7,110 - - - - 7,130 - - - - 7,570 - - - - 7,950 - - - - Ohio - - - - 101,000 - - - - 100,270 - - - - 101,400 - - - - 101,510 - - - - 102,560 - - - - 102,320 - - - - 100,230 - - - - 99,790 - - - - 99,990 - - - - South Dakota - - - - 8,300 - - - - 8,270 - - - - 8,140 - - - - 8,300 - - - - 8,340 - - - - 8,170 - - - - 8,320 - - - - 8,520 - - - - 8,860 - - - - Wisconsin - - - - 60,460 - - - - 60,160 - - - - 60,310 - - - - 60,770 - - - - 61,760 - - - - 61,480 - - - - 60,740 - - - - 61,310 - - - - 62,540 - - - - South - - - - 1,103,910 - - - - 1,110,100 - - - - 1,126,810 - - - - 1,146,200 - - - - 1,174,920 - - - - 1,182,420 - - - - 1,176,490 - - - - 1,180,140 - - - - 1,186,400 - - - - Alabama - - - - 42,920 - - - - 43,440 - - - - 43,980 - - - - 44,730 - - - - 45,840 - - - - 45,440 - - - - 44,260 - - - - 44,090 - - - - 44,660 - - - - Arkansas - - - - 28,540 - - - - 28,880 - - - - 28,980 - - - - 29,500 - - - - 29,720 - - - - 30,050 - - - - 29,880 - - - - 29,890 - - - - 29,990 - - - - Delaware - - - - 8,120 - - - - 7,680 - - - - 7,990 - - - - 8,310 - - - - 8,550 - - - - 8,570 - - - - 8,590 - - - - 8,880 - - - - 8,880 - - - - District of Columbia - - - - 2,970 - - - - 2,860 - - - - 2,790 - - - - 2,710 - - - - 2,850 - - - - 2,800 - - - - 2,600 - - - - 2,470 - - - - 2,520 - - - - Florida - - - - 160,580 - - - - 162,940 - - - - 161,020 - - - - 163,780 - - - - 165,240 - - - - 165,980 - - - - 163,090 - - - - 161,340 - - - - 164,090 - - - - Georgia - - - - 92,010 - - - - 91,560 - - - - 94,320 - - - - 95,340 - - - - 97,270 - - - - 98,230 - - - - 96,250 - - - - 95,490 - - - - 96,780 - - - - Kentucky - - - - 38,490 - - - - 38,790 - - - - 39,610 - - - - 40,160 - - - - 41,370 - - - - 41,490 - - - - 40,080 - - - - 40,300 - - - - 40,490 - - - - Louisiana - - - - 35,720 - - - - 33,340 - - - - 35,050 - - - - 35,420 - - - - 37,880 - - - - 36,830 - - - - 36,840 - - - - 36,360 - - - - 36,130 - - - - Maryland - - - - 56,990 - - - - 56,390 - - - - 56,410 - - - - 55,620 - - - - 56,880 - - - - 56,180 - - - - 58,350 - - - - 59,010 - - - - 60,120 - - - - Mississippi - - - - 25,720 - - - - 25,330 - - - - 25,300 - - - - 25,840 - - - - 26,800 - - - - 26,010 - - - - 25,820 - - - - 25,210 - - - - 25,890 - - - - North Carolina - - - - 88,040 - - - - 88,870 - - - - 90,870 - - - - 92,910 - - - - 95,590 - - - - 97,550 - - - - 96,500 - - - - 96,670 - - - - 88,260 - - - - Oklahoma - - - - 37,300 - - - - 37,770 - - - - 39,130 - - - - 39,390 - - - - 40,270 - - - - 40,450 - - - - 40,780 - - - - 41,540 - - - - 41,650 - - - - South Carolina - - - - 39,450 - - - - 39,520 - - - - 40,350 - - - - 41,180 - - - - 42,880 - - - - 43,000 - - - - 41,930 - - - - 41,930 - - - - 42,690 - - - - 58,600 - - - - 58,920 - - - - 60,110 - - - - 61,570 - - - - 62,040 - - - - 62,400 - - - - 61,940 - - - - 61,870 - - - - 62,340 - - - - 291,830 - - - - 297,630 - - - - 303,120 - - - - 311,510 - - - - 320,960 - - - - 326,770 - - - - 328,560 - - - - 334,040 - - - - 338,920 - - - - Virginia - - - - 79,900 - - - - 79,520 - - - - 80,780 - - - - 81,390 - - - - 83,490 - - - - 83,660 - - - - 83,680 - - - - 84,220 - - - - 85,760 - - - - West Virginia - - - - 16,740 - - - - 16,650 - - - - 17,020 - - - - 16,850 - - - - 17,270 - - - - 17,020 - - - - 17,340 - - - - 16,840 - - - - 17,240 - - - - West - - - - 712,240 - - - - 714,640 - - - - 713,840 - - - - 725,080 - - - - 734,030 - - - - 734,790 - - - - 736,980 - - - - 751,850 - - - - 758,880 - - - - Alaska - - - - 7,390 - - - - 7,380 - - - - 7,370 - - - - 7,670 - - - - 7,710 - - - - 7,750 - - - - 7,690 - - - - 7,900 - - - - 7,990 - - - - Arizona - - - - 59,850 - - - - 58,910 - - - - 59,050 - - - - 61,440 - - - - 62,730 - - - - 63,940 - - - - 64,500 - - - - 66,130 - - - - 66,370 - - - - California - - - - 376,610 - - - - 378,640 - - - - 371,530 - - - - 375,070 - - - - 377,960 - - - - 375,740 - - - - 377,280 - - - - 385,060 - - - - 388,600 - - - - Colorado - - - - 51,330 - - - - 51,790 - - - - 53,030 - - - - 54,300 - - - - 56,210 - - - - 57,010 - - - - 57,590 - - - - 59,290 - - - - 59,110 - - - - Hawaii - - - - 10,530 - - - - 10,300 - - - - 10,390 - - - - 10,250 - - - - 10,640 - - - - 10,030 - - - - 10,500 - - - - 10,620 - - - - 10,700 - - - - Idaho - - - - 17,170 - - - - 16,800 - - - - 17,090 - - - - 17,750 - - - - 17,790 - - - - 17,950 - - - - 17,930 - - - - 17,790 - - - - 17,830 - - - - Montana - - - - 9,220 - - - - 9,040 - - - - 9,060 - - - - 9,140 - - - - 9,040 - - - - 9,270 - - - - 9,260 - - - - 9,360 - - - - 9,580 - - - - Nevada - - - - 24,580 - - - - 24,360 - - - - 25,470 - - - - 25,810 - - - - 26,240 - - - - 26,710 - - - - 26,440 - - - - 26,250 - - - - 26,380 - - - - New Mexico - - - - 18,480 - - - - 18,680 - - - - 18,850 - - - - 19,540 - - - - 19,690 - - - - 20,020 - - - - 20,050 - - - - 20,130 - - - - 20,550 - - - - Oregon - - - - 34,490 - - - - 34,210 - - - - 34,960 - - - - 35,180 - - - - 35,300 - - - - 35,220 - - - - 34,690 - - - - 35,190 - - - - 35,360 - - - - Utah - - - - 31,860 - - - - 32,870 - - - - 34,510 - - - - 35,660 - - - - 36,650 - - - - 36,990 - - - - 37,700 - - - - 39,040 - - - - 39,830 - - - - Washington - - - - 65,310 - - - - 66,150 - - - - 66,860 - - - - 67,520 - - - - 68,260 - - - - 68,320 - - - - 67,430 - - - - 68,880 - - - - 70,300 - - - - Wyoming - - - - 5,430 - - - - 5,510 - - - - 5,670 - - - - 5,760 - - - - 5,800 - - - - 5,830 - - - - 5,940 - - - - 6,210 - - - - 6,280 - - - - Tennessee - - - - - - Texas - - - - -
- - - - - Projected - - - - Actual 2003–04 -to 2008–09 - - - - Region and state - - - - 2008–09 to 2015–16 - - - - 2015–16 to 2021–22 - - - - 2008–09 to 2021–22 - - - - United States - - - - 10.4 - - - - 0.9 - - - - 3.8 - - - - 4.7 - - - - Northeast - - - - 13.9 - - - - -0.4 - - - - 0.3 - - - - -0.1 - - - - Connecticut - - - - 1.1 - - - - 0.6 - - - - -5.9 - - - - -5.3 - - - - Maine - - - - 6.1 - - - - -10.6 - - - - -3.2 - - - - -13.4 - - - - Massachusetts - - - - 11.9 - - - - -3.4 - - - - -1.6 - - - - -5.0 - - - - New Hampshire - - - - 10.9 - - - - -8.3 - - - - -6.6 - - - - -14.3 - - - - New Jersey - - - - 13.4 - - - - -3.9 - - - - -0.2 - - - - -4.1 - - - - New York - - - - 26.9 - - - - 9.5 - - - - -0.5 - - - - 9.0 - - - - Pennsylvania - - - - 5.8 - - - - -7.0 - - - - 6.5 - - - - -0.9 - - - - Rhode Island - - - - 8.3 - - - - -7.6 - - - - -0.8 - - - - -8.3 - - - - Vermont - - - - 1.5 - - - - -16.4 - - - - -5.8 - - - - -21.2 - - - - Midwest - - - - 5.5 - - - - -6.0 - - - - 1.6 - - - - -4.5 - - - - Illinois - - - - 5.5 - - - - -0.9 - - - - 0.5 - - - - -0.4 - - - - Indiana - - - - 13.7 - - - - 2.1 - - - - 2.4 - - - - 4.5 - - - - Iowa - - - - -1.2 - - - - -3.7 - - - - 2.7 - - - - -1.1 - - - - Kansas - - - - 0.7 - - - - 4.6 - - - - 6.7 - - - - 11.7 - - - - Michigan - - - - 14.1 - - - - -11.4 - - - - -5.5 - - - - -16.3 - - - - Minnesota - - - - 1.1 - - - - -5.3 - - - - 10.0 - - - - 4.1 - - - - Missouri - - - - 8.6 - - - - -2.0 - - - - 0.8 - - - - -1.2 - - - - Nebraska - - - - -4.0 - - - - 1.1 - - - - 13.0 - - - - 14.3 - - - - North Dakota - - - - -8.3 - - - - -2.5 - - - - 12.8 - - - - 9.9 - - - - Ohio - - - - 2.7 - - - - -17.0 - - - - -1.4 - - - - -18.2 - - - - South Dakota - - - - -9.8 - - - - 0.2 - - - - 8.8 - - - - 9.1 - - - - Wisconsin - - - - 4.2 - - - - -7.8 - - - - 3.7 - - - - -4.4 - - - - South - - - - 12.8 - - - - 5.5 - - - - 5.3 - - - - 11.1 - - - - Alabama - - - - 15.4 - - - - 4.5 - - - - 1.5 - - - - 6.1 - - - - Arkansas - - - - 3.2 - - - - 3.3 - - - - 3.5 - - - - 6.9 - - - - Delaware - - - - 12.8 - - - - 1.9 - - - - 11.1 - - - - 13.3 - - - - District of Columbia - - - - 16.0 - - - - -20.7 - - - - -9.7 - - - - -28.3 - - - - Florida - - - - 16.8 - - - - 4.9 - - - - 1.9 - - - - 6.9 - - - - Georgia - - - - 28.4 - - - - 7.2 - - - - 1.9 - - - - 10.0 - - - - Kentucky - - - - 10.8 - - - - -5.4 - - - - 2.2 - - - - -3.3 - - - - Louisiana - - - - -3.8 - - - - -1.6 - - - - 3.1 - - - - 1.4 - - - - Maryland - - - - 10.3 - - - - -3.2 - - - - 6.6 - - - - 3.1 - - - - Mississippi - - - - 3.2 - - - - 3.2 - - - - 2.3 - - - - 5.7 - - - - North Carolina - - - - 20.2 - - - - 4.8 - - - - -2.9 - - - - 1.8 - - - - Oklahoma - - - - 1.1 - - - - 5.1 - - - - 6.4 - - - - 11.9 - - - - South Carolina - - - - 17.7 - - - - 3.2 - - - - 5.8 - - - - 9.1 - - - - 31.0 - - - - -0.4 - - - - 3.7 - - - - 3.3 - - - - 8.2 - - - - 14.7 - - - - 11.8 - - - - 28.2 - - - - Virginia - - - - 10.6 - - - - 1.4 - - - - 6.2 - - - - 7.7 - - - - West Virginia - - - - 2.0 - - - - -3.8 - - - - 1.3 - - - - -2.5 - - - - West - - - - 9.3 - - - - 1.9 - - - - 6.3 - - - - 8.4 - - - - Alaska - - - - 10.7 - - - - -8.0 - - - - 8.4 - - - - -0.2 - - - - Arizona - - - - 37.1 - - - - -5.3 - - - - 12.4 - - - - 6.4 - - - - California - - - - 8.4 - - - - -0.2 - - - - 4.6 - - - - 4.4 - - - - Colorado - - - - 6.0 - - - - 11.7 - - - - 11.5 - - - - 24.5 - - - - Hawaii - - - - 11.5 - - - - -9.7 - - - - 3.0 - - - - -7.0 - - - - Idaho - - - - 8.1 - - - - 1.7 - - - - 4.3 - - - - 6.1 - - - - Montana - - - - -4.0 - - - - -10.1 - - - - 5.7 - - - - -4.9 - - - - Nevada - - - - 30.9 - - - - 28.0 - - - - 3.6 - - - - 32.5 - - - - New Mexico - - - - 0.2 - - - - 5.1 - - - - 9.0 - - - - 14.6 - - - - Oregon - - - - 6.6 - - - - -0.5 - - - - 1.1 - - - - 0.6 - - - - Utah - - - - 0.7 - - - - 13.3 - - - - 15.4 - - - - 30.7 - - - - Washington - - - - 2.4 - - - - 6.5 - - - - 5.1 - - - - 12.0 - - - - Wyoming - - - - -5.8 - - - - 3.2 - - - - 10.8 - - - - 14.3 - - - - Tennessee - - - - - - Texas - - - - -
- - - - - Number of teachers - - - - Number of new teacher hires - - - - Control - - - - Control - - - - Total - - - - Public - - - - Private - - - - Total - - - - Public - - - - Private - - - - Actual - - - - 1996 - - - - 3,051 - - - - 2,667 - - - - 384 - - - - - - - - - - - - - - - - 1997 - - - - 3,138 - - - - 2,746 - - - - 391 - - - - - - - - - - - - - - - - 1998 - - - - 3,230 - - - - 2,830 - - - - 400 - - - - - - - - - - - - - - - - 1999 - - - - 3,319 - - - - 2,911 - - - - 408 - - - - 305 - - - - 222 - - - - 83 - - - - 2000 - - - - 3,366 - - - - 2,941 - - - - 424 - - - - - - - - - - - - - - - - 2001 - - - - 3,440 - - - - 3,000 - - - - 441 - - - - - - - - - - - - - - - - 2002 - - - - 3,476 - - - - 3,034 - - - - 442 - - - - - - - - - - - - - - - - 2003 - - - - 3,490 - - - - 3,049 - - - - 441 - - - - 311 - - - - 236 - - - - 74 - - - - 2004 - - - - 3,536 - - - - 3,091 - - - - 445 - - - - - - - - - - - - - - - - 2005 - - - - 3,593 - - - - 3,143 - - - - 450 - - - - - - - - - - - - - - - - 2006 - - - - 3,619 - - - - 3,166 - - - - - 453 - - - - - - - - - - - - - - - - 2007 - - - - 3,634 - - - - 3,178 - - - - 456 - - - - 327 - - - - 246 - - - - 80 - - - - 2008 - - - - 3,667 - - - - 3,219 - - - - 448 - - - - 386 - - - - 310 - - - - 76 - - - - 2009 - - - - 3,647 - - - - 3,210 - - - - 437 - - - - 358 - - - - 289 - - - - 69 - - - - 2010 - - - - 3,653 - - - - 3,210 - - - - 443 - - - - 385 - - - - 301 - - - - 84 - - - - Projected - - - - 2011 - - - - 3,679 - - - - 3,246 - - - - 432 - - - - 407 - - - - 339 - - - - 69 - - - - 2012 - - - - 3,711 - - - - 3,283 - - - - 428 - - - - 416 - - - - 344 - - - - 72 - - - - 2013 - - - - 3,738 - - - - 3,312 - - - - 426 - - - - 414 - - - - 340 - - - - 74 - - - - 2014 - - - - 3,782 - - - - 3,357 - - - - 425 - - - - 434 - - - - 359 - - - - 75 - - - - 2015 - - - - 3,830 - - - - 3,403 - - - - 427 - - - - 441 - - - - 364 - - - - 78 - - - - 2016 - - - - 3,881 - - - - 3,451 - - - - 430 - - - - 448 - - - - 369 - - - - 79 - - - - 2017 - - - - 3,935 - - - - 3,500 - - - - 434 - - - - 454 - - - - 373 - - - - 81 - - - - 2018 - - - - 3,988 - - - - 3,549 - - - - 439 - - - - 459 - - - - 377 - - - - 82 - - - - 2019 - - - - 4,045 - - - - 3,601 - - - - 444 - - - - 467 - - - - 383 - - - - 84 - - - - 2020 - - - - 4,102 - - - - 3,651 - - - - 451 - - - - 472 - - - - 386 - - - - 86 - - - - 2021 - - - - 4,151 - - - - 3,694 - - - - 457 - - - - 470 - - - - 384 - - - - 87 - - - - Year - - - - -
- - - - - Total - - - - Public - - - - Private - - - - Actual - - - - 1996 - - - - 16.9 - - - - 17.1 - - - - 15.5 - - - - 1997 - - - - 16.6 - - - - 16.8 - - - - 15.2 - - - - 1998 - - - - 16.3 - - - - 16.4 - - - - 15.0 - - - - 1999 - - - - 15.9 - - - - 16.1 - - - - 14.7 - - - - 2000 - - - - 15.9 - - - - 16.0 - - - - 14.5 - - - - 2001 - - - - 15.7 - - - - 15.9 - - - - 14.3 - - - - 2002 - - - - 15.7 - - - - 15.9 - - - - 14.1 - - - - 2003 - - - - 15.7 - - - - 15.9 - - - - 13.8 - - - - 2004 - - - - 15.5 - - - - 15.8 - - - - 13.7 - - - - 2005 - - - - 15.4 - - - - 15.6 - - - - 13.5 - - - - 2006 - - - - 15.3 - - - - 15.6 - - - - 13.2 - - - - 2007 - - - - 15.2 - - - - 15.5 - - - - 13.0 - - - - 2008 - - - - 15.0 - - - - 15.3 - - - - 12.8 - - - - 2009 - - - - 15.0 - - - - 15.4 - - - - 12.5 - - - - 2010 - - - - 15.0 - - - - 15.2 - - - - 12.4 - - - - Projected - - - - 2011 - - - - 14.9 - - - - 15.2 - - - - 12.3 - - - - 2012 - - - - 14.8 - - - - 15.2 - - - - 12.3 - - - - 2013 - - - - 14.8 - - - - 15.1 - - - - 12.3 - - - - 2014 - - - - 14.7 - - - - 15.0 - - - - 12.2 - - - - 2015 - - - - 14.6 - - - - 14.9 - - - - 12.1 - - - - 2016 - - - - 14.5 - - - - 14.8 - - - - 12.1 - - - - 2017 - - - - 14.4 - - - - 14.7 - - - - 12.0 - - - - 2018 - - - - 14.3 - - - - 14.6 - - - - 11.9 - - - - 2019 - - - - 14.2 - - - - 14.5 - - - - 11.8 - - - - 2020 - - - - 14.1 - - - - 14.4 - - - - 11.7 - - - - 2021 - - - - 14.1 - - - - 14.4 - - - - 11.7 - - - - Year - - - - -
- - - - - Current expenditures - - - - Constant 2010–11 dollars - - - - Current dollars - - - - Fall enrollment -(In thousands) - - - - Total -(in billions) - - - - Perpupil -in fall enrollment - - - - Total -(in billions) - - - - Perpupil -in fall enrollment - - - - School year - - - - Actual - - - - 1996–97 - - - - 45,611 - - - - $375.9 - - - - $8,242 - - - - $270.2 - - - - $5,923 - - - - 1997–98 - - - - 46,127 - - - - 390.2 - - - - 8,460 - - - - 285.5 - - - - 6,189 - - - - 1998–99 - - - - 46,539 - - - - 407.0 - - - - 8,745 - - - - 302.9 - - - - 6,508 - - - - 1999–2000 - - - - 46,857 - - - - 423.0 - - - - 9,028 - - - - 323.9 - - - - 6,912 - - - - 2000–01 - - - - 47,204 - - - - 440.0 - - - - 9,321 - - - - 348.4 - - - - 7,380 - - - - 2001–02 - - - - 47,672 - - - - 457.1 - - - - 9,589 - - - - 368.4 - - - - 7,727 - - - - 2002–03 - - - - 48,183 - - - - 470.6 - - - - 9,767 - - - - 387.6 - - - - 8,044 - - - - 2003–04 - - - - 48,540 - - - - 479.2 - - - - 9,873 - - - - 403.4 - - - - 8,310 - - - - 2004–05 - - - - 48,795 - - - - 490.2 - - - - 10,047 - - - - 425.0 - - - - 8,711 - - - - 2005–06 - - - - 49,113 - - - - 499.2 - - - - 10,163 - - - - 449.1 - - - - 9,145 - - - - 2006–07 - - - - 49,262 - - - - 516.5 - - - - 10,473 - - - - 476.8 - - - - 9,669 - - - - 2007–08 - - - - 49,221 - - - - 529.4 - - - - 10,741 - - - - 506.9 - - - - 10,283 - - - - 2008–09 - - - - 49,003 - - - - 534.6 - - - - 10,909 - - - - 519.0 - - - - 10,591 - - - - Projected - - - - 2009–10 - - - - 49,373 - - - - 519.3 - - - - 10,518 - - - - 519.3 - - - - 10,518 - - - - 2010–11 - - - - 49,484 - - - - 539.5 - - - - 10,902 - - - - 539.4 - - - - 10,901 - - - - 2011–12 - - - - 49,636 - - - - 541.6 - - - - 10,912 - - - - 556.0 - - - - 11,201 - - - - 2012–13 - - - - 49,828 - - - - 548.7 - - - - 11,012 - - - - 571.4 - - - - 11,467 - - - - 2013–14 - - - - 50,067 - - - - 556.8 - - - - 11,121 - - - - 591.3 - - - - 11,810 - - - - 2014–15 - - - - 50,407 - - - - 571.3 - - - - 11,335 - - - - 619.1 - - - - 12,281 - - - - 2015–16 - - - - 50,773 - - - - 585.6 - - - - 11,534 - - - - - - - - - - - - 2016–17 - - - - 51,146 - - - - 599.0 - - - - 11,711 - - - - - - - - - - - - 2017–18 - - - - 51,524 - - - - 612.5 - - - - 11,888 - - - - - - - - - - - - 2018–19 - - - - 51,880 - - - - 625.5 - - - - 12,058 - - - - - - - - - - - - 2019–20 - - - - 52,260 - - - - 639.8 - - - - 12,243 - - - - - - - - - - - - 2020–21 - - - - 52,688 - - - - 654.5 - - - - 12,423 - - - - - - - - - - - - 2021–22 - - - - 53,113 - - - - 665.5 - - - - 12,530 - - - - - - - - - - -
- - - - - Current expenditures - - - - Constant 2008–09 dollars - - - - Current dollars - - - - ADA -(In thousands) - - - - Total -(in billions) - - - - Total -(in billions) - - - - School year - - - - Per pupil in ADA - - - - Per pupil in ADA - - - - Actual - - - - 1996–97 - - - - 42,262 - - - - $375.9 - - - - $8,895 - - - - $270.2 - - - - $6,393 - - - - 1997–98 - - - - 42,766 - - - - 390.2 - - - - 9,125 - - - - 285.5 - - - - 6,676 - - - - 1998–99 - - - - 43,187 - - - - 407.0 - - - - 9,423 - - - - 302.9 - - - - 7,013 - - - - 1999–2000 - - - - 43,807 - - - - 423.0 - - - - 9,656 - - - - 323.9 - - - - 7,394 - - - - 2000–01 - - - - 44,076 - - - - 440.0 - - - - 9,982 - - - - 348.4 - - - - 7,904 - - - - 2001–02 - - - - 44,605 - - - - 457.1 - - - - 10,249 - - - - 368.4 - - - - 8,259 - - - - 2002–03 - - - - 45,017 - - - - 470.6 - - - - 10,454 - - - - 387.6 - - - - 8,610 - - - - 2003–04 - - - - 45,326 - - - - 479.2 - - - - 10,573 - - - - 403.4 - - - - 8,900 - - - - 2004–05 - - - - 45,625 - - - - 490.2 - - - - 10,745 - - - - 425.0 - - - - 9,316 - - - - 2005–06 - - - - 45,932 - - - - 499.2 - - - - 10,867 - - - - 449.1 - - - - 9,778 - - - - 2006–07 - - - - 46,133 - - - - 516.5 - - - - 11,196 - - - - 476.8 - - - - 10,336 - - - - 2007–08 - - - - 46,156 - - - - 529.4 - - - - 11,471 - - - - 506.9 - - - - 10,982 - - - - 2008–09 - - - - 46,213 - - - - 534.6 - - - - 11,568 - - - - 519.0 - - - - 11,231 - - - - Projected - - - - 2009–10 - - - - 46,176 - - - - 519.3 - - - - 11,246 - - - - 519.3 - - - - 11,246 - - - - 2010–11 - - - - 46,280 - - - - 539.5 - - - - 11,657 - - - - 539.4 - - - - 11,655 - - - - 2011–12 - - - - 46,422 - - - - 541.6 - - - - 11,668 - - - - 556.0 - - - - 11,976 - - - - 2012–13 - - - - 46,602 - - - - 548.7 - - - - 11,775 - - - - 571.4 - - - - 12,261 - - - - 2013–14 - - - - 46,825 - - - - 556.8 - - - - 11,890 - - - - 591.3 - - - - 12,628 - - - - 2014–15 - - - - 47,143 - - - - 571.3 - - - - 12,119 - - - - 619.1 - - - - 13,132 - - - - 2015–16 - - - - 47,486 - - - - 585.6 - - - - 12,332 - - - - - - - - - - - - 2016–17 - - - - 47,834 - - - - 599.0 - - - - 12,522 - - - - - - - - - - - - 2017–18 - - - - 48,188 - - - - 612.5 - - - - 12,711 - - - - - - - - - - - - 2018–19 - - - - 48,521 - - - - 625.5 - - - - 12,892 - - - - - - - - - - - - 2019–20 - - - - 48,876 - - - - 639.8 - - - - 13,091 - - - - - - - - - - - - 2020–21 - - - - 49,276 - - - - 654.5 - - - - 13,283 - - - - - - - - - - - - 2021–22 - - - - 49,674 - - - - 665.5 - - - - 13,397 - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-018.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-018.json deleted file mode 100644 index ed2b5b39..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-018.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":7,"numCorrectlyDetectedTables":6,"numErroneouslyDetectedTables":1,"expectedFailure":true} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-018.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-018.pdf deleted file mode 100644 index a154cdf9..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-018.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-019-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-019-reg.xml deleted file mode 100644 index 8a6e9439..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-019-reg.xml +++ /dev/null @@ -1,650 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-019-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-019-str.xml deleted file mode 100644 index 8c1b288a..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-019-str.xml +++ /dev/null @@ -1,1535 +0,0 @@ - - - - - - - Variable - - - - Assumption - - - - Demographic assumptions - - - - Projections are consistent with the Census Bureau estimates - - - - Population - - - - 18- to 24-year-old population - - - - Census Bureau projection: average annual growth rate of 0.1% - - - - Census Bureau projection: average annual growth rate of 0.6% - - - - 25- to 29-year-old population - - - - 30- to 34-year-old population - - - - Census Bureau projection: average annual growth rate of 1.3% - - - - Census Bureau projection: average annual growth rate of 0.6% - - - - 35- to 44-year-old population - - - - Economic assumptions - - - - Annual percent changes range between -1.9% and 2.2% -with an annual growth rate of 1.4% - - - - Disposable income per capita in -constant dollars - - - - Annual percent changes range between -2.4% and 2.3% -with an annual growth rate of 1.3% - - - - Education revenue receipts from state -sources per capita in constant dollars - - - - Inflation rate - - - - Inflation rate ranges between 1.0% and 2.0% - - - - Unemployment rate (men) - - - - Remains between 17.7% and 26.8% - - - - Ages 18 and 19 - - - - Ages 20 to 24 - - - - Remains between 10.8% and 15.6% - - - - Age 25 and over - - - - Remains between 5.3% and 7.9% - - - - Unemployment rate (women) - - - - Ages 18 and 19 - - - - Remains between 14.3% and 19.6% - - - - Remains between 9.3% and 13.1% - - - - Ages 20 to 24 - - - - Remains between 5.0% and 7.3% - - - - Age 25 and over - - -
- - - - - Leadtime(years) - - - - Statistic - - - - 1 - - - - 2 - - - - 3 - - - - 4 - - - - 5 - - - - 6 - - - - 7 - - - - 8 - - - - 9 - - - - 10 - - - - Publicelementaryandsecondaryschools - - - - Prekindergarten–12enrollment - - - - 0.3 - - - - 0.6 - - - - 0.8 - - - - 1.1 - - - - 1.3 - - - - 1.4 - - - - 1.6 - - - - 1.9 - - - - 2.3 - - - - 2.6 - - - - Prekindergarten–8enrollment - - - - 0.3 - - - - 0.6 - - - - 1.0 - - - - 1.3 - - - - 1.4 - - - - 1.6 - - - - 1.9 - - - - 2.4 - - - - 2.8 - - - - 3.3 - - - - 9–12enrollment - - - - 0.4 - - - - 0.7 - - - - 0.9 - - - - 1.1 - - - - 1.2 - - - - 1.5 - - - - 1.8 - - - - 2.2 - - - - 2.4 - - - - 2.5 - - - - Highschoolgraduates - - - - 1.0 - - - - 1.0 - - - - 1.5 - - - - 1.6 - - - - 1.5 - - - - 2.0 - - - - 2.7 - - - - 3.7 - - - - 4.3 - - - - 4.3 - - - - Elementaryandsecondaryteachers - - - - 0.8 - - - - 1.4 - - - - 1.7 - - - - 2.2 - - - - 2.8 - - - - 3.4 - - - - 3.9 - - - - 4.3 - - - - 5.0 - - - - 5.9 - - - - Totalcurrentexpenditures - - - - 1.3 - - - - 2.1 - - - - 2.0 - - - - 2.1 - - - - 2.6 - - - - 3.3 - - - - 3.9 - - - - 4.1 - - - - 4.0 - - - - 4.0 - - - - Currentexpendituresperpupilinfallenrollment - - - - 1.3 - - - - 2.1 - - - - 2.0 - - - - 2.0 - - - - 2.9 - - - - 3.6 - - - - 4.3 - - - - 4.6 - - - - 5.2 - - - - 5.2 - - - - Privateelementaryandsecondaryschools - - - - Prekindergarten–12enrollment - - - - 3.4 - - - - 4.6 - - - - 3.7 - - - - 7.2 - - - - 7.7 - - - - 10.6 - - - - 9.3 - - - - 9.4 - - - - 8.1 - - - - 6.3 - - - - Prekindergarten–8enrollment - - - - 3.5 - - - - 4.9 - - - - 4.1 - - - - 8.0 - - - - 9.2 - - - - 12.1 - - - - 10.6 - - - - 10.4 - - - - 10.2 - - - - 7.9 - - - - 9–12enrollment - - - - 3.0 - - - - 3.8 - - - - 2.3 - - - - 4.3 - - - - 2.8 - - - - 5.8 - - - - 5.7 - - - - 6.1 - - - - 1.3 - - - - 1.3 - - - - Highschoolgraduates - - - - 0.9 - - - - 0.9 - - - - 1.6 - - - - 2.8 - - - - 5.0 - - - - 6.2 - - - - 4.9 - - - - 4.8 - - - - 1.6 - - - - 1.6 - - - - Postsecondarydegree-grantinginstitutions - - - - Totalenrollment - - - - 1.7 - - - - 2.6 - - - - 3.6 - - - - 4.7 - - - - 5.3 - - - - 6.2 - - - - 7.6 - - - - 9.4 - - - - 11.7 - - - - 13.1 - - - - Men - - - - 1.7 - - - - 3.0 - - - - 4.2 - - - - 5.5 - - - - 6.3 - - - - 7.0 - - - - 8.1 - - - - 9.8 - - - - 11.7 - - - - 13.3 - - - - Women - - - - 1.8 - - - - 2.6 - - - - 3.7 - - - - 4.3 - - - - 4.6 - - - - 5.6 - - - - 7.2 - - - - 9.0 - - - - 11.7 - - - - 12.9 - - - - 4-yearinstitutions - - - - 1.8 - - - - 3.0 - - - - 4.0 - - - - 5.4 - - - - 6.0 - - - - 7.0 - - - - 8.5 - - - - 10.6 - - - - 13.1 - - - - 14.8 - - - - 2-yearinstitutions - - - - 2.2 - - - - 3.2 - - - - 4.2 - - - - 4.8 - - - - 5.0 - - - - 5.0 - - - - 5.9 - - - - 7.1 - - - - 9.4 - - - - 10.1 - - - - White - - - - 1.1 - - - - 2.4 - - - - 3.9 - - - - 5.5 - - - - 6.7 - - - - 7.4 - - - - - - - - - - - - - - - - - - - - BlackorAfricanAmerican - - - - 4.2 - - - - 8.8 - - - - 12.5 - - - - 15.8 - - - - 19.0 - - - - 20.5 - - - - - - - - - - - - - - - - - - - - HispanicorLatino - - - - 4.2 - - - - 8.6 - - - - 12.1 - - - - 15.5 - - - - 18.9 - - - - 22.1 - - - - - - - - - - - - - - - - - - - - Asian/PacificIslander - - - - 2.6 - - - - 5.0 - - - - 5.7 - - - - 7.0 - - - - 6.0 - - - - 4.7 - - - - - - - - - - - - - - - - - - - - AmericanIndian/AlaskaNative - - - - 5.2 - - - - 4.1 - - - - 5.6 - - - - 3.3 - - - - 2.4 - - - - 5.0 - - - - - - - - - - - - - - - - - - - - Nonresidentalien - - - - 2.7 - - - - 4.8 - - - - 7.8 - - - - 9.5 - - - - 7.3 - - - - 2.1 - - - - - - - - - - - - - - - - - - -
- - - - - Yearofdata - - - - Source - - - - 2007–08 - - - - 2008–09 - - - - 2009–10 - - - - 2010–11 - - - - Enrollment,inthousands - - - - Actual - - - - 49,293 - - - - 49,266 - - - - 49,373 - - - - 49,484 - - - - Projectedenrollment,inthousands - - - - ProjectionsofEducationStatisticsto2017 - - - - 49,644 - - - - 49,825 - - - - 50,067 - - - - 50,353 - - - - ProjectionsofEducationStatisticsto2018 - - - - 49,470 - - - - 49,623 - - - - 49,788 - - - - 50,034 - - - - ProjectionsofEducationStatisticsto2019 - - - - - - - - 49,265 - - - - 49,312 - - - - 49,386 - - - - ProjectionsofEducationStatisticsto2020 - - - - - - - - - - - - 49,282 - - - - 49,306 - - - - Percentagedifferencebetweenactualandprojectedvalues - - - - ProjectionsofEducationStatisticsto2017 - - - - 0.7 - - - - 1.1 - - - - 1.4 - - - - 1.8 - - - - ProjectionsofEducationStatisticsto2018 - - - - 0.4 - - - - 0.7 - - - - 0.8 - - - - 1.1 - - - - ProjectionsofEducationStatisticsto2019 - - - - - - - - # - - - - -0.1 - - - - -0.2 - - - - ProjectionsofEducationStatisticsto2020 - - - - - - - - - - - - -0.2 - - - - -0.4 - - -
- - - - - Leadtime(years) - - - - Source - - - - 1 - - - - 2 - - - - 3 - - - - 4 - - - - Absolutevalueofpercentagedifferencebetweenactualandprojectedvalues - - - - ProjectionsofEducationStatisticsto2017 - - - - - - - - 0.7 - - - - 1.1 - - - - 1.4 - - - - ProjectionsofEducationStatisticsto2018 - - - - 0.4 - - - - 0.7 - - - - 0.8 - - - - 1.1 - - - - ProjectionsofEducationStatisticsto2019 - - - - # - - - - 0.1 - - - - 0.2 - - - - - - - - ProjectionsofEducationStatisticsto2020 - - - - 0.2 - - - - 0.4 - - - - - - - - - - - - Meanabsolutepercentageerror - - - - Example - - - - 0.2 - - - - 0.5 - - - - 0.7 - - - - 1.3 - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-019.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-019.json deleted file mode 100644 index 4279ae6b..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-019.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":4,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":2,"expectedFailure":true} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-019.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-019.pdf deleted file mode 100644 index 2316c328..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-019.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-020-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-020-reg.xml deleted file mode 100644 index 549c35d2..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-020-reg.xml +++ /dev/null @@ -1,3557 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-020-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-020-str.xml deleted file mode 100644 index 1f4b7b1b..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-020-str.xml +++ /dev/null @@ -1,4175 +0,0 @@ - - - - - - - Education system - - - - Australia - - - - 96 - - - - 95 - - - - 93 - - - - 98 - - - - 100 - - - - 4 - - - - 98 - - - - 98 - - - - 100 - - - - Austria - - - - 5 - - - - 100 - - - - 100 - - - - 100 - - - - Azerbaijan - - - - 100 - - - - 100 - - - - 7 - - - - 100 - - - - 84 - - - - 85 - - - - Belgium(French)-BEL - - - - 6 - - - - 100 - - - - 82 - - - - 77 - - - - 97 - - - - Bulgaria - - - - 97 - - - - 95 - - - - 95 - - - - 100 - - - - 100 - - - - 3 - - - - 94 - - - - Canada - - - - 100 - - - - 98 - - - - 98 - - - - 96 - - - - 10 - - - - ChineseTaipei-CHN - - - - 100 - - - - 100 - - - - 100 - - - - 1 - - - - 99 - - - - 99 - - - - 97 - - - - 99 - - - - Colombia - - - - 89 - - - - 100 - - - - 95 - - - - 2 - - - - Croatia - - - - 100 - - - - 95 - - - - 95 - - - - 99 - - - - 100 - - - - 8 - - - - CzechRepublic - - - - 100 - - - - 5 - - - - 99 - - - - 94 - - - - 94 - - - - 90 - - - - 98 - - - - 97 - - - - Denmark - - - - 100 - - - - 87 - - - - 7 - - - - 95 - - - - England-GBR - - - - 94 - - - - 87 - - - - 73 - - - - 100 - - - - 82 - - - - 2 - - - - 99 - - - - Finland - - - - 96 - - - - 95 - - - - 100 - - - - 97 - - - - 3 - - - - 100 - - - - France - - - - 98 - - - - 98 - - - - 5 - - - - 100 - - - - 97 - - - - 97 - - - - 98 - - - - 98 - - - - Georgia - - - - 96 - - - - 92 - - - - 5 - - - - Germany - - - - 95 - - - - 99 - - - - 100 - - - - 96 - - - - 2 - - - - 96 - - - - HongKong-CHN - - - - 83 - - - - 12 - - - - 100 - - - - 94 - - - - 88 - - - - 86 - - - - 96 - - - - Hungary - - - - 100 - - - - 99 - - - - 98 - - - - 97 - - - - 4 - - - - 100 - - - - Indonesia - - - - 100 - - - - 100 - - - - 97 - - - - 97 - - - - 3 - - - - Iran, IslamicRep. Of - - - - 100 - - - - 99 - - - - 100 - - - - 99 - - - - 100 - - - - 5 - - - - 98 - - - - Ireland - - - - 100 - - - - 95 - - - - 95 - - - - 100 - - - - 3 - - - - 99 - - - - 98 - - - - Israel - - - - 93 - - - - 100 - - - - 94 - - - - 25 - - - - 81 - - - - Italy - - - - 95 - - - - 100 - - - - 4 - - - - 98 - - - - 96 - - - - 94 - - - - Lithuania - - - - 94 - - - - 94 - - - - 100 - - - - 93 - - - - 6 - - - - 95 - - - - 100 - - - - 4 - - - - 100 - - - - 95 - - - - Malta - - - - 100 - - - - 100 - - - - Morocco - - - - 96 - - - - 99 - - - - 95 - - - - 99 - - - - 2 - - - - Netherlands - - - - 92 - - - - 100 - - - - 97 - - - - 89 - - - - 68 - - - - 4 - - - - NewZealand - - - - 99 - - - - 100 - - - - 3 - - - - 94 - - - - 93 - - - - 93 - - - - NorthernIreland-GBR - - - - 79 - - - - 93 - - - - 100 - - - - 85 - - - - 62 - - - - 4 - - - - 100 - - - - Norway - - - - 71 - - - - 4 - - - - 86 - - - - 83 - - - - 57 - - - - 100 - - - - 98 - - - - 98 - - - - 96 - - - - Oman - - - - 98 - - - - 2 - - - - Poland - - - - 100 - - - - 100 - - - - 96 - - - - 96 - - - - 100 - - - - 4 - - - - 95 - - - - 99 - - - - 100 - - - - 3 - - - - Portugal - - - - 87 - - - - 93 - - - - 100 - - - - 100 - - - - Qatar - - - - 100 - - - - 99 - - - - 99 - - - - 6 - - - - Romania - - - - 97 - - - - 100 - - - - 4 - - - - 99 - - - - 97 - - - - 100 - - - - RussianFederation - - - - 100 - - - - 5 - - - - 100 - - - - 98 - - - - 100 - - - - 98 - - - - 100 - - - - 98 - - - - SaudiArabia - - - - 98 - - - - 100 - - - - 2 - - - - 95 - - - - Singapore - - - - 6 - - - - 100 - - - - 100 - - - - 100 - - - - 96 - - - - 96 - - - - SlovakRepublic - - - - 95 - - - - 97 - - - - 100 - - - - 96 - - - - 99 - - - - 5 - - - - 100 - - - - Slovenia - - - - 97 - - - - 97 - - - - 3 - - - - 95 - - - - 96 - - - - Spain - - - - 100 - - - - 97 - - - - 96 - - - - 96 - - - - 99 - - - - 5 - - - - 99 - - - - 100 - - - - Sweden - - - - 97 - - - - 92 - - - - 91 - - - - 4 - - - - 99 - - - - Trinidad andTobago - - - - 1 - - - - 100 - - - - 99 - - - - 96 - - - - 95 - - - - 100 - - - - 100 - - - - 97 - - - - United Arab Emirates - - - - 97 - - - - 3 - - - - 100 - - - - 100 - - - - 80 - - - - 81 - - - - 96 - - - - 7 - - - - 85 - - - - United States - - - - Percentage of -international -desired population -coverage - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - National desired -population overall -exclusion rate - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Weighted school -participation rate -before substitution - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Weighted school -participation rate -after substitution - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Weighted student -response rate - - - - - - - - - - - - - - - - - - - - - - - - - - Combined -weighted school -participation -and student -response rate - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - Alberta-CAN - - - - 99 - - - - 94 - - - - 7 - - - - 100 - - - - 97 - - - - 95 - - - - Ontario-CAN - - - - 99 - - - - 96 - - - - 95 - - - - 99 - - - - 100 - - - - 8 - - - - Quebec-CAN - - - - 92 - - - - 100 - - - - 96 - - - - 96 - - - - 95 - - - - 4 - - - - Maltese-MLT - - - - 100 - - - - 100 - - - - 94 - - - - 100 - - - - 94 - - - - 4 - - - - Andalusia-ESP - - - - 5 - - - - 99 - - - - 96 - - - - 97 - - - - 100 - - - - 99 - - - - AbuDhabi-UAE - - - - 100 - - - - 99 - - - - 96 - - - - 97 - - - - 99 - - - - 3 - - - - 96 - - - - Dubai-UAE - - - - 100 - - - - 94 - - - - 99 - - - - 5 - - - - 99 - - - - 91 - - - - 96 - - - - 13 - - - - 96 - - - - 89 - - - - Florida-USA - - - - 95 - - - - Benchmarking -educationsystems - - - - - - - - - - - - - - - - - - - - - - Percentageof -international -desiredpopulation -coverage - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Nationaldesired -populationoverall -exclusionrate - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Weightedschool -participationrate -beforesubstitution - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Weightedschool -participationrate -aftersubstitution - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Weightedstudent -responserate - - - - - - - - - - - - - - - - - - - - - - - - - - Combined -weightedschool -participation -andstudent -responserate - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - Educationsystem - - - - 6,126 - - - - 275 - - - - Australia - - - - 6,709 - - - - 280 - - - - 284 - - - - 5 - - - - 290 - - - - 158 - - - - 158 - - - - 4,670 - - - - 4,976 - - - - Austria - - - - 160 - - - - 158 - - - - 0 - - - - 5,098 - - - - 142 - - - - 169 - - - - Azerbaijan - - - - 4,881 - - - - 170 - - - - 27 - - - - 169 - - - - 127 - - - - 115 - - - - 3,727 - - - - 3,910 - - - - 150 - - - - 150 - - - - 12 - - - - Belgium(French)-BEL - - - - 5,725 - - - - 147 - - - - Bulgaria - - - - 5,261 - - - - 5 - - - - 150 - - - - 147 - - - - 142 - - - - 1,111 - - - - Canada - - - - 23,206 - - - - 1,142 - - - - 25,707 - - - - 1,125 - - - - 1,106 - - - - 5 - - - - ChineseTaipei-CHN - - - - 4,293 - - - - 150 - - - - 150 - - - - 150 - - - - 4,376 - - - - 150 - - - - 0 - - - - Colombia - - - - 4,309 - - - - 157 - - - - 131 - - - - 150 - - - - 3,966 - - - - 19 - - - - 152 - - - - 150 - - - - 5,097 - - - - 152 - - - - 2 - - - - 4,587 - - - - 152 - - - - 152 - - - - Croatia - - - - 4,556 - - - - 178 - - - - 177 - - - - 4,895 - - - - CzechRepublic - - - - 16 - - - - 161 - - - - 180 - - - - Denmark - - - - 4,594 - - - - 4,994 - - - - 25 - - - - 232 - - - - 236 - - - - 207 - - - - 240 - - - - 20 - - - - 150 - - - - 148 - - - - England-GBR - - - - 4,243 - - - - 3,927 - - - - 109 - - - - 129 - - - - 4,640 - - - - Finland - - - - 145 - - - - 4,914 - - - - 146 - - - - 150 - - - - 4 - - - - 141 - - - - 170 - - - - 175 - - - - 4,438 - - - - 4 - - - - 174 - - - - 175 - - - - 4,638 - - - - France - - - - 177 - - - - 4,796 - - - - 180 - - - - Georgia - - - - 173 - - - - 4,958 - - - - 172 - - - - 1 - - - - Germany - - - - 197 - - - - 200 - - - - 199 - - - - 4,000 - - - - 4,229 - - - - 190 - - - - 7 - - - - 132 - - - - HongKong-CHN - - - - 3,875 - - - - 4,189 - - - - 150 - - - - 2 - - - - 130 - - - - 154 - - - - Hungary - - - - 146 - - - - 150 - - - - 149 - - - - 5,204 - - - - 150 - - - - 5,488 - - - - 3 - - - - 5,049 - - - - 158 - - - - 158 - - - - 4,791 - - - - 158 - - - - 158 - - - - 0 - - - - Indonesia - - - - 244 - - - - Iran,IslamicRep.Of - - - - 5,758 - - - - 244 - - - - 244 - - - - 5,932 - - - - 0 - - - - 250 - - - - 3 - - - - 4,524 - - - - 152 - - - - Ireland - - - - 148 - - - - 151 - - - - 151 - - - - 4,849 - - - - 153 - - - - 153 - - - - 4,579 - - - - 4,186 - - - - 152 - - - - 150 - - - - Israel - - - - 2 - - - - 4,529 - - - - Italy - - - - 4,189 - - - - 36 - - - - 202 - - - - 205 - - - - 205 - - - - 166 - - - - 154 - - - - 5,140 - - - - 154 - - - - 160 - - - - 145 - - - - 4,661 - - - - 9 - - - - Lithuania - - - - 3,958 - - - - 3,598 - - - - Malta - - - - 99 - - - - 96 - - - - 96 - - - - 0 - - - - 96 - - - - 284 - - - - 0 - - - - Morocco - - - - 8,381 - - - - 7,805 - - - - 287 - - - - 284 - - - - 289 - - - - Netherlands - - - - 151 - - - - 97 - - - - 4,179 - - - - 3,995 - - - - 41 - - - - 151 - - - - 138 - - - - NewZealand - - - - 192 - - - - 201 - - - - 6,192 - - - - 12 - - - - 180 - - - - 5,644 - - - - 199 - - - - 36 - - - - 136 - - - - 3,586 - - - - 3,942 - - - - 100 - - - - 160 - - - - 160 - - - - NorthernIreland-GBR - - - - Norway - - - - 120 - - - - 3,921 - - - - 3,190 - - - - 145 - - - - 150 - - - - 85 - - - - 35 - - - - 333 - - - - 10,394 - - - - 338 - - - - 327 - - - - 327 - - - - 10,840 - - - - 0 - - - - Oman - - - - Poland - - - - 150 - - - - 150 - - - - 0 - - - - 5,005 - - - - 150 - - - - 150 - - - - 5,316 - - - - 4,085 - - - - 15 - - - - 150 - - - - 150 - - - - Portugal - - - - 133 - - - - 148 - - - - 4,428 - - - - 167 - - - - 4,120 - - - - 166 - - - - 4,394 - - - - 166 - - - - 175 - - - - 0 - - - - Qatar - - - - 4,879 - - - - Romania - - - - 148 - - - - 147 - - - - 148 - - - - 4,665 - - - - 1 - - - - 150 - - - - 202 - - - - RussianFederation - - - - 202 - - - - 202 - - - - 4,693 - - - - 202 - - - - 4,461 - - - - 0 - - - - 171 - - - - SaudiArabia - - - - 175 - - - - 171 - - - - 8 - - - - 4,507 - - - - 4,625 - - - - 163 - - - - 6,687 - - - - 176 - - - - 0 - - - - 176 - - - - Singapore - - - - 6,367 - - - - 176 - - - - 176 - - - - 5,933 - - - - 198 - - - - 197 - - - - 10 - - - - 187 - - - - 5,630 - - - - 200 - - - - SlovakRepublic - - - - Slovenia - - - - 4,674 - - - - 2 - - - - 4,512 - - - - 195 - - - - 201 - - - - 202 - - - - 193 - - - - 314 - - - - 8,580 - - - - 308 - - - - Spain - - - - 314 - - - - 9,223 - - - - 312 - - - - 4 - - - - Sweden - - - - 148 - - - - 4 - - - - 5,209 - - - - 152 - - - - 161 - - - - 4,622 - - - - 153 - - - - 150 - - - - 0 - - - - 3,948 - - - - 150 - - - - Trinidad andTobago - - - - 4,190 - - - - 149 - - - - 149 - - - - 460 - - - - 14,618 - - - - 458 - - - - 458 - - - - 478 - - - - 15,372 - - - - 0 - - - - UnitedArabEmirates - - - - 12,726 - - - - 14,253 - - - - UnitedStates - - - - 21 - - - - 349 - - - - 437 - - - - 450 - - - - 370 - - - - Schools in -original sample - - - - - - - - - - - - - - - - - - - - - - Eligible schools -in original -sample - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Schools -in original -sample that -participated - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Substitute -schools - - - - - - - - - - - - - - - - - Total -schools that -participated - - - - - - - - - - - - - - - - - - - - - - - - - - - - Sampled -students in -participating -schools - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Students -assessed - - - - - - - - - - - - - -
- - - - - Alberta-CAN - - - - 143 - - - - 150 - - - - 147 - - - - 3,789 - - - - 145 - - - - 4,292 - - - - 2 - - - - 4,932 - - - - 200 - - - - Ontario-CAN - - - - 188 - - - - 4,561 - - - - 191 - - - - 189 - - - - 1 - - - - 190 - - - - 4,529 - - - - 189 - - - - Quebec-CAN - - - - 197 - - - - 4,244 - - - - 200 - - - - 1 - - - - 99 - - - - 3,548 - - - - 95 - - - - 3,942 - - - - 95 - - - - 0 - - - - 95 - - - - Maltese-MLT - - - - Andalusia-ESP - - - - 149 - - - - 4,652 - - - - 4,333 - - - - 149 - - - - 0 - - - - 150 - - - - 150 - - - - 165 - - - - AbuDhabi-UAE - - - - 4,308 - - - - 0 - - - - 164 - - - - 168 - - - - 4,146 - - - - 164 - - - - 6,497 - - - - 138 - - - - 139 - - - - Dubai-UAE - - - - 6,061 - - - - 152 - - - - 138 - - - - 0 - - - - Florida-USA - - - - 3,052 - - - - 2,598 - - - - 77 - - - - 77 - - - - 81 - - - - 80 - - - - 0 - - - - Benchmarking -education systems - - - - - - - - - - - - - - - - - - - - - - Schools in -original sample - - - - - - - - - - - - - - - - - - - - - - Eligible schools -in original -sample - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Schools -in original -sample that -participated - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Substitute -schools - - - - - - - - - - - - - - - - - Total -schools that -participated - - - - - - - - - - - - - - - - - - - - - - - - - - - - Sampled -students in -participating -schools - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Students -assessed - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-020.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-020.json deleted file mode 100644 index 11be9878..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-020.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":4,"numCorrectlyDetectedTables":4,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-020.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-020.pdf deleted file mode 100644 index 39a8546c..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-020.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-021-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-021-reg.xml deleted file mode 100644 index 87cb5331..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-021-reg.xml +++ /dev/null @@ -1,459 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-021-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-021-str.xml deleted file mode 100644 index 3dfdd591..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-021-str.xml +++ /dev/null @@ -1,751 +0,0 @@ - - - - - - - Content domain and process - - - - - - - - - - - - - - - - - - - - - - - All items - - - - - - - - - - New items - - - - - - - - - - - Trend items - - - - - - - - - - - - Number - - - - - - - - - Percent - - - - - - - - Number - - - - - - - - - Percent - - - - - - - - Number - - - - - - - - - Percent - - - - - - - - Total items - - - - - - - - - - - - 135 - - - - - - - 100 - - - - - - - 60 - - - - - - 100 - - - - - - - 75 - - - - - - 100 - - - - - - - Purposes of reading - - - - - - - - - - - - - - - - - - - - Literary experience - - - - - - - - - - - - - - - - - 72 - - - - - - 53 - - - - - - 33 - - - - - - 55 - - - - - - 39 - - - - - - 52 - - - - - - Acquire and use information - - - - - - - - - - - - - - - - - - - - - - 63 - - - - - - 47 - - - - - - 27 - - - - - - 45 - - - - - - 36 - - - - - - 48 - - - - - - Processes of comprehension - - - - - - - - - - - - - - - - - - - - - - - - - - - Focuson and retrieve explicitly stated information - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 33 - - - - - - 24 - - - - - - 14 - - - - - - 23 - - - - - - 19 - - - - - - 25 - - - - - - Makes traightforward inferences - - - - - - - - - - - - - - - - - - - - - - - - - - 46 - - - - - - 34 - - - - - - 20 - - - - - - 33 - - - - - - 26 - - - - - - 35 - - - - - - Interpret and integrate ideas and information - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 38 - - - - - - 28 - - - - - - 18 - - - - - - 30 - - - - - - 20 - - - - - - 27 - - - - - - Examine and evaluate content, language, and textual elements - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 18 - - - - - - 13 - - - - - - 8 - - - - - 13 - - - - - - 10 - - - - - - 13 - - - - -
- - - - - Item Format - - - - - - - - - - - - Number -ofitems - - - - - - - - - - - - - - - Percent -ofitems - - - - - - - - - - - - - - Total - - - - - - - - 135 - - - - - - - 100 - - - - - - - Multiple choice - - - - - - - - - - - - - - - 74 - - - - - - 55 - - - - - - Constructed response - - - - - - - - - - - - - - - - - 61 - - - - - - 45 - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-021.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-021.json deleted file mode 100644 index 1bf5fd29..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-021.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-021.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-021.pdf deleted file mode 100644 index d586e691..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-021.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-022-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-022-reg.xml deleted file mode 100644 index 3ab038d1..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-022-reg.xml +++ /dev/null @@ -1,120 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-022-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-022-str.xml deleted file mode 100644 index ada032e2..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-022-str.xml +++ /dev/null @@ -1,383 +0,0 @@ - - - - - - - District Totals  - - - - - - - - FY 2007  - - - - - FY 2008  - - - - - FY 2009  - - - - - FY 2010  - - - - - FY 2011  - - - - - Investigative Matters Received by AUSAs - - - - - - - - - - - - - - - - - - - 426  - - - - - 365  - - - - - 285  - - - - - 402  - - - - - 387  - - - - - Defendants Charged - - - - - - - - - 290  - - - - - 259  - - - - - 235  - - - - - 259  - - - - - 215  - - - - - Cases Charged - - - - - - - - 217  - - - - - 197  - - - - - 173  - - - - - 177  - - - - - 168  - - - - - Defendants Sentenced - - - - - - - - - - - - 287  - - - - - 242  - - - - - 223  - - - - - 207  - - - - - 208  - - - - - No Prison Term - - - - - - - - - 148  - - - - - 107  - - - - - 126  - - - - - 121  - - - - - 102  - - - - - 1-12 Months - - - - - - - 52  - - - - - 48  - - - - - 35  - - - - - 38  - - - - - 27  - - - - - 13-24 Months - - - - - - - 37  - - - - - 45  - - - - - 29  - - - - - 27  - - - - - 33  - - - - - 25-36 Months - - - - - - - 20  - - - - - 20  - - - - - - - - - - 10  - - - - - 17  - - - - - 37-60 Months - - - - - - - 14  - - - - - 19  - - - - - 18  - - - - - - - - - - 21  - - - - - 60+ Months - - - - - - - 16  - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-022.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-022.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-022.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-022.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-022.pdf deleted file mode 100644 index 1c41e738..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-022.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-023-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-023-reg.xml deleted file mode 100644 index 16fccc3c..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-023-reg.xml +++ /dev/null @@ -1,223 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-023-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-023-str.xml deleted file mode 100644 index 9cc174a5..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-023-str.xml +++ /dev/null @@ -1,610 +0,0 @@ - - - - - - - Inequality measure - - - - - - - - Year - - - - - - 1997 - - - - - 1998 - - - - - 1999 - - - - - 2000 - - - - - 2001 - - - - - 2002 - - - - - 2003 - - - - - 2004 - - - - - 2005 - - - - - 2006 - - - - - 2007 - - - - - Median household income - - - - - - - - - - - - $49,497 - - - - - - - - $51,295 - - - - - - - $52,587 - - - - - - $52,500 - - - - - - $51,356 - - - - - - $50,756 - - - - - - $50,711 - - - - - - $50,535 - - - - - - $51,093 - - - - - - $51,473 - - - - - - $52,163 - - - - - - Household income inequality (Gini index) - - - - - - - - - - - - - - - - - - - - - - - 0.4590 - - - - - - - 0.4560 - - - - - - - 0.4580 - - - - - - 0.4620 - - - - - - 0.4660 - - - - - - 0.4620 - - - - - - 0.4640 - - - - - - 0.4660 - - - - - - 0.4690 - - - - - - 0.4700 - - - - - - 0.4630 - - - - - - Between-state income inequality (Gini index) - - - - - - - - - - - - - - - - - - - - 0.0628 - - - - - 0.0636 - - - - - 0.0612 - - - - - 0.0646 - - - - - 0.0658 - - - - - 0.0671 - - - - - 0.0624 - - - - - 0.0701 - - - - - 0.0677 - - - - - 0.0713 - - - - - 0.0749 - - - - - Premature mortality (years of potential life lost before age 75 yrs/100,000 population) - - - - - - - - - - - - - - - - - - - 7108.3 - - - - - 6960.6 - - - - - 6920.0 - - - - - 6899.5 - - - - - 6940.6 - - - - - 6965.2 - - - - - 6970.7 - - - - - 6841.5 - - - - - 6912.9 - - - - - 6882.0 - - - - - 6799.5 - - - - - Between-state inequality in premature mortality (Gini index) - - - - - - - - - - - - - - - - - - - 0.0762 - - - - - 0.0785 - - - - - 0.0820 - - - - - 0.0850 - - - - - 0.0819 - - - - - 0.0861 - - - - - 0.0868 - - - - - 0.0926 - - - - - 0.0939 - - - - - 0.0963 - - - - - 0.0956 - - - - - Mean Health and Activities Limitation Index (HALex), ages 18–65 yrs - - - - - - - - - - - - - - - 0.8766 - - - - - 0.8762 - - - - - 0.8779 - - - - - 0.8783 - - - - - 0.8747 - - - - - 0.8722 - - - - - 0.8711 - - - - - 0.8712 - - - - - 0.8708 - - - - - 0.8684 - - - - - 0.8662 - - - - - Inequality in HALex (Giniindex), ages 18–65 yrs - - - - - - - - - - - - - 0.0928 - - - - - 0.0872 - - - - - 0.0848 - - - - - 0.0840 - - - - - 0.0871 - - - - - 0.0884 - - - - - 0.0888 - - - - - 0.0878 - - - - - 0.0886 - - - - - 0.0904 - - - - - 0.0862 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-023.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-023.json deleted file mode 100644 index 79301b7d..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-023.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":4,"expectedFailure":true} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-023.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-023.pdf deleted file mode 100644 index 6700d7d1..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-023.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-024-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-024-reg.xml deleted file mode 100644 index f0fb7c73..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-024-reg.xml +++ /dev/null @@ -1,1962 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-024-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-024-str.xml deleted file mode 100644 index fb5abc7f..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-024-str.xml +++ /dev/null @@ -1,7404 +0,0 @@ - - - - - - - Characteristic - - - - - - - - - 2007 - - - - - 2009 - - - - - total occupied -housing units - - - - - - - - Inadequate housing units - - - - - - - - Total occupied -housing units - - - - - - - - Inadequate housing units - - - - - - - - no. - - - - - - (%) - - - - - Unadjusted OR - - - - - - (95% CI) - - - - - no. - - - - - - (%) - - - - - Unadjusted OR - - - - - - (95% CI) - - - - - Sex - - - - - - Male - - - - - - 61,206 - - - - - 2,862 - - - - - (4.7) - - - - - Ref. - - - - - - - - - - - - 60,721 - - - - - 2,962 - - - - - (4.9) - - - - - Ref. - - - - - - - - - - - - Female - - - - - - 49,486 - - - - - 2,909 - - - - - (5.9) - - - - - 1.1 - - - - - (1.1–1.2) - - - - - 51,084 - - - - - 2,795 - - - - - (5.5) - - - - - 1.1 - - - - - (1.1–1.2) - - - - - Race/Ethnicity - - - - - - - - - White, non-Hispanic - - - - - - - - - 78,744 - - - - - 3,174 - - - - - (4.0) - - - - - Ref. - - - - - - - - - - - - 79,333 - - - - - 3,222 - - - - - (4.1) - - - - - Ref. - - - - - - - Hispanic - - - - - - 12,609 - - - - - 966 - - - - - (7.7) - - - - - 2.0 - - - - - (1.7–2.3) - - - - - 12,739 - - - - - 991 - - - - - (7.8) - - - - - 2.0 - - - - - (1.7–2.3) - - - - - Black, non-Hispanic - - - - - - - 13,437 - - - - - 1,292 - - - - - (9.6) - - - - - 2.5 - - - - - (2.2–3.0) - - - - - 13,609 - - - - - 1,228 - - - - - (9.0) - - - - - 2.3 - - - - - (2.0–2.7) - - - - - Asian/Paciic Islander - - - - - - - 4,050 - - - - - 174 - - - - - (4.3) - - - - - 1.1 - - - - - (0.8–1.5) - - - - - 4,181 - - - - - 192 - - - - - (4.6) - - - - - 1.1 - - - - - (0.8–1.5) - - - - - American Indian/Alaska -Native - - - - - - - - - - - - - 707 - - - - - 51 - - - - - (7.2) - - - - - 1.8 - - - - - (1.0–3.5) - - - - - 730 - - - - - 55 - - - - - (7.5) - - - - - 1.9 - - - - - (1.1–3.4) - - - - - Sex, by race/ethnicity - - - - - - - - - - - Male - - - - - - White, non-Hispanic - - - - - - - - - 45,116 - - - - - 1,638 - - - - - (3.6) - - - - - Ref. - - - - - - - - - - - - 44,537 - - - - - 1,704 - - - - - (3.8) - - - - - Ref. - - - - - - - - - - - - Hispanic - - - - - - 7,086 - - - - - 508 - - - - - (7.2) - - - - - 2.1 - - - - - (1.6–2.6) - - - - - 7,160 - - - - - 577 - - - - - (8.1) - - - - - 2.2 - - - - - (1.8–2.7) - - - - - Black, non-Hispanic - - - - - - - 5,545 - - - - - 548 - - - - - (9.9) - - - - - 2.9 - - - - - (2.3–3.7) - - - - - 5,520 - - - - - 512 - - - - - (9.3) - - - - - 2.6 - - - - - (2.1–3.2) - - - - - Asian/Paciic Islander - - - - - - - 2,536 - - - - - 95 - - - - - (3.7) - - - - - 1.0 - - - - - (0.7–1.6) - - - - - 2,577 - - - - - 117 - - - - - (4.6) - - - - - 1.2 - - - - - (0.8–1.7) - - - - - American Indian/ -Alaska Native - - - - - - - - - - - - - 348 - - - - - 18 - - - - - (5.3) - - - - - 1.5 - - - - - (0.6–3.9) - - - - - 352 - - - - - 27 - - - - - (7.8) - - - - - 2.1 - - - - - (0.9–4.8) - - - - - Female - - - - - - White, non-Hispanic - - - - - - - - - 33,628 - - - - - 1,536 - - - - - (4.6) - - - - - Ref. - - - - - - - - - - - - 34,795 - - - - - 1,518 - - - - - (4.4) - - - - - Ref. - - - - - - - - - - - - Hispanic - - - - - - 5,523 - - - - - 458 - - - - - (8.3) - - - - - 1.9 - - - - - (1.5–2.4) - - - - - 5,580 - - - - - 414 - - - - - (7.4) - - - - - 1.8 - - - - - (1.4–2.2) - - - - - Black, non-Hispanic - - - - - - - 7,892 - - - - - 744 - - - - - (9.4) - - - - - 2.2 - - - - - (1.8–2.7) - - - - - 8,090 - - - - - 716 - - - - - (8.9) - - - - - 2.1 - - - - - (1.8–2.6) - - - - - Asian/Paciic Islander - - - - - - - 1,514 - - - - - 79 - - - - - (5.2) - - - - - 1.2 - - - - - (0.7–2.0) - - - - - 1,604 - - - - - 75 - - - - - (4.7) - - - - - 1.1 - - - - - (0.7–1.8) - - - - - American Indian/ -Alaska Native - - - - - - - - - - - - - 359 - - - - - 32 - - - - - (9.0) - - - - - 2.1 - - - - - (0.9–4.8) - - - - - 378 - - - - - 27 - - - - - (7.2) - - - - - 1.7 - - - - - (0.7–3.9) - - - - - Annual income ($) - - - - - - - ≤24,999 - - - - - 46,912 - - - - - 2,771 - - - - - (9.4) - - - - - 4.9 - - - - - (4.1–5.9) - - - - - 49,240 - - - - - 2,615 - - - - - (8.5) - - - - - 3.8 - - - - - (3.2–4.6) - - - - - 25,000–49,999 - - - - - 31,170 - - - - - 1,650 - - - - - (5.3) - - - - - 2.6 - - - - - (2.2–3.2) - - - - - 29,757 - - - - - 1,711 - - - - - (5.7) - - - - - 2.5 - - - - - (2.1–3.1) - - - - - 50,000–74,999 - - - - - 18,985 - - - - - 700 - - - - - (3.7) - - - - - 1.8 - - - - - (1.4–2.3) - - - - - 18,557 - - - - - 663 - - - - - (3.6) - - - - - 1.5 - - - - - (1.2–1.9) - - - - - ≥75,000 - - - - - 31,137 - - - - - 650 - - - - - (2.1) - - - - - Ref. - - - - - - - - - - - - 32,558 - - - - - 768 - - - - - (2.4) - - - - - Ref. - - - - - - - - - - - - Education level - - - - - - - - - Less than high school - - - - - - 16,779 - - - - - 1,507 - - - - - (9.0) - - - - - 2.2 - - - - - (1.9–2.6) - - - - - 15,229 - - - - - 1,278 - - - - - (8.4) - - - - - 2.1 - - - - - (1.8–2.5) - - - - - High school diploma - - - - - - 30,559 - - - - - 1,564 - - - - - (5.1) - - - - - 1.2 - - - - - (1.1–1.4) - - - - - 30,692 - - - - - 1,770 - - - - - (5.8) - - - - - 1.4 - - - - - (1.3–1.6) - - - - - Any college education - - - - - - - - - 63,354 - - - - - 2,700 - - - - - (4.3) - - - - - Ref. - - - - - - - - - - - - 65,884 - - - - - 2,709 - - - - - (4.1) - - - - - Ref. - - - - - - - - - - - - U.S.Census region - - - - - - - - - Northeast - - - - - - 23,128 - - - - - 1,096 - - - - - (5.4) - - - - - 1.3 - - - - - (1.0–1.5) - - - - - 23,316 - - - - - 1,320 - - - - - (6.5) - - - - - 1.6 - - - - - (1.3–1.9) - - - - - Midwest - - - - - - - 29,202 - - - - - 1,063 - - - - - (4.2) - - - - - 1.0 - - - - - (0.8–1.2) - - - - - 29,403 - - - - - 1,092 - - - - - (4.3) - - - - - 1.0 - - - - - (0.9–1.3) - - - - - South - - - - - - 48,324 - - - - - 2,554 - - - - - (6.3) - - - - - 1.5 - - - - - (1.3–1.7) - - - - - 49,372 - - - - - 2,332 - - - - - (5.6) - - - - - 1.5 - - - - - (1.2–1.6) - - - - - West - - - - - - 27,550 - - - - - 1,058 - - - - - (4.3) - - - - - Ref. - - - - - - - - - - - - 28,021 - - - - - 1,013 - - - - - (4.2) - - - - - Ref. - - - - - - - - - - - - Disability status - - - - - - - - Yes - - - - - - 3,657 - - - - - 245 - - - - - (6.7) - - - - - 1.3 - - - - - (1.0–1.8) - - - - - 3,647 - - - - - 226 - - - - - (6.2) - - - - - 1.2 - - - - - (0.9–1.6) - - - - - No - - - - - 107,035 - - - - - 5,526 - - - - - (5.2) - - - - - Ref. - - - - - - - - - - - - 108,151 - - - - - 5,531 - - - - - (5.1) - - - - - Ref. - - - - - - - - - - - - total - - - - - - 110,692 - - - - - 5,771 - - - - - (5.2) - - - - - - - - - - - - - - - 111,800 - - - - - 5,757 - - - - - (5.2) - - - - - - - - - - - - - -
- - - - - 2007 - - - - - 2009 - - - - - Characteristic - - - - - - - - - total -occupied -housing -units - - - - - - - - - - Unhealthyhousingunits - - - - - - total -occupied -housing units - - - - - - - - - Unhealthyhousingunits - - - - - - no. - - - - - - (%) - - - - - Unadjusted -odds ratio - - - - - - - - - (95%CI) - - - - - no. - - - - - - (%) - - - - - Unadjusted -odds ratio - - - - - - - - - (95%CI) - - - - - Sex - - - - - - Male - - - - - - 61,206 - - - - - 14,037 - - - - - (22.9) - - - - - Ref. - - - - - - - - - - - - 60,721 - - - - - 13,647 - - - - - (22.5) - - - - - Ref. - - - - - - - - - - - - Female - - - - - - 49,486 - - - - - 12,303 - - - - - (24.9) - - - - - 1.1 - - - - - (1.1–1.2) - - - - - 51,084 - - - - - 12,549 - - - - - (24.6) - - - - - 1.1 - - - - - (1.1–1.2) - - - - - Race/Ethnicity - - - - - - - - - White, non-Hispanic - - - - - - - - - 78,744 - - - - - 18,446 - - - - - (23.4) - - - - - Ref. - - - - - - - - - - - - 79,333 - - - - - 17,992 - - - - - (22.7) - - - - - Ref. - - - - - - - - - - - - Hispanic - - - - - - 12,609 - - - - - 2,754 - - - - - (21.8) - - - - - 0.9 - - - - - (0.8–1.0) - - - - - 12,739 - - - - - 3,079 - - - - - (24.2) - - - - - 1.1 - - - - - (1.0–1.2) - - - - - Black, non-Hispanic - - - - - - - 13,437 - - - - - 3,849 - - - - - (28.6) - - - - - 1.3 - - - - - (1.2–1.4) - - - - - 13,609 - - - - - 3,847 - - - - - (28.3) - - - - - 1.3 - - - - - (1.2–1.5) - - - - - Asian/Paciic Islander - - - - - - - 4,050 - - - - - 705 - - - - - (17.4) - - - - - 0.7 - - - - - (0.6–0.8) - - - - - 4,181 - - - - - 720 - - - - - (17.2) - - - - - 0.7 - - - - - (0.6–0.8) - - - - - American Indian/Alaska Native - - - - - - - - - - - - 707 - - - - - 218 - - - - - (30.8) - - - - - 1.5 - - - - - (1.0–2.1) - - - - - 730 - - - - - 233 - - - - - (31.9) - - - - - 1.6 - - - - - (1.1–2.3) - - - - - Sex, by race/ethnicity - - - - - - - - - - - Male - - - - - - White, non-Hispanic - - - - - - - - - 45,116 - - - - - 10,384 - - - - - (23.0) - - - - - Ref. - - - - - - - - - - - - 44,537 - - - - - 9,895 - - - - - (22.2) - - - - - Ref. - - - - - - - - - - - - Hispanic - - - - - - 7,086 - - - - - 1,433 - - - - - (20.2) - - - - - 0.9 - - - - - (0.7–1.0) - - - - - 7,160 - - - - - 1,625 - - - - - (22.7) - - - - - 1.0 - - - - - (0.9–1.2) - - - - - Black, non-Hispanic - - - - - - - 5,545 - - - - - 1,524 - - - - - (27.5) - - - - - 1.3 - - - - - (1.1–1.5) - - - - - 5,520 - - - - - 1,439 - - - - - (26.1) - - - - - 1.2 - - - - - (1.1–1.4) - - - - - Asian/Paciic Islander - - - - - - - 2,536 - - - - - 398 - - - - - (15.7) - - - - - 0.6 - - - - - (0.5–0.8) - - - - - 2,577 - - - - - 433 - - - - - (16.8) - - - - - 0.7 - - - - - (0.6–0.9) - - - - - American Indian/Alaska Native - - - - - - - - - - - - 348 - - - - - 106 - - - - - (30.4) - - - - - 1.5 - - - - - (0.9–2.4) - - - - - 352 - - - - - 120 - - - - - (34.1) - - - - - 1.8 - - - - - (1.1–3.0) - - - - - Female - - - - - - White, non-Hispanic - - - - - - - - - 33,628 - - - - - 8,062 - - - - - (24.0) - - - - - Ref. - - - - - - - - - - - - 34,795 - - - - - 8,097 - - - - - (23.3) - - - - - Ref. - - - - - - - - - - - - Hispanic - - - - - - 5,523 - - - - - 1,321 - - - - - (23.9) - - - - - 1.0 - - - - - (0.9–1.2) - - - - - 5,580 - - - - - 1,454 - - - - - (26.1) - - - - - 1.2 - - - - - (1.0–1.3) - - - - - Black, non-Hispanic - - - - - - - 7,892 - - - - - 2,325 - - - - - (29.5) - - - - - 1.3 - - - - - (1.2–1.5) - - - - - 8,090 - - - - - 2,408 - - - - - (29.8) - - - - - 1.4 - - - - - (1.3–1.6) - - - - - Asian/Paciic Islander - - - - - - - 1,514 - - - - - 308 - - - - - (20.3) - - - - - 0.8 - - - - - (0.6–1.1) - - - - - 1,604 - - - - - 288 - - - - - (17.9) - - - - - 0.7 - - - - - (0.6–1.0) - - - - - American Indian/Alaska Native - - - - - - - - - - - - 359 - - - - - 112 - - - - - (31.2) - - - - - 1.4 - - - - - (0.8–2.5) - - - - - 378 - - - - - 113 - - - - - (29.8) - - - - - 1.4 - - - - - (0.9–2.3) - - - - - Annual income($) - - - - - - - ≤24,999 - - - - - 46,912 - - - - - 8,004 - - - - - (27.2) - - - - - 1.3 - - - - - (1.2–1.5) - - - - - 49,240 - - - - - 8,219 - - - - - (26.6) - - - - - 1.4 - - - - - (1.3–1.5) - - - - - 25,000–49,999 - - - - - 31,170 - - - - - 7,215 - - - - - (23.1) - - - - - 1.1 - - - - - (1.0–1.2) - - - - - 29,757 - - - - - 7,079 - - - - - (23.8) - - - - - 1.2 - - - - - (1.1–1.3) - - - - - 50,000–74,999 - - - - - 18,985 - - - - - 4,330 - - - - - (22.8) - - - - - 1.1 - - - - - (1.0–1.2) - - - - - 18,557 - - - - - 4,065 - - - - - (21.9) - - - - - 1.1 - - - - - (1.0–1.2) - - - - - ≥75,000 - - - - - 31,137 - - - - - 6,791 - - - - - (21.8) - - - - - Ref. - - - - - - - - - - - - 32,558 - - - - - 6,833 - - - - - (21.0) - - - - - Ref. - - - - - - - - - - - - Education level - - - - - - - - - Lessthan high school - - - - - - 16,779 - - - - - 4,283 - - - - - (25.5) - - - - - 1.1 - - - - - (1.0–1.2) - - - - - 15,229 - - - - - 3,795 - - - - - (24.9) - - - - - 1.1 - - - - - (1.0–1.1) - - - - - High school diploma - - - - - - 30,559 - - - - - 6,635 - - - - - (21.7) - - - - - 0.9 - - - - - (0.8–0.9) - - - - - 30,692 - - - - - 6,829 - - - - - (22.3) - - - - - 0.9 - - - - - (0.9–1.0 - - - - - Any college education - - - - - - - - - 63,354 - - - - - 15,422 - - - - - (24.3) - - - - - Ref. - - - - - - - - - - - - 65,884 - - - - - 15,572 - - - - - (23.6) - - - - - Ref. - - - - - - - - - - - - U.S. Census region - - - - - - - - - Northeast - - - - - - 23,128 - - - - - 6,390 - - - - - (31.3) - - - - - 2.0 - - - - - (1.8–2.1) - - - - - 23,316 - - - - - 5,538 - - - - - (27.1) - - - - - 1.6 - - - - - (1.4–1.7) - - - - - Midwest - - - - - - - 29,202 - - - - - 6,426 - - - - - (25.4) - - - - - 1.5 - - - - - (1.3–1.6) - - - - - 29,403 - - - - - 6,878 - - - - - (27.1) - - - - - 1.6 - - - - - (1.4–1.7) - - - - - South - - - - - - 48,324 - - - - - 8,889 - - - - - (21.9) - - - - - 1.2 - - - - - (1.1–1.3) - - - - - 49,372 - - - - - 9,088 - - - - - (21.9) - - - - - 1.2 - - - - - (1.1–1.3) - - - - - West - - - - - - 27,550 - - - - - 4,635 - - - - - (19.0) - - - - - Ref. - - - - - - - - - - - - 28,021 - - - - - 4,692 - - - - - (19.2) - - - - - Ref. - - - - - - - Disability status - - - - - - - - Yes - - - - - - 3,657 - - - - - 987 - - - - - (27.0) - - - - - 1.2 - - - - - (1.0–1.4) - - - - - 3,647 - - - - - 1,100 - - - - - (30.2) - - - - - 1.4 - - - - - (1.2–1.7) - - - - - No - - - - - 107,035 - - - - - 25,353 - - - - - (23.7) - - - - - Ref. - - - - - - - - - - - - 108,151 - - - - - 25,096 - - - - - (23.2) - - - - - Ref. - - - - - - - - - - - - total - - - - - - 110,692 - - - - - 26,196 - - - - - (23.4) - - - - - - - - - - - - - - - 111,800 - - - - - 26,340 - - - - - (23.8) - - - - - - - - - - - - - -
- - - - - Characteristics - - - - - - - - - total -occupied -housing -units - - - - - - - - - - Rodent seen in unit recently - - - - - - - - - - Leaks during preceding 12 months - - - - - - - - - no. - - - - - - (%) - - - - - Unadjusted -odds ratio - - - - - - - - - (95 %CI) - - - - - no. - - - - - - (%) - - - - - Unadjusted -odds ratio - - - - - - - - - (95%CI) - - - - - Sex - - - - - - Male - - - - - - 60,721 - - - - - 3,716 - - - - - (35.2) - - - - - Ref. - - - - - - - - - - - - 5,748 - - - - - (9.6) - - - - - Ref. - - - - - - - - - - - - Female - - - - - - 51,084 - - - - - 3,219 - - - - - (38.9) - - - - - 1.2 - - - - - (1.0–1.3) - - - - - 5,215 - - - - - (10.3) - - - - - 1.1 - - - - - (1.0–1.2) - - - - - Race/Ethnicity - - - - - - - - - White, non-Hispanic - - - - - - - - - 51,084 - - - - - 4,692 - - - - - (33.4) - - - - - Ref. - - - - - - - - - - - - 8,077 - - - - - (10.3) - - - - - Ref. - - - - - - - - - - - - Hispanic - - - - - - 79,333 - - - - - 849 - - - - - (51.3) - - - - - 2.1 - - - - - (1.7–2.6) - - - - - 997 - - - - - (7.9) - - - - - 0.7 - - - - - (0.6–0.9) - - - - - Black, non-Hispanic - - - - - - - 12,739 - - - - - 1,028 - - - - - (44.8) - - - - - 1.6 - - - - - (1.3–2.0) - - - - - 1,447 - - - - - (10.7) - - - - - 1.1 - - - - - (0.9–1.2) - - - - - Asian/Paciic Islander - - - - - - - 13,609 - - - - - 172 - - - - - (43.9) - - - - - 1.6 - - - - - (1.1–2.3) - - - - - 229 - - - - - (5.5) - - - - - 0.5 - - - - - (0.4–0.7) - - - - - American Indian/Alaska -Native - - - - - - - - - - - - - 4,181 - - - - - 77 - - - - - (53.0) - - - - - 2.3 - - - - - (1.1–4.7) - - - - - 81 - - - - - (11.2) - - - - - 1.1 - - - - - (0.7–1.8) - - - - - Sex, by race/ethnicity - - - - - - - - - - - Male - - - - - - White, non-Hispanic - - - - - - - - - 44,537 - - - - - 2,739 - - - - - (33.0) - - - - - Ref. - - - - - - - - - - - - 4,439 - - - - - (10.1) - - - - - Ref. - - - - - - - - - - - - Hispanic - - - - - - 7,160 - - - - - 398 - - - - - (44.8) - - - - - 1.7 - - - - - (1.2–2.2) - - - - - 545 - - - - - (7.7) - - - - - 0.7 - - - - - (0.6–0.9) - - - - - Black, non-Hispanic - - - - - - - 5,520 - - - - - 353 - - - - - (41.0) - - - - - 1.4 - - - - - (1.0–1.9) - - - - - 561 - - - - - (10.3) - - - - - 1.0 - - - - - (0.8–1.2) - - - - - Asian/Paciic Islander - - - - - - - 2,577 - - - - - 115 - - - - - (46.3) - - - - - 1.8 - - - - - (1.1–2.9) - - - - - 113 - - - - - (4.4) - - - - - 0.4 - - - - - (0.3–0.6) - - - - - American Indian/Alaska -Native - - - - - - - - - - - - - 352 - - - - - 55 - - - - - (65.1) - - - - - 3.8 - - - - - (1.3–11.1) - - - - - 42 - - - - - (11.9) - - - - - 1.2 - - - - - (0.6–2.4) - - - - - Female - - - - - - White, non-Hispanic - - - - - - - - - 34,795 - - - - - 1,953 - - - - - (34.0) - - - - - Ref. - - - - - - - - - - - - 3,639 - - - - - (10.6) - - - - - Ref. - - - - - - - - - - - - Hispanic - - - - - - 5,580 - - - - - 451 - - - - - (59.0) - - - - - 2.8 - - - - - (2.0–3.9) - - - - - 452 - - - - - (8.1) - - - - - 0.8 - - - - - (0.6–0.9) - - - - - Black, non-Hispanic - - - - - - - 8,090 - - - - - 675 - - - - - (47.1) - - - - - 1.7 - - - - - (1.3–2.2) - - - - - 885 - - - - - (11.1) - - - - - 1.1 - - - - - (0.9–1.2) - - - - - Asian/Paciic Islander - - - - - - - 1,604 - - - - - 57 - - - - - (39.9) - - - - - 1.3 - - - - - (0.7–2.5) - - - - - 116 - - - - - (7.3) - - - - - 0.7 - - - - - (0.5–1.0) - - - - - American Indian/Alaska -Native - - - - - - - - - - - - - 378 - - - - - 22 - - - - - (36.1) - - - - - 1.1 - - - - - (0.4–3.3) - - - - - 39 - - - - - (10.5) - - - - - 1.0 - - - - - (0.5–1.9) - - - - - Annualincome($) - - - - - - - ≤24,999 - - - - - 49,240 - - - - - 2,388 - - - - - (45.1) - - - - - 1.9 - - - - - (1.6–2.2) - - - - - 2,957 - - - - - (9.7) - - - - - 1.0 - - - - - (0.9–1.1) - - - - - 25,000–49,999 - - - - - 29,757 - - - - - 1,913 - - - - - (38.0) - - - - - 1.4 - - - - - (1.2–1.7) - - - - - 2,915 - - - - - (9.9) - - - - - 1.0 - - - - - (0.9–1.1) - - - - - 50,000–74,999 - - - - - 18,557 - - - - - 971 - - - - - (32.1) - - - - - 1.1 - - - - - (0.9–1.3) - - - - - 1,881 - - - - - (10.3) - - - - - 1.0 - - - - - (0.9–1.2) - - - - - ≥75,000 - - - - - 32,558 - - - - - 1,663 - - - - - (30.5) - - - - - Ref. - - - - - - - - - - - - 3,209 - - - - - (9.9) - - - - - Ref. - - - - - - - - - - - - Education level - - - - - - - - - Less than high school - - - - - - 15,229 - - - - - 1,270 - - - - - (44.1) - - - - - 1.5 - - - - - (1.3–1.8) - - - - - 1,297 - - - - - (8.6) - - - - - 0.8 - - - - - (0.7–0.9) - - - - - High school diploma - - - - - - 30,692 - - - - - 1,955 - - - - - (38.2) - - - - - 1.2 - - - - - (1.0–1.4) - - - - - 2,698 - - - - - (8.9) - - - - - 0.8 - - - - - (0.7–0.9) - - - - - Any college education - - - - - - - - - 65,884 - - - - - 3,709 - - - - - (34.3) - - - - - Ref. - - - - - - - - - - - - 6,969 - - - - - (10.7) - - - - - Ref. - - - - - - - - - - - - U.S. Census region - - - - - - - - - Northeast - - - - - - 23,316 - - - - - 1,850 - - - - - (43.1) - - - - - 1.6 - - - - - (1.3–2.0) - - - - - 2,285 - - - - - (11.3) - - - - - 1.8 - - - - - (1.6–2.1) - - - - - Midwest - - - - - - - 29,403 - - - - - 1,571 - - - - - (33.9) - - - - - 1.1 - - - - - (0.9–1.4) - - - - - 3,694 - - - - - (14.7) - - - - - 2.4 - - - - - (2.1–2.8) - - - - - South - - - - - - 49,372 - - - - - 2,485 - - - - - (37.3) - - - - - 1.3 - - - - - (1.0–1.6) - - - - - 3,383 - - - - - (8.3) - - - - - 1.3 - - - - - (1.1–1.5) - - - - - West - - - - - - 28,021 - - - - - 1,029 - - - - - (31.9) - - - - - Ref. - - - - - - - - - - - - 1,600 - - - - - (6.6) - - - - - Ref. - - - - - - - Disability status - - - - - - - - Yes - - - - - - 3,647 - - - - - 289 - - - - - (41.6) - - - - - 1.2 - - - - - (0.9–1.7) - - - - - 469 - - - - - (12.9) - - - - - 1.4 - - - - - (1.1–1.7) - - - - - No - - - - - 108,151 - - - - - 6,646 - - - - - (36.7) - - - - - Ref. - - - - - - - - - - - - 10,494 - - - - - (9.8) - - - - - Ref. - - - - - - - - - - - - total - - - - - - 111,800 - - - - - 6,935 - - - - - (36.9) - - - - - - - - - - - - - - - 10,960 - - - - - (9.9) - - - - - - - - - - - - - -
- - - - - Characteristics - - - - - - - - - Peeling paint - - - - - - - no working smoke alarm - - - - - - - - no. - - - - - - (%) - - - - - Unadjusted -odds ratio - - - - - - - - - (95% CI) - - - - - no. - - - - - - (%) - - - - - Unadjusted -odds ratio - - - - - - - - - (95% CI) - - - - - Sex - - - - - - Male - - - - - - 1,170 - - - - - (1.9) - - - - - Ref. - - - - - - - - - - - - 3,352 - - - - - (5.6) - - - - - Ref. - - - - - - - - - - - - Female - - - - - - 1,207 - - - - - (2.4) - - - - - 1.2 - - - - - (1.0–1.5) - - - - - 2,806 - - - - - (5.6) - - - - - 1.2 - - - - - (1.0–1.5) - - - - - Race/Ethnicity - - - - - - - - - White, non-Hispanic - - - - - - - - - 1,471 - - - - - (1.9) - - - - - Ref. - - - - - - - 3,542 - - - - - (4.5) - - - - - Ref. - - - - - - - - - - - - Hispanic - - - - - - 311 - - - - - (2.4) - - - - - 1.3 - - - - - (1.0–1.7) - - - - - 1,447 - - - - - (11.4) - - - - - 2.7 - - - - - (2.4–3.1) - - - - - Black, non-Hispanic - - - - - - - 480 - - - - - (3.5) - - - - - 1.9 - - - - - (1.5–2.4) - - - - - 795 - - - - - (5.9) - - - - - 1.3 - - - - - (1.1–1.6) - - - - - Asian/Paciic Islander - - - - - - - 38 - - - - - (0.9) - - - - - 0.5 - - - - - (0.3–0.9) - - - - - 212 - - - - - (5.2) - - - - - 1.2 - - - - - (0.9–1.5) - - - - - American Indian/Alaska Native - - - - - - - - - - - - 34 - - - - - (4.7) - - - - - 2.6 - - - - - (1.2–5.7) - - - - - 78 - - - - - (10.8) - - - - - 2.6 - - - - - (1.5–4.4) - - - - - Sex, by race/ethnicity - - - - - - - - - - - Male - - - - - - White, non-Hispanic - - - - - - - - - 780 - - - - - (1.8) - - - - - Ref. - - - - - - - - - - - - 1,907 - - - - - (4.3) - - - - - Ref. - - - - - - - - - - - - Hispanic - - - - - - 155 - - - - - (2.2) - - - - - 1.2 - - - - - (0.9–1.8) - - - - - 845 - - - - - (11.9) - - - - - 3.0 - - - - - (2.5–3.6) - - - - - Black, non-Hispanic - - - - - - - 189 - - - - - (3.4) - - - - - 2.0 - - - - - (1.4–2.9) - - - - - 403 - - - - - (7.3) - - - - - 1.8 - - - - - (1.4–2.2) - - - - - Asian/Paciic Islander - - - - - - - 21 - - - - - (0.8) - - - - - 0.5 - - - - - (0.2–1.1) - - - - - 117 - - - - - (4.6) - - - - - 1.1 - - - - - (0.7–1.6) - - - - - American Indian/Alaska Native - - - - - - - - - - - - 20 - - - - - (5.7) - - - - - 3.4 - - - - - (1.1–10.2) - - - - - 44 - - - - - (12.6) - - - - - 3.2 - - - - - (1.5–6.7) - - - - - Female - - - - - - White, non-Hispanic - - - - - - - - - 691 - - - - - (2.0) - - - - - Ref. - - - - - - - - - - - - 1,635 - - - - - (4.8) - - - - - Ref. - - - - - - - - - - - - Hispanic - - - - - - 156 - - - - - (2.8) - - - - - 1.4 - - - - - (1.0–2.0) - - - - - 601 - - - - - (10.8) - - - - - 2.4 - - - - - (2.0–3.0) - - - - - Black, non-Hispanic - - - - - - - 291 - - - - - (3.6) - - - - - 1.8 - - - - - (1.4–2.5) - - - - - 393 - - - - - (4.9) - - - - - 1.0 - - - - - (0.8–1.3) - - - - - Asian/Paciic Islander - - - - - - - 18 - - - - - (1.1) - - - - - 0.6 - - - - - (0.2–1.4) - - - - - 96 - - - - - (6.0) - - - - - 1.3 - - - - - (0.8–2.0) - - - - - American Indian/Alaska Native - - - - - - - - - - - - 14 - - - - - (3.7) - - - - - 1.9 - - - - - (0.6–5.5) - - - - - 34 - - - - - (9.1) - - - - - 2.0 - - - - - (0.9–4.3) - - - - - Annual income ($) - - - - - - - ≤24,999 - - - - - 1,969 - - - - - (4.0) - - - - - 3.4 - - - - - (2.7–4.2) - - - - - 5,679 - - - - - (12.6) - - - - - 5.4 - - - - - (4.6–6.3) - - - - - 25,000–49,999 - - - - - 639 - - - - - (2.1) - - - - - 1.8 - - - - - (1.4–2.3) - - - - - 1,826 - - - - - (6.2) - - - - - 2.5 - - - - - (2.1–3.0) - - - - - 50,000–74,999 - - - - - 332 - - - - - (1.8) - - - - - 1.5 - - - - - (1.1–2.0) - - - - - 752 - - - - - (4.1) - - - - - 1.6 - - - - - (1.3–2.0) - - - - - ≥75,000 - - - - - 399 - - - - - (1.2) - - - - - Ref. - - - - - - - - - - - - 843 - - - - - (2.6) - - - - - Ref. - - - - - - - - - - - - Education level - - - - - - - - - Less than high school - - - - - - 446 - - - - - (2.9) - - - - - 1.5 - - - - - (1.2–1.9) - - - - - 1,800 - - - - - (12.0) - - - - - 3.6 - - - - - (3.1–4.1) - - - - - High school diploma - - - - - - 636 - - - - - (2.1) - - - - - 1.1 - - - - - (0.9–1.3) - - - - - 1,962 - - - - - (6.5) - - - - - 1.8 - - - - - (1.6–2.1) - - - - - Any college education - - - - - - - - - 1,295 - - - - - (2.0) - - - - - Ref. - - - - - - - - - - - - 2,396 - - - - - (3.7) - - - - - Ref. - - - - - - - - - - - - U.S. Census region - - - - - - - - - Northeast - - - - - - 648 - - - - - (2.8) - - - - - 1.5 - - - - - (1.2–2.0) - - - - - 1,093 - - - - - (4.9) - - - - - 0.7 - - - - - (0.6–0.8) - - - - - Midwest - - - - - - - 980 - - - - - (3.3 - - - - - 1.9 - - - - - (1.5–2.3) - - - - - 1,694 - - - - - (6.0) - - - - - 0.8 - - - - - (0.7–1.0) - - - - - South - - - - - - 1,199 - - - - - (2.4) - - - - - 1.3 - - - - - (1.1–1.7) - - - - - 4,382 - - - - - (9.3) - - - - - 1.3 - - - - - (1.2–1.5) - - - - - West - - - - - - 512 - - - - - (1.8) - - - - - Ref. - - - - - - - - - - - - 1,931 - - - - - (7.1) - - - - - Ref. - - - - - - - - - - - - Disability status - - - - - - - - Yes - - - - - - 148 - - - - - (4.1) - - - - - 2.0 - - - - - (1.3–3.0) - - - - - 194 - - - - - (5.4) - - - - - 1.0 - - - - - (0.7–1.3) - - - - - No - - - - - 2,230 - - - - - (2.1) - - - - - Ref. - - - - - - - - - - - - 5,961 - - - - - (5.6) - - - - - Ref. - - - - - - - - - - - - total - - - - - - 2,378 - - - - - (2.1) - - - - - - - - - - - - - - - 6,157 - - - - - (5.6) - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-024.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-024.json deleted file mode 100644 index e700926c..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-024.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":4,"numCorrectlyDetectedTables":4,"numErroneouslyDetectedTables":2,"expectedFailure":true} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-024.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-024.pdf deleted file mode 100644 index e09d976f..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-024.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-025-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-025-reg.xml deleted file mode 100644 index 7243285c..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-025-reg.xml +++ /dev/null @@ -1,1263 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-025-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-025-str.xml deleted file mode 100644 index 6fdb985e..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-025-str.xml +++ /dev/null @@ -1,5218 +0,0 @@ - - - - - - - - Characteristic - - - - - - - - - Coronary heart disease - - - - - - - - - Stroke - - - - - - - - no. - - - - - - Rate - - - - - - (95% CI) - - - - - no. - - - - - - Rate - - - - - - (95% CI) - - - - - Sex - - - - - - Female - - - - - - 200,915 - - - - - 103.1 - - - - - (102.7–103.6) - - - - - 82,595 - - - - - 42.6 - - - - - (42.3–42.9) - - - - - Male - - - - - - 224,510 - - - - - 176.5 - - - - - (175.7–177.2) - - - - - 54,524 - - - - - 43.9 - - - - - (43.5–44.3) - - - - - Race - - - - - - - American Indian/Alaska Native - - - - - - - - - - - - 1,880 - - - - - 97.4 - - - - - (92.8–102.0) - - - - - 548 - - - - - 29.4 - - - - - (26.9–32.0) - - - - - Asian/Paciic Islander - - - - - - - 7,570 - - - - - 77.1 - - - - - (75.4–78.9) - - - - - 3,662 - - - - - 37.0 - - - - - (35.8–38.2) - - - - - Black - - - - - 44,530 - - - - - 161.6 - - - - - (160.1–163.1) - - - - - 17,045 - - - - - 61.6 - - - - - (60.7–62.6) - - - - - White - - - - - - - 371,445 - - - - - 134.2 - - - - - (133.8–134.6) - - - - - 115,864 - - - - - 41.7 - - - - - (41.5–42.0) - - - - - Ethnicity - - - - - - - Hispanic - - - - - - 20,939 - - - - - 106.4 - - - - - (104.9–107.8) - - - - - 7,005 - - - - - 34.2 - - - - - (33.4–35.0) - - - - - Non-Hispanic - - - - - - 403,588 - - - - - 136.8 - - - - - (136.4–137.3) - - - - - 129,892 - - - - - 44.0 - - - - - (43.8–44.3) - - - - - total - - - - - - 425,425 - - - - - 135.0 - - - - - (134.6–135.4) - - - - - 137,119 - - - - - 43.6 - - - - - (43.3–43.8) - - - -
- - - - - - Age group -(yrs) - - - - - - - - - Race - - - - - - - American Indian/Alaska native - - - - - - - - - - - Asian/Pacific Islander - - - - - - - Black - - - - - White - - - - - - - no. - - - - - - Rate - - - - - - (95% CI) - - - - - no. - - - - - - Rate - - - - - - (95% CI) - - - - - no. - - - - - - Rate - - - - - - (95% CI) - - - - - no. - - - - - - Rate - - - - - - (95% CI) - - - - - Women - - - - - - 45–54 - - - - - 47 - - - - - 21.8 - - - - - (16.0–29.0) - - - - - 91 - - - - - 8.7 - - - - - (7.0–10.6) - - - - - 1,564 - - - - - 56.0 - - - - - (53.2–58.8) - - - - - 4,316 - - - - - 24.1 - - - - - (23.4–24.8) - - - - - 55–64 - - - - - 116 - - - - - 85.5 - - - - - (69.9–101.0) - - - - - 224 - - - - - 31.9 - - - - - (27.7–36.1) - - - - - 2,636 - - - - - 147.8 - - - - - (142.1–153.4) - - - - - 10,137 - - - - - 73.8 - - - - - (72.3–75.2) - - - - - 65–74 - - - - - 164 - - - - - 234.9 - - - - - (199.0–270.9) - - - - - 527 - - - - - 132.2 - - - - - (120.9–143.5) - - - - - 3,859 - - - - - 367.2 - - - - - (355.6–378.8) - - - - - 19,287 - - - - - 221.0 - - - - - (217.9–224.1) - - - - - 75–84 - - - - - 242 - - - - - 654.1 - - - - - (571.7–736.5) - - - - - 1,056 - - - - - 448.6 - - - - - (421.5–475.7) - - - - - 6,114 - - - - - 940.8 - - - - - (917.3–964.4) - - - - - 50,538 - - - - - 740.4 - - - - - (733.9–746.8) - - - - - ≥85 - - - - - 208 - - - - - 1,271.7 - - - - - (1,098.9–1,444.5) - - - - - 1,331 - - - - - 1,665.5 - - - - - (1,576.0–1,754.9) - - - - - 7,111 - - - - - 2,599.5 - - - - - (2,539.1–2,660.0) - - - - - 89,442 - - - - - 2,761.6 - - - - - (2,743.6–2,779.7) - - - - - total - - - - - - 777 - - - - - 163.8 - - - - - (152.3–175.3) - - - - - 3,229 - - - - - 130.9 - - - - - (126.3–135.4) - - - - - 21,284 - - - - - 324.9 - - - - - (320.6–329.3) - - - - - 173,720 - - - - - 344.3 - - - - - (342.6–345.9) - - - - - Men - - - - - - 45–54 - - - - - 164 - - - - - 81.0 - - - - - (68.6–93.4) - - - - - 374 - - - - - 39.9 - - - - - (35.8–43.9) - - - - - 3,140 - - - - - 130.9 - - - - - (126.3–135.5) - - - - - 15,294 - - - - - 86.2 - - - - - (84.8–87.5) - - - - - 55–64 - - - - - 241 - - - - - 191.7 - - - - - (167.5–215.9) - - - - - 690 - - - - - 114.0 - - - - - (105.5–122.5) - - - - - 4,890 - - - - - 340.1 - - - - - (330.6–349.7) - - - - - 27,772 - - - - - 212.7 - - - - - (210.2–215.2) - - - - - 65–74 - - - - - 256 - - - - - 424.4 - - - - - (372.4–476.4) - - - - - 858 - - - - - 261.7 - - - - - (244.2–279.2) - - - - - 5,300 - - - - - 704.9 - - - - - (685.9–723.9) - - - - - 36,434 - - - - - 483.8 - - - - - (478.9–488.8) - - - - - 75–84 - - - - - 248 - - - - - 900.6 - - - - - (788.5–1,012.7) - - - - - 1,191 - - - - - 736.4 - - - - - (694.6–778.2) - - - - - 5,384 - - - - - 1,456.9 - - - - - (1,418.0–1,495.8) - - - - - 60,452 - - - - - 1,275.5 - - - - - (1,265.3–1,285.7) - - - - - ≥85 - - - - - 113 - - - - - 1,441.7 - - - - - (1,175.9–1,707.5) - - - - - 1,045 - - - - - 2,169.9 - - - - - (2,038.3–2,301.5) - - - - - 2,973 - - - - - 2,656.7 - - - - - (2,561.2–2,752.2) - - - - - 51,632 - - - - - 3,396.0 - - - - - (3,366.7–3,425.3) - - - - - total - - - - - - 1,022 - - - - - 241.1 - - - - - (226.3–255.8) - - - - - 4,158 - - - - - 199.8 - - - - - (193.7–205.9) - - - - - 21,687 - - - - - 427.8 - - - - - (422.1–433.5) - - - - - 191,584 - - - - - 429.6 - - - - - (427.7–431.5) - - - -
- - - - - - Age -group -(yrs) - - - - - - - - - - Women - - - - - - Men - - - - - - Hispanic - - - - - - non-Hispanic - - - - - - Hispanic - - - - - - non-Hispanic - - - - - - no. - - - - - - Rate - - - - - - (95% CI) - - - - - no. - - - - - - Rate - - - - - - (95% CI) - - - - - no. - - - - - - Rate - - - - - - (95% CI) - - - - - no. - - - - - - Rate - - - - - - (95% CI) - - - - - 45–54 - - - - - 345 - - - - - 15.5 - - - - - (13.8–17.1) - - - - - 5,663 - - - - - 28.7 - - - - - (27.9–29.4) - - - - - 1,205 - - - - - 52.7 - - - - - (49.7–55.7) - - - - - 17,707 - - - - - 93.2 - - - - - (91.8–94.6) - - - - - 55–64 - - - - - 806 - - - - - 60.9 - - - - - (56.7–65.1) - - - - - 12,273 - - - - - 81.6 - - - - - (80.2–83.0) - - - - - 1,906 - - - - - 156.5 - - - - - (149.5–163.6) - - - - - 31,564 - - - - - 225.4 - - - - - (222.9–227.8) - - - - - 65–74 - - - - - 1,512 - - - - - 199.2 - - - - - (189.2–209.2) - - - - - 22,270 - - - - - 234.7 - - - - - (231.6–237.8) - - - - - 2,430 - - - - - 394.1 - - - - - (378.5–409.8) - - - - - 40,266 - - - - - 500.0 - - - - - (495.1–504.9) - - - - - 75–84 - - - - - 3,012 - - - - - 666.6 - - - - - (642.8–690.4) - - - - - 54,839 - - - - - 751.6 - - - - - (745.3–757.9) - - - - - 3,235 - - - - - 1,022.8 - - - - - (987.6–1,058.1) - - - - - 63,916 - - - - - 1,282.9 - - - - - (1,273.0–1,292.9) - - - - - ≥85 - - - - - 3,694 - - - - - 2,213.2 - - - - - (2,141.8–2,284.5) - - - - - 94,269 - - - - - 2,739.1 - - - - - (2,721.6–2,756.6) - - - - - 2,176 - - - - - 2,453.9 - - - - - (2,350.8–2,557.0) - - - - - 53,499 - - - - - 3,344.5 - - - - - (3,316.2–3,372.9) - - - - - total - - - - - - 9,369 - - - - - 190.0 - - - - - (186.2–193.9) - - - - - 189,314 - - - - - 344.1 - - - - - (342.5–345.6) - - - - - 10,952 - - - - - 242.0 - - - - - (237.5–246.5) - - - - - 206,952 - - - - - 434.4 - - - - - (432.5–436.2) - - - -
- - - - - - Age -group -(yrs) - - - - - - - - - - Race - - - - - - - American Indian/Alaska native - - - - - - - - - - - Asian/PaciicIslander - - - - - - - Black - - - - - White - - - - - - - no. - - - - - - Rate - - - - - - (95%CI) - - - - - no. - - - - - - Rate - - - - - - (95%CI) - - - - - no. - - - - - - Rate - - - - - - (95% CI) - - - - - no. - - - - - - Rate - - - - - - (95% CI) - - - - - Women - - - - - - 45–54 - - - - - 19 - - - - - - - - - - - - - - - 109 - - - - - 10.4 - - - - - (8.4–12.3) - - - - - 875 - - - - - 31.3 - - - - - (29.3–33.4) - - - - - 1,856 - - - - - 10.4 - - - - - (9.9–10.8) - - - - - 55–64 - - - - - 22 - - - - - 16.2 - - - - - (10.2–24.5) - - - - - 202 - - - - - 28.8 - - - - - (24.8–32.7) - - - - - 1,090 - - - - - 61.1 - - - - - (57.5–64.7) - - - - - 3,307 - - - - - 24.1 - - - - - (23.2–24.9) - - - - - 65–74 - - - - - 55 - - - - - 78.8 - - - - - (59.4–102.5) - - - - - 322 - - - - - 80.8 - - - - - (72.0–89.6) - - - - - 1,565 - - - - - 148.9 - - - - - (141.5–156.3) - - - - - 6,918 - - - - - 79.3 - - - - - (77.4–81.1) - - - - - 75–84 - - - - - 99 - - - - - 267.6 - - - - - (217.5–325.8) - - - - - 669 - - - - - 284.2 - - - - - (262.7–305.7) - - - - - 2,701 - - - - - 415.6 - - - - - (400.0–431.3) - - - - - 21,943 - - - - - 321.5 - - - - - (317.2–325.7) - - - - - ≥85 - - - - - 106 - - - - - 648.1 - - - - - (524.7–771.5) - - - - - 621 - - - - - 777.0 - - - - - (715.9–838.2) - - - - - 2,901 - - - - - 1,060.5 - - - - - (1,021.9–1,099.1) - - - - - 35,698 - - - - - 1,102.2 - - - - - (1,090.8–1,113.7) - - - - - total - - - - - - 301 - - - - - 63.4 - - - - - (56.3–70.6) - - - - - 1,923 - - - - - 77.9 - - - - - (74.5–81.4) - - - - - 9,132 - - - - - 139.4 - - - - - (136.5–142.3) - - - - - 69,722 - - - - - 138.2 - - - - - (137.1–139.2) - - - - - Men - - - - - - 45–54 - - - - - 33 - - - - - 16.3 - - - - - (11.2–22.9) - - - - - 126 - - - - - 13.4 - - - - - (11.1–15.8) - - - - - 1,044 - - - - - 43.5 - - - - - (40.9–46.2) - - - - - 2,279 - - - - - 12.8 - - - - - (12.3–13.4) - - - - - 55–64 - - - - - 44 - - - - - 35.0 - - - - - (25.4–47.0) - - - - - 220 - - - - - 36.3 - - - - - (31.5–41.1) - - - - - 1,523 - - - - - 105.9 - - - - - (100.6–111.3) - - - - - 4,110 - - - - - 31.5 - - - - - (30.5–32.4) - - - - - 65–74 - - - - - 50 - - - - - 82.9 - - - - - (61.5–109.3) - - - - - 357 - - - - - 108.9 - - - - - (97.6–120.2) - - - - - 1,644 - - - - - 218.7 - - - - - (208.1–229.2) - - - - - 7,312 - - - - - 97.1 - - - - - (94.9–99.3) - - - - - 75–84 - - - - - 48 - - - - - 174.3 - - - - - (128.5–231.1) - - - - - 477 - - - - - 294.9 - - - - - (268.5–321.4) - - - - - 1,741 - - - - - 471.1 - - - - - (449.0–493.2) - - - - - 16,041 - - - - - 338.5 - - - - - (333.2–343.7) - - - - - ≥85 - - - - - 27 - - - - - 344.5 - - - - - (227.0–501.2) - - - - - 417 - - - - - 865.9 - - - - - (782.8–949.0) - - - - - 987 - - - - - 882.0 - - - - - (827.0–937.0) - - - - - 14,311 - - - - - 941.3 - - - - - (925.9–956.7) - - - - - total - - - - - - 202 - - - - - 47.6 - - - - - (41.1–54.2) - - - - - 1,597 - - - - - 76.7 - - - - - (73.0–80.5) - - - - - 6,939 - - - - - 136.9 - - - - - (133.7–140.1) - - - - - 44,053 - - - - - 98.8 - - - - - (97.9–99.7) - - - -
- - - - - - Age -group -(yrs) - - - - - - - - - - Women - - - - - - Men - - - - - - Hispanic - - - - - - non-Hispanic - - - - - - Hispanic - - - - - - non-Hispanic - - - - - - no. - - - - - - Rate - - - - - - (95% CI) - - - - - no. - - - - - - Rate - - - - - - (95% CI) - - - - - no. - - - - - - Rate - - - - - - (95% CI) - - - - - no. - - - - - - Rate - - - - - - (95% CI) - - - - - 45–54 - - - - - 263 - - - - - 11.8 - - - - - (10.4–13.2) - - - - - 2,590 - - - - - 13.1 - - - - - (12.6–13.6) - - - - - 389 - - - - - 17.0 - - - - - (15.3–18.7) - - - - - 3,080 - - - - - 16.2 - - - - - (15.6–16.8) - - - - - 55–64 - - - - - 368 - - - - - 27.8 - - - - - (25.0–30.7) - - - - - 4,243 - - - - - 28.2 - - - - - (27.4–29.1) - - - - - 501 - - - - - 41.1 - - - - - (37.5–44.8) - - - - - 5,380 - - - - - 38.4 - - - - - (37.4–39.4) - - - - - 65–74 - - - - - 584 - - - - - 76.9 - - - - - (70.7–83.2) - - - - - 8,256 - - - - - 87.0 - - - - - (85.1–88.9) - - - - - 617 - - - - - 100.1 - - - - - (92.2–108.0) - - - - - 8,723 - - - - - 108.3 - - - - - (106.0–110.6) - - - - - 75–84 - - - - - 1,087 - - - - - 240.6 - - - - - (226.3–254.9) - - - - - 24,285 - - - - - 332.8 - - - - - (328.6–337.0) - - - - - 926 - - - - - 292.8 - - - - - (273.9–311.6) - - - - - 17,350 - - - - - 348.2 - - - - - (343.1–353.4) - - - - - ≥85 - - - - - 1,240 - - - - - 742.9 - - - - - (701.6–784.3) - - - - - 38,056 - - - - - 1,105.8 - - - - - (1,094.6–1,116.9) - - - - - 516 - - - - - 581.9 - - - - - (531.7–632.1) - - - - - 15,203 - - - - - 950.4 - - - - - (935.3–965.5) - - - - - total - - - - - - 3,542 - - - - - 71.8 - - - - - (69.5–74.2) - - - - - 77,430 - - - - - 140.7 - - - - - (139.7–141.7) - - - - - 2,949 - - - - - 65.2 - - - - - (62.8–67.5) - - - - - 49,736 - - - - - 104.4 - - - - - (103.5–105.3) - - - -
- - - - - - State/Area - - - - - - - - - - Coronary heart disease - - - - - - - - - Stroke - - - - - - - - no. - - - - - - Rate - - - - - - (95% CI) - - - - - no. - - - - - - Rate - - - - - - (95% CI) - - - - - District of Columbia - - - - - - - - 1,144 - - - - - 193.5 - - - - - (182.2–204.8) - - - - - 221 - - - - - 37.6 - - - - - (32.6–42.6) - - - - - NewYork - - - - - - - - 39,385 - - - - - 181.2 - - - - - (179.4–183.0) - - - - - 6,398 - - - - - 29.7 - - - - - (29.0–30.5) - - - - - Oklahoma - - - - - - 6,930 - - - - - 177.4 - - - - - (173.2–181.6) - - - - - 2,085 - - - - - 53.3 - - - - - (51.0–55.6) - - - - - Tennessee - - - - - - 10,602 - - - - - 167.8 - - - - - (164.6–171.0) - - - - - 3,407 - - - - - 54.6 - - - - - (52.8–56.5) - - - - - RhodeIsland - - - - - - 2,187 - - - - - 162.4 - - - - - (155.5–169.3) - - - - - 421 - - - - - 31.4 - - - - - (28.4–34.5) - - - - - Arkansas - - - - - - - - 5,100 - - - - - 160.1 - - - - - (155.7–164.5) - - - - - 1,884 - - - - - 58.8 - - - - - (56.1–61.4) - - - - - WestVirginia - - - - - - - - - - 3,548 - - - - - 158.7 - - - - - (153.4–163.9) - - - - - 1,072 - - - - - 47.6 - - - - - (44.7–50.5) - - - - - Michigan - - - - - - 16,782 - - - - - 156.6 - - - - - (154.2–158.9) - - - - - 4,752 - - - - - 44.5 - - - - - (43.3–45.8) - - - - - Missouri - - - - - - - 10,206 - - - - - 155.2 - - - - - (152.2–158.2) - - - - - 3,247 - - - - - 49.4 - - - - - (47.7–51.1) - - - - - Ohio - - - - - 19,820 - - - - - 154.0 - - - - - (151.8–156.1) - - - - - 5,828 - - - - - 45.2 - - - - - (44.1–46.4) - - - - - Kentucky - - - - - - - 6,530 - - - - - 148.6 - - - - - (145.0–152.2) - - - - - 2,197 - - - - - 50.5 - - - - - (48.3–52.6) - - - - - Mississippi - - - - - - 4,354 - - - - - 146.7 - - - - - (142.4–151.1) - - - - - 1,585 - - - - - 53.7 - - - - - (51.1–56.4) - - - - - Maryland - - - - - - - 7,744 - - - - - 141.7 - - - - - (138.5–144.9) - - - - - 2,365 - - - - - 43.6 - - - - - (41.8–45.4) - - - - - Iowa - - - - - - - 5,469 - - - - - 141.6 - - - - - (137.7–145.4) - - - - - 1,718 - - - - - 42.9 - - - - - (40.8–45.0) - - - - - NewJersey - - - - - 13,684 - - - - - 141.2 - - - - - (138.8–143.6) - - - - - 3,468 - - - - - 35.9 - - - - - (34.7–37.1) - - - - - Delaware - - - - - - - - - 1,305 - - - - - 140.8 - - - - - (133.2–148.5) - - - - - 384 - - - - - 41.8 - - - - - (37.6–46.0) - - - - - SouthDakota - - - - - - 1,397 - - - - - 140.0 - - - - - (132.6–147.5) - - - - - 442 - - - - - 42.4 - - - - - (38.4–46.4) - - - - - Indiana - - - - - - 9,210 - - - - - 139.7 - - - - - (136.8–142.5) - - - - - 3,238 - - - - - 49.1 - - - - - (47.4–50.8) - - - - - California - - - - - - - - 46,584 - - - - - 139.0 - - - - - (137.7–140.2) - - - - - 15,039 - - - - - 44.9 - - - - - (44.2–45.6) - - - - - Louisiana - - - - - - 5,919 - - - - - 138.3 - - - - - (134.7–141.8) - - - - - 2,195 - - - - - 52.1 - - - - - (49.9–54.3) - - - - - Pennsylvania - - - - - - - 22,030 - - - - - 136.0 - - - - - (134.2–137.8) - - - - - 7,151 - - - - - 43.6 - - - - - (42.6–44.6) - - - - - Illinois - - - - - - 17,747 - - - - - 134.8 - - - - - (132.8–136.8) - - - - - 5,989 - - - - - 45.4 - - - - - (44.3–46.6) - - - - - NorthDakota - - - - - - 1,115 - - - - - 133.7 - - - - - (125.7–141.8) - - - - - 428 - - - - - 49.2 - - - - - (44.5–54.0) - - - - - Texas - - - - - - - 25,933 - - - - - 132.2 - - - - - (130.6–133.8) - - - - - 9,366 - - - - - 48.3 - - - - - (47.3–49.3) - - - - - Florida - - - - - - - 32,868 - - - - - 129.2 - - - - - (127.8–130.6) - - - - - 8,925 - - - - - 35.3 - - - - - (34.5–36.0) - - - - - NorthCarolina - - - - - - - - 11,173 - - - - - 126.1 - - - - - (123.8–128.5) - - - - - 4,572 - - - - - 52.4 - - - - - (50.9–53.9) - - - - - Vermont - - - - - - - - 880 - - - - - 124.5 - - - - - (116.2–132.8) - - - - - 264 - - - - - 37.8 - - - - - (33.2–42.4) - - - - - Alabama - - - - - - 6,038 - - - - - 121.7 - - - - - (118.6–124.8) - - - - - 2,740 - - - - - 55.5 - - - - - (53.4–57.6) - - - - - Arizona - - - - - - - - 7,806 - - - - - 120.8 - - - - - (118.1–123.5) - - - - - 2,226 - - - - - 34.5 - - - - - (33.1–36.0) - - - - - Nevada - - - - - - 2,649 - - - - - 119.5 - - - - - (114.9–124.1) - - - - - 847 - - - - - 39.7 - - - - - (37.0–42.4) - - - - - SouthCarolina - - - - - - - - 5,398 - - - - - 119.2 - - - - - (116.0–122.4) - - - - - 2,291 - - - - - 51.6 - - - - - (49.5–53.8) - - - - - NewHampshire - - - - - - 1,629 - - - - - 116.3 - - - - - (110.6–121.9) - - - - - 494 - - - - - 35.4 - - - - - (32.3–38.6) - - - - - Virginia - - - - - - - - 8,486 - - - - - 115.6 - - - - - (113.1–118.0) - - - - - 3,523 - - - - - 49.0 - - - - - (47.3–50.6) - - - - - Washington - - - - - - - 7,303 - - - - - 114.7 - - - - - (112.1–117.4) - - - - - 2,725 - - - - - 42.9 - - - - - (41.3–44.6) - - - - - NewMexico - - - - - - - - 2,277 - - - - - 114.6 - - - - - (109.9–119.3) - - - - - 739 - - - - - 37.5 - - - - - (34.8–40.2) - - - - - Kansas - - - - - - 3,565 - - - - - 114.1 - - - - - (110.3–117.8) - - - - - 1,489 - - - - - 46.7 - - - - - (44.3–49.1) - - - - - Wisconsin - - - - - - - 7,183 - - - - - 113.9 - - - - - (111.3–116.6) - - - - - 2,829 - - - - - 44.3 - - - - - (42.7–46.0) - - - - - Maine - - - - - - 1,816 - - - - - 112.2 - - - - - (107.0–117.4) - - - - - 670 - - - - - 41.3 - - - - - (38.2–44.5) - - - - - Idaho - - - - - 1,565 - - - - - 110.2 - - - - - (104.7–115.7) - - - - - 725 - - - - - 51.6 - - - - - (47.9–55.4) - - - - - Connecticut - - - - - - - 4,630 - - - - - 110.0 - - - - - (106.8–113.2) - - - - - 1,547 - - - - - 36.5 - - - - - (34.6–38.3) - - - - - Georgia - - - - - - - - 8,371 - - - - - 108.7 - - - - - (106.4–111.1) - - - - - 3,889 - - - - - 51.4 - - - - - (49.8–53.1) - - - - - Wyoming - - - - - - - 561 - - - - - 107.1 - - - - - (98.2–116.1) - - - - - 236 - - - - - 45.4 - - - - - (39.6–51.3) - - - - - Massachusetts - - - - - - 8,015 - - - - - 105.6 - - - - - (103.3–108.0) - - - - - 2,880 - - - - - 37.7 - - - - - (36.3–39.0) - - - - - Oregon - - - - - - 4,070 - - - - - 99.2 - - - - - (96.1–102.3) - - - - - 1,978 - - - - - 48.0 - - - - - (45.9–50.1) - - - - - Montana - - - - - - - 1,093 - - - - - 99.0 - - - - - (93.0–104.9) - - - - - 461 - - - - - 41.2 - - - - - (37.4–44.9) - - - - - Colorado - - - - - - - 3,922 - - - - - 96.3 - - - - - (93.2–99.3) - - - - - 1,532 - - - - - 38.7 - - - - - (36.7–40.6) - - - - - Nebraska - - - - - - - 1,861 - - - - - 89.9 - - - - - (85.8–94.0) - - - - - 922 - - - - - 43.9 - - - - - (41.0–46.7) - - - - - Alaska - - - - - - - 351 - - - - - 87.4 - - - - - (77.7–97.2) - - - - - 177 - - - - - 46.8 - - - - - (39.5–54.1) - - - - - Hawaii - - - - - - - 1,298 - - - - - 85.2 - - - - - (80.5–89.9) - - - - - 665 - - - - - 43.2 - - - - - (39.9–46.5) - - - - - Minnesota - - - - - - 4,430 - - - - - 79.7 - - - - - (77.3–82.0) - - - - - 2,219 - - - - - 39.3 - - - - - (37.7–41.0) - - - - - Utah - - - - - - 1,462 - - - - - 77.5 - - - - - (73.5–81.5) - - - - - 674 - - - - - 36.2 - - - - - (33.5–38.9) - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-025.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-025.json deleted file mode 100644 index f783d0e7..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-025.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":6,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":3,"expectedFailure":true} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-025.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-025.pdf deleted file mode 100644 index c242c923..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-025.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-026-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-026-reg.xml deleted file mode 100644 index 1cf81bcd..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-026-reg.xml +++ /dev/null @@ -1,486 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-026-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-026-str.xml deleted file mode 100644 index 96a4efb5..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-026-str.xml +++ /dev/null @@ -1,809 +0,0 @@ - - - - - - - Fused aluminum oxide - - - - - - - - - - - - - - - - - - - - - - Silicon carbide - - - - - - - - - - - - - - 2009 - - - - - 2010 - - - - - 2009 - - - - - 2010 - - - - - United States and Canada - - - - - - - - - - - - - - - - - - - - - - - - - 60,400 - - - - - - - - - - 60,400 - - - - - - - - - - 42,600 - - - - - - - - - - 42,600 - - - - - - - - - - Argentina - - - - - - - - - - - - - - - - - - - - - - - 5,000 - - - - - - - - - 5,000 - - - - - - - - - Australia - - - - - - - - - - - - - 50,000 - - - - - - - - - - 50,000 - - - - - - - - - - - - - - - - - - - - Austria - - - - - - - - - - - 60,000 - - - - - - - - - - 60,000 - - - - - - - - - - - - - - - - - - - - Brazil - - - - - - - - - 50,000 - - - - - - - - - - 50,000 - - - - - - - - - - 43,000 - - - - - - - - - - 43,000 - - - - - - - - - - China - - - - - - - - - 700,000 - - - - - - - - - - - 700,000 - - - - - - - - - - - 455,000 - - - - - - - - - - - 455,000 - - - - - - - - - - - France - - - - - - - - - - 40,000 - - - - - - - - - - 40,000 - - - - - - - - - - 16,000 - - - - - - - - - - 16,000 - - - - - - - - - - Germany - - - - - - - - - - - 80,000 - - - - - - - - - - 80,000 - - - - - - - - - - 36,000 - - - - - - - - - - 36,000 - - - - - - - - - - India - - - - - - - - - 40,000 - - - - - - - - - - 40,000 - - - - - - - - - - 5,000 - - - - - - - - - 5,000 - - - - - - - - - Japan - - - - - - - - - 25,000 - - - - - - - - - - 25,000 - - - - - - - - - - 60,000 - - - - - - - - - - 60,000 - - - - - - - - - - Mexico - - - - - - - - - - - - - - - - - - 45,000 - - - - - - - - - - 45,000 - - - - - - - - - - Norway - - - - - - - - - - - - - - - - - - - - 80,000 - - - - - - - - - - 80,000 - - - - - - - - - - Venezuela - - - - - - - - - - - - - - - - - - - - - - - 30,000 - - - - - - - - - - 30,000 - - - - - - - - - - Other countries - - - - - - - - - - - - - - - - - - 80,000 - - - - - - - - - - 80,000 - - - - - - - - - - 190,000 - - - - - - - - - - - 190,000 - - - - - - - - - - - World total (rounded) - - - - - - - - - - - - - - - - - - - - - - - 1,190,000 - - - - - - - - - - - - - 1,190,000 - - - - - - - - - - - - - 1,010,000 - - - - - - - - - - - - - 1,010,000 - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-026.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-026.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-026.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-026.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-026.pdf deleted file mode 100644 index d203b52b..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-026.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-027-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-027-reg.xml deleted file mode 100644 index 8b7aa45d..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-027-reg.xml +++ /dev/null @@ -1,280 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-027-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-027-str.xml deleted file mode 100644 index 491dd177..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-027-str.xml +++ /dev/null @@ -1,631 +0,0 @@ - - - - - - - Age - - - - - Enrollment - - - - - - - - - % - - - - - 14-17 - - - - - - - 231,000 - - - - - - - - 1.3 - - - - - - 18-19 - - - - - - - 3,769,000 - - - - - - - - - 21.2 - - - - - - 20-21 - - - - - - - 3,648,000 - - - - - - - - - 20.5 - - - - - - 22-24 - - - - - - - 3,193,000 - - - - - - - - - 18.0 - - - - - - 25-29 - - - - - - - 2,401,000 - - - - - - - - - 13.5 - - - - - - 30-34 - - - - - - - 1,409,000 - - - - - - - - - 7.9 - - - - - - Over 35 - - - - - - - - 3,107,000 - - - - - - - - - 17.5 - - - - - - Total - - - - - - 17,758,000 - - - - - - - - - 100 - - - - -
- - - - - Murder / Non-Negligent Manslaughter - - - - - - - - - - - - - - - - - - - - - Negligent Manslaughter - - - - - - - - - - - - - - - - Forcible Sex Offense - - - - - - - - - - - Non-Forcible Sex Offense - - - - - - - - - - - - - Robbery - - - - - - - - Aggravated Assault - - - - - - - - Burglary - - - - - - - Motor Vehicle Theft - - - - - - - - - Arson - - - - - - - 2005 - - - - - - - 28 - - - - - 33 - - - - - 3,583 - - - - - - - 55 - - - - - 5,432 - - - - - - - 5,943 - - - - - - - 37,800 - - - - - - - - 11,890 - - - - - - - - 1,219 - - - - - - - 2006 - - - - - - - 25 - - - - - 0 - - - - - 3,490 - - - - - - - 56 - - - - - 4,921 - - - - - - - 5,472 - - - - - - - 35,124 - - - - - - - - 9,811 - - - - - - - 1,086 - - - - - - - 2007 - - - - - - - 66 - - - - - 8 - - - - - 3,482 - - - - - - - 62 - - - - - 4,985 - - - - - - - 5,234 - - - - - - - 33,010 - - - - - - - - 8,744 - - - - - - - 915 - - - - - - 2008 - - - - - - - 55 - - - - - 5 - - - - - 3,287 - - - - - - - 49 - - - - - 4,562 - - - - - - - 5,026 - - - - - - - 31,851 - - - - - - - - 7,465 - - - - - - - 825 - - - - - - Total - - - - - - 174 - - - - - - 46 - - - - - 13,842 - - - - - - - 222 - - - - - - 19,900 - - - - - - - 21,675 - - - - - - - 137,785 - - - - - - - - 37,910 - - - - - - - 4,045 - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-027.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-027.json deleted file mode 100644 index 1bf5fd29..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-027.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-027.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-027.pdf deleted file mode 100644 index 200de4c0..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-027.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-028-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-028-reg.xml deleted file mode 100644 index f7f29556..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-028-reg.xml +++ /dev/null @@ -1,199 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-028-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-028-str.xml deleted file mode 100644 index a1da4e60..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-028-str.xml +++ /dev/null @@ -1,425 +0,0 @@ - - - - - - - Buildings - - - - - - - - n = - - - - - - % - - - - - IHE Residence - - - - - - - - - - 60 - - - - - 27.7 - - - - - - IHE Grounds & Parking Lots - - - - - - - - - - - - - - - - 58 - - - - - 26.7 - - - - - - Administrative or Academic - - - - - - - - 56 - - - - - 25.8 - - - - - - Student/Employee Services - - - - - - - - - - - - - 22 - - - - - 10.1 - - - - - - Other/Undetermined - - - - - - - - - - - - - - - 15 - - - - - 6.9 - - - - - - Multiple Facilities/Buildings - - - - - - - - - - - 6 - - - - - 2.8 - - - - - - Total - - - - - - 217 - - - - - - 100.0 - - - - - -
- - - - - Locales - - - - - n = - - - - - - % - - - - - Dorm Room or Apartment - - - - - - - - - - - - - - 48 - - - - - 30.2 - - - - - - Office(s) - - - - - - 22 - - - - - 13.8 - - - - - - Instructional Area - - - - - - - - - - 20 - - - - - 12.6 - - - - - - Non-specific/Other/Undetermined - - - - - - - - - - - - - - - - - - - - 16 - - - - - 10.1 - - - - - - Common Area - - - - - - - - - 15 - - - - - 9.4 - - - - - - Hallway(s)/Stairwell(s)/Restroom(s) - - - - - - - - 15 - - - - - 9.4 - - - - - - Student Services Locales/Cafeteria - - - - - - - 10 - - - - - 6.3 - - - - - - Multiple Locales within the Same Building - - - - - - - - - - - - - - - - - - - 7 - - - - - 4.4 - - - - - - Multiple Facilities/Buildings - - - - - - - - - - - 6 - - - - - 3.8 - - - - - - Total - - - - - 159 - - - - - - 100 - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-028.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-028.json deleted file mode 100644 index a2697933..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-028.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":2,"expectedFailure":true} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-028.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-028.pdf deleted file mode 100644 index a4b57022..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-028.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-029-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-029-reg.xml deleted file mode 100644 index 79282b6f..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-029-reg.xml +++ /dev/null @@ -1,123 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-029-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-029-str.xml deleted file mode 100644 index 6f23abe0..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-029-str.xml +++ /dev/null @@ -1,266 +0,0 @@ - - - - - - - Categories - - - - - - - - n = - - - - - - % - - - - - Related to an Intimate Relationship - - - - - - - - - - - - - - - - - 77 - - - - - 33.9 - - - - - - Retaliation for Specific Action(s) - - - - - - - - - - - - 31 - - - - - 13.7 - - - - - - Refused Advances or Obsession with the Target - - - - - - - - - - - - - - - - - - - - - - 23 - - - - - 10.1 - - - - - - Response to Academic Stress/Failure - - - - - - - - 23 - - - - - 10.1 - - - - - - Acquaintance/Stranger Based Sexual Violence - - - - - - - - - 22 - - - - - 9.7 - - - - - - Psychotic Actions - - - - - - - - - 18 - - - - - 7.9 - - - - - - Workplace Dismissal/Sanction - - - - - - - 14 - - - - - 6.2 - - - - - - Need to Kill / Specific Victimology - - - - - - - - - - - - - - 7 - - - - - 3.1 - - - - - - Draw Attention to Self/Issue(s) - - - - - - - - - 7 - - - - - 3.1 - - - - - - Bias Related - - - - - 5 - - - - - 2.2 - - - - - - Total - - - - - 227 - - - - - - 100 - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-029.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-029.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-029.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-029.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-029.pdf deleted file mode 100644 index 3589dbd1..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-029.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-030-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-030-reg.xml deleted file mode 100644 index 3c23fd4e..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-030-reg.xml +++ /dev/null @@ -1,263 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-030-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-030-str.xml deleted file mode 100644 index bb875681..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-030-str.xml +++ /dev/null @@ -1,441 +0,0 @@ - - - - - - - Cycle -Name - - - - - - - - - - - - - KI -(1/km) - - - - - - - - - - - - Distance -(mi) - - - - - - - - - - - - - - - - Percent Fuel Savings - - - - - - - - - - - - - - - - - - - - - - Improved -Speed - - - - - - - - - - - - - - - - - Decreased -Accel - - - - - - - - - - - - - - - - - - Eliminate -Stops - - - - - - - - - - - - - - - - - Decreased -Idle - - - - - - - - - - - - - - - - - 2012_2 - - - - - - - - - - 3.30 - - - - - - - - 1.3 - - - - - - - 5.9% - - - - - - - - 9.5% - - - - - - - - 29.2% - - - - - - - - - 17.4% - - - - - - - - - 2145_1 - - - - - - - - - - 0.68 - - - - - - - - 11.2 - - - - - - - - 2.4% - - - - - - - - 0.1% - - - - - - - - 9.5% - - - - - - - - 2.7% - - - - - - - - 4234_1 - - - - - - - - - - 0.59 - - - - - - - - 58.7 - - - - - - - - 8.5% - - - - - - - - 1.3% - - - - - - - - 8.5% - - - - - - - - 3.3% - - - - - - - - 2032_2 - - - - - - - - - - 0.17 - - - - - - - - 57.8 - - - - - - - - 21.7% - - - - - - - - - 0.3% - - - - - - - - 2.7% - - - - - - - - 1.2% - - - - - - - - 4171_1 - - - - - - - - - - 0.07 - - - - - - - - 173.9 - - - - - - - - - 58.1% - - - - - - - - - 1.6% - - - - - - - - 2.1% - - - - - - - - 0.5% - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-030.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-030.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-030.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf deleted file mode 100644 index 17dea229..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-031a-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-031a-reg.xml deleted file mode 100644 index 24afb34d..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-031a-reg.xml +++ /dev/null @@ -1,394 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-031a-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-031a-str.xml deleted file mode 100644 index 0aae7b97..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-031a-str.xml +++ /dev/null @@ -1,583 +0,0 @@ - - - - - - - Accel/decel -reduction and -smoothing - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Speed reduction/ -optimization - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Idle time reduction - - - - - - - - - - - - - - - - - - - - - Per cycle fuel savings potential - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Med-low - - - - - - - - - - 5% - - - - - - 8% - - - - - - 0.5% - - - - - - - - Med-high - - - - - - - - - - - - 15% - - - - - - - 15% - - - - - - - 2% - - - - - - Very high - - - - - - - - - - - - 30% - - - - - - - 35% - - - - - - - 10% - - - - - - - Frequency of opportunity -occurrence ingeneral population - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Med-low - - - - - - - - - - 30% - - - - - - - 20% - - - - - - - 30% - - - - - - - Med-high - - - - - - - - - - - - 15% - - - - - - - 15% - - - - - - - 15% - - - - - - - Very high - - - - - - - - - - - - 8% - - - - - - 10% - - - - - - - 5% - - - - - - Combined savings opportunity -(per cycle magnitude *frequency -of occurrence) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Med-low - - - - - - - - - - 1.5% - - - - - - - - 1.6% - - - - - - - - 0.2% - - - - - - - - Med-high - - - - - - - - - - - - 2.3% - - - - - - - - 2.3% - - - - - - - - 0.3% - - - - - - - - Very high - - - - - - - - - - - - 2.4% - - - - - - - - 3.5% - - - - - - - - 0.5% - - - - - - - - Total - - - - - - - - - 6% - - - - - - 7% - - - - - - 1% - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-031a.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-031a.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-031a.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-031a.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-031a.pdf deleted file mode 100644 index 6765bbda..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-031a.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-031b-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-031b-reg.xml deleted file mode 100644 index 24afb34d..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-031b-reg.xml +++ /dev/null @@ -1,394 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-031b-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-031b-str.xml deleted file mode 100644 index 056f4da6..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-031b-str.xml +++ /dev/null @@ -1,583 +0,0 @@ - - - - - - - Accel/decel -reduction and -smoothing - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Speed reduction/ -optimization - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Idle time reduction - - - - - - - - - - - - - - - - - - - - - Per cycle fuel savings potential - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Med-low - - - - - - - - - - 5% - - - - - - 8% - - - - - - 0.5% - - - - - - - - Med-high - - - - - - - - - - - - 15% - - - - - - - 15% - - - - - - - 2% - - - - - - Very high - - - - - - - - - - - - 30% - - - - - - - 35% - - - - - - - 10% - - - - - - - Frequency of opportunity -occurrence ingeneral population - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Med-low - - - - - - - - - - 30% - - - - - - - 20% - - - - - - - 30% - - - - - - - Med-high - - - - - - - - - - - - 15% - - - - - - - 15% - - - - - - - 15% - - - - - - - Very high - - - - - - - - - - - - 8% - - - - - - 10% - - - - - - - 5% - - - - - - Combined savings opportunity -(per cycle magnitude *frequency -of occurrence) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Med-low - - - - - - - - - - 1.5% - - - - - - - - 1.6% - - - - - - - - 0.2% - - - - - - - - Med-high - - - - - - - - - - - - 2.3% - - - - - - - - 2.3% - - - - - - - - 0.3% - - - - - - - - Very high - - - - - - - - - - - - 2.4% - - - - - - - - 3.5% - - - - - - - - 0.5% - - - - - - - - Total - - - - - - - - - 6% - - - - - - 7% - - - - - - 1% - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-032-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-032-reg.xml deleted file mode 100644 index c2e8e4d7..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-032-reg.xml +++ /dev/null @@ -1,143 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-032-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-032-str.xml deleted file mode 100644 index b77a32ed..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-032-str.xml +++ /dev/null @@ -1,231 +0,0 @@ - - - - - - - Source - - - - - Definition - - - - - - Examples - - - - - Stationary: - - - - - - - Major - - - - - - - Emissions of 10 tons per year or -more of any one air toxic, or 25 -tons per year or more of any -combination of air toxics - - - - - - - - - - - - - - - - - - - - Utilities, refineries, steel -manufacturers, chemical -manufacturers - - - - - - - - - - - - Area - - - - - Emissions of less than 10 tons per -year of any one air toxic pollutant, -or less than 25 tons per year of -any combination of air toxics - - - - - - - - - - - - - - - - - - - Dry cleaners, gas stations, -auto body refinishing paint -shops, decorative chromium -electroplating operations - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Mobile: - - - - - On-road - - - - - Emissions from motorized vehicles -normally operated on public -roadways - - - - - - - - - - - - - - - - - - - - Cars, buses, sport-utility -vehicles, light- and heavy- -duty trucks - - - - - - - - - - - Non-road - - - - - Emissions from a diverse -collection of engines, equipment, -vehicles, and vessels operated off -public roads - - - - - - - - - - - - - - - - - - - - - - - - - Construction and agricultural -equipment, personal -watercraft, lawn and garden -equipment - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-032.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-032.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-032.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-032.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-032.pdf deleted file mode 100644 index b7d668d1..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-032.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-033-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-033-reg.xml deleted file mode 100644 index 8c2143c3..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-033-reg.xml +++ /dev/null @@ -1,192 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-033-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-033-str.xml deleted file mode 100644 index 3ed428ef..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-033-str.xml +++ /dev/null @@ -1,878 +0,0 @@ - - - - - - - Age (years) - - - - - Non-Hispanic white - - - - - Non-Hispanic black - - - - - Mexican American - - - - - Other - - - - - Total -population - - - - - - Male - - - - - Female - - - - - Male - - - - - Female - - - - - Male - - - - - Female - - - - - Male - - - - - Female - - - - - 2-11 months - - - - - 1,087,948 - - - - - 1,022,490 - - - - - 292,652 - - - - - 255,744 - - - - - 188,980 - - - - - 150,760 - - - - - 165,949 - - - - - 185,667 - - - - - 3,350,188 - - - - - 1-2 - - - - - 2,586,688 - - - - - 2,568,738 - - - - - 647,701 - - - - - 639,327 - - - - - 409,038 - - - - - 392,640 - - - - - 446,166 - - - - - 312,164 - - - - - 8,002,463 - - - - - 3-5 - - - - - 3,867,692 - - - - - 3,576,723 - - - - - 935,862 - - - - - 938,510 - - - - - 563,286 - - - - - 563,183 - - - - - 581,558 - - - - - 628,177 - - - - - 11,654,990 - - - - - 6-11 - - - - - 7,808,033 - - - - - 7,401,349 - - - - - 1,770,525 - - - - - 1,732,954 - - - - - 998,192 - - - - - 999,217 - - - - - 972,969 - - - - - 843,937 - - - - - 22,527,176 - - - - - 12-19 - - - - - 9,795,497 - - - - - 9,208,607 - - - - - 2,191,327 - - - - - 2,218,406 - - - - - 1,180,160 - - - - - 1,173,272 - - - - - 1,249,752 - - - - - 1,364,492 - - - - - 28,381,514 - - - - - 20-29 - - - - - 13,340,788 - - - - - 14,032,118 - - - - - 2,194,990 - - - - - 2,776,284 - - - - - 1,785,795 - - - - - 1,462,678 - - - - - 1,967,497 - - - - - 1,614,120 - - - - - 39,174,269 - - - - - 30-39 - - - - - 15,492,738 - - - - - 15,745,424 - - - - - 2,433,567 - - - - - 2,902,296 - - - - - 1,318,832 - - - - - 1,170,452 - - - - - 1,803,778 - - - - - 1,851,752 - - - - - 42,718,838 - - - - - 40-49 - - - - - 12,895,086 - - - - - 12,644,242 - - - - - 1,641,005 - - - - - 1,995,794 - - - - - 795,346 - - - - - 757,632 - - - - - 910,861 - - - - - 1,547,516 - - - - - 33,187,483 - - - - - 50-59 - - - - - 8,551,440 - - - - - 9,112,707 - - - - - 937,867 - - - - - 1,166,482 - - - - - 380,932 - - - - - 410,833 - - - - - 757,342 - - - - - 681,281 - - - - - 21,998,882 - - - - - 60-69 - - - - - 7,740,932 - - - - - 8,915,681 - - - - - 773,533 - - - - - 1,015,525 - - - - - 252,188 - - - - - 326,141 - - - - - 462,520 - - - - - 613,319 - - - - - 20,099,840 - - - - - 70-79 - - - - - 5,033,323 - - - - - 7,049,276 - - - - - 435,122 - - - - - 642,775 - - - - - 116,067 - - - - - 122,989 - - - - - 229,588 - - - - - 235,166 - - - - - 13,864,305 - - - - - 80+ - - - - - 1,857,333 - - - - - 3,545,878 - - - - - 138,000 - - - - - 338,819 - - - - - 45,313 - - - - - 52,006 - - - - - 71,673 - - - - - 88,032 - - - - - 6,137,053 - - - - - All - - - - - 90,057,499 - - - - - 94,823,234 - - - - - 14,392,149 - - - - - 16,622,916 - - - - - 8,034,129 - - - - - 7,581,802 - - - - - 9,619,653 - - - - - 9,965,622 - - - - - 251,097,002 - - - -
- - - - - Age Group - - - - - Proportion - - - - - 20-29 - - - - - 0.2650 - - - - - 30-39 - - - - - 0.2046 - - - - - 40-49 - - - - - 0.1477 - - - - - 50-59 - - - - - 0.1514 - - - - - 60-69 - - - - - 0.1225 - - - - - 70-79 - - - - - 0.0752 - - - - - 80 + - - - - - 0.0336 - - - -
- - - - - Age Group - - - - - Proportion - - - - - 20-29 - - - - - 0.2834 - - - - - 30-39 - - - - - 0.2188 - - - - - 40-49 - - - - - 0.1579 - - - - - 50-59 - - - - - 0.1618 - - - - - 60-74 - - - - - 0.1781 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-033.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-033.json deleted file mode 100644 index 8bca2b65..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-033.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":3,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":2,"expectedFailure":true} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-033.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-033.pdf deleted file mode 100644 index c38adc7a..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-033.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-034-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-034-reg.xml deleted file mode 100644 index f79cf5b6..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-034-reg.xml +++ /dev/null @@ -1,54 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-034-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-034-str.xml deleted file mode 100644 index 6536521a..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-034-str.xml +++ /dev/null @@ -1,1462 +0,0 @@ - - - - - - - Proportion - - - - - Design effect - - - - - 1.0 - - - - - 1.1 - - - - - 1.2 - - - - - 1.3 - - - - - 1.4 - - - - - 1.5 - - - - - 1.6 - - - - - 0.99 - - - - - 800 - - - - - 880 - - - - - 960 - - - - - 1,040 - - - - - 1,120 - - - - - 1,200 - - - - - 1,280 - - - - - 0.95 - - - - - 160 - - - - - 176 - - - - - 192 - - - - - 208 - - - - - 224 - - - - - 240 - - - - - 256 - - - - - 0.90 - - - - - 80 - - - - - 88 - - - - - 96 - - - - - 104 - - - - - 112 - - - - - 120 - - - - - 128 - - - - - 0.85 - - - - - 53 - - - - - 59 - - - - - 64 - - - - - 69 - - - - - 75 - - - - - 80 - - - - - 85 - - - - - 0.80 - - - - - 40 - - - - - 44 - - - - - 48 - - - - - 52 - - - - - 56 - - - - - 60 - - - - - 64 - - - - - 0.75 - - - - - 32 - - - - - 35 - - - - - 38 - - - - - 42 - - - - - 45 - - - - - 48 - - - - - 51 - - - - - 0.56-0.74 - - - - - 30 - - - - - 33 - - - - - 36 - - - - - 39 - - - - - 42 - - - - - 45 - - - - - 48 - - - - - 0.55 - - - - - 30 - - - - - 33 - - - - - 36 - - - - - 39 - - - - - 42 - - - - - 45 - - - - - 48 - - - - - 0.50 - - - - - 30 - - - - - 33 - - - - - 36 - - - - - 39 - - - - - 42 - - - - - 45 - - - - - 48 - - - - - 0.45 - - - - - 30 - - - - - 33 - - - - - 36 - - - - - 39 - - - - - 42 - - - - - 45 - - - - - 48 - - - - - 0.26-0.44 - - - - - 30 - - - - - 33 - - - - - 36 - - - - - 39 - - - - - 42 - - - - - 45 - - - - - 48 - - - - - 0.25 - - - - - 32 - - - - - 35 - - - - - 38 - - - - - 42 - - - - - 45 - - - - - 48 - - - - - 51 - - - - - 0.20 - - - - - 40 - - - - - 44 - - - - - 48 - - - - - 52 - - - - - 56 - - - - - 60 - - - - - 64 - - - - - 0.15 - - - - - 53 - - - - - 59 - - - - - 64 - - - - - 69 - - - - - 75 - - - - - 80 - - - - - 85 - - - - - 0.10 - - - - - 80 - - - - - 88 - - - - - 96 - - - - - 104 - - - - - 112 - - - - - 120 - - - - - 128 - - - - - 0.05 - - - - - 160 - - - - - 176 - - - - - 192 - - - - - 208 - - - - - 224 - - - - - 240 - - - - - 256 - - - - - 0.01 - - - - - 800 - - - - - 880 - - - - - 960 - - - - - 1,040 - - - - - 1,120 - - - - - 1,200 - - - - - 1,280 - - - -
- - - - - Proportion - - - - - Design effect - - - - - 1.7 - - - - - 1.8 - - - - - 1.9 - - - - - 2.0 - - - - - 2.5 - - - - - 3.0 - - - - - 3.5 - - - - - 0.99 - - - - - 1,360 - - - - - 1,440 - - - - - 1,520 - - - - - 1,600 - - - - - 2,000 - - - - - 2,400 - - - - - 2,800 - - - - - 0.95 - - - - - 272 - - - - - 288 - - - - - 304 - - - - - 320 - - - - - 400 - - - - - 480 - - - - - 560 - - - - - 0.90 - - - - - 136 - - - - - 144 - - - - - 152 - - - - - 160 - - - - - 200 - - - - - 240 - - - - - 280 - - - - - 0.85 - - - - - 91 - - - - - 96 - - - - - 101 - - - - - 107 - - - - - 133 - - - - - 160 - - - - - 187 - - - - - 0.80 - - - - - 68 - - - - - 72 - - - - - 76 - - - - - 80 - - - - - 100 - - - - - 120 - - - - - 140 - - - - - 0.75 - - - - - 54 - - - - - 58 - - - - - 61 - - - - - 64 - - - - - 80 - - - - - 96 - - - - - 112 - - - - - 0.56-.74 - - - - - 51 - - - - - 54 - - - - - 57 - - - - - 60 - - - - - 75 - - - - - 90 - - - - - 105 - - - - - 0.55 - - - - - 51 - - - - - 54 - - - - - 57 - - - - - 60 - - - - - 75 - - - - - 90 - - - - - 105 - - - - - 0.50 - - - - - 51 - - - - - 54 - - - - - 57 - - - - - 60 - - - - - 75 - - - - - 90 - - - - - 105 - - - - - 0.45 - - - - - 51 - - - - - 54 - - - - - 57 - - - - - 60 - - - - - 75 - - - - - 90 - - - - - 105 - - - - - 0.26-.44 - - - - - 51 - - - - - 54 - - - - - 57 - - - - - 60 - - - - - 75 - - - - - 90 - - - - - 105 - - - - - 0.25 - - - - - 54 - - - - - 58 - - - - - 61 - - - - - 64 - - - - - 80 - - - - - 96 - - - - - 112 - - - - - 0.20 - - - - - 68 - - - - - 72 - - - - - 76 - - - - - 80 - - - - - 100 - - - - - 120 - - - - - 140 - - - - - 0.15 - - - - - 91 - - - - - 96 - - - - - 101 - - - - - 107 - - - - - 133 - - - - - 160 - - - - - 187 - - - - - 0.10 - - - - - 136 - - - - - 144 - - - - - 152 - - - - - 160 - - - - - 200 - - - - - 240 - - - - - 280 - - - - - 0.05 - - - - - 272 - - - - - 288 - - - - - 304 - - - - - 320 - - - - - 400 - - - - - 480 - - - - - 560 - - - - - 0.01 - - - - - 1,360 - - - - - 1,440 - - - - - 1,520 - - - - - 1,600 - - - - - 2,000 - - - - - 2,400 - - - - - 2,800 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-034.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-034.json deleted file mode 100644 index eb903a1d..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-034.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":2,"numCorrectlyDetectedTables":0,"numErroneouslyDetectedTables":1,"expectedFailure":true} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-034.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-034.pdf deleted file mode 100644 index 8264e2a5..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-034.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035a-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035a-reg.xml deleted file mode 100644 index 5a3876ac..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035a-reg.xml +++ /dev/null @@ -1,334 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035a-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035a-str.xml deleted file mode 100644 index 0835c671..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035a-str.xml +++ /dev/null @@ -1,1576 +0,0 @@ - - - - - - - Age groups - - - - - U.S. population - - - - - Proportion -(total) - - - - - - Proportion -(20+ years) - - - - - - Total - - - - - Under 1 year - - - - - 0.0156 - - - - - 3,533,692 - - - - - 1 - 2 years - - - - - 0.0287 - - - - - 6,493,373 - - - - - 3 - 5 years - - - - - 0.0419 - - - - - 9,483,880 - - - - - 6 - 11 years - - - - - 0.0920 - - - - - 20,834,439 - - - - - 12 - 19 years - - - - - 0.1418 - - - - - 32,113,079 - - - - - 20 - 29 years - - - - - 0.1803 - - - - - 0.2650 - - - - - 40,839,623 - - - - - 30 - 39 years - - - - - 0.1392 - - - - - 0.2046 - - - - - 31,526,222 - - - - - 40 - 49 years - - - - - 0.1005 - - - - - 0.1477 - - - - - 22,759,163 - - - - - 50 - 59 years - - - - - 0.1030 - - - - - 0.1514 - - - - - 23,325,286 - - - - - 60 - 69 years - - - - - 0.0833 - - - - - 0.1225 - - - - - 18,870,102 - - - - - 70 - 79 years - - - - - 0.0512 - - - - - 0.0752 - - - - - 11,591,846 - - - - - 80 years plus - - - - - 0.0228 - - - - - 0.0336 - - - - - 5,175,100 - - - - - Total - - - - - 226,545,805 - - - -
- - - - - - Age - - - - - Total -population - - - - - - Under 1 year - - - - - 3,533,692 - - - - - 1 years - - - - - 3,269,557 - - - - - 2 years - - - - - 3,223,816 - - - - - 3 years - - - - - 3,179,441 - - - - - 4 years - - - - - 3,141,748 - - - - - 5 years - - - - - 3,162,691 - - - - - 5 years - - - - - 3,109,095 - - - - - 7 years - - - - - 3,273,052 - - - - - 8 years - - - - - 3,394,998 - - - - - 9 years - - - - - 3,760,120 - - - - - 10 years - - - - - 3,716,530 - - - - - 11 years - - - - - 3,580,644 - - - - - 12 years - - - - - 3,518,982 - - - - - 13 years - - - - - 3,643,189 - - - - - 14 years - - - - - 3,782,784 - - - - - 15 years - - - - - 4,059,898 - - - - - 16 years - - - - - 4,180,875 - - - - - 17 years - - - - - 4,223,848 - - - - - 18 years - - - - - 4,251,779 - - - - - 19 years - - - - - 4,451,724 - - - - - 20 years - - - - - 4,387,100 - - - - - 21 years - - - - - 4,285,763 - - - - - 22 years - - - - - 4,284,351 - - - - - 23 years - - - - - 4,199,711 - - - - - 24 years - - - - - 4,161,779 - - - - - 25 years - - - - - 4,116,218 - - - - - 26 years - - - - - 3,977,515 - - - - - 27 years - - - - - 3,931,620 - - - - - 28 years - - - - - 3,708,968 - - - - - 29 years - - - - - 3,786,598 - - - - - 30 years - - - - - 3,726,525 - - - - - 31 years - - - - - 3,607,610 - - - - - 32 years - - - - - 3,712,217 - - - - - 33 years - - - - - 3,653,921 - - - - - 34 years - - - - - 2,860,647 - - - - - 35 years - - - - - 2,902,331 - - - - - 36 years - - - - - 2,929,040 - - - - - 37 years - - - - - 2,982,533 - - - - - 38 years - - - - - 2,598,636 - - - - - 39 years - - - - - 2,552,762 - - - - - - - Age - - - - - Total -population - - - - - - 40 years - - - - - 2,468,083 - - - - - 41 years - - - - - 2,375,849 - - - - - 42 years - - - - - 2,325,572 - - - - - 43 years - - - - - 2,237,108 - - - - - 44 years - - - - - 2,262,796 - - - - - 45 years - - - - - 2,242,318 - - - - - 46 years - - - - - 2,139,385 - - - - - 47 years - - - - - 2,222,969 - - - - - 48 years - - - - - 2,163,709 - - - - - 49 years - - - - - 2,321,374 - - - - - 50 years - - - - - 2,347,068 - - - - - 51 years - - - - - 2,295,077 - - - - - 52 years - - - - - 2,363,152 - - - - - 53 years - - - - - 2,337,138 - - - - - 54 years - - - - - 2,367,597 - - - - - 55 years - - - - - 2,390,440 - - - - - 56 years - - - - - 2,329,790 - - - - - 57 years - - - - - 2,312,737 - - - - - 58 years - - - - - 2,330,373 - - - - - 59 years - - - - - 2,251,914 - - - - - 60 years - - - - - 2,160,937 - - - - - 61 years - - - - - 2,073,764 - - - - - 62 years - - - - - 2,008,093 - - - - - 63 years - - - - - 1,931,425 - - - - - 64 years - - - - - 1,913,402 - - - - - 65 years - - - - - 1,904,641 - - - - - 66 years - - - - - 1,813,987 - - - - - 67 years - - - - - 1,763,637 - - - - - 68 years - - - - - 1,678,740 - - - - - 69 years - - - - - 1,621,476 - - - - - 70 years - - - - - 1,516,900 - - - - - 71 years - - - - - 1,439,723 - - - - - 72 years - - - - - 1,371,235 - - - - - 73 years - - - - - 1,261,994 - - - - - 74 years - - - - - 1,208,272 - - - - - 75 years - - - - - 1,111,480 - - - - - 76 years - - - - - 1,028,927 - - - - - 77 years - - - - - 951,774 - - - - - 78 years - - - - - 828,866 - - - - - 79 years - - - - - 872,675 - - - - - - - Age - - - - - Total -population - - - - - - 80 years - - - - - 723,049 - - - - - 81 years - - - - - 640,276 - - - - - 82 years - - - - - 566,548 - - - - - 83 years - - - - - 527,982 - - - - - 84 years - - - - - 477,178 - - - - - 85 years - - - - - 412,549 - - - - - 86 years - - - - - 350,655 - - - - - 87 years - - - - - 306,906 - - - - - 88 years - - - - - 236,314 - - - - - 89 years - - - - - 213,778 - - - - - 90 years - - - - - 175,900 - - - - - 91 years - - - - - 140,003 - - - - - 92 years - - - - - 101,492 - - - - - 93 years - - - - - 78,233 - - - - - 94 years - - - - - 60,964 - - - - - 95 years - - - - - 46,219 - - - - - 96 years - - - - - 32,789 - - - - - 97 years - - - - - 23,471 - - - - - 98 years - - - - - 16,215 - - - - - 99 years - - - - - 12,385 - - - - - 100 years - - - - - 9,663 - - - - - 101 years - - - - - 5,231 - - - - - 102 years - - - - - 3,886 - - - - - 103 years - - - - - 2,800 - - - - - 104 years - - - - - 2,015 - - - - - 105 years - - - - - 1,573 - - - - - 106 years - - - - - 1,276 - - - - - 107 years - - - - - 1,038 - - - - - 108 years - - - - - 883 - - - - - 109 years - - - - - 852 - - - - - 110 years - - - - - 819 - - - - - 111 years - - - - - 623 - - - - - 112+ years - - - - - 1,535 - - - - - Total - - - - - 226,545,805 - - - -
- - - - - - Status - - - - - Sample -size - - - - - - Percent - - - - - Weighted -Percent - - - - - - Total - - - - - 39695 - - - - - 100.0 - - - - - 100.0 - - - - - Not interviewed - - - - - 5701 - - - - - 14.4 - - - - - 18.2 - - - - - Interviewed, not examined - - - - - 2683 - - - - - 6.8 - - - - - 7.5 - - - - - MEC examined - - - - - 30818 - - - - - 77.6 - - - - - 73.4 - - - - - Home examined - - - - - 493 - - - - - 1.2 - - - - - 0.8 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035a.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035a.json deleted file mode 100644 index 71ae6a77..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035a.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":3,"numCorrectlyDetectedTables":3,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035a.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035a.pdf deleted file mode 100644 index a15920db..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035a.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035b-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035b-reg.xml deleted file mode 100644 index e5edf11b..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035b-reg.xml +++ /dev/null @@ -1,340 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035b-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035b-str.xml deleted file mode 100644 index 0835c671..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035b-str.xml +++ /dev/null @@ -1,1576 +0,0 @@ - - - - - - - Age groups - - - - - U.S. population - - - - - Proportion -(total) - - - - - - Proportion -(20+ years) - - - - - - Total - - - - - Under 1 year - - - - - 0.0156 - - - - - 3,533,692 - - - - - 1 - 2 years - - - - - 0.0287 - - - - - 6,493,373 - - - - - 3 - 5 years - - - - - 0.0419 - - - - - 9,483,880 - - - - - 6 - 11 years - - - - - 0.0920 - - - - - 20,834,439 - - - - - 12 - 19 years - - - - - 0.1418 - - - - - 32,113,079 - - - - - 20 - 29 years - - - - - 0.1803 - - - - - 0.2650 - - - - - 40,839,623 - - - - - 30 - 39 years - - - - - 0.1392 - - - - - 0.2046 - - - - - 31,526,222 - - - - - 40 - 49 years - - - - - 0.1005 - - - - - 0.1477 - - - - - 22,759,163 - - - - - 50 - 59 years - - - - - 0.1030 - - - - - 0.1514 - - - - - 23,325,286 - - - - - 60 - 69 years - - - - - 0.0833 - - - - - 0.1225 - - - - - 18,870,102 - - - - - 70 - 79 years - - - - - 0.0512 - - - - - 0.0752 - - - - - 11,591,846 - - - - - 80 years plus - - - - - 0.0228 - - - - - 0.0336 - - - - - 5,175,100 - - - - - Total - - - - - 226,545,805 - - - -
- - - - - - Age - - - - - Total -population - - - - - - Under 1 year - - - - - 3,533,692 - - - - - 1 years - - - - - 3,269,557 - - - - - 2 years - - - - - 3,223,816 - - - - - 3 years - - - - - 3,179,441 - - - - - 4 years - - - - - 3,141,748 - - - - - 5 years - - - - - 3,162,691 - - - - - 5 years - - - - - 3,109,095 - - - - - 7 years - - - - - 3,273,052 - - - - - 8 years - - - - - 3,394,998 - - - - - 9 years - - - - - 3,760,120 - - - - - 10 years - - - - - 3,716,530 - - - - - 11 years - - - - - 3,580,644 - - - - - 12 years - - - - - 3,518,982 - - - - - 13 years - - - - - 3,643,189 - - - - - 14 years - - - - - 3,782,784 - - - - - 15 years - - - - - 4,059,898 - - - - - 16 years - - - - - 4,180,875 - - - - - 17 years - - - - - 4,223,848 - - - - - 18 years - - - - - 4,251,779 - - - - - 19 years - - - - - 4,451,724 - - - - - 20 years - - - - - 4,387,100 - - - - - 21 years - - - - - 4,285,763 - - - - - 22 years - - - - - 4,284,351 - - - - - 23 years - - - - - 4,199,711 - - - - - 24 years - - - - - 4,161,779 - - - - - 25 years - - - - - 4,116,218 - - - - - 26 years - - - - - 3,977,515 - - - - - 27 years - - - - - 3,931,620 - - - - - 28 years - - - - - 3,708,968 - - - - - 29 years - - - - - 3,786,598 - - - - - 30 years - - - - - 3,726,525 - - - - - 31 years - - - - - 3,607,610 - - - - - 32 years - - - - - 3,712,217 - - - - - 33 years - - - - - 3,653,921 - - - - - 34 years - - - - - 2,860,647 - - - - - 35 years - - - - - 2,902,331 - - - - - 36 years - - - - - 2,929,040 - - - - - 37 years - - - - - 2,982,533 - - - - - 38 years - - - - - 2,598,636 - - - - - 39 years - - - - - 2,552,762 - - - - - - - Age - - - - - Total -population - - - - - - 40 years - - - - - 2,468,083 - - - - - 41 years - - - - - 2,375,849 - - - - - 42 years - - - - - 2,325,572 - - - - - 43 years - - - - - 2,237,108 - - - - - 44 years - - - - - 2,262,796 - - - - - 45 years - - - - - 2,242,318 - - - - - 46 years - - - - - 2,139,385 - - - - - 47 years - - - - - 2,222,969 - - - - - 48 years - - - - - 2,163,709 - - - - - 49 years - - - - - 2,321,374 - - - - - 50 years - - - - - 2,347,068 - - - - - 51 years - - - - - 2,295,077 - - - - - 52 years - - - - - 2,363,152 - - - - - 53 years - - - - - 2,337,138 - - - - - 54 years - - - - - 2,367,597 - - - - - 55 years - - - - - 2,390,440 - - - - - 56 years - - - - - 2,329,790 - - - - - 57 years - - - - - 2,312,737 - - - - - 58 years - - - - - 2,330,373 - - - - - 59 years - - - - - 2,251,914 - - - - - 60 years - - - - - 2,160,937 - - - - - 61 years - - - - - 2,073,764 - - - - - 62 years - - - - - 2,008,093 - - - - - 63 years - - - - - 1,931,425 - - - - - 64 years - - - - - 1,913,402 - - - - - 65 years - - - - - 1,904,641 - - - - - 66 years - - - - - 1,813,987 - - - - - 67 years - - - - - 1,763,637 - - - - - 68 years - - - - - 1,678,740 - - - - - 69 years - - - - - 1,621,476 - - - - - 70 years - - - - - 1,516,900 - - - - - 71 years - - - - - 1,439,723 - - - - - 72 years - - - - - 1,371,235 - - - - - 73 years - - - - - 1,261,994 - - - - - 74 years - - - - - 1,208,272 - - - - - 75 years - - - - - 1,111,480 - - - - - 76 years - - - - - 1,028,927 - - - - - 77 years - - - - - 951,774 - - - - - 78 years - - - - - 828,866 - - - - - 79 years - - - - - 872,675 - - - - - - - Age - - - - - Total -population - - - - - - 80 years - - - - - 723,049 - - - - - 81 years - - - - - 640,276 - - - - - 82 years - - - - - 566,548 - - - - - 83 years - - - - - 527,982 - - - - - 84 years - - - - - 477,178 - - - - - 85 years - - - - - 412,549 - - - - - 86 years - - - - - 350,655 - - - - - 87 years - - - - - 306,906 - - - - - 88 years - - - - - 236,314 - - - - - 89 years - - - - - 213,778 - - - - - 90 years - - - - - 175,900 - - - - - 91 years - - - - - 140,003 - - - - - 92 years - - - - - 101,492 - - - - - 93 years - - - - - 78,233 - - - - - 94 years - - - - - 60,964 - - - - - 95 years - - - - - 46,219 - - - - - 96 years - - - - - 32,789 - - - - - 97 years - - - - - 23,471 - - - - - 98 years - - - - - 16,215 - - - - - 99 years - - - - - 12,385 - - - - - 100 years - - - - - 9,663 - - - - - 101 years - - - - - 5,231 - - - - - 102 years - - - - - 3,886 - - - - - 103 years - - - - - 2,800 - - - - - 104 years - - - - - 2,015 - - - - - 105 years - - - - - 1,573 - - - - - 106 years - - - - - 1,276 - - - - - 107 years - - - - - 1,038 - - - - - 108 years - - - - - 883 - - - - - 109 years - - - - - 852 - - - - - 110 years - - - - - 819 - - - - - 111 years - - - - - 623 - - - - - 112+ years - - - - - 1,535 - - - - - Total - - - - - 226,545,805 - - - -
- - - - - - Status - - - - - Sample -size - - - - - - Percent - - - - - Weighted -Percent - - - - - - Total - - - - - 39695 - - - - - 100.0 - - - - - 100.0 - - - - - Not interviewed - - - - - 5701 - - - - - 14.4 - - - - - 18.2 - - - - - Interviewed, not examined - - - - - 2683 - - - - - 6.8 - - - - - 7.5 - - - - - MEC examined - - - - - 30818 - - - - - 77.6 - - - - - 73.4 - - - - - Home examined - - - - - 493 - - - - - 1.2 - - - - - 0.8 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-036-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-036-reg.xml deleted file mode 100644 index 0b9eec66..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-036-reg.xml +++ /dev/null @@ -1,337 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-036-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-036-str.xml deleted file mode 100644 index b8fc1595..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-036-str.xml +++ /dev/null @@ -1,403 +0,0 @@ - - - - - - - Item - - - - - Description - - - - - - - Tuition - - - - - - The cost of takinga course varies froms chool to school.To search for tuition costs -at different colleges, go to www.studentaid.ed.gov/myfsa. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Room and board - - - - - - - The cost of lodging and food varies froms chool to school. Go to -www.studentaid.ed.gov/myfsa to search for estimated costs. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Books and -school supplies - - - - - - - Books can be expensive. School supplies can include book bags, notebooks, pens, pencils, -paper, folders, stapler, desk organizing system (trays, penholder, etc.), computer paper, etc. -According to the College Board,the national average cost for textbooks at four-year public -colleges in 2009–10 was $1,122. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Fees - - - - - Fees will depend on the school you’re attending. This list can be obtained directly from the -school. Fees include activity fees,parking decal fees, etc. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Equipment and -room materials - - - - - - - - - Equipment may include a computer, printer, etc. Furnishings or room materials may include -such items as reading lamps, microwave, refrigerator, sheets, towels, etc. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Travel and -miscellaneous expenses - - - - - - - - If you live on campus, you’ll probably need to travel during school breaks. -Other expenses can include clothing and cellphone use. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-036.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-036.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-036.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-036.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-036.pdf deleted file mode 100644 index 610e5d09..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-036.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-037-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-037-reg.xml deleted file mode 100644 index cc78490d..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-037-reg.xml +++ /dev/null @@ -1,648 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-037-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-037-str.xml deleted file mode 100644 index 95a944ab..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-037-str.xml +++ /dev/null @@ -1,1343 +0,0 @@ - - - - - - - Concentration -(ppm) - - - - - - - - - - - - - - - - - - Postnatal Day 1 - - - - - - - - - - - - - Postnatal Day 4 - - - - - - - - - - - - - Postnatal Day 7 - - - - - - - - - - - - - Postnatal Day 14 - - - - - - - - - - - - - - Postnatal Day 20 - - - - - - - - - - - - - - No. - - - - - - Body -Weight -(g) - - - - - - - - - - - - - - Weight -Relative -to -Controls -(%) - - - - - - - - - - - - - - - - - - - - - - - - - No. - - - - - - Body -Weight -(g) - - - - - - - - - - - - - - Weight -Relative -to -Controls -(%) - - - - - - - - - - - - - - - - - - - - - - - - - Body -Weight -(g) - - - - - - - - - - - - - - Weight -Relative -to -Controls -(%) - - - - - - - - - - - - - - - - - - - - - - - - - Body -Weight -(g) - - - - - - - - - - - - - - Weight -Relative -to -Controls -(%) - - - - - - - - - - - - - - - - - - - - - - - - - Body -Weight -(g) - - - - - - - - - - - - - - Weight -Relative -to -t Controls -(%) - - - - - - - - - - - - - - - - - - - - - - - - - - Male - - - - - - - 0 - - - - - 39 - - - - - - 5.8 - - - - - - 10 - - - - - - 8.8 - - - - - - 13.5 - - - - - - - - 25.7 - - - - - - - - 35.0 - - - - - - - 250 - - - - - - - 30 - - - - - - 5.9 - - - - - - 102 - - - - - - - 10 - - - - - - 9.0 - - - - - - 102 - - - - - - - 13.6 - - - - - - - - 101 - - - - - - - 26.2 - - - - - - - 102 - - - - - - - 35.8 - - - - - - - - 102 - - - - - - - 500 - - - - - - - 33 - - - - - - 6.0 - - - - - - 103 - - - - - - - 10 - - - - - - 8.6 - - - - - - 98 - - - - - - 13.0 - - - - - - - 96 - - - - - - 25.1 - - - - - - - 98 - - - - - - 34.7 - - - - - - - - 99 - - - - - - 1,000 - - - - - - - - 31 - - - - - - 5.8 - - - - - - 100 - - - - - - - 10 - - - - - - 8.4 - - - - - - 96 - - - - - - 13.0 - - - - - - - 96 - - - - - - 24.9 - - - - - - - - 97 - - - - - - 34.5 - - - - - - - - 99 - - - - - - 2,000 - - - - - - - - 38 - - - - - - 5.8 - - - - - - 100 - - - - - - - 10 - - - - - - 8.8 - - - - - - 100 - - - - - - - 12.9 - - - - - - - - 96 - - - - - - 24.6 - - - - - - - - 96 - - - - - - 31.6** - - - - - - - - - - 90 - - - - - - 4,000 - - - - - - - - 27 - - - - - - 5.3** - - - - - - - - - 91 - - - - - - 10 - - - - - - 7.5** - - - - - - - - - 85 - - - - - - 10.4** - - - - - - - - - - 77 - - - - - - 16.8** - - - - - - - - - - 65 - - - - - - 19.8** - - - - - - - - - - 57 - - - - - - Female - - - - - - - - - 0 - - - - - 23 - - - - - - 5.4 - - - - - - 10 - - - - - - 8.2 - - - - - - 12.7 - - - - - - - - 24.7 - - - - - - - - 33.6 - - - - - - - - 250 - - - - - - - 34 - - - - - - 5.6 - - - - - - 104 - - - - - - - 10 - - - - - - 8.5 - - - - - - 104 - - - - - - - 12.9 - - - - - - - - 102 - - - - - - - 24.7 - - - - - - - - 100 - - - - - - - 33.9 - - - - - - - - 101 - - - - - - - 500 - - - - - - - 32 - - - - - - 5.4 - - - - - - 100 - - - - - - - 10 - - - - - - 8.2 - - - - - - 100 - - - - - - - 12.7 - - - - - - - - 100 - - - - - - - 25.0 - - - - - - - 101 - - - - - - - 33.6 - - - - - - - - 100 - - - - - - - 1,000 - - - - - - - - 40 - - - - - - 5.5 - - - - - - 102 - - - - - - - 10 - - - - - - 8.0 - - - - - - 98 - - - - - - 12.4 - - - - - - - 98 - - - - - - 24.2 - - - - - - - 98 - - - - - - 32.7 - - - - - - - - 97 - - - - - - 2,000 - - - - - - - - 49 - - - - - - 5.3 - - - - - - 98 - - - - - - 10 - - - - - - 8.2 - - - - - - 100 - - - - - - - 12.4 - - - - - - - 98 - - - - - - 23.7 - - - - - - - - 96 - - - - - - 30.3** - - - - - - - - - 90 - - - - - - 4,000 - - - - - - - - 31 - - - - - - 5.0** - - - - - - - - 93 - - - - - - 10 - - - - - - 7.3* - - - - - - - 89 - - - - - - 9.9** - - - - - - - - 78 - - - - - - 16.1** - - - - - - - - - 65 - - - - - - 18.8** - - - - - - - - - - 56 - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-037.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-037.json deleted file mode 100644 index 8dd87c33..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-037.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":0,"numErroneouslyDetectedTables":3,"expectedFailure":true} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-037.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-037.pdf deleted file mode 100644 index a7938379..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-037.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-038-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-038-reg.xml deleted file mode 100644 index fbc6ffa7..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-038-reg.xml +++ /dev/null @@ -1,26 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-038-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-038-str.xml deleted file mode 100644 index 071efd9f..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-038-str.xml +++ /dev/null @@ -1,90 +0,0 @@ - - - - - - - Species - - - - - Percent of Range -Impacted - - - - - - Kingfisher - - - - - 29% - - - - - Bald Eagle - - - - - 34% - - - - - Osprey - - - - - 20% - - - - - Common Loon - - - - - 40% - - - - - Florida Panther - - - - - 100% - - - - - Mink - - - - - 35% - - - - - River Otter - - - - - 38% - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-038.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-038.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-038.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-038.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-038.pdf deleted file mode 100644 index 6233b863..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-038.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-039-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-039-reg.xml deleted file mode 100644 index bd44c1a8..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-039-reg.xml +++ /dev/null @@ -1,23 +0,0 @@ - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-039-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-039-str.xml deleted file mode 100644 index 5771af39..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-039-str.xml +++ /dev/null @@ -1,78 +0,0 @@ - - - - - - - Organism - - - - - Wildlife Criterion (pg/L) - - - - - Mink - - - - - 57 - - - - - River otter - - - - - 42 - - - - - Kingfisher - - - - - 33 - - - - - Loon - - - - - 82 - - - - - Osprey - - - - - 82 - - - - - Bald eagle - - - - - 100 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-039.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-039.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-039.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-039.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-039.pdf deleted file mode 100644 index 1e00bd39..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-039.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-040-reg.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-040-reg.xml deleted file mode 100644 index dc36ba51..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-040-reg.xml +++ /dev/null @@ -1,49 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-040-str.xml b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-040-str.xml deleted file mode 100644 index b586ca16..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-040-str.xml +++ /dev/null @@ -1,125 +0,0 @@ - - - - - - - Species - - - - - - - Wildlife Criterion -(pg/L) - - - - - - - - GLWQI - - - - - Mercury Study Report to Congress - - - - - - - - - - - - - - Mink - - - - - 2880 - - - - - 1038 - - - - - Otter - - - - - 1930 - - - - - 764 - - - - - Kingfisher - - - - - - - 1040 - - - - - 598 - - - - - Osprey - - - - - - - - Not done - - - - - 1498 - - - - - Eagle - - - - - - - 1920 - - - - - 1818 - - - -
-
diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-040.json b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-040.json deleted file mode 100644 index a55497df..00000000 --- a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-040.json +++ /dev/null @@ -1 +0,0 @@ -{"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-040.pdf b/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-040.pdf deleted file mode 100644 index fe243399..00000000 Binary files a/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-040.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/indictb1h_14.pdf b/src/test/resources/technology/tabula/indictb1h_14.pdf deleted file mode 100644 index 8850058b..00000000 Binary files a/src/test/resources/technology/tabula/indictb1h_14.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/jpeg2000.pdf b/src/test/resources/technology/tabula/jpeg2000.pdf deleted file mode 100644 index 815a5010..00000000 Binary files a/src/test/resources/technology/tabula/jpeg2000.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/json/AnimalSounds1.json b/src/test/resources/technology/tabula/json/AnimalSounds1.json deleted file mode 100644 index 8511b786..00000000 --- a/src/test/resources/technology/tabula/json/AnimalSounds1.json +++ /dev/null @@ -1 +0,0 @@ -[{"extraction_method":"lattice","page_number":1,"top":0.006499578,"left":56.8,"width":241.1999969482422,"height":315.36407470703125,"right":298.0,"bottom":315.37057,"data":[[{"top":0.006499578,"left":56.8,"width":79.19999694824219,"height":95.31405639648438,"text":"Animal"},{"top":0.006499578,"left":136.0,"width":61.0,"height":95.31405639648438,"text":"Action"},{"top":0.006499578,"left":197.0,"width":101.0,"height":95.31405639648438,"text":"Result"}],[{"top":95.32056,"left":56.8,"width":79.19999694824219,"height":23.050010681152344,"text":"Cat"},{"top":95.32056,"left":136.0,"width":61.0,"height":23.050010681152344,"text":"Says"},{"top":95.32056,"left":197.0,"width":101.0,"height":23.050010681152344,"text":"Meow"}],[{"top":118.37057,"left":56.8,"width":79.19999694824219,"height":63.99999237060547,"text":"Parastratiosph\recomyiastratio\rsphecomyioid\res"},{"top":118.37057,"left":136.0,"width":61.0,"height":63.99999237060547,"text":"Says"},{"top":118.37057,"left":197.0,"width":101.0,"height":63.99999237060547,"text":"bzzzzzzz"}],[{"top":182.37056,"left":56.8,"width":79.19999694824219,"height":133.00001525878906,"text":"Fox"},{"top":182.37056,"left":136.0,"width":61.0,"height":133.00001525878906,"text":"Says"},{"top":182.37056,"left":197.0,"width":101.0,"height":133.00001525878906,"text":"Ring-\rdingdingdingd\ringeringeding\rGering-\rdingdingdingd\ringeringeding\rGering-\rdingdingdingd\ringeringeding"}]]},{"extraction_method":"lattice","page_number":1,"top":0.006499578,"left":313.35715,"width":241.55941772460938,"height":259.2640380859375,"right":554.91656,"bottom":259.27054,"data":[[{"top":0.006499578,"left":313.35715,"width":77.64285278320312,"height":72.26405334472656,"text":""},{"top":0.006499578,"left":391.0,"width":66.0,"height":72.26405334472656,"text":""},{"top":0.006499578,"left":457.0,"width":97.91656494140625,"height":72.26405334472656,"text":""}],[{"top":72.27055,"left":313.35715,"width":77.64285278320312,"height":23.050003051757812,"text":"Animal"},{"top":72.27055,"left":391.0,"width":66.0,"height":23.050003051757812,"text":"Action"},{"top":72.27055,"left":457.0,"width":97.91656494140625,"height":23.050003051757812,"text":"Result"}],[{"top":95.32056,"left":313.35715,"width":77.64285278320312,"height":35.94999694824219,"text":"Dogs/wolves/\rMore dogs"},{"top":95.32056,"left":391.0,"width":66.0,"height":35.94999694824219,"text":"Says"},{"top":95.32056,"left":457.0,"width":97.91656494140625,"height":35.94999694824219,"text":"Bow-wow/\rruff-ruff"}],[{"top":131.27055,"left":313.35715,"width":77.64285278320312,"height":36.40000915527344,"text":"Donkey"},{"top":131.27055,"left":391.0,"width":66.0,"height":36.40000915527344,"text":"Says"},{"top":131.27055,"left":457.0,"width":97.91656494140625,"height":36.40000915527344,"text":"Hee-Haw Hee-\rHaw"}],[{"top":167.67056,"left":313.35715,"width":77.64285278320312,"height":91.5999755859375,"text":"Fox"},{"top":167.67056,"left":391.0,"width":66.0,"height":91.5999755859375,"text":"Says"},{"top":167.67056,"left":457.0,"width":97.91656494140625,"height":91.5999755859375,"text":"Wa-pa-pa-pa-\rpa-pa-pow\rWa-pa-pa-pa-\rpa-pow\rWa-pa-pa-pa-\rpa-pa-pow"}]]}] diff --git a/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json b/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json deleted file mode 100644 index fb2d478e..00000000 --- a/src/test/resources/technology/tabula/json/argentina_diputados_voting_record.json +++ /dev/null @@ -1 +0,0 @@ -{"extraction_method":"stream","page_number":1,"top":269.875,"left":12.75,"width":548.25,"height":520.625,"right":561.0,"bottom":790.5,"data":[[{"top":281.82,"left":28.56,"width":175.21029663085938,"height":4.5,"text":"ABDALA de MATARAZZO, Norma Amanda"},{"top":281.82,"left":213.72,"width":108.25027465820312,"height":4.5,"text":"Frente Cívico por Santiago"},{"top":282.54,"left":397.56,"width":79.69027709960938,"height":4.5,"text":"Santiago del Estero"},{"top":281.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":298.26,"left":28.56,"width":141.71029663085938,"height":4.5,"text":"ALBRIEU, Oscar Edmundo Nicolas"},{"top":298.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":299.1,"left":397.56,"width":41.6602783203125,"height":4.5,"text":"Rio Negro"},{"top":298.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":314.82,"left":28.56,"width":83.75028228759766,"height":4.5,"text":"ALONSO, María Luz"},{"top":314.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":315.54,"left":397.56,"width":42.250274658203125,"height":4.5,"text":"La Pampa"},{"top":314.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":331.26,"left":28.56,"width":84.25028228759766,"height":4.5,"text":"ARENA, Celia Isabel"},{"top":331.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":332.1,"left":397.56,"width":37.690277099609375,"height":4.5,"text":"Santa Fe"},{"top":331.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":347.82,"left":28.56,"width":110.29029846191406,"height":4.5,"text":"ARREGUI, Andrés Roberto"},{"top":347.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":348.54,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":347.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":364.26,"left":28.56,"width":115.69029235839844,"height":4.5,"text":"AVOSCAN, Herman Horacio"},{"top":364.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":365.1,"left":397.56,"width":41.6602783203125,"height":4.5,"text":"Rio Negro"},{"top":364.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":380.82,"left":28.56,"width":95.6902847290039,"height":4.5,"text":"BALCEDO, María Ester"},{"top":380.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":381.54,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":380.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":397.26,"left":28.56,"width":127.69029235839844,"height":4.5,"text":"BARRANDEGUY, Raúl Enrique"},{"top":397.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":398.1,"left":397.56,"width":43.6702880859375,"height":4.5,"text":"Entre Ríos"},{"top":397.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":413.82,"left":28.56,"width":106.69029235839844,"height":4.5,"text":"BASTERRA, Luis Eugenio"},{"top":413.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":414.54,"left":397.56,"width":36.73028564453125,"height":4.5,"text":"Formosa"},{"top":413.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":430.26,"left":28.56,"width":92.21028137207031,"height":4.5,"text":"BEDANO, Nora Esther"},{"top":430.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":431.1,"left":397.56,"width":35.6602783203125,"height":4.5,"text":"Córdoba"},{"top":430.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":446.82,"left":28.56,"width":100.69029235839844,"height":4.5,"text":"BERNAL, María Eugenia"},{"top":446.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":447.54,"left":397.56,"width":22.200286865234375,"height":4.5,"text":"Jujuy"},{"top":446.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":463.26,"left":28.56,"width":112.21029663085938,"height":4.5,"text":"BERTONE, Rosana Andrea"},{"top":463.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":464.1,"left":397.56,"width":67.21026611328125,"height":4.5,"text":"Tierra del Fuego"},{"top":463.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":479.82,"left":28.56,"width":114.73028564453125,"height":4.5,"text":"BIANCHI, María del Carmen"},{"top":479.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":480.54,"left":397.56,"width":74.6702880859375,"height":4.5,"text":"Cdad. Aut. Bs. As."},{"top":479.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":496.26,"left":28.56,"width":115.19029235839844,"height":4.5,"text":"BIDEGAIN, Gloria Mercedes"},{"top":496.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":497.1,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":496.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":512.82,"left":28.56,"width":66.25028228759766,"height":4.5,"text":"BRAWER, Mara"},{"top":512.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":513.54,"left":397.56,"width":74.6702880859375,"height":4.5,"text":"Cdad. Aut. Bs. As."},{"top":512.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":529.26,"left":28.56,"width":90.73028564453125,"height":4.5,"text":"BRILLO, José Ricardo"},{"top":529.26,"left":213.72,"width":121.81027221679688,"height":4.5,"text":"Movimiento Popular Neuquino"},{"top":530.1,"left":397.56,"width":37.700286865234375,"height":4.5,"text":"Neuquén"},{"top":529.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":545.82,"left":28.56,"width":120.73028564453125,"height":4.5,"text":"BROMBERG, Isaac Benjamín"},{"top":545.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":546.54,"left":397.56,"width":38.6602783203125,"height":4.5,"text":"Tucumán"},{"top":545.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":562.26,"left":28.56,"width":89.7702865600586,"height":4.5,"text":"BRUE, Daniel Agustín"},{"top":562.26,"left":213.72,"width":108.25027465820312,"height":4.5,"text":"Frente Cívico por Santiago"},{"top":563.1,"left":397.56,"width":79.69027709960938,"height":4.5,"text":"Santiago del Estero"},{"top":562.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":578.82,"left":28.56,"width":72.23028564453125,"height":4.5,"text":"CALCAGNO, Eric"},{"top":578.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":579.54,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":578.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":595.26,"left":28.56,"width":114.73028564453125,"height":4.5,"text":"CARLOTTO, Remo Gerardo"},{"top":595.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":596.1,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":595.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":611.82,"left":28.56,"width":122.1702880859375,"height":4.5,"text":"CARMONA, Guillermo Ramón"},{"top":611.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":612.54,"left":397.56,"width":38.1602783203125,"height":4.5,"text":"Mendoza"},{"top":611.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":628.26,"left":28.56,"width":124.73028564453125,"height":4.5,"text":"CATALAN MAGNI, Julio César"},{"top":628.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":629.1,"left":397.56,"width":67.21026611328125,"height":4.5,"text":"Tierra del Fuego"},{"top":628.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":644.82,"left":28.56,"width":88.6902847290039,"height":4.5,"text":"CEJAS, Jorge Alberto"},{"top":644.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":645.54,"left":397.56,"width":41.6602783203125,"height":4.5,"text":"Rio Negro"},{"top":644.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":661.26,"left":28.56,"width":89.7702865600586,"height":4.5,"text":"CHIENO, María Elena"},{"top":661.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":662.1,"left":397.56,"width":42.72027587890625,"height":4.5,"text":"Corrientes"},{"top":661.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":677.82,"left":28.56,"width":96.25028228759766,"height":4.5,"text":"CIAMPINI, José Alberto"},{"top":677.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":678.54,"left":397.56,"width":37.700286865234375,"height":4.5,"text":"Neuquén"},{"top":677.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":694.26,"left":28.56,"width":131.77029418945312,"height":4.5,"text":"CIGOGNA, Luis Francisco Jorge"},{"top":694.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":695.1,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":694.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":710.82,"left":28.56,"width":62.15028381347656,"height":4.5,"text":"CLERI, Marcos"},{"top":710.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":711.54,"left":397.56,"width":37.690277099609375,"height":4.5,"text":"Santa Fe"},{"top":710.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":727.26,"left":28.56,"width":101.77029418945312,"height":4.5,"text":"COMELLI, Alicia Marcela"},{"top":727.26,"left":213.72,"width":121.81027221679688,"height":4.5,"text":"Movimiento Popular Neuquino"},{"top":728.1,"left":397.56,"width":37.700286865234375,"height":4.5,"text":"Neuquén"},{"top":727.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":743.82,"left":28.56,"width":88.1902847290039,"height":4.5,"text":"CONTI, Diana Beatriz"},{"top":743.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":744.54,"left":397.56,"width":54.71026611328125,"height":4.5,"text":"Buenos Aires"},{"top":743.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":760.26,"left":28.56,"width":98.75028228759766,"height":4.5,"text":"CORDOBA, Stella Maris"},{"top":760.26,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":761.1,"left":397.56,"width":38.6602783203125,"height":4.5,"text":"Tucumán"},{"top":760.26,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}],[{"top":776.82,"left":28.56,"width":105.25028991699219,"height":4.5,"text":"CURRILEN, Oscar Rubén"},{"top":776.82,"left":213.72,"width":108.23028564453125,"height":4.5,"text":"Frente para la Victoria - PJ"},{"top":777.54,"left":397.56,"width":30.2802734375,"height":4.5,"text":"Chubut"},{"top":776.82,"left":494.04,"width":54.999969482421875,"height":4.5,"text":"AFIRMATIVO"}]]} \ No newline at end of file diff --git a/src/test/resources/technology/tabula/json/schools.json b/src/test/resources/technology/tabula/json/schools.json deleted file mode 100644 index d251bb20..00000000 --- a/src/test/resources/technology/tabula/json/schools.json +++ /dev/null @@ -1 +0,0 @@ -{"extraction_method":"lattice","page_number":1,"top":54.315777,"left":16.97,"width":745.3303833007812,"height":483.9442443847656,"right":762.30035,"bottom":538.26,"data":[[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":54.315777,"left":18.588728,"width":153.91128540039062,"height":8.744216918945312,"text":"Last Name"},{"top":54.315777,"left":172.50002,"width":69.71998596191406,"height":8.744216918945312,"text":"First Name"},{"top":54.315777,"left":242.22,"width":104.760009765625,"height":8.744216918945312,"text":"Address"},{"top":54.315777,"left":346.98,"width":70.80001831054688,"height":8.744216918945312,"text":"City"},{"top":54.315777,"left":417.78003,"width":20.87994384765625,"height":8.744216918945312,"text":"State"},{"top":54.315777,"left":438.65997,"width":49.91998291015625,"height":8.744216918945312,"text":"Zip"},{"top":54.315777,"left":488.57996,"width":78.47998046875,"height":8.744216918945312,"text":"Occupation"},{"top":54.315777,"left":567.05994,"width":106.4400634765625,"height":8.744216918945312,"text":"Employer"},{"top":54.315777,"left":673.5,"width":45.52001953125,"height":8.744216918945312,"text":"Date"},{"top":54.315777,"left":719.02,"width":43.28033447265625,"height":8.744216918945312,"text":"Amount"}],[{"top":63.059994,"left":16.97,"width":1.6187286376953125,"height":10.799991607666016,"text":""},{"top":63.059994,"left":18.588728,"width":153.91128540039062,"height":10.799991607666016,"text":"Lidstad"},{"top":63.059994,"left":172.50002,"width":69.71998596191406,"height":10.799991607666016,"text":"Dick \u0026 Peg"},{"top":63.059994,"left":242.22,"width":104.760009765625,"height":10.799991607666016,"text":"62 Mississippi River Blvd N"},{"top":63.059994,"left":346.98,"width":70.80001831054688,"height":10.799991607666016,"text":"Saint Paul"},{"top":63.059994,"left":417.78003,"width":20.87994384765625,"height":10.799991607666016,"text":"MN"},{"top":63.059994,"left":438.65997,"width":49.91998291015625,"height":10.799991607666016,"text":"55104"},{"top":63.059994,"left":488.57996,"width":78.47998046875,"height":10.799991607666016,"text":"retired"},{"top":63.059994,"left":567.05994,"width":106.4400634765625,"height":10.799991607666016,"text":""},{"top":63.059994,"left":673.5,"width":45.52001953125,"height":10.799991607666016,"text":"10/12/2012"},{"top":63.059994,"left":719.02,"width":43.28033447265625,"height":10.799991607666016,"text":"60.00"}],[{"top":73.859985,"left":16.97,"width":1.6187286376953125,"height":10.800025939941406,"text":""},{"top":73.859985,"left":18.588728,"width":153.91128540039062,"height":10.800025939941406,"text":"Strom"},{"top":73.859985,"left":172.50002,"width":69.71998596191406,"height":10.800025939941406,"text":"Pam"},{"top":73.859985,"left":242.22,"width":104.760009765625,"height":10.800025939941406,"text":"1229 Hague Ave"},{"top":73.859985,"left":346.98,"width":70.80001831054688,"height":10.800025939941406,"text":"St. Paul"},{"top":73.859985,"left":417.78003,"width":20.87994384765625,"height":10.800025939941406,"text":"MN"},{"top":73.859985,"left":438.65997,"width":49.91998291015625,"height":10.800025939941406,"text":"55104"},{"top":73.859985,"left":488.57996,"width":78.47998046875,"height":10.800025939941406,"text":""},{"top":73.859985,"left":567.05994,"width":106.4400634765625,"height":10.800025939941406,"text":""},{"top":73.859985,"left":673.5,"width":45.52001953125,"height":10.800025939941406,"text":"9/12/2012"},{"top":73.859985,"left":719.02,"width":43.28033447265625,"height":10.800025939941406,"text":"60.00"}],[{"top":84.66001,"left":16.97,"width":1.6187286376953125,"height":10.799995422363281,"text":""},{"top":84.66001,"left":18.588728,"width":153.91128540039062,"height":10.799995422363281,"text":"Seeba"},{"top":84.66001,"left":172.50002,"width":69.71998596191406,"height":10.799995422363281,"text":"Louise \u0026 Paul"},{"top":84.66001,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"1399 Sheldon St"},{"top":84.66001,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"Saint Paul"},{"top":84.66001,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":84.66001,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55108"},{"top":84.66001,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":"BOE"},{"top":84.66001,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":"City of Saint Paul"},{"top":84.66001,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"10/12/2012"},{"top":84.66001,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"60.00"}],[{"top":95.46001,"left":16.97,"width":1.6187286376953125,"height":10.799995422363281,"text":""},{"top":95.46001,"left":18.588728,"width":153.91128540039062,"height":10.799995422363281,"text":"Schumacher / Bales"},{"top":95.46001,"left":172.50002,"width":69.71998596191406,"height":10.799995422363281,"text":"Douglas L. / Patricia"},{"top":95.46001,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"948 County Rd. D W"},{"top":95.46001,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"Saint Paul"},{"top":95.46001,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":95.46001,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55126"},{"top":95.46001,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":""},{"top":95.46001,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":""},{"top":95.46001,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"10/13/2012"},{"top":95.46001,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"60.00"}],[{"top":106.26,"left":16.97,"width":1.6187286376953125,"height":10.799995422363281,"text":""},{"top":106.26,"left":18.588728,"width":153.91128540039062,"height":10.799995422363281,"text":"Abrams"},{"top":106.26,"left":172.50002,"width":69.71998596191406,"height":10.799995422363281,"text":"Marjorie"},{"top":106.26,"left":242.22,"width":104.760009765625,"height":10.799995422363281,"text":"238 8th St east"},{"top":106.26,"left":346.98,"width":70.80001831054688,"height":10.799995422363281,"text":"St Paul"},{"top":106.26,"left":417.78003,"width":20.87994384765625,"height":10.799995422363281,"text":"MN"},{"top":106.26,"left":438.65997,"width":49.91998291015625,"height":10.799995422363281,"text":"55101"},{"top":106.26,"left":488.57996,"width":78.47998046875,"height":10.799995422363281,"text":"Retired"},{"top":106.26,"left":567.05994,"width":106.4400634765625,"height":10.799995422363281,"text":"Retired"},{"top":106.26,"left":673.5,"width":45.52001953125,"height":10.799995422363281,"text":"8/8/2012"},{"top":106.26,"left":719.02,"width":43.28033447265625,"height":10.799995422363281,"text":"75.00"}],[{"top":117.06,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":117.06,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Crouse / Schroeder"},{"top":117.06,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Abigail / Jonathan"},{"top":117.06,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1545 Branston St."},{"top":117.06,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":117.06,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":117.06,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55108"},{"top":117.06,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":""},{"top":117.06,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":""},{"top":117.06,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/6/2012"},{"top":117.06,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"75.00"}],[{"top":127.859985,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":127.859985,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"O\u0027Connell"},{"top":127.859985,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"Jean"},{"top":127.859985,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"888 Ivy Ave W."},{"top":127.859985,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Saint Paul"},{"top":127.859985,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":127.859985,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55117"},{"top":127.859985,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":""},{"top":127.859985,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":""},{"top":127.859985,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/30/2012"},{"top":127.859985,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"75.00"}],[{"top":138.65999,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":138.65999,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Reese"},{"top":138.65999,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Cheri A"},{"top":138.65999,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"981 Davern St."},{"top":138.65999,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":138.65999,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":138.65999,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55116"},{"top":138.65999,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Public Relations"},{"top":138.65999,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Far North Spirits"},{"top":138.65999,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/3/2012"},{"top":138.65999,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"75.00"}],[{"top":149.46,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":149.46,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"Serrano"},{"top":149.46,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"Luz Maria"},{"top":149.46,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"5559 Park Place Drive"},{"top":149.46,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Shoreview"},{"top":149.46,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":149.46,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55126"},{"top":149.46,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"retired"},{"top":149.46,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":"SPPS"},{"top":149.46,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/20/2012"},{"top":149.46,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"75.00"}],[{"top":160.26001,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":160.26001,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Alsiddiqui"},{"top":160.26001,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Jaber"},{"top":160.26001,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"13056 Euclid Ave"},{"top":160.26001,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Apple Valley"},{"top":160.26001,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":160.26001,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55124"},{"top":160.26001,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"SPPS budget analyst"},{"top":160.26001,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"SPPS"},{"top":160.26001,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"9/20/2012"},{"top":160.26001,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":171.06,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":171.06,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"Andrastek"},{"top":171.06,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"John"},{"top":171.06,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"774 Ashland Ave"},{"top":171.06,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"St Paul"},{"top":171.06,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":171.06,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55104"},{"top":171.06,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"asst principal"},{"top":171.06,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":"SPPS"},{"top":171.06,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"8/22/2012"},{"top":171.06,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"100.00"}],[{"top":181.86,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":181.86,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Anfang"},{"top":181.86,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Heather L. \u0026 Matt"},{"top":181.86,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1635 Bayard Ave"},{"top":181.86,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":181.86,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":181.86,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55116"},{"top":181.86,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"Ex Director"},{"top":181.86,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"BOMA"},{"top":181.86,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/6/2012"},{"top":181.86,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":192.65999,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":192.65999,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Aronson"},{"top":192.65999,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Roger J."},{"top":192.65999,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4852 Emerson Ave. S."},{"top":192.65999,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Minneapolis"},{"top":192.65999,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":192.65999,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55419"},{"top":192.65999,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Attorney at Law"},{"top":192.65999,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":""},{"top":192.65999,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"9/20/2012"},{"top":192.65999,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":203.46,"left":16.97,"width":1.6187286376953125,"height":10.800003051757812,"text":""},{"top":203.46,"left":18.588728,"width":153.91128540039062,"height":10.800003051757812,"text":"Aronson"},{"top":203.46,"left":172.50002,"width":69.71998596191406,"height":10.800003051757812,"text":"Roger J."},{"top":203.46,"left":242.22,"width":104.760009765625,"height":10.800003051757812,"text":"4852 Emerson Ave. S."},{"top":203.46,"left":346.98,"width":70.80001831054688,"height":10.800003051757812,"text":"Minneapolis"},{"top":203.46,"left":417.78003,"width":20.87994384765625,"height":10.800003051757812,"text":"MN"},{"top":203.46,"left":438.65997,"width":49.91998291015625,"height":10.800003051757812,"text":"55419"},{"top":203.46,"left":488.57996,"width":78.47998046875,"height":10.800003051757812,"text":"Attorney at Law"},{"top":203.46,"left":567.05994,"width":106.4400634765625,"height":10.800003051757812,"text":""},{"top":203.46,"left":673.5,"width":45.52001953125,"height":10.800003051757812,"text":"9/20/2012"},{"top":203.46,"left":719.02,"width":43.28033447265625,"height":10.800003051757812,"text":"100.00"}],[{"top":214.26001,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":214.26001,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Banks"},{"top":214.26001,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"Michael or Patricia"},{"top":214.26001,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1664 Van Buren Ave."},{"top":214.26001,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":214.26001,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":214.26001,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":214.26001,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"retired social worker"},{"top":214.26001,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":""},{"top":214.26001,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"9/20/2012"},{"top":214.26001,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":225.06,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":225.06,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Bennett"},{"top":225.06,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"David"},{"top":225.06,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"25 Birchwood Rd."},{"top":225.06,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Mahtomedi"},{"top":225.06,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":225.06,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55110"},{"top":225.06,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"retired"},{"top":225.06,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"retired"},{"top":225.06,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"9/3/2012"},{"top":225.06,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":235.86002,"left":16.97,"width":1.6187286376953125,"height":10.799972534179688,"text":""},{"top":235.86002,"left":18.588728,"width":153.91128540039062,"height":10.799972534179688,"text":"Berry"},{"top":235.86002,"left":172.50002,"width":69.71998596191406,"height":10.799972534179688,"text":"Margaret"},{"top":235.86002,"left":242.22,"width":104.760009765625,"height":10.799972534179688,"text":"1267 Pike Lake Dr"},{"top":235.86002,"left":346.98,"width":70.80001831054688,"height":10.799972534179688,"text":"New Brighton"},{"top":235.86002,"left":417.78003,"width":20.87994384765625,"height":10.799972534179688,"text":"MN"},{"top":235.86002,"left":438.65997,"width":49.91998291015625,"height":10.799972534179688,"text":"55112"},{"top":235.86002,"left":488.57996,"width":78.47998046875,"height":10.799972534179688,"text":"Asst Principal"},{"top":235.86002,"left":567.05994,"width":106.4400634765625,"height":10.799972534179688,"text":"Saint Paul Public Schools"},{"top":235.86002,"left":673.5,"width":45.52001953125,"height":10.799972534179688,"text":"8/8/2012"},{"top":235.86002,"left":719.02,"width":43.28033447265625,"height":10.799972534179688,"text":"100.00"}],[{"top":246.65999,"left":16.97,"width":1.6187286376953125,"height":10.800033569335938,"text":""},{"top":246.65999,"left":18.588728,"width":153.91128540039062,"height":10.800033569335938,"text":"Boyle"},{"top":246.65999,"left":172.50002,"width":69.71998596191406,"height":10.800033569335938,"text":"Matthew C. \u0026 Eliza"},{"top":246.65999,"left":242.22,"width":104.760009765625,"height":10.800033569335938,"text":"2165 Princeton Ave"},{"top":246.65999,"left":346.98,"width":70.80001831054688,"height":10.800033569335938,"text":"Saint Paul"},{"top":246.65999,"left":417.78003,"width":20.87994384765625,"height":10.800033569335938,"text":"MN"},{"top":246.65999,"left":438.65997,"width":49.91998291015625,"height":10.800033569335938,"text":"55105"},{"top":246.65999,"left":488.57996,"width":78.47998046875,"height":10.800033569335938,"text":""},{"top":246.65999,"left":567.05994,"width":106.4400634765625,"height":10.800033569335938,"text":""},{"top":246.65999,"left":673.5,"width":45.52001953125,"height":10.800033569335938,"text":"10/6/2012"},{"top":246.65999,"left":719.02,"width":43.28033447265625,"height":10.800033569335938,"text":"100.00"}],[{"top":257.46002,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":257.46002,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Brodrick"},{"top":257.46002,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"John"},{"top":257.46002,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1007 Charles"},{"top":257.46002,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"St. Paul"},{"top":257.46002,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":257.46002,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":257.46002,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"BOE"},{"top":257.46002,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"retired"},{"top":257.46002,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"8/26/2012"},{"top":257.46002,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":268.26,"left":16.97,"width":1.6187286376953125,"height":10.79998779296875,"text":""},{"top":268.26,"left":18.588728,"width":153.91128540039062,"height":10.79998779296875,"text":"Brodrick"},{"top":268.26,"left":172.50002,"width":69.71998596191406,"height":10.79998779296875,"text":"John F."},{"top":268.26,"left":242.22,"width":104.760009765625,"height":10.79998779296875,"text":"1007 Charles Ave"},{"top":268.26,"left":346.98,"width":70.80001831054688,"height":10.79998779296875,"text":"Saint Paul"},{"top":268.26,"left":417.78003,"width":20.87994384765625,"height":10.79998779296875,"text":"MN"},{"top":268.26,"left":438.65997,"width":49.91998291015625,"height":10.79998779296875,"text":"55104"},{"top":268.26,"left":488.57996,"width":78.47998046875,"height":10.79998779296875,"text":"BoE"},{"top":268.26,"left":567.05994,"width":106.4400634765625,"height":10.79998779296875,"text":"SPPS"},{"top":268.26,"left":673.5,"width":45.52001953125,"height":10.79998779296875,"text":"10/4/2012"},{"top":268.26,"left":719.02,"width":43.28033447265625,"height":10.79998779296875,"text":"100.00"}],[{"top":279.06,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":279.06,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Brown"},{"top":279.06,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Mike"},{"top":279.06,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1440 Goodrich Ave"},{"top":279.06,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":279.06,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":279.06,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":279.06,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":279.06,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":279.06,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"9/23/2012"},{"top":279.06,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":289.84,"left":16.97,"width":1.6187286376953125,"height":10.819976806640625,"text":""},{"top":289.84,"left":18.588728,"width":153.91128540039062,"height":10.819976806640625,"text":"Cacy"},{"top":289.84,"left":172.50002,"width":69.71998596191406,"height":10.819976806640625,"text":"Jill"},{"top":289.84,"left":242.22,"width":104.760009765625,"height":10.819976806640625,"text":"1409 Smith Ave So"},{"top":289.84,"left":346.98,"width":70.80001831054688,"height":10.819976806640625,"text":"West St. Paul"},{"top":289.84,"left":417.78003,"width":20.87994384765625,"height":10.819976806640625,"text":"MN"},{"top":289.84,"left":438.65997,"width":49.91998291015625,"height":10.819976806640625,"text":"55118"},{"top":289.84,"left":488.57996,"width":78.47998046875,"height":10.819976806640625,"text":"Administrator"},{"top":289.84,"left":567.05994,"width":106.4400634765625,"height":10.819976806640625,"text":"SPPS"},{"top":289.84,"left":673.5,"width":45.52001953125,"height":10.819976806640625,"text":"9/12/2012"},{"top":289.84,"left":719.02,"width":43.28033447265625,"height":10.819976806640625,"text":"100.00"}],[{"top":300.65997,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":300.65997,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Cardwell"},{"top":300.65997,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Eileen"},{"top":300.65997,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4172 Bridgewater Ct"},{"top":300.65997,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Vadnais Height"},{"top":300.65997,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":300.65997,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55127"},{"top":300.65997,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Retired"},{"top":300.65997,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Retired"},{"top":300.65997,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/3/2012"},{"top":300.65997,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":311.46,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":311.46,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Carlstrom"},{"top":311.46,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Cheryl"},{"top":311.46,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"4244 Oakmede Ln"},{"top":311.46,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"St Paul"},{"top":311.46,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":311.46,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55110"},{"top":311.46,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Satff"},{"top":311.46,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Saint Paul Public Schools"},{"top":311.46,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/8/2012"},{"top":311.46,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":322.26,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":322.26,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Carter"},{"top":322.26,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Melvin W. \u0026 Willet"},{"top":322.26,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"405 Western Ave N"},{"top":322.26,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":322.26,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":322.26,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55103"},{"top":322.26,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Cty Commissioner"},{"top":322.26,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Ramsey Cty"},{"top":322.26,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/8/2012"},{"top":322.26,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":333.06003,"left":16.97,"width":1.6187286376953125,"height":10.8199462890625,"text":""},{"top":333.06003,"left":18.588728,"width":153.91128540039062,"height":10.8199462890625,"text":"Caruso"},{"top":333.06003,"left":172.50002,"width":69.71998596191406,"height":10.8199462890625,"text":"Sarah"},{"top":333.06003,"left":242.22,"width":104.760009765625,"height":10.8199462890625,"text":"2615 Newton Ave S"},{"top":333.06003,"left":346.98,"width":70.80001831054688,"height":10.8199462890625,"text":"Minneapolis"},{"top":333.06003,"left":417.78003,"width":20.87994384765625,"height":10.8199462890625,"text":"MN"},{"top":333.06003,"left":438.65997,"width":49.91998291015625,"height":10.8199462890625,"text":"55405"},{"top":333.06003,"left":488.57996,"width":78.47998046875,"height":10.8199462890625,"text":"CEO"},{"top":333.06003,"left":567.05994,"width":106.4400634765625,"height":10.8199462890625,"text":"United Way"},{"top":333.06003,"left":673.5,"width":45.52001953125,"height":10.8199462890625,"text":"9/12/2012"},{"top":333.06003,"left":719.02,"width":43.28033447265625,"height":10.8199462890625,"text":"100.00"}],[{"top":343.87997,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":343.87997,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Casey /Trewartha"},{"top":343.87997,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Kerry F. / Kelly A."},{"top":343.87997,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"2266 Goodrich Ave"},{"top":343.87997,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":343.87997,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":343.87997,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":343.87997,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":343.87997,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":343.87997,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/4/2012"},{"top":343.87997,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":354.65997,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":354.65997,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Cassidy"},{"top":354.65997,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Paul D."},{"top":354.65997,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1990 Dayton Ave"},{"top":354.65997,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":354.65997,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":354.65997,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55104"},{"top":354.65997,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":""},{"top":354.65997,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":354.65997,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/6/2012"},{"top":354.65997,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":365.43997,"left":16.97,"width":1.6187286376953125,"height":10.820037841796875,"text":""},{"top":365.43997,"left":18.588728,"width":153.91128540039062,"height":10.820037841796875,"text":"Causey"},{"top":365.43997,"left":172.50002,"width":69.71998596191406,"height":10.820037841796875,"text":"Christopher"},{"top":365.43997,"left":242.22,"width":104.760009765625,"height":10.820037841796875,"text":"2181 Doswell Avenue"},{"top":365.43997,"left":346.98,"width":70.80001831054688,"height":10.820037841796875,"text":"Saint Paul"},{"top":365.43997,"left":417.78003,"width":20.87994384765625,"height":10.820037841796875,"text":"MN"},{"top":365.43997,"left":438.65997,"width":49.91998291015625,"height":10.820037841796875,"text":"55108"},{"top":365.43997,"left":488.57996,"width":78.47998046875,"height":10.820037841796875,"text":"finance"},{"top":365.43997,"left":567.05994,"width":106.4400634765625,"height":10.820037841796875,"text":""},{"top":365.43997,"left":673.5,"width":45.52001953125,"height":10.820037841796875,"text":"9/3/2012"},{"top":365.43997,"left":719.02,"width":43.28033447265625,"height":10.820037841796875,"text":"100.00"}],[{"top":376.26,"left":16.97,"width":1.6187286376953125,"height":10.819976806640625,"text":""},{"top":376.26,"left":18.588728,"width":153.91128540039062,"height":10.819976806640625,"text":"Christiansen"},{"top":376.26,"left":172.50002,"width":69.71998596191406,"height":10.819976806640625,"text":"Lena"},{"top":376.26,"left":242.22,"width":104.760009765625,"height":10.819976806640625,"text":"2453 Como Ave"},{"top":376.26,"left":346.98,"width":70.80001831054688,"height":10.819976806640625,"text":"St Paul"},{"top":376.26,"left":417.78003,"width":20.87994384765625,"height":10.819976806640625,"text":"MN"},{"top":376.26,"left":438.65997,"width":49.91998291015625,"height":10.819976806640625,"text":"55108"},{"top":376.26,"left":488.57996,"width":78.47998046875,"height":10.819976806640625,"text":"Principal"},{"top":376.26,"left":567.05994,"width":106.4400634765625,"height":10.819976806640625,"text":"Saint Paul Public Schools"},{"top":376.26,"left":673.5,"width":45.52001953125,"height":10.819976806640625,"text":"8/8/2012"},{"top":376.26,"left":719.02,"width":43.28033447265625,"height":10.819976806640625,"text":"100.00"}],[{"top":387.08,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":387.08,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Clapp"},{"top":387.08,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Agustus (Bill)"},{"top":387.08,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"757 Osceola Ave #1"},{"top":387.08,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":387.08,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":387.08,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55105"},{"top":387.08,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":"retired"},{"top":387.08,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":""},{"top":387.08,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/2/2012"},{"top":387.08,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":397.86,"left":16.97,"width":1.6187286376953125,"height":10.779998779296875,"text":""},{"top":397.86,"left":18.588728,"width":153.91128540039062,"height":10.779998779296875,"text":"Cohen"},{"top":397.86,"left":172.50002,"width":69.71998596191406,"height":10.779998779296875,"text":"Brad A."},{"top":397.86,"left":242.22,"width":104.760009765625,"height":10.779998779296875,"text":"1460 Raymond Ave"},{"top":397.86,"left":346.98,"width":70.80001831054688,"height":10.779998779296875,"text":"Saint Paul"},{"top":397.86,"left":417.78003,"width":20.87994384765625,"height":10.779998779296875,"text":"MN"},{"top":397.86,"left":438.65997,"width":49.91998291015625,"height":10.779998779296875,"text":"55108"},{"top":397.86,"left":488.57996,"width":78.47998046875,"height":10.779998779296875,"text":"academic technology"},{"top":397.86,"left":567.05994,"width":106.4400634765625,"height":10.779998779296875,"text":"UMN"},{"top":397.86,"left":673.5,"width":45.52001953125,"height":10.779998779296875,"text":"10/17/2012"},{"top":397.86,"left":719.02,"width":43.28033447265625,"height":10.779998779296875,"text":"100.00"}],[{"top":408.63998,"left":16.97,"width":1.6187286376953125,"height":10.82000732421875,"text":""},{"top":408.63998,"left":18.588728,"width":153.91128540039062,"height":10.82000732421875,"text":"Commers"},{"top":408.63998,"left":172.50002,"width":69.71998596191406,"height":10.82000732421875,"text":"Beth"},{"top":408.63998,"left":242.22,"width":104.760009765625,"height":10.82000732421875,"text":"2294 Commonwealth Ave"},{"top":408.63998,"left":346.98,"width":70.80001831054688,"height":10.82000732421875,"text":"St Paul"},{"top":408.63998,"left":417.78003,"width":20.87994384765625,"height":10.82000732421875,"text":"MN"},{"top":408.63998,"left":438.65997,"width":49.91998291015625,"height":10.82000732421875,"text":"55108"},{"top":408.63998,"left":488.57996,"width":78.47998046875,"height":10.82000732421875,"text":"Self Employed"},{"top":408.63998,"left":567.05994,"width":106.4400634765625,"height":10.82000732421875,"text":"Homemaker"},{"top":408.63998,"left":673.5,"width":45.52001953125,"height":10.82000732421875,"text":"8/1/2012"},{"top":408.63998,"left":719.02,"width":43.28033447265625,"height":10.82000732421875,"text":"100.00"}],[{"top":419.46,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":419.46,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Couture"},{"top":419.46,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Steven"},{"top":419.46,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"7950 Victoria Way"},{"top":419.46,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Louise Park"},{"top":419.46,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":419.46,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55426"},{"top":419.46,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Principal"},{"top":419.46,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"Saint Paul Public Schools"},{"top":419.46,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"8/10/2012"},{"top":419.46,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":430.26,"left":16.97,"width":1.6187286376953125,"height":10.800048828125,"text":""},{"top":430.26,"left":18.588728,"width":153.91128540039062,"height":10.800048828125,"text":"Crawford"},{"top":430.26,"left":172.50002,"width":69.71998596191406,"height":10.800048828125,"text":"Lydia P."},{"top":430.26,"left":242.22,"width":104.760009765625,"height":10.800048828125,"text":"1140 Edgcumbe Rd"},{"top":430.26,"left":346.98,"width":70.80001831054688,"height":10.800048828125,"text":"Saint Paul"},{"top":430.26,"left":417.78003,"width":20.87994384765625,"height":10.800048828125,"text":"MN"},{"top":430.26,"left":438.65997,"width":49.91998291015625,"height":10.800048828125,"text":"55105"},{"top":430.26,"left":488.57996,"width":78.47998046875,"height":10.800048828125,"text":""},{"top":430.26,"left":567.05994,"width":106.4400634765625,"height":10.800048828125,"text":""},{"top":430.26,"left":673.5,"width":45.52001953125,"height":10.800048828125,"text":"9/15/2012"},{"top":430.26,"left":719.02,"width":43.28033447265625,"height":10.800048828125,"text":"100.00"}],[{"top":441.06006,"left":16.97,"width":1.6187286376953125,"height":10.799896240234375,"text":""},{"top":441.06006,"left":18.588728,"width":153.91128540039062,"height":10.799896240234375,"text":"Cudahy / Ricker"},{"top":441.06006,"left":172.50002,"width":69.71998596191406,"height":10.799896240234375,"text":"Robert \u0026 Mary C"},{"top":441.06006,"left":242.22,"width":104.760009765625,"height":10.799896240234375,"text":"616 Cherokee Ave."},{"top":441.06006,"left":346.98,"width":70.80001831054688,"height":10.799896240234375,"text":"Saint Paul"},{"top":441.06006,"left":417.78003,"width":20.87994384765625,"height":10.799896240234375,"text":"MN"},{"top":441.06006,"left":438.65997,"width":49.91998291015625,"height":10.799896240234375,"text":"55107"},{"top":441.06006,"left":488.57996,"width":78.47998046875,"height":10.799896240234375,"text":"Teacher/Union Presid"},{"top":441.06006,"left":567.05994,"width":106.4400634765625,"height":10.799896240234375,"text":"SPPS/SPFT"},{"top":441.06006,"left":673.5,"width":45.52001953125,"height":10.799896240234375,"text":"9/18/2012"},{"top":441.06006,"left":719.02,"width":43.28033447265625,"height":10.799896240234375,"text":"100.00"}],[{"top":451.85995,"left":16.97,"width":1.6187286376953125,"height":10.800018310546875,"text":""},{"top":451.85995,"left":18.588728,"width":153.91128540039062,"height":10.800018310546875,"text":"Cudahy / Ricker"},{"top":451.85995,"left":172.50002,"width":69.71998596191406,"height":10.800018310546875,"text":"Robert \u0026 Mary C"},{"top":451.85995,"left":242.22,"width":104.760009765625,"height":10.800018310546875,"text":"616 Cherokee Ave."},{"top":451.85995,"left":346.98,"width":70.80001831054688,"height":10.800018310546875,"text":"Saint Paul"},{"top":451.85995,"left":417.78003,"width":20.87994384765625,"height":10.800018310546875,"text":"MN"},{"top":451.85995,"left":438.65997,"width":49.91998291015625,"height":10.800018310546875,"text":"55107"},{"top":451.85995,"left":488.57996,"width":78.47998046875,"height":10.800018310546875,"text":"Teacher/Union Presid"},{"top":451.85995,"left":567.05994,"width":106.4400634765625,"height":10.800018310546875,"text":"SPPS/SPFT"},{"top":451.85995,"left":673.5,"width":45.52001953125,"height":10.800018310546875,"text":"10/6/2012"},{"top":451.85995,"left":719.02,"width":43.28033447265625,"height":10.800018310546875,"text":"100.00"}],[{"top":462.65997,"left":16.97,"width":1.6187286376953125,"height":10.82000732421875,"text":""},{"top":462.65997,"left":18.588728,"width":153.91128540039062,"height":10.82000732421875,"text":"Currie"},{"top":462.65997,"left":172.50002,"width":69.71998596191406,"height":10.82000732421875,"text":"Elisabeth"},{"top":462.65997,"left":242.22,"width":104.760009765625,"height":10.82000732421875,"text":"2274 Hillside Ave"},{"top":462.65997,"left":346.98,"width":70.80001831054688,"height":10.82000732421875,"text":"St. Paul"},{"top":462.65997,"left":417.78003,"width":20.87994384765625,"height":10.82000732421875,"text":"MN"},{"top":462.65997,"left":438.65997,"width":49.91998291015625,"height":10.82000732421875,"text":"55108"},{"top":462.65997,"left":488.57996,"width":78.47998046875,"height":10.82000732421875,"text":""},{"top":462.65997,"left":567.05994,"width":106.4400634765625,"height":10.82000732421875,"text":""},{"top":462.65997,"left":673.5,"width":45.52001953125,"height":10.82000732421875,"text":"9/12/2012"},{"top":462.65997,"left":719.02,"width":43.28033447265625,"height":10.82000732421875,"text":"100.00"}],[{"top":473.47998,"left":16.97,"width":1.6187286376953125,"height":10.780029296875,"text":""},{"top":473.47998,"left":18.588728,"width":153.91128540039062,"height":10.780029296875,"text":"Doane"},{"top":473.47998,"left":172.50002,"width":69.71998596191406,"height":10.780029296875,"text":"Paul V \u0026 Helen R"},{"top":473.47998,"left":242.22,"width":104.760009765625,"height":10.780029296875,"text":"444 Portland Ave"},{"top":473.47998,"left":346.98,"width":70.80001831054688,"height":10.780029296875,"text":"Sant Paul"},{"top":473.47998,"left":417.78003,"width":20.87994384765625,"height":10.780029296875,"text":"MN"},{"top":473.47998,"left":438.65997,"width":49.91998291015625,"height":10.780029296875,"text":"55102"},{"top":473.47998,"left":488.57996,"width":78.47998046875,"height":10.780029296875,"text":"Ex director"},{"top":473.47998,"left":567.05994,"width":106.4400634765625,"height":10.780029296875,"text":"St Paul Teachers\u0027 Retirement A"},{"top":473.47998,"left":673.5,"width":45.52001953125,"height":10.780029296875,"text":"10/3/2012"},{"top":473.47998,"left":719.02,"width":43.28033447265625,"height":10.780029296875,"text":"100.00"}],[{"top":484.26,"left":16.97,"width":1.6187286376953125,"height":10.800048828125,"text":""},{"top":484.26,"left":18.588728,"width":153.91128540039062,"height":10.800048828125,"text":"Dougherty"},{"top":484.26,"left":172.50002,"width":69.71998596191406,"height":10.800048828125,"text":"Richards S \u0026 Patrici"},{"top":484.26,"left":242.22,"width":104.760009765625,"height":10.800048828125,"text":"812 Goodrich Ave"},{"top":484.26,"left":346.98,"width":70.80001831054688,"height":10.800048828125,"text":"Saint Paul"},{"top":484.26,"left":417.78003,"width":20.87994384765625,"height":10.800048828125,"text":"MN"},{"top":484.26,"left":438.65997,"width":49.91998291015625,"height":10.800048828125,"text":"55105"},{"top":484.26,"left":488.57996,"width":78.47998046875,"height":10.800048828125,"text":""},{"top":484.26,"left":567.05994,"width":106.4400634765625,"height":10.800048828125,"text":""},{"top":484.26,"left":673.5,"width":45.52001953125,"height":10.800048828125,"text":"10/4/2012"},{"top":484.26,"left":719.02,"width":43.28033447265625,"height":10.800048828125,"text":"100.00"}],[{"top":495.06006,"left":16.97,"width":1.6187286376953125,"height":10.799896240234375,"text":""},{"top":495.06006,"left":18.588728,"width":153.91128540039062,"height":10.799896240234375,"text":"Driscoll"},{"top":495.06006,"left":172.50002,"width":69.71998596191406,"height":10.799896240234375,"text":"Joe \u0026 Jill"},{"top":495.06006,"left":242.22,"width":104.760009765625,"height":10.799896240234375,"text":"2383 Bourne Ave"},{"top":495.06006,"left":346.98,"width":70.80001831054688,"height":10.799896240234375,"text":"Saint Paul"},{"top":495.06006,"left":417.78003,"width":20.87994384765625,"height":10.799896240234375,"text":"MN"},{"top":495.06006,"left":438.65997,"width":49.91998291015625,"height":10.799896240234375,"text":"55108"},{"top":495.06006,"left":488.57996,"width":78.47998046875,"height":10.799896240234375,"text":"HR Manager"},{"top":495.06006,"left":567.05994,"width":106.4400634765625,"height":10.799896240234375,"text":"LexisNexis"},{"top":495.06006,"left":673.5,"width":45.52001953125,"height":10.799896240234375,"text":"10/18/2012"},{"top":495.06006,"left":719.02,"width":43.28033447265625,"height":10.799896240234375,"text":"100.00"}],[{"top":505.85995,"left":16.97,"width":1.6187286376953125,"height":10.780059814453125,"text":""},{"top":505.85995,"left":18.588728,"width":153.91128540039062,"height":10.780059814453125,"text":"Dubaille / Haugee"},{"top":505.85995,"left":172.50002,"width":69.71998596191406,"height":10.780059814453125,"text":"Florence M. /Eric"},{"top":505.85995,"left":242.22,"width":104.760009765625,"height":10.780059814453125,"text":"1009 Edmund Ave"},{"top":505.85995,"left":346.98,"width":70.80001831054688,"height":10.780059814453125,"text":"Saint Paul"},{"top":505.85995,"left":417.78003,"width":20.87994384765625,"height":10.780059814453125,"text":"MN"},{"top":505.85995,"left":438.65997,"width":49.91998291015625,"height":10.780059814453125,"text":"55104"},{"top":505.85995,"left":488.57996,"width":78.47998046875,"height":10.780059814453125,"text":"Florence‐teacher"},{"top":505.85995,"left":567.05994,"width":106.4400634765625,"height":10.780059814453125,"text":"SPPS"},{"top":505.85995,"left":673.5,"width":45.52001953125,"height":10.780059814453125,"text":"10/3/2012"},{"top":505.85995,"left":719.02,"width":43.28033447265625,"height":10.780059814453125,"text":"100.00"}],[{"top":516.64,"left":16.97,"width":1.6187286376953125,"height":10.83990478515625,"text":""},{"top":516.64,"left":18.588728,"width":153.91128540039062,"height":10.83990478515625,"text":"Eaton"},{"top":516.64,"left":172.50002,"width":69.71998596191406,"height":10.83990478515625,"text":"Jim"},{"top":516.64,"left":242.22,"width":104.760009765625,"height":10.83990478515625,"text":"2133 Berkeley Ave"},{"top":516.64,"left":346.98,"width":70.80001831054688,"height":10.83990478515625,"text":"St Paul"},{"top":516.64,"left":417.78003,"width":20.87994384765625,"height":10.83990478515625,"text":"MN"},{"top":516.64,"left":438.65997,"width":49.91998291015625,"height":10.83990478515625,"text":"55105"},{"top":516.64,"left":488.57996,"width":78.47998046875,"height":10.83990478515625,"text":"Principal"},{"top":516.64,"left":567.05994,"width":106.4400634765625,"height":10.83990478515625,"text":"Saint Paul Public Schools"},{"top":516.64,"left":673.5,"width":45.52001953125,"height":10.83990478515625,"text":"8/23/2012"},{"top":516.64,"left":719.02,"width":43.28033447265625,"height":10.83990478515625,"text":"100.00"}],[{"top":527.4799,"left":16.97,"width":1.6187286376953125,"height":10.78009033203125,"text":""},{"top":527.4799,"left":18.588728,"width":153.91128540039062,"height":10.78009033203125,"text":"Eaves /Alger"},{"top":527.4799,"left":172.50002,"width":69.71998596191406,"height":10.78009033203125,"text":"Patricia / Stuart"},{"top":527.4799,"left":242.22,"width":104.760009765625,"height":10.78009033203125,"text":"1143 Portladn Ave."},{"top":527.4799,"left":346.98,"width":70.80001831054688,"height":10.78009033203125,"text":"Saint Paul"},{"top":527.4799,"left":417.78003,"width":20.87994384765625,"height":10.78009033203125,"text":"MN"},{"top":527.4799,"left":438.65997,"width":49.91998291015625,"height":10.78009033203125,"text":"55104"},{"top":527.4799,"left":488.57996,"width":78.47998046875,"height":10.78009033203125,"text":""},{"top":527.4799,"left":567.05994,"width":106.4400634765625,"height":10.78009033203125,"text":""},{"top":527.4799,"left":673.5,"width":45.52001953125,"height":10.78009033203125,"text":"10/3/2012"},{"top":527.4799,"left":719.02,"width":43.28033447265625,"height":10.78009033203125,"text":"100.00"}]]} diff --git a/src/test/resources/technology/tabula/json/spanning_cells.json b/src/test/resources/technology/tabula/json/spanning_cells.json deleted file mode 100644 index 7f68ee99..00000000 --- a/src/test/resources/technology/tabula/json/spanning_cells.json +++ /dev/null @@ -1 +0,0 @@ -[{"extraction_method":"lattice","page_number":1,"top":146.51932,"left":119.78943,"width":355.312255859375,"height":270.5516052246094,"right":475.10168,"bottom":417.07092,"data":[[{"top":146.51932,"left":119.78943,"width":355.312255859375,"height":12.938491821289062,"text":"Improved operation scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":159.45781,"left":119.78943,"width":121.92680358886719,"height":12.785446166992188,"text":"Volume servers in:"},{"top":159.45781,"left":241.71623,"width":46.66426086425781,"height":12.785446166992188,"text":"2007"},{"top":159.45781,"left":288.3805,"width":46.663482666015625,"height":12.785446166992188,"text":"2008"},{"top":159.45781,"left":335.04398,"width":46.663818359375,"height":12.785446166992188,"text":"2009"},{"top":159.45781,"left":381.7078,"width":46.663360595703125,"height":12.785446166992188,"text":"2010"},{"top":159.45781,"left":428.37115,"width":46.73052978515625,"height":12.785446166992188,"text":"2011"}],[{"top":172.24326,"left":119.78943,"width":121.92680358886719,"height":12.832427978515625,"text":"Server closets"},{"top":172.24326,"left":241.71623,"width":46.66426086425781,"height":12.832427978515625,"text":"1,505"},{"top":172.24326,"left":288.3805,"width":46.663482666015625,"height":12.832427978515625,"text":"1,580"},{"top":172.24326,"left":335.04398,"width":46.663818359375,"height":12.832427978515625,"text":"1,643"},{"top":172.24326,"left":381.7078,"width":46.663360595703125,"height":12.832427978515625,"text":"1,673"},{"top":172.24326,"left":428.37115,"width":46.73052978515625,"height":12.832427978515625,"text":"1,689"}],[{"top":185.07568,"left":119.78943,"width":121.92680358886719,"height":12.891036987304688,"text":"Server rooms"},{"top":185.07568,"left":241.71623,"width":46.66426086425781,"height":12.891036987304688,"text":"1,512"},{"top":185.07568,"left":288.3805,"width":46.663482666015625,"height":12.891036987304688,"text":"1,586"},{"top":185.07568,"left":335.04398,"width":46.663818359375,"height":12.891036987304688,"text":"1,646"},{"top":185.07568,"left":381.7078,"width":46.663360595703125,"height":12.891036987304688,"text":"1,677"},{"top":185.07568,"left":428.37115,"width":46.73052978515625,"height":12.891036987304688,"text":"1,693"}],[{"top":197.96672,"left":119.78943,"width":121.92680358886719,"height":12.890411376953125,"text":"Localized data centers"},{"top":197.96672,"left":241.71623,"width":46.66426086425781,"height":12.890411376953125,"text":"1,512"},{"top":197.96672,"left":288.3805,"width":46.663482666015625,"height":12.890411376953125,"text":"1,586"},{"top":197.96672,"left":335.04398,"width":46.663818359375,"height":12.890411376953125,"text":"1,646"},{"top":197.96672,"left":381.7078,"width":46.663360595703125,"height":12.890411376953125,"text":"1,677"},{"top":197.96672,"left":428.37115,"width":46.73052978515625,"height":12.890411376953125,"text":"1,693"}],[{"top":210.85713,"left":119.78943,"width":121.92680358886719,"height":12.890716552734375,"text":"Mid-tier data centers"},{"top":210.85713,"left":241.71623,"width":46.66426086425781,"height":12.890716552734375,"text":"1,512"},{"top":210.85713,"left":288.3805,"width":46.663482666015625,"height":12.890716552734375,"text":"1,586"},{"top":210.85713,"left":335.04398,"width":46.663818359375,"height":12.890716552734375,"text":"1,646"},{"top":210.85713,"left":381.7078,"width":46.663360595703125,"height":12.890716552734375,"text":"1,677"},{"top":210.85713,"left":428.37115,"width":46.73052978515625,"height":12.890716552734375,"text":"1,693"}],[{"top":223.74785,"left":119.78943,"width":121.92680358886719,"height":12.785018920898438,"text":"Enterprise-class data centers"},{"top":223.74785,"left":241.71623,"width":46.66426086425781,"height":12.785018920898438,"text":"1,512"},{"top":223.74785,"left":288.3805,"width":46.663482666015625,"height":12.785018920898438,"text":"1,586"},{"top":223.74785,"left":335.04398,"width":46.663818359375,"height":12.785018920898438,"text":"1,646"},{"top":223.74785,"left":381.7078,"width":46.663360595703125,"height":12.785018920898438,"text":"1,677"},{"top":223.74785,"left":428.37115,"width":46.73052978515625,"height":12.785018920898438,"text":"1,693"}],[{"top":236.53287,"left":119.78943,"width":355.312255859375,"height":13.1025390625,"text":"Best practice scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":249.6354,"left":119.78943,"width":121.92680358886719,"height":12.72686767578125,"text":"Volume servers in:"},{"top":249.6354,"left":241.71623,"width":46.66426086425781,"height":12.72686767578125,"text":"2007"},{"top":249.6354,"left":288.3805,"width":46.663482666015625,"height":12.72686767578125,"text":"2008"},{"top":249.6354,"left":335.04398,"width":46.663818359375,"height":12.72686767578125,"text":"2009"},{"top":249.6354,"left":381.7078,"width":46.663360595703125,"height":12.72686767578125,"text":"2010"},{"top":249.6354,"left":428.37115,"width":46.73052978515625,"height":12.72686767578125,"text":"2011"}],[{"top":262.36227,"left":119.78943,"width":121.92680358886719,"height":12.890472412109375,"text":"Server closets"},{"top":262.36227,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,456"},{"top":262.36227,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,439"},{"top":262.36227,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,386"},{"top":262.36227,"left":381.7078,"width":46.663360595703125,"height":12.890472412109375,"text":"1,296"},{"top":262.36227,"left":428.37115,"width":46.73052978515625,"height":12.890472412109375,"text":"1,326"}],[{"top":275.25275,"left":119.78943,"width":121.92680358886719,"height":12.8914794921875,"text":"Server rooms"},{"top":275.25275,"left":241.71623,"width":46.66426086425781,"height":12.8914794921875,"text":"1,465"},{"top":275.25275,"left":288.3805,"width":46.663482666015625,"height":12.8914794921875,"text":"1,472"},{"top":275.25275,"left":335.04398,"width":46.663818359375,"height":12.8914794921875,"text":"1,427"},{"top":275.25275,"left":381.7078,"width":46.663360595703125,"height":12.8914794921875,"text":"1,334"},{"top":275.25275,"left":428.37115,"width":46.73052978515625,"height":12.8914794921875,"text":"1,371"}],[{"top":288.14423,"left":119.78943,"width":121.92680358886719,"height":12.890167236328125,"text":"Localized data centers"},{"top":288.14423,"left":241.71623,"width":46.66426086425781,"height":12.890167236328125,"text":"1,465"},{"top":288.14423,"left":288.3805,"width":46.663482666015625,"height":12.890167236328125,"text":"1,471"},{"top":288.14423,"left":335.04398,"width":46.663818359375,"height":12.890167236328125,"text":"1,426"},{"top":288.14423,"left":381.7078,"width":46.663360595703125,"height":12.890167236328125,"text":"1,334"},{"top":288.14423,"left":428.37115,"width":46.73052978515625,"height":12.890167236328125,"text":"1,371"}],[{"top":301.0344,"left":119.78943,"width":121.92680358886719,"height":12.890472412109375,"text":"Mid-tier data centers"},{"top":301.0344,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,465"},{"top":301.0344,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,471"},{"top":301.0344,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,426"},{"top":301.0344,"left":381.7078,"width":46.663360595703125,"height":12.890472412109375,"text":"1,334"},{"top":301.0344,"left":428.37115,"width":46.73052978515625,"height":12.890472412109375,"text":"1,371"}],[{"top":313.92487,"left":119.78943,"width":121.92680358886719,"height":12.7855224609375,"text":"Enterprise-class data centers"},{"top":313.92487,"left":241.71623,"width":46.66426086425781,"height":12.7855224609375,"text":"1,465"},{"top":313.92487,"left":288.3805,"width":46.663482666015625,"height":12.7855224609375,"text":"1,471"},{"top":313.92487,"left":335.04398,"width":46.663818359375,"height":12.7855224609375,"text":"1,426"},{"top":313.92487,"left":381.7078,"width":46.663360595703125,"height":12.7855224609375,"text":"1,334"},{"top":313.92487,"left":428.37115,"width":46.73052978515625,"height":12.7855224609375,"text":"1,371"}],[{"top":326.7104,"left":119.78943,"width":355.312255859375,"height":13.0440673828125,"text":"State-of-the-art scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":339.75446,"left":119.78943,"width":121.92680358886719,"height":12.784912109375,"text":"Volume servers in:"},{"top":339.75446,"left":241.71623,"width":46.66426086425781,"height":12.784912109375,"text":"2007"},{"top":339.75446,"left":288.3805,"width":46.663482666015625,"height":12.784912109375,"text":"2008"},{"top":339.75446,"left":335.04398,"width":46.663818359375,"height":12.784912109375,"text":"2009"},{"top":339.75446,"left":381.7078,"width":46.663360595703125,"height":12.784912109375,"text":"2010"},{"top":339.75446,"left":428.37115,"width":46.73052978515625,"height":12.784912109375,"text":"2011"}],[{"top":352.53937,"left":119.78943,"width":121.92680358886719,"height":12.89111328125,"text":"Server closets"},{"top":352.53937,"left":241.71623,"width":46.66426086425781,"height":12.89111328125,"text":"1,485"},{"top":352.53937,"left":288.3805,"width":46.663482666015625,"height":12.89111328125,"text":"1,471"},{"top":352.53937,"left":335.04398,"width":46.663818359375,"height":12.89111328125,"text":"1,424"},{"top":352.53937,"left":381.7078,"width":46.663360595703125,"height":12.89111328125,"text":"1,315"},{"top":352.53937,"left":428.37115,"width":46.73052978515625,"height":12.89111328125,"text":"1,349"}],[{"top":365.43048,"left":119.78943,"width":121.92680358886719,"height":12.8902587890625,"text":"Server rooms"},{"top":365.43048,"left":241.71623,"width":46.66426086425781,"height":12.8902587890625,"text":"1,495"},{"top":365.43048,"left":288.3805,"width":46.663482666015625,"height":12.8902587890625,"text":"1,573"},{"top":365.43048,"left":335.04398,"width":46.663818359375,"height":12.8902587890625,"text":"1,586"},{"top":365.43048,"left":381.7078,"width":46.663360595703125,"height":12.8902587890625,"text":"1,424"},{"top":365.43048,"left":428.37115,"width":46.73052978515625,"height":12.8902587890625,"text":"1,485"}],[{"top":378.32074,"left":119.78943,"width":121.92680358886719,"height":12.891143798828125,"text":"Localized data centers"},{"top":378.32074,"left":241.71623,"width":46.66426086425781,"height":12.891143798828125,"text":"1,495"},{"top":378.32074,"left":288.3805,"width":46.663482666015625,"height":12.891143798828125,"text":"1,572"},{"top":378.32074,"left":335.04398,"width":46.663818359375,"height":12.891143798828125,"text":"1,585"},{"top":378.32074,"left":381.7078,"width":46.663360595703125,"height":12.891143798828125,"text":"1,424"},{"top":378.32074,"left":428.37115,"width":46.73052978515625,"height":12.891143798828125,"text":"1,485"}],[{"top":391.21188,"left":119.78943,"width":121.92680358886719,"height":12.89068603515625,"text":"Mid-tier data centers"},{"top":391.21188,"left":241.71623,"width":46.66426086425781,"height":12.89068603515625,"text":"1,495"},{"top":391.21188,"left":288.3805,"width":46.663482666015625,"height":12.89068603515625,"text":"1,572"},{"top":391.21188,"left":335.04398,"width":46.663818359375,"height":12.89068603515625,"text":"1,585"},{"top":391.21188,"left":381.7078,"width":46.663360595703125,"height":12.89068603515625,"text":"1,424"},{"top":391.21188,"left":428.37115,"width":46.73052978515625,"height":12.89068603515625,"text":"1,485"}],[{"top":404.10257,"left":119.78943,"width":121.92680358886719,"height":12.968353271484375,"text":"Enterprise-class data centers"},{"top":404.10257,"left":241.71623,"width":46.66426086425781,"height":12.968353271484375,"text":"1,495"},{"top":404.10257,"left":288.3805,"width":46.663482666015625,"height":12.968353271484375,"text":"1,572"},{"top":404.10257,"left":335.04398,"width":46.663818359375,"height":12.968353271484375,"text":"1,585"},{"top":404.10257,"left":381.7078,"width":46.663360595703125,"height":12.968353271484375,"text":"1,424"},{"top":404.10257,"left":428.37115,"width":46.73052978515625,"height":12.968353271484375,"text":"1,485"}]]},{"extraction_method":"lattice","page_number":1,"top":538.25964,"left":130.8136,"width":333.3038024902344,"height":51.52508544921875,"right":464.1174,"bottom":589.7847,"data":[[{"top":538.25964,"left":130.8136,"width":333.3038024902344,"height":12.880615234375,"text":"All alternative scenarios"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":551.14026,"left":130.8136,"width":99.93739318847656,"height":12.78485107421875,"text":"Server class"},{"top":551.14026,"left":230.75099,"width":46.66316223144531,"height":12.78485107421875,"text":"2007"},{"top":551.14026,"left":277.41415,"width":46.663726806640625,"height":12.78485107421875,"text":"2008"},{"top":551.14026,"left":324.07788,"width":46.663970947265625,"height":12.78485107421875,"text":"2009"},{"top":551.14026,"left":370.74185,"width":46.663177490234375,"height":12.78485107421875,"text":"2010"},{"top":551.14026,"left":417.40503,"width":46.712371826171875,"height":12.78485107421875,"text":"2011"}],[{"top":563.9251,"left":130.8136,"width":99.93739318847656,"height":12.8905029296875,"text":"Mid-range"},{"top":563.9251,"left":230.75099,"width":46.66316223144531,"height":12.8905029296875,"text":"4,921"},{"top":563.9251,"left":277.41415,"width":46.663726806640625,"height":12.8905029296875,"text":"5,467"},{"top":563.9251,"left":324.07788,"width":46.663970947265625,"height":12.8905029296875,"text":"6,152"},{"top":563.9251,"left":370.74185,"width":46.663177490234375,"height":12.8905029296875,"text":"6,649"},{"top":563.9251,"left":417.40503,"width":46.712371826171875,"height":12.8905029296875,"text":"7,185"}],[{"top":576.8156,"left":130.8136,"width":99.93739318847656,"height":12.9691162109375,"text":"High-end"},{"top":576.8156,"left":230.75099,"width":46.66316223144531,"height":12.9691162109375,"text":"76,295"},{"top":576.8156,"left":277.41415,"width":46.663726806640625,"height":12.9691162109375,"text":"81,624"},{"top":576.8156,"left":324.07788,"width":46.663970947265625,"height":12.9691162109375,"text":"86,849"},{"top":576.8156,"left":370.74185,"width":46.663177490234375,"height":12.9691162109375,"text":"92,662"},{"top":576.8156,"left":417.40503,"width":46.712371826171875,"height":12.9691162109375,"text":"98,864"}]]}] diff --git a/src/test/resources/technology/tabula/json/spanning_cells_basic.json b/src/test/resources/technology/tabula/json/spanning_cells_basic.json deleted file mode 100644 index 6efaef5b..00000000 --- a/src/test/resources/technology/tabula/json/spanning_cells_basic.json +++ /dev/null @@ -1 +0,0 @@ -[{"extraction_method":"lattice","page_number":1,"top":150.56,"left":119.789444,"width":355.31939697265625,"height":266.5108642578125,"right":475.10883,"bottom":417.07086,"data":[[{"top":150.56,"left":119.789444,"width":355.31939697265625,"height":8.897811889648438,"text":"Improved operation scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":159.45781,"left":119.789444,"width":121.92678833007812,"height":12.785446166992188,"text":"Volume servers in:"},{"top":159.45781,"left":241.71623,"width":46.66426086425781,"height":12.785446166992188,"text":"2007"},{"top":159.45781,"left":288.3805,"width":46.663482666015625,"height":12.785446166992188,"text":"2008"},{"top":159.45781,"left":335.04398,"width":46.663818359375,"height":12.785446166992188,"text":"2009"},{"top":159.45781,"left":381.7078,"width":46.663330078125,"height":12.785446166992188,"text":"2010"},{"top":159.45781,"left":428.37112,"width":46.73773193359375,"height":12.785446166992188,"text":"2011"}],[{"top":172.24326,"left":119.789444,"width":121.92678833007812,"height":12.832427978515625,"text":"Server closets"},{"top":172.24326,"left":241.71623,"width":46.66426086425781,"height":12.832427978515625,"text":"1,505"},{"top":172.24326,"left":288.3805,"width":46.663482666015625,"height":12.832427978515625,"text":"1,580"},{"top":172.24326,"left":335.04398,"width":46.663818359375,"height":12.832427978515625,"text":"1,643"},{"top":172.24326,"left":381.7078,"width":46.663330078125,"height":12.832427978515625,"text":"1,673"},{"top":172.24326,"left":428.37112,"width":46.73773193359375,"height":12.832427978515625,"text":"1,689"}],[{"top":185.07568,"left":119.789444,"width":121.92678833007812,"height":12.891036987304688,"text":"Server rooms"},{"top":185.07568,"left":241.71623,"width":46.66426086425781,"height":12.891036987304688,"text":"1,512"},{"top":185.07568,"left":288.3805,"width":46.663482666015625,"height":12.891036987304688,"text":"1,586"},{"top":185.07568,"left":335.04398,"width":46.663818359375,"height":12.891036987304688,"text":"1,646"},{"top":185.07568,"left":381.7078,"width":46.663330078125,"height":12.891036987304688,"text":"1,677"},{"top":185.07568,"left":428.37112,"width":46.73773193359375,"height":12.891036987304688,"text":"1,693"}],[{"top":197.96672,"left":119.789444,"width":121.92678833007812,"height":12.890411376953125,"text":"Localized data centers"},{"top":197.96672,"left":241.71623,"width":46.66426086425781,"height":12.890411376953125,"text":"1,512"},{"top":197.96672,"left":288.3805,"width":46.663482666015625,"height":12.890411376953125,"text":"1,586"},{"top":197.96672,"left":335.04398,"width":46.663818359375,"height":12.890411376953125,"text":"1,646"},{"top":197.96672,"left":381.7078,"width":46.663330078125,"height":12.890411376953125,"text":"1,677"},{"top":197.96672,"left":428.37112,"width":46.73773193359375,"height":12.890411376953125,"text":"1,693"}],[{"top":210.85713,"left":119.789444,"width":121.92678833007812,"height":12.890716552734375,"text":"Mid-tier data centers"},{"top":210.85713,"left":241.71623,"width":46.66426086425781,"height":12.890716552734375,"text":"1,512"},{"top":210.85713,"left":288.3805,"width":46.663482666015625,"height":12.890716552734375,"text":"1,586"},{"top":210.85713,"left":335.04398,"width":46.663818359375,"height":12.890716552734375,"text":"1,646"},{"top":210.85713,"left":381.7078,"width":46.663330078125,"height":12.890716552734375,"text":"1,677"},{"top":210.85713,"left":428.37112,"width":46.73773193359375,"height":12.890716552734375,"text":"1,693"}],[{"top":223.74785,"left":119.789444,"width":121.92678833007812,"height":12.785003662109375,"text":"Enterprise-class data centers"},{"top":223.74785,"left":241.71623,"width":46.66426086425781,"height":12.785003662109375,"text":"1,512"},{"top":223.74785,"left":288.3805,"width":46.663482666015625,"height":12.785003662109375,"text":"1,586"},{"top":223.74785,"left":335.04398,"width":46.663818359375,"height":12.785003662109375,"text":"1,646"},{"top":223.74785,"left":381.7078,"width":46.663330078125,"height":12.785003662109375,"text":"1,677"},{"top":223.74785,"left":428.37112,"width":46.73773193359375,"height":12.785003662109375,"text":"1,693"}],[{"top":236.53285,"left":119.789444,"width":355.31939697265625,"height":13.1025390625,"text":"Best practice scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":249.63539,"left":119.789444,"width":121.92678833007812,"height":12.726882934570312,"text":"Volume servers in:"},{"top":249.63539,"left":241.71623,"width":46.66426086425781,"height":12.726882934570312,"text":"2007"},{"top":249.63539,"left":288.3805,"width":46.663482666015625,"height":12.726882934570312,"text":"2008"},{"top":249.63539,"left":335.04398,"width":46.663818359375,"height":12.726882934570312,"text":"2009"},{"top":249.63539,"left":381.7078,"width":46.663330078125,"height":12.726882934570312,"text":"2010"},{"top":249.63539,"left":428.37112,"width":46.73773193359375,"height":12.726882934570312,"text":"2011"}],[{"top":262.36227,"left":119.789444,"width":121.92678833007812,"height":12.890472412109375,"text":"Server closets"},{"top":262.36227,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,456"},{"top":262.36227,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,439"},{"top":262.36227,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,386"},{"top":262.36227,"left":381.7078,"width":46.663330078125,"height":12.890472412109375,"text":"1,296"},{"top":262.36227,"left":428.37112,"width":46.73773193359375,"height":12.890472412109375,"text":"1,326"}],[{"top":275.25275,"left":119.789444,"width":121.92678833007812,"height":12.8914794921875,"text":"Server rooms"},{"top":275.25275,"left":241.71623,"width":46.66426086425781,"height":12.8914794921875,"text":"1,465"},{"top":275.25275,"left":288.3805,"width":46.663482666015625,"height":12.8914794921875,"text":"1,472"},{"top":275.25275,"left":335.04398,"width":46.663818359375,"height":12.8914794921875,"text":"1,427"},{"top":275.25275,"left":381.7078,"width":46.663330078125,"height":12.8914794921875,"text":"1,334"},{"top":275.25275,"left":428.37112,"width":46.73773193359375,"height":12.8914794921875,"text":"1,371"}],[{"top":288.14423,"left":119.789444,"width":121.92678833007812,"height":12.890167236328125,"text":"Localized data centers"},{"top":288.14423,"left":241.71623,"width":46.66426086425781,"height":12.890167236328125,"text":"1,465"},{"top":288.14423,"left":288.3805,"width":46.663482666015625,"height":12.890167236328125,"text":"1,471"},{"top":288.14423,"left":335.04398,"width":46.663818359375,"height":12.890167236328125,"text":"1,426"},{"top":288.14423,"left":381.7078,"width":46.663330078125,"height":12.890167236328125,"text":"1,334"},{"top":288.14423,"left":428.37112,"width":46.73773193359375,"height":12.890167236328125,"text":"1,371"}],[{"top":301.0344,"left":119.789444,"width":121.92678833007812,"height":12.890472412109375,"text":"Mid-tier data centers"},{"top":301.0344,"left":241.71623,"width":46.66426086425781,"height":12.890472412109375,"text":"1,465"},{"top":301.0344,"left":288.3805,"width":46.663482666015625,"height":12.890472412109375,"text":"1,471"},{"top":301.0344,"left":335.04398,"width":46.663818359375,"height":12.890472412109375,"text":"1,426"},{"top":301.0344,"left":381.7078,"width":46.663330078125,"height":12.890472412109375,"text":"1,334"},{"top":301.0344,"left":428.37112,"width":46.73773193359375,"height":12.890472412109375,"text":"1,371"}],[{"top":313.92487,"left":119.789444,"width":121.92678833007812,"height":12.785552978515625,"text":"Enterprise-class data centers"},{"top":313.92487,"left":241.71623,"width":46.66426086425781,"height":12.785552978515625,"text":"1,465"},{"top":313.92487,"left":288.3805,"width":46.663482666015625,"height":12.785552978515625,"text":"1,471"},{"top":313.92487,"left":335.04398,"width":46.663818359375,"height":12.785552978515625,"text":"1,426"},{"top":313.92487,"left":381.7078,"width":46.663330078125,"height":12.785552978515625,"text":"1,334"},{"top":313.92487,"left":428.37112,"width":46.73773193359375,"height":12.785552978515625,"text":"1,371"}],[{"top":326.71042,"left":119.789444,"width":355.31939697265625,"height":13.04400634765625,"text":"State-of-the-art scenario"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":339.75443,"left":119.789444,"width":121.92678833007812,"height":12.784942626953125,"text":"Volume servers in:"},{"top":339.75443,"left":241.71623,"width":46.66426086425781,"height":12.784942626953125,"text":"2007"},{"top":339.75443,"left":288.3805,"width":46.663482666015625,"height":12.784942626953125,"text":"2008"},{"top":339.75443,"left":335.04398,"width":46.663818359375,"height":12.784942626953125,"text":"2009"},{"top":339.75443,"left":381.7078,"width":46.663330078125,"height":12.784942626953125,"text":"2010"},{"top":339.75443,"left":428.37112,"width":46.73773193359375,"height":12.784942626953125,"text":"2011"}],[{"top":352.53937,"left":119.789444,"width":121.92678833007812,"height":12.89111328125,"text":"Server closets"},{"top":352.53937,"left":241.71623,"width":46.66426086425781,"height":12.89111328125,"text":"1,485"},{"top":352.53937,"left":288.3805,"width":46.663482666015625,"height":12.89111328125,"text":"1,471"},{"top":352.53937,"left":335.04398,"width":46.663818359375,"height":12.89111328125,"text":"1,424"},{"top":352.53937,"left":381.7078,"width":46.663330078125,"height":12.89111328125,"text":"1,315"},{"top":352.53937,"left":428.37112,"width":46.73773193359375,"height":12.89111328125,"text":"1,349"}],[{"top":365.43048,"left":119.789444,"width":121.92678833007812,"height":12.8902587890625,"text":"Server rooms"},{"top":365.43048,"left":241.71623,"width":46.66426086425781,"height":12.8902587890625,"text":"1,495"},{"top":365.43048,"left":288.3805,"width":46.663482666015625,"height":12.8902587890625,"text":"1,573"},{"top":365.43048,"left":335.04398,"width":46.663818359375,"height":12.8902587890625,"text":"1,586"},{"top":365.43048,"left":381.7078,"width":46.663330078125,"height":12.8902587890625,"text":"1,424"},{"top":365.43048,"left":428.37112,"width":46.73773193359375,"height":12.8902587890625,"text":"1,485"}],[{"top":378.32074,"left":119.789444,"width":121.92678833007812,"height":12.891143798828125,"text":"Localized data centers"},{"top":378.32074,"left":241.71623,"width":46.66426086425781,"height":12.891143798828125,"text":"1,495"},{"top":378.32074,"left":288.3805,"width":46.663482666015625,"height":12.891143798828125,"text":"1,572"},{"top":378.32074,"left":335.04398,"width":46.663818359375,"height":12.891143798828125,"text":"1,585"},{"top":378.32074,"left":381.7078,"width":46.663330078125,"height":12.891143798828125,"text":"1,424"},{"top":378.32074,"left":428.37112,"width":46.73773193359375,"height":12.891143798828125,"text":"1,485"}],[{"top":391.21188,"left":119.789444,"width":121.92678833007812,"height":12.89068603515625,"text":"Mid-tier data centers"},{"top":391.21188,"left":241.71623,"width":46.66426086425781,"height":12.89068603515625,"text":"1,495"},{"top":391.21188,"left":288.3805,"width":46.663482666015625,"height":12.89068603515625,"text":"1,572"},{"top":391.21188,"left":335.04398,"width":46.663818359375,"height":12.89068603515625,"text":"1,585"},{"top":391.21188,"left":381.7078,"width":46.663330078125,"height":12.89068603515625,"text":"1,424"},{"top":391.21188,"left":428.37112,"width":46.73773193359375,"height":12.89068603515625,"text":"1,485"}],[{"top":404.10257,"left":119.789444,"width":121.92678833007812,"height":12.968292236328125,"text":"Enterprise-class data centers"},{"top":404.10257,"left":241.71623,"width":46.66426086425781,"height":12.968292236328125,"text":"1,495"},{"top":404.10257,"left":288.3805,"width":46.663482666015625,"height":12.968292236328125,"text":"1,572"},{"top":404.10257,"left":335.04398,"width":46.663818359375,"height":12.968292236328125,"text":"1,585"},{"top":404.10257,"left":381.7078,"width":46.663330078125,"height":12.968292236328125,"text":"1,424"},{"top":404.10257,"left":428.37112,"width":46.73773193359375,"height":12.968292236328125,"text":"1,485"}]]},{"extraction_method":"lattice","page_number":1,"top":538.25964,"left":130.8136,"width":333.3038024902344,"height":51.5423583984375,"right":464.1174,"bottom":589.802,"data":[[{"top":538.25964,"left":130.8136,"width":333.3038024902344,"height":12.88055419921875,"text":"All alternative scenarios"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":551.1402,"left":130.8136,"width":99.93739318847656,"height":12.784912109375,"text":"Server class"},{"top":551.1402,"left":230.75099,"width":46.66316223144531,"height":12.784912109375,"text":"2007"},{"top":551.1402,"left":277.41415,"width":46.663726806640625,"height":12.784912109375,"text":"2008"},{"top":551.1402,"left":324.07788,"width":46.663970947265625,"height":12.784912109375,"text":"2009"},{"top":551.1402,"left":370.74185,"width":46.663177490234375,"height":12.784912109375,"text":"2010"},{"top":551.1402,"left":417.40503,"width":46.712371826171875,"height":12.784912109375,"text":"2011"}],[{"top":563.9251,"left":130.8136,"width":99.93739318847656,"height":12.8905029296875,"text":"Mid-range"},{"top":563.9251,"left":230.75099,"width":46.66316223144531,"height":12.8905029296875,"text":"4,921"},{"top":563.9251,"left":277.41415,"width":46.663726806640625,"height":12.8905029296875,"text":"5,467"},{"top":563.9251,"left":324.07788,"width":46.663970947265625,"height":12.8905029296875,"text":"6,152"},{"top":563.9251,"left":370.74185,"width":46.663177490234375,"height":12.8905029296875,"text":"6,649"},{"top":563.9251,"left":417.40503,"width":46.712371826171875,"height":12.8905029296875,"text":"7,185"}],[{"top":576.8156,"left":130.8136,"width":99.93739318847656,"height":12.98638916015625,"text":"High-end"},{"top":576.8156,"left":230.75099,"width":46.66316223144531,"height":12.98638916015625,"text":"76,295"},{"top":576.8156,"left":277.41415,"width":46.663726806640625,"height":12.98638916015625,"text":"81,624"},{"top":576.8156,"left":324.07788,"width":46.663970947265625,"height":12.98638916015625,"text":"86,849"},{"top":576.8156,"left":370.74185,"width":46.663177490234375,"height":12.98638916015625,"text":"92,662"},{"top":576.8156,"left":417.40503,"width":46.712371826171875,"height":12.98638916015625,"text":"98,864"}]]}] diff --git a/src/test/resources/technology/tabula/json/twotables.json b/src/test/resources/technology/tabula/json/twotables.json deleted file mode 100644 index 35a4c50f..00000000 --- a/src/test/resources/technology/tabula/json/twotables.json +++ /dev/null @@ -1 +0,0 @@ -[{"extraction_method":"lattice","page_number":1,"top":111.79087,"left":67.41156,"width":342.44476318359375,"height":174.0704345703125,"right":409.85632,"bottom":285.8613,"data":[[{"top":111.79087,"left":67.41156,"width":85.61141967773438,"height":28.536293029785156,"text":""},{"top":111.79087,"left":153.02298,"width":256.8333435058594,"height":14.267105102539062,"text":"株主資本"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":126.057976,"left":153.02298,"width":51.367401123046875,"height":14.269187927246094,"text":"資本金"},{"top":126.057976,"left":204.39038,"width":51.36669921875,"height":14.269187927246094,"text":"資本剰余金"},{"top":126.057976,"left":255.75708,"width":51.3656005859375,"height":14.269187927246094,"text":"利益剰余金"},{"top":126.057976,"left":307.12268,"width":51.366668701171875,"height":14.269187927246094,"text":"自己株式"},{"top":126.057976,"left":358.48935,"width":51.366973876953125,"height":14.269187927246094,"text":"株主資本合計"}],[{"top":140.32716,"left":67.41156,"width":85.61141967773438,"height":14.265777587890625,"text":"当期首残高"},{"top":140.32716,"left":153.02298,"width":51.367401123046875,"height":14.265777587890625,"text":"5,664"},{"top":140.32716,"left":204.39038,"width":51.36669921875,"height":14.265777587890625,"text":"749"},{"top":140.32716,"left":255.75708,"width":51.3656005859375,"height":14.265777587890625,"text":"12,017"},{"top":140.32716,"left":307.12268,"width":51.366668701171875,"height":14.265777587890625,"text":"△747"},{"top":140.32716,"left":358.48935,"width":51.366973876953125,"height":14.265777587890625,"text":"17,683"}],[{"top":154.59294,"left":67.41156,"width":85.61141967773438,"height":14.26910400390625,"text":"当期変動額"},{"top":154.59294,"left":153.02298,"width":51.367401123046875,"height":14.26910400390625,"text":""},{"top":154.59294,"left":204.39038,"width":51.36669921875,"height":14.26910400390625,"text":""},{"top":154.59294,"left":255.75708,"width":51.3656005859375,"height":14.26910400390625,"text":""},{"top":154.59294,"left":307.12268,"width":51.366668701171875,"height":14.26910400390625,"text":""},{"top":154.59294,"left":358.48935,"width":51.366973876953125,"height":14.26910400390625,"text":""}],[{"top":168.86205,"left":67.41156,"width":85.61141967773438,"height":14.268997192382812,"text":"剰余金の配当"},{"top":168.86205,"left":153.02298,"width":51.367401123046875,"height":14.268997192382812,"text":""},{"top":168.86205,"left":204.39038,"width":51.36669921875,"height":14.268997192382812,"text":""},{"top":168.86205,"left":255.75708,"width":51.3656005859375,"height":14.268997192382812,"text":"△525"},{"top":168.86205,"left":307.12268,"width":51.366668701171875,"height":14.268997192382812,"text":""},{"top":168.86205,"left":358.48935,"width":51.366973876953125,"height":14.268997192382812,"text":"△525"}],[{"top":183.13104,"left":67.41156,"width":85.61141967773438,"height":14.2691650390625,"text":"当期純利益"},{"top":183.13104,"left":153.02298,"width":51.367401123046875,"height":14.2691650390625,"text":""},{"top":183.13104,"left":204.39038,"width":51.36669921875,"height":14.2691650390625,"text":""},{"top":183.13104,"left":255.75708,"width":51.3656005859375,"height":14.2691650390625,"text":"1,269"},{"top":183.13104,"left":307.12268,"width":51.366668701171875,"height":14.2691650390625,"text":""},{"top":183.13104,"left":358.48935,"width":51.366973876953125,"height":14.2691650390625,"text":"1,269"}],[{"top":197.4002,"left":67.41156,"width":85.61141967773438,"height":14.268936157226562,"text":"自己株式の取得"},{"top":197.4002,"left":153.02298,"width":51.367401123046875,"height":14.268936157226562,"text":""},{"top":197.4002,"left":204.39038,"width":51.36669921875,"height":14.268936157226562,"text":""},{"top":197.4002,"left":255.75708,"width":51.3656005859375,"height":14.268936157226562,"text":""},{"top":197.4002,"left":307.12268,"width":51.366668701171875,"height":14.268936157226562,"text":"△0"},{"top":197.4002,"left":358.48935,"width":51.366973876953125,"height":14.268936157226562,"text":"△0"}],[{"top":211.66914,"left":67.41156,"width":85.61141967773438,"height":22.82952880859375,"text":"持分法の適用範囲\rの変動"},{"top":211.66914,"left":153.02298,"width":51.367401123046875,"height":22.82952880859375,"text":""},{"top":211.66914,"left":204.39038,"width":51.36669921875,"height":22.82952880859375,"text":""},{"top":211.66914,"left":255.75708,"width":51.3656005859375,"height":22.82952880859375,"text":"85"},{"top":211.66914,"left":307.12268,"width":51.366668701171875,"height":22.82952880859375,"text":""},{"top":211.66914,"left":358.48935,"width":51.366973876953125,"height":22.82952880859375,"text":"85"}],[{"top":234.49867,"left":67.41156,"width":85.61141967773438,"height":22.829910278320312,"text":"株主資本以外の項目\rの当期変動額(純額)"},{"top":234.49867,"left":153.02298,"width":51.367401123046875,"height":22.829910278320312,"text":""},{"top":234.49867,"left":204.39038,"width":51.36669921875,"height":22.829910278320312,"text":""},{"top":234.49867,"left":255.75708,"width":51.3656005859375,"height":22.829910278320312,"text":""},{"top":234.49867,"left":307.12268,"width":51.366668701171875,"height":22.829910278320312,"text":""},{"top":234.49867,"left":358.48935,"width":51.366973876953125,"height":22.829910278320312,"text":""}],[{"top":257.32858,"left":67.41156,"width":85.61141967773438,"height":14.2686767578125,"text":"当期変動額合計"},{"top":257.32858,"left":153.02298,"width":51.367401123046875,"height":14.2686767578125,"text":"―"},{"top":257.32858,"left":204.39038,"width":51.36669921875,"height":14.2686767578125,"text":"―"},{"top":257.32858,"left":255.75708,"width":51.3656005859375,"height":14.2686767578125,"text":"829"},{"top":257.32858,"left":307.12268,"width":51.366668701171875,"height":14.2686767578125,"text":"△0"},{"top":257.32858,"left":358.48935,"width":51.366973876953125,"height":14.2686767578125,"text":"829"}],[{"top":271.59726,"left":67.41156,"width":85.61141967773438,"height":14.2640380859375,"text":"当期末残高"},{"top":271.59726,"left":153.02298,"width":51.367401123046875,"height":14.2640380859375,"text":"5,664"},{"top":271.59726,"left":204.39038,"width":51.36669921875,"height":14.2640380859375,"text":"749"},{"top":271.59726,"left":255.75708,"width":51.3656005859375,"height":14.2640380859375,"text":"12,846"},{"top":271.59726,"left":307.12268,"width":51.366668701171875,"height":14.2640380859375,"text":"△747"},{"top":271.59726,"left":358.48935,"width":51.366973876953125,"height":14.2640380859375,"text":"18,512"}]]},{"extraction_method":"lattice","page_number":1,"top":312.25272,"left":67.41156,"width":445.17803955078125,"height":191.19696044921875,"right":512.5896,"bottom":503.44968,"data":[[{"top":312.25272,"left":67.41156,"width":85.61141967773438,"height":45.658233642578125,"text":""},{"top":312.25272,"left":153.02298,"width":256.8333435058594,"height":14.267730712890625,"text":"その他の包括利益累計額"},{"top":312.25272,"left":409.85632,"width":51.365631103515625,"height":45.658233642578125,"text":"少数株主持分"},{"top":312.25272,"left":461.22195,"width":51.367645263671875,"height":45.658233642578125,"text":"純資産合計"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":326.52045,"left":153.02298,"width":51.367401123046875,"height":31.3905029296875,"text":"その他有価証券\r評価差額金"},{"top":326.52045,"left":204.39038,"width":51.36669921875,"height":31.3905029296875,"text":"繰延ヘッジ\r損益"},{"top":326.52045,"left":255.75708,"width":51.3656005859375,"height":31.3905029296875,"text":"為替換算\r調整勘定"},{"top":326.52045,"left":307.12268,"width":51.366668701171875,"height":31.3905029296875,"text":"退職給付に係る\r調整累計額"},{"top":326.52045,"left":358.48935,"width":51.366973876953125,"height":31.3905029296875,"text":"その他の\r包括利益\r累計額合計"},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""},{"top":0.0,"left":0.0,"width":0.0,"height":0.0,"text":""}],[{"top":357.91095,"left":67.41156,"width":85.61141967773438,"height":14.268768310546875,"text":"当期首残高"},{"top":357.91095,"left":153.02298,"width":51.367401123046875,"height":14.268768310546875,"text":"△669"},{"top":357.91095,"left":204.39038,"width":51.36669921875,"height":14.268768310546875,"text":"61"},{"top":357.91095,"left":255.75708,"width":51.3656005859375,"height":14.268768310546875,"text":"△109"},{"top":357.91095,"left":307.12268,"width":51.366668701171875,"height":14.268768310546875,"text":"―"},{"top":357.91095,"left":358.48935,"width":51.366973876953125,"height":14.268768310546875,"text":"△717"},{"top":357.91095,"left":409.85632,"width":51.365631103515625,"height":14.268768310546875,"text":"246"},{"top":357.91095,"left":461.22195,"width":51.367645263671875,"height":14.268768310546875,"text":"17,212"}],[{"top":372.17972,"left":67.41156,"width":85.61141967773438,"height":14.2691650390625,"text":"当期変動額"},{"top":372.17972,"left":153.02298,"width":51.367401123046875,"height":14.2691650390625,"text":""},{"top":372.17972,"left":204.39038,"width":51.36669921875,"height":14.2691650390625,"text":""},{"top":372.17972,"left":255.75708,"width":51.3656005859375,"height":14.2691650390625,"text":""},{"top":372.17972,"left":307.12268,"width":51.366668701171875,"height":14.2691650390625,"text":""},{"top":372.17972,"left":358.48935,"width":51.366973876953125,"height":14.2691650390625,"text":""},{"top":372.17972,"left":409.85632,"width":51.365631103515625,"height":14.2691650390625,"text":""},{"top":372.17972,"left":461.22195,"width":51.367645263671875,"height":14.2691650390625,"text":""}],[{"top":386.44888,"left":67.41156,"width":85.61141967773438,"height":14.268646240234375,"text":"剰余金の配当"},{"top":386.44888,"left":153.02298,"width":51.367401123046875,"height":14.268646240234375,"text":""},{"top":386.44888,"left":204.39038,"width":51.36669921875,"height":14.268646240234375,"text":""},{"top":386.44888,"left":255.75708,"width":51.3656005859375,"height":14.268646240234375,"text":""},{"top":386.44888,"left":307.12268,"width":51.366668701171875,"height":14.268646240234375,"text":""},{"top":386.44888,"left":358.48935,"width":51.366973876953125,"height":14.268646240234375,"text":""},{"top":386.44888,"left":409.85632,"width":51.365631103515625,"height":14.268646240234375,"text":""},{"top":386.44888,"left":461.22195,"width":51.367645263671875,"height":14.268646240234375,"text":"△525"}],[{"top":400.71753,"left":67.41156,"width":85.61141967773438,"height":14.26812744140625,"text":"当期純利益"},{"top":400.71753,"left":153.02298,"width":51.367401123046875,"height":14.26812744140625,"text":""},{"top":400.71753,"left":204.39038,"width":51.36669921875,"height":14.26812744140625,"text":""},{"top":400.71753,"left":255.75708,"width":51.3656005859375,"height":14.26812744140625,"text":""},{"top":400.71753,"left":307.12268,"width":51.366668701171875,"height":14.26812744140625,"text":""},{"top":400.71753,"left":358.48935,"width":51.366973876953125,"height":14.26812744140625,"text":""},{"top":400.71753,"left":409.85632,"width":51.365631103515625,"height":14.26812744140625,"text":""},{"top":400.71753,"left":461.22195,"width":51.367645263671875,"height":14.26812744140625,"text":"1,269"}],[{"top":414.98566,"left":67.41156,"width":85.61141967773438,"height":14.26678466796875,"text":"自己株式の取得"},{"top":414.98566,"left":153.02298,"width":51.367401123046875,"height":14.26678466796875,"text":""},{"top":414.98566,"left":204.39038,"width":51.36669921875,"height":14.26678466796875,"text":""},{"top":414.98566,"left":255.75708,"width":51.3656005859375,"height":14.26678466796875,"text":""},{"top":414.98566,"left":307.12268,"width":51.366668701171875,"height":14.26678466796875,"text":""},{"top":414.98566,"left":358.48935,"width":51.366973876953125,"height":14.26678466796875,"text":""},{"top":414.98566,"left":409.85632,"width":51.365631103515625,"height":14.26678466796875,"text":""},{"top":414.98566,"left":461.22195,"width":51.367645263671875,"height":14.26678466796875,"text":"△0"}],[{"top":429.25244,"left":67.41156,"width":85.61141967773438,"height":22.8292236328125,"text":"持分法の適用範囲\rの変動"},{"top":429.25244,"left":153.02298,"width":51.367401123046875,"height":22.8292236328125,"text":""},{"top":429.25244,"left":204.39038,"width":51.36669921875,"height":22.8292236328125,"text":""},{"top":429.25244,"left":255.75708,"width":51.3656005859375,"height":22.8292236328125,"text":""},{"top":429.25244,"left":307.12268,"width":51.366668701171875,"height":22.8292236328125,"text":""},{"top":429.25244,"left":358.48935,"width":51.366973876953125,"height":22.8292236328125,"text":""},{"top":429.25244,"left":409.85632,"width":51.365631103515625,"height":22.8292236328125,"text":""},{"top":429.25244,"left":461.22195,"width":51.367645263671875,"height":22.8292236328125,"text":"85"}],[{"top":452.08167,"left":67.41156,"width":85.61141967773438,"height":22.830596923828125,"text":"株主資本以外の項目\rの当期変動額(純額)"},{"top":452.08167,"left":153.02298,"width":51.367401123046875,"height":22.830596923828125,"text":"556"},{"top":452.08167,"left":204.39038,"width":51.36669921875,"height":22.830596923828125,"text":"80"},{"top":452.08167,"left":255.75708,"width":51.3656005859375,"height":22.830596923828125,"text":"5"},{"top":452.08167,"left":307.12268,"width":51.366668701171875,"height":22.830596923828125,"text":"―"},{"top":452.08167,"left":358.48935,"width":51.366973876953125,"height":22.830596923828125,"text":"642"},{"top":452.08167,"left":409.85632,"width":51.365631103515625,"height":22.830596923828125,"text":"△0"},{"top":452.08167,"left":461.22195,"width":51.367645263671875,"height":22.830596923828125,"text":"642"}],[{"top":474.91226,"left":67.41156,"width":85.61141967773438,"height":14.26776123046875,"text":"当期変動額合計"},{"top":474.91226,"left":153.02298,"width":51.367401123046875,"height":14.26776123046875,"text":"556"},{"top":474.91226,"left":204.39038,"width":51.36669921875,"height":14.26776123046875,"text":"80"},{"top":474.91226,"left":255.75708,"width":51.3656005859375,"height":14.26776123046875,"text":"5"},{"top":474.91226,"left":307.12268,"width":51.366668701171875,"height":14.26776123046875,"text":"―"},{"top":474.91226,"left":358.48935,"width":51.366973876953125,"height":14.26776123046875,"text":"642"},{"top":474.91226,"left":409.85632,"width":51.365631103515625,"height":14.26776123046875,"text":"△0"},{"top":474.91226,"left":461.22195,"width":51.367645263671875,"height":14.26776123046875,"text":"1,471"}],[{"top":489.18002,"left":67.41156,"width":85.61141967773438,"height":14.2696533203125,"text":"当期末残高"},{"top":489.18002,"left":153.02298,"width":51.367401123046875,"height":14.2696533203125,"text":"△113"},{"top":489.18002,"left":204.39038,"width":51.36669921875,"height":14.2696533203125,"text":"142"},{"top":489.18002,"left":255.75708,"width":51.3656005859375,"height":14.2696533203125,"text":"△104"},{"top":489.18002,"left":307.12268,"width":51.366668701171875,"height":14.2696533203125,"text":"―"},{"top":489.18002,"left":358.48935,"width":51.366973876953125,"height":14.2696533203125,"text":"△75"},{"top":489.18002,"left":409.85632,"width":51.365631103515625,"height":14.2696533203125,"text":"245"},{"top":489.18002,"left":461.22195,"width":51.367645263671875,"height":14.2696533203125,"text":"18,683"}]]}] diff --git a/src/test/resources/technology/tabula/labor.pdf b/src/test/resources/technology/tabula/labor.pdf deleted file mode 100644 index 9e322812..00000000 Binary files a/src/test/resources/technology/tabula/labor.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/m27.pdf b/src/test/resources/technology/tabula/m27.pdf deleted file mode 100644 index cecd7b6b..00000000 Binary files a/src/test/resources/technology/tabula/m27.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/mednine.pdf b/src/test/resources/technology/tabula/mednine.pdf deleted file mode 100644 index 0a3f36c1..00000000 Binary files a/src/test/resources/technology/tabula/mednine.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/npe_issue_206.pdf b/src/test/resources/technology/tabula/npe_issue_206.pdf deleted file mode 100644 index 352e77ba..00000000 Binary files a/src/test/resources/technology/tabula/npe_issue_206.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/offense.pdf b/src/test/resources/technology/tabula/offense.pdf deleted file mode 100644 index 08ed7263..00000000 Binary files a/src/test/resources/technology/tabula/offense.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/puertos1.pdf b/src/test/resources/technology/tabula/puertos1.pdf deleted file mode 100644 index beb80c42..00000000 Binary files a/src/test/resources/technology/tabula/puertos1.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/rotated_page.pdf b/src/test/resources/technology/tabula/rotated_page.pdf deleted file mode 100644 index df532a23..00000000 Binary files a/src/test/resources/technology/tabula/rotated_page.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/schools.pdf b/src/test/resources/technology/tabula/schools.pdf deleted file mode 100644 index eef50e25..00000000 Binary files a/src/test/resources/technology/tabula/schools.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/should_detect_rulings.pdf b/src/test/resources/technology/tabula/should_detect_rulings.pdf deleted file mode 100644 index 55539eea..00000000 Binary files a/src/test/resources/technology/tabula/should_detect_rulings.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/sort_exception.pdf b/src/test/resources/technology/tabula/sort_exception.pdf deleted file mode 100644 index 5f844627..00000000 Binary files a/src/test/resources/technology/tabula/sort_exception.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/spanning_cells.pdf b/src/test/resources/technology/tabula/spanning_cells.pdf deleted file mode 100755 index 378fb1e0..00000000 Binary files a/src/test/resources/technology/tabula/spanning_cells.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/spreadsheet_no_bounding_frame.pdf b/src/test/resources/technology/tabula/spreadsheet_no_bounding_frame.pdf deleted file mode 100644 index aa221b98..00000000 Binary files a/src/test/resources/technology/tabula/spreadsheet_no_bounding_frame.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/sydney_disclosure_contract.pdf b/src/test/resources/technology/tabula/sydney_disclosure_contract.pdf deleted file mode 100644 index 70f4215a..00000000 Binary files a/src/test/resources/technology/tabula/sydney_disclosure_contract.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/twotables.pdf b/src/test/resources/technology/tabula/twotables.pdf deleted file mode 100644 index 42921a97..00000000 Binary files a/src/test/resources/technology/tabula/twotables.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/us-007.pdf b/src/test/resources/technology/tabula/us-007.pdf deleted file mode 100644 index 45b3de3c..00000000 Binary files a/src/test/resources/technology/tabula/us-007.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/us-017.pdf b/src/test/resources/technology/tabula/us-017.pdf deleted file mode 100644 index 64158f5f..00000000 Binary files a/src/test/resources/technology/tabula/us-017.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/us-020.pdf b/src/test/resources/technology/tabula/us-020.pdf deleted file mode 100644 index 39a8546c..00000000 Binary files a/src/test/resources/technology/tabula/us-020.pdf and /dev/null differ diff --git a/src/test/resources/technology/tabula/us-024.pdf b/src/test/resources/technology/tabula/us-024.pdf deleted file mode 100644 index 1a43f84c..00000000 Binary files a/src/test/resources/technology/tabula/us-024.pdf and /dev/null differ diff --git a/stylesheet.css b/stylesheet.css new file mode 100644 index 00000000..6ea9e516 --- /dev/null +++ b/stylesheet.css @@ -0,0 +1,29 @@ +/* Javadoc style sheet */ + +/* Define colors, fonts and other style attributes here to override the defaults */ + +/* Page background color */ +body { background-color: #FFFFFF; color:#000000 } + +/* Headings */ +h1 { font-size: 145% } + +/* Table colors */ +.TableHeadingColor { background: #CCCCFF; color:#000000 } /* Dark mauve */ +.TableSubHeadingColor { background: #EEEEFF; color:#000000 } /* Light mauve */ +.TableRowColor { background: #FFFFFF; color:#000000 } /* White */ + +/* Font used in left-hand frame lists */ +.FrameTitleFont { font-size: 100%; font-family: Helvetica, Arial, sans-serif; color:#000000 } +.FrameHeadingFont { font-size: 90%; font-family: Helvetica, Arial, sans-serif; color:#000000 } +.FrameItemFont { font-size: 90%; font-family: Helvetica, Arial, sans-serif; color:#000000 } + +/* Navigation bar fonts and colors */ +.NavBarCell1 { background-color:#EEEEFF; color:#000000} /* Light mauve */ +.NavBarCell1Rev { background-color:#00008B; color:#FFFFFF} /* Dark Blue */ +.NavBarFont1 { font-family: Arial, Helvetica, sans-serif; color:#000000;color:#000000;} +.NavBarFont1Rev { font-family: Arial, Helvetica, sans-serif; color:#FFFFFF;color:#FFFFFF;} + +.NavBarCell2 { font-family: Arial, Helvetica, sans-serif; background-color:#FFFFFF; color:#000000} +.NavBarCell3 { font-family: Arial, Helvetica, sans-serif; background-color:#FFFFFF; color:#000000} + diff --git a/technology/tabula/Cell.html b/technology/tabula/Cell.html new file mode 100644 index 00000000..c9862709 --- /dev/null +++ b/technology/tabula/Cell.html @@ -0,0 +1,518 @@ + + + + + + + +Cell (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula +
+Class Cell

+
+java.lang.Object
+  extended by java.awt.geom.RectangularShape
+      extended by java.awt.geom.Rectangle2D
+          extended by java.awt.geom.Rectangle2D.Float
+              extended by technology.tabula.Rectangle
+                  extended by technology.tabula.RectangularTextContainer<TextChunk>
+                      extended by technology.tabula.Cell
+
+
+
All Implemented Interfaces:
Shape, Serializable, Cloneable, Comparable<Rectangle>
+
+
+
+
public class Cell
extends RectangularTextContainer<TextChunk>
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + +
+Nested Class Summary
+ + + + + + + +
Nested classes/interfaces inherited from class java.awt.geom.Rectangle2D
Rectangle2D.Double, Rectangle2D.Float
+  + + + + + + + +
+Field Summary
+ + + + + + + +
Fields inherited from class java.awt.geom.Rectangle2D.Float
height, width, x, y
+ + + + + + + +
Fields inherited from class java.awt.geom.Rectangle2D
OUT_BOTTOM, OUT_LEFT, OUT_RIGHT, OUT_TOP
+  + + + + + + + + + + + + + +
+Constructor Summary
Cell(float top, + float left, + float width, + float height) + +
+           
Cell(Point2D topLeft, + Point2D bottomRight) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ StringgetText() + +
+           
+ StringgetText(boolean useLineReturns) + +
+           
+ List<TextChunk>getTextElements() + +
+           
+ booleanisPlaceholder() + +
+           
+ booleanisSpanning() + +
+           
+ voidsetPlaceholder(boolean placeholder) + +
+           
+ voidsetSpanning(boolean spanning) + +
+           
+ voidsetTextElements(List<TextChunk> textElements) + +
+           
+ + + + + + + +
Methods inherited from class technology.tabula.RectangularTextContainer
merge, toString
+ + + + + + + +
Methods inherited from class technology.tabula.Rectangle
boundingBoxOf, compareTo, getArea, getBottom, getLeft, getPoints, getRight, getTop, horizontallyOverlaps, horizontalOverlapRatio, merge, overlapRatio, setBottom, setLeft, setRight, setTop, verticallyOverlaps, verticalOverlap, verticalOverlapRatio
+ + + + + + + +
Methods inherited from class java.awt.geom.Rectangle2D.Float
createIntersection, createUnion, getBounds2D, getHeight, getWidth, getX, getY, isEmpty, outcode, setRect, setRect, setRect
+ + + + + + + +
Methods inherited from class java.awt.geom.Rectangle2D
add, add, add, contains, contains, equals, getPathIterator, getPathIterator, hashCode, intersect, intersects, intersectsLine, intersectsLine, outcode, setFrame, union
+ + + + + + + +
Methods inherited from class java.awt.geom.RectangularShape
clone, contains, contains, getBounds, getCenterX, getCenterY, getFrame, getMaxX, getMaxY, getMinX, getMinY, intersects, setFrame, setFrame, setFrameFromCenter, setFrameFromCenter, setFrameFromDiagonal, setFrameFromDiagonal
+ + + + + + + +
Methods inherited from class java.lang.Object
finalize, getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+Cell

+
+public Cell(float top,
+            float left,
+            float width,
+            float height)
+
+
+
+ +

+Cell

+
+public Cell(Point2D topLeft,
+            Point2D bottomRight)
+
+
+ + + + + + + + +
+Method Detail
+ +

+getText

+
+public String getText(boolean useLineReturns)
+
+
+
Specified by:
getText in class RectangularTextContainer<TextChunk>
+
+
+
+
+
+
+ +

+getText

+
+public String getText()
+
+
+
Specified by:
getText in class RectangularTextContainer<TextChunk>
+
+
+
+
+
+
+ +

+isSpanning

+
+public boolean isSpanning()
+
+
+
+
+
+
+ +

+setSpanning

+
+public void setSpanning(boolean spanning)
+
+
+
+
+
+
+ +

+isPlaceholder

+
+public boolean isPlaceholder()
+
+
+
+
+
+
+ +

+setPlaceholder

+
+public void setPlaceholder(boolean placeholder)
+
+
+
+
+
+
+ +

+getTextElements

+
+public List<TextChunk> getTextElements()
+
+
+
Specified by:
getTextElements in class RectangularTextContainer<TextChunk>
+
+
+
+
+
+
+ +

+setTextElements

+
+public void setTextElements(List<TextChunk> textElements)
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/CohenSutherlandClipping.html b/technology/tabula/CohenSutherlandClipping.html new file mode 100644 index 00000000..f8421667 --- /dev/null +++ b/technology/tabula/CohenSutherlandClipping.html @@ -0,0 +1,304 @@ + + + + + + + +CohenSutherlandClipping (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula +
+Class CohenSutherlandClipping

+
+java.lang.Object
+  extended by technology.tabula.CohenSutherlandClipping
+
+
+
+
public final class CohenSutherlandClipping
extends Object
+ + +

+Implements the well known Cohen Sutherland line + clipping algorithm (line against clip rectangle). +

+ +

+


+ +

+ + + + + + + + + + + + + + +
+Constructor Summary
CohenSutherlandClipping() + +
+          Creates a Cohen Sutherland clipper with clip rect (0, 0, 0, 0).
CohenSutherlandClipping(Rectangle2D clip) + +
+          Creates a Cohen Sutherland clipper with the given clip rectangle.
+  + + + + + + + + + + + + + + + +
+Method Summary
+ booleanclip(Line2D.Float line) + +
+          Clips a given line against the clip rectangle.
+ voidsetClip(Rectangle2D clip) + +
+          Sets the clip rectangle.
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+CohenSutherlandClipping

+
+public CohenSutherlandClipping()
+
+
Creates a Cohen Sutherland clipper with clip rect (0, 0, 0, 0). +

+

+
+ +

+CohenSutherlandClipping

+
+public CohenSutherlandClipping(Rectangle2D clip)
+
+
Creates a Cohen Sutherland clipper with the given clip rectangle. +

+

+
Parameters:
clip - the clip rectangle to use
+
+ + + + + + + + +
+Method Detail
+ +

+setClip

+
+public void setClip(Rectangle2D clip)
+
+
Sets the clip rectangle. +

+

+
Parameters:
clip - the clip rectangle
+
+
+
+ +

+clip

+
+public boolean clip(Line2D.Float line)
+
+
Clips a given line against the clip rectangle. + The modification (if needed) is done in place. +

+

+
Parameters:
line - the line to clip +
Returns:
true if line is clipped, false if line is + totally outside the clip rect.
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/CommandLineApp.html b/technology/tabula/CommandLineApp.html new file mode 100644 index 00000000..a642debc --- /dev/null +++ b/technology/tabula/CommandLineApp.html @@ -0,0 +1,275 @@ + + + + + + + +CommandLineApp (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula +
+Class CommandLineApp

+
+java.lang.Object
+  extended by technology.tabula.CommandLineApp
+
+
+
+
public class CommandLineApp
extends Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
CommandLineApp() + +
+           
+  + + + + + + + + + + + + + + + +
+Method Summary
+static voidmain(String[] args) + +
+           
+static List<Float>parseFloatList(String option) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+CommandLineApp

+
+public CommandLineApp()
+
+
+ + + + + + + + +
+Method Detail
+ +

+main

+
+public static void main(String[] args)
+
+
+
+
+
+
+ +

+parseFloatList

+
+public static List<Float> parseFloatList(String option)
+                                  throws org.apache.commons.cli.ParseException
+
+
+ +
Throws: +
org.apache.commons.cli.ParseException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/DummyGraphics2D.html b/technology/tabula/DummyGraphics2D.html new file mode 100644 index 00000000..f4c91ff4 --- /dev/null +++ b/technology/tabula/DummyGraphics2D.html @@ -0,0 +1,2024 @@ + + + + + + + +DummyGraphics2D (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula +
+Class DummyGraphics2D

+
+java.lang.Object
+  extended by java.awt.Graphics
+      extended by java.awt.Graphics2D
+          extended by technology.tabula.DummyGraphics2D
+
+
+
+
public class DummyGraphics2D
extends Graphics2D
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
DummyGraphics2D() + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ voidaddRenderingHints(Map<?,?> hints) + +
+           
+ voidclearRect(int x, + int y, + int width, + int height) + +
+           
+ voidclip(Shape s) + +
+           
+ voidclipRect(int x, + int y, + int width, + int height) + +
+           
+ voidcopyArea(int x, + int y, + int width, + int height, + int dx, + int dy) + +
+           
+ Graphicscreate() + +
+           
+ voiddispose() + +
+           
+ voiddraw(Shape s) + +
+           
+ voiddrawArc(int x, + int y, + int width, + int height, + int startAngle, + int arcAngle) + +
+           
+ voiddrawGlyphVector(GlyphVector g, + float x, + float y) + +
+           
+ voiddrawImage(BufferedImage img, + BufferedImageOp op, + int x, + int y) + +
+           
+ booleandrawImage(Image img, + AffineTransform xform, + ImageObserver obs) + +
+           
+ booleandrawImage(Image img, + int x, + int y, + Color bgcolor, + ImageObserver observer) + +
+           
+ booleandrawImage(Image img, + int x, + int y, + ImageObserver observer) + +
+           
+ booleandrawImage(Image img, + int x, + int y, + int width, + int height, + Color bgcolor, + ImageObserver observer) + +
+           
+ booleandrawImage(Image img, + int x, + int y, + int width, + int height, + ImageObserver observer) + +
+           
+ booleandrawImage(Image img, + int dx1, + int dy1, + int dx2, + int dy2, + int sx1, + int sy1, + int sx2, + int sy2, + Color bgcolor, + ImageObserver observer) + +
+           
+ booleandrawImage(Image img, + int dx1, + int dy1, + int dx2, + int dy2, + int sx1, + int sy1, + int sx2, + int sy2, + ImageObserver observer) + +
+           
+ voiddrawLine(int x1, + int y1, + int x2, + int y2) + +
+           
+ voiddrawOval(int x, + int y, + int width, + int height) + +
+           
+ voiddrawPolygon(int[] xPoints, + int[] yPoints, + int nPoints) + +
+           
+ voiddrawPolyline(int[] xPoints, + int[] yPoints, + int nPoints) + +
+           
+ voiddrawRenderableImage(RenderableImage img, + AffineTransform xform) + +
+           
+ voiddrawRenderedImage(RenderedImage img, + AffineTransform xform) + +
+           
+ voiddrawRoundRect(int x, + int y, + int width, + int height, + int arcWidth, + int arcHeight) + +
+           
+ voiddrawString(AttributedCharacterIterator iterator, + float x, + float y) + +
+           
+ voiddrawString(AttributedCharacterIterator iterator, + int x, + int y) + +
+           
+ voiddrawString(String str, + float x, + float y) + +
+           
+ voiddrawString(String str, + int x, + int y) + +
+           
+ voidfill(Shape s) + +
+           
+ voidfillArc(int x, + int y, + int width, + int height, + int startAngle, + int arcAngle) + +
+           
+ voidfillOval(int x, + int y, + int width, + int height) + +
+           
+ voidfillPolygon(int[] xPoints, + int[] yPoints, + int nPoints) + +
+           
+ voidfillRect(int x, + int y, + int width, + int height) + +
+           
+ voidfillRoundRect(int x, + int y, + int width, + int height, + int arcWidth, + int arcHeight) + +
+           
+ ColorgetBackground() + +
+           
+ ShapegetClip() + +
+           
+ RectanglegetClipBounds() + +
+           
+ ColorgetColor() + +
+           
+ CompositegetComposite() + +
+           
+ GraphicsConfigurationgetDeviceConfiguration() + +
+           
+ FontgetFont() + +
+           
+ FontMetricsgetFontMetrics(Font f) + +
+           
+ FontRenderContextgetFontRenderContext() + +
+           
+ PaintgetPaint() + +
+           
+ ObjectgetRenderingHint(RenderingHints.Key hintKey) + +
+           
+ RenderingHintsgetRenderingHints() + +
+           
+ StrokegetStroke() + +
+           
+ AffineTransformgetTransform() + +
+           
+ booleanhit(Rectangle rect, + Shape s, + boolean onStroke) + +
+           
+ voidrotate(double theta) + +
+           
+ voidrotate(double theta, + double x, + double y) + +
+           
+ voidscale(double sx, + double sy) + +
+           
+ voidsetBackground(Color color) + +
+           
+ voidsetClip(int x, + int y, + int width, + int height) + +
+           
+ voidsetClip(Shape clip) + +
+           
+ voidsetColor(Color c) + +
+           
+ voidsetComposite(Composite comp) + +
+           
+ voidsetFont(Font font) + +
+           
+ voidsetPaint(Paint paint) + +
+           
+ voidsetPaintMode() + +
+           
+ voidsetRenderingHint(RenderingHints.Key hintKey, + Object hintValue) + +
+           
+ voidsetRenderingHints(Map<?,?> hints) + +
+           
+ voidsetStroke(Stroke s) + +
+           
+ voidsetTransform(AffineTransform Tx) + +
+           
+ voidsetXORMode(Color c1) + +
+           
+ voidshear(double shx, + double shy) + +
+           
+ voidtransform(AffineTransform Tx) + +
+           
+ voidtranslate(double tx, + double ty) + +
+           
+ voidtranslate(int x, + int y) + +
+           
+ + + + + + + +
Methods inherited from class java.awt.Graphics2D
draw3DRect, fill3DRect
+ + + + + + + +
Methods inherited from class java.awt.Graphics
create, drawBytes, drawChars, drawPolygon, drawRect, fillPolygon, finalize, getClipBounds, getClipRect, getFontMetrics, hitClip, toString
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+DummyGraphics2D

+
+public DummyGraphics2D()
+
+
+ + + + + + + + +
+Method Detail
+ +

+addRenderingHints

+
+public void addRenderingHints(Map<?,?> hints)
+
+
+
Specified by:
addRenderingHints in class Graphics2D
+
+
+
+
+
+
+ +

+clip

+
+public void clip(Shape s)
+
+
+
Specified by:
clip in class Graphics2D
+
+
+
+
+
+
+ +

+draw

+
+public void draw(Shape s)
+
+
+
Specified by:
draw in class Graphics2D
+
+
+
+
+
+
+ +

+drawGlyphVector

+
+public void drawGlyphVector(GlyphVector g,
+                            float x,
+                            float y)
+
+
+
Specified by:
drawGlyphVector in class Graphics2D
+
+
+
+
+
+
+ +

+drawImage

+
+public boolean drawImage(Image img,
+                         AffineTransform xform,
+                         ImageObserver obs)
+
+
+
Specified by:
drawImage in class Graphics2D
+
+
+
+
+
+
+ +

+drawImage

+
+public void drawImage(BufferedImage img,
+                      BufferedImageOp op,
+                      int x,
+                      int y)
+
+
+
Specified by:
drawImage in class Graphics2D
+
+
+
+
+
+
+ +

+drawRenderableImage

+
+public void drawRenderableImage(RenderableImage img,
+                                AffineTransform xform)
+
+
+
Specified by:
drawRenderableImage in class Graphics2D
+
+
+
+
+
+
+ +

+drawRenderedImage

+
+public void drawRenderedImage(RenderedImage img,
+                              AffineTransform xform)
+
+
+
Specified by:
drawRenderedImage in class Graphics2D
+
+
+
+
+
+
+ +

+drawString

+
+public void drawString(String str,
+                       int x,
+                       int y)
+
+
+
Specified by:
drawString in class Graphics2D
+
+
+
+
+
+
+ +

+drawString

+
+public void drawString(String str,
+                       float x,
+                       float y)
+
+
+
Specified by:
drawString in class Graphics2D
+
+
+
+
+
+
+ +

+drawString

+
+public void drawString(AttributedCharacterIterator iterator,
+                       int x,
+                       int y)
+
+
+
Specified by:
drawString in class Graphics2D
+
+
+
+
+
+
+ +

+drawString

+
+public void drawString(AttributedCharacterIterator iterator,
+                       float x,
+                       float y)
+
+
+
Specified by:
drawString in class Graphics2D
+
+
+
+
+
+
+ +

+fill

+
+public void fill(Shape s)
+
+
+
Specified by:
fill in class Graphics2D
+
+
+
+
+
+
+ +

+getBackground

+
+public Color getBackground()
+
+
+
Specified by:
getBackground in class Graphics2D
+
+
+
+
+
+
+ +

+getComposite

+
+public Composite getComposite()
+
+
+
Specified by:
getComposite in class Graphics2D
+
+
+
+
+
+
+ +

+getDeviceConfiguration

+
+public GraphicsConfiguration getDeviceConfiguration()
+
+
+
Specified by:
getDeviceConfiguration in class Graphics2D
+
+
+
+
+
+
+ +

+getFontRenderContext

+
+public FontRenderContext getFontRenderContext()
+
+
+
Specified by:
getFontRenderContext in class Graphics2D
+
+
+
+
+
+
+ +

+getPaint

+
+public Paint getPaint()
+
+
+
Specified by:
getPaint in class Graphics2D
+
+
+
+
+
+
+ +

+getRenderingHint

+
+public Object getRenderingHint(RenderingHints.Key hintKey)
+
+
+
Specified by:
getRenderingHint in class Graphics2D
+
+
+
+
+
+
+ +

+getRenderingHints

+
+public RenderingHints getRenderingHints()
+
+
+
Specified by:
getRenderingHints in class Graphics2D
+
+
+
+
+
+
+ +

+getStroke

+
+public Stroke getStroke()
+
+
+
Specified by:
getStroke in class Graphics2D
+
+
+
+
+
+
+ +

+getTransform

+
+public AffineTransform getTransform()
+
+
+
Specified by:
getTransform in class Graphics2D
+
+
+
+
+
+
+ +

+hit

+
+public boolean hit(Rectangle rect,
+                   Shape s,
+                   boolean onStroke)
+
+
+
Specified by:
hit in class Graphics2D
+
+
+
+
+
+
+ +

+rotate

+
+public void rotate(double theta)
+
+
+
Specified by:
rotate in class Graphics2D
+
+
+
+
+
+
+ +

+rotate

+
+public void rotate(double theta,
+                   double x,
+                   double y)
+
+
+
Specified by:
rotate in class Graphics2D
+
+
+
+
+
+
+ +

+scale

+
+public void scale(double sx,
+                  double sy)
+
+
+
Specified by:
scale in class Graphics2D
+
+
+
+
+
+
+ +

+setBackground

+
+public void setBackground(Color color)
+
+
+
Specified by:
setBackground in class Graphics2D
+
+
+
+
+
+
+ +

+setComposite

+
+public void setComposite(Composite comp)
+
+
+
Specified by:
setComposite in class Graphics2D
+
+
+
+
+
+
+ +

+setPaint

+
+public void setPaint(Paint paint)
+
+
+
Specified by:
setPaint in class Graphics2D
+
+
+
+
+
+
+ +

+setRenderingHint

+
+public void setRenderingHint(RenderingHints.Key hintKey,
+                             Object hintValue)
+
+
+
Specified by:
setRenderingHint in class Graphics2D
+
+
+
+
+
+
+ +

+setRenderingHints

+
+public void setRenderingHints(Map<?,?> hints)
+
+
+
Specified by:
setRenderingHints in class Graphics2D
+
+
+
+
+
+
+ +

+setStroke

+
+public void setStroke(Stroke s)
+
+
+
Specified by:
setStroke in class Graphics2D
+
+
+
+
+
+
+ +

+setTransform

+
+public void setTransform(AffineTransform Tx)
+
+
+
Specified by:
setTransform in class Graphics2D
+
+
+
+
+
+
+ +

+shear

+
+public void shear(double shx,
+                  double shy)
+
+
+
Specified by:
shear in class Graphics2D
+
+
+
+
+
+
+ +

+transform

+
+public void transform(AffineTransform Tx)
+
+
+
Specified by:
transform in class Graphics2D
+
+
+
+
+
+
+ +

+translate

+
+public void translate(int x,
+                      int y)
+
+
+
Specified by:
translate in class Graphics2D
+
+
+
+
+
+
+ +

+translate

+
+public void translate(double tx,
+                      double ty)
+
+
+
Specified by:
translate in class Graphics2D
+
+
+
+
+
+
+ +

+clearRect

+
+public void clearRect(int x,
+                      int y,
+                      int width,
+                      int height)
+
+
+
Specified by:
clearRect in class Graphics
+
+
+
+
+
+
+ +

+clipRect

+
+public void clipRect(int x,
+                     int y,
+                     int width,
+                     int height)
+
+
+
Specified by:
clipRect in class Graphics
+
+
+
+
+
+
+ +

+copyArea

+
+public void copyArea(int x,
+                     int y,
+                     int width,
+                     int height,
+                     int dx,
+                     int dy)
+
+
+
Specified by:
copyArea in class Graphics
+
+
+
+
+
+
+ +

+create

+
+public Graphics create()
+
+
+
Specified by:
create in class Graphics
+
+
+
+
+
+
+ +

+dispose

+
+public void dispose()
+
+
+
Specified by:
dispose in class Graphics
+
+
+
+
+
+
+ +

+drawArc

+
+public void drawArc(int x,
+                    int y,
+                    int width,
+                    int height,
+                    int startAngle,
+                    int arcAngle)
+
+
+
Specified by:
drawArc in class Graphics
+
+
+
+
+
+
+ +

+drawImage

+
+public boolean drawImage(Image img,
+                         int x,
+                         int y,
+                         ImageObserver observer)
+
+
+
Specified by:
drawImage in class Graphics
+
+
+
+
+
+
+ +

+drawImage

+
+public boolean drawImage(Image img,
+                         int x,
+                         int y,
+                         Color bgcolor,
+                         ImageObserver observer)
+
+
+
Specified by:
drawImage in class Graphics
+
+
+
+
+
+
+ +

+drawImage

+
+public boolean drawImage(Image img,
+                         int x,
+                         int y,
+                         int width,
+                         int height,
+                         ImageObserver observer)
+
+
+
Specified by:
drawImage in class Graphics
+
+
+
+
+
+
+ +

+drawImage

+
+public boolean drawImage(Image img,
+                         int x,
+                         int y,
+                         int width,
+                         int height,
+                         Color bgcolor,
+                         ImageObserver observer)
+
+
+
Specified by:
drawImage in class Graphics
+
+
+
+
+
+
+ +

+drawImage

+
+public boolean drawImage(Image img,
+                         int dx1,
+                         int dy1,
+                         int dx2,
+                         int dy2,
+                         int sx1,
+                         int sy1,
+                         int sx2,
+                         int sy2,
+                         ImageObserver observer)
+
+
+
Specified by:
drawImage in class Graphics
+
+
+
+
+
+
+ +

+drawImage

+
+public boolean drawImage(Image img,
+                         int dx1,
+                         int dy1,
+                         int dx2,
+                         int dy2,
+                         int sx1,
+                         int sy1,
+                         int sx2,
+                         int sy2,
+                         Color bgcolor,
+                         ImageObserver observer)
+
+
+
Specified by:
drawImage in class Graphics
+
+
+
+
+
+
+ +

+drawLine

+
+public void drawLine(int x1,
+                     int y1,
+                     int x2,
+                     int y2)
+
+
+
Specified by:
drawLine in class Graphics
+
+
+
+
+
+
+ +

+drawOval

+
+public void drawOval(int x,
+                     int y,
+                     int width,
+                     int height)
+
+
+
Specified by:
drawOval in class Graphics
+
+
+
+
+
+
+ +

+drawPolygon

+
+public void drawPolygon(int[] xPoints,
+                        int[] yPoints,
+                        int nPoints)
+
+
+
Specified by:
drawPolygon in class Graphics
+
+
+
+
+
+
+ +

+drawPolyline

+
+public void drawPolyline(int[] xPoints,
+                         int[] yPoints,
+                         int nPoints)
+
+
+
Specified by:
drawPolyline in class Graphics
+
+
+
+
+
+
+ +

+drawRoundRect

+
+public void drawRoundRect(int x,
+                          int y,
+                          int width,
+                          int height,
+                          int arcWidth,
+                          int arcHeight)
+
+
+
Specified by:
drawRoundRect in class Graphics
+
+
+
+
+
+
+ +

+fillArc

+
+public void fillArc(int x,
+                    int y,
+                    int width,
+                    int height,
+                    int startAngle,
+                    int arcAngle)
+
+
+
Specified by:
fillArc in class Graphics
+
+
+
+
+
+
+ +

+fillOval

+
+public void fillOval(int x,
+                     int y,
+                     int width,
+                     int height)
+
+
+
Specified by:
fillOval in class Graphics
+
+
+
+
+
+
+ +

+fillPolygon

+
+public void fillPolygon(int[] xPoints,
+                        int[] yPoints,
+                        int nPoints)
+
+
+
Specified by:
fillPolygon in class Graphics
+
+
+
+
+
+
+ +

+fillRect

+
+public void fillRect(int x,
+                     int y,
+                     int width,
+                     int height)
+
+
+
Specified by:
fillRect in class Graphics
+
+
+
+
+
+
+ +

+fillRoundRect

+
+public void fillRoundRect(int x,
+                          int y,
+                          int width,
+                          int height,
+                          int arcWidth,
+                          int arcHeight)
+
+
+
Specified by:
fillRoundRect in class Graphics
+
+
+
+
+
+
+ +

+getClip

+
+public Shape getClip()
+
+
+
Specified by:
getClip in class Graphics
+
+
+
+
+
+
+ +

+getClipBounds

+
+public Rectangle getClipBounds()
+
+
+
Specified by:
getClipBounds in class Graphics
+
+
+
+
+
+
+ +

+getColor

+
+public Color getColor()
+
+
+
Specified by:
getColor in class Graphics
+
+
+
+
+
+
+ +

+getFont

+
+public Font getFont()
+
+
+
Specified by:
getFont in class Graphics
+
+
+
+
+
+
+ +

+getFontMetrics

+
+public FontMetrics getFontMetrics(Font f)
+
+
+
Specified by:
getFontMetrics in class Graphics
+
+
+
+
+
+
+ +

+setClip

+
+public void setClip(Shape clip)
+
+
+
Specified by:
setClip in class Graphics
+
+
+
+
+
+
+ +

+setClip

+
+public void setClip(int x,
+                    int y,
+                    int width,
+                    int height)
+
+
+
Specified by:
setClip in class Graphics
+
+
+
+
+
+
+ +

+setColor

+
+public void setColor(Color c)
+
+
+
Specified by:
setColor in class Graphics
+
+
+
+
+
+
+ +

+setFont

+
+public void setFont(Font font)
+
+
+
Specified by:
setFont in class Graphics
+
+
+
+
+
+
+ +

+setPaintMode

+
+public void setPaintMode()
+
+
+
Specified by:
setPaintMode in class Graphics
+
+
+
+
+
+
+ +

+setXORMode

+
+public void setXORMode(Color c1)
+
+
+
Specified by:
setXORMode in class Graphics
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/HasText.html b/technology/tabula/HasText.html new file mode 100644 index 00000000..02970ee4 --- /dev/null +++ b/technology/tabula/HasText.html @@ -0,0 +1,210 @@ + + + + + + + +HasText (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula +
+Interface HasText

+
+
All Known Implementing Classes:
TextChunk, TextElement
+
+
+
+
public interface HasText
+ + +

+


+ +

+ + + + + + + + + + + + +
+Method Summary
+ StringgetText() + +
+           
+  +

+ + + + + + + + +
+Method Detail
+ +

+getText

+
+String getText()
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/Line.html b/technology/tabula/Line.html new file mode 100644 index 00000000..eff5a32b --- /dev/null +++ b/technology/tabula/Line.html @@ -0,0 +1,452 @@ + + + + + + + +Line (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula +
+Class Line

+
+java.lang.Object
+  extended by java.awt.geom.RectangularShape
+      extended by java.awt.geom.Rectangle2D
+          extended by java.awt.geom.Rectangle2D.Float
+              extended by technology.tabula.Rectangle
+                  extended by technology.tabula.Line
+
+
+
All Implemented Interfaces:
Shape, Serializable, Cloneable, Comparable<Rectangle>
+
+
+
+
public class Line
extends Rectangle
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + +
+Nested Class Summary
+ + + + + + + +
Nested classes/interfaces inherited from class java.awt.geom.Rectangle2D
Rectangle2D.Double, Rectangle2D.Float
+  + + + + + + + + + + + +
+Field Summary
+static Character[]WHITE_SPACE_CHARS + +
+           
+ + + + + + + +
Fields inherited from class java.awt.geom.Rectangle2D.Float
height, width, x, y
+ + + + + + + +
Fields inherited from class java.awt.geom.Rectangle2D
OUT_BOTTOM, OUT_LEFT, OUT_RIGHT, OUT_TOP
+  + + + + + + + + + + +
+Constructor Summary
Line() + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ voidaddTextChunk(int i, + TextChunk textChunk) + +
+           
+ voidaddTextChunk(TextChunk textChunk) + +
+           
+ List<TextChunk>getTextElements() + +
+           
+ voidsetTextElements(List<TextChunk> textChunks) + +
+           
+ StringtoString() + +
+           
+ + + + + + + +
Methods inherited from class technology.tabula.Rectangle
boundingBoxOf, compareTo, getArea, getBottom, getLeft, getPoints, getRight, getTop, horizontallyOverlaps, horizontalOverlapRatio, merge, overlapRatio, setBottom, setLeft, setRight, setTop, verticallyOverlaps, verticalOverlap, verticalOverlapRatio
+ + + + + + + +
Methods inherited from class java.awt.geom.Rectangle2D.Float
createIntersection, createUnion, getBounds2D, getHeight, getWidth, getX, getY, isEmpty, outcode, setRect, setRect, setRect
+ + + + + + + +
Methods inherited from class java.awt.geom.Rectangle2D
add, add, add, contains, contains, equals, getPathIterator, getPathIterator, hashCode, intersect, intersects, intersectsLine, intersectsLine, outcode, setFrame, union
+ + + + + + + +
Methods inherited from class java.awt.geom.RectangularShape
clone, contains, contains, getBounds, getCenterX, getCenterY, getFrame, getMaxX, getMaxY, getMinX, getMinY, intersects, setFrame, setFrame, setFrameFromCenter, setFrameFromCenter, setFrameFromDiagonal, setFrameFromDiagonal
+ + + + + + + +
Methods inherited from class java.lang.Object
finalize, getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+WHITE_SPACE_CHARS

+
+public static final Character[] WHITE_SPACE_CHARS
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+Line

+
+public Line()
+
+
+ + + + + + + + +
+Method Detail
+ +

+getTextElements

+
+public List<TextChunk> getTextElements()
+
+
+
+
+
+
+ +

+setTextElements

+
+public void setTextElements(List<TextChunk> textChunks)
+
+
+
+
+
+
+ +

+addTextChunk

+
+public void addTextChunk(int i,
+                         TextChunk textChunk)
+
+
+
+
+
+
+ +

+addTextChunk

+
+public void addTextChunk(TextChunk textChunk)
+
+
+
+
+
+
+ +

+toString

+
+public String toString()
+
+
+
Overrides:
toString in class Rectangle
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/ObjectExtractor.html b/technology/tabula/ObjectExtractor.html new file mode 100644 index 00000000..d42cc318 --- /dev/null +++ b/technology/tabula/ObjectExtractor.html @@ -0,0 +1,790 @@ + + + + + + + +ObjectExtractor (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula +
+Class ObjectExtractor

+
+java.lang.Object
+  extended by org.apache.pdfbox.util.PDFStreamEngine
+      extended by org.apache.pdfbox.pdfviewer.PageDrawer
+          extended by technology.tabula.ObjectExtractor
+
+
+
+
public class ObjectExtractor
extends org.apache.pdfbox.pdfviewer.PageDrawer
+ + +

+


+ +

+ + + + + + + + + + + + + + + +
+Field Summary
+ List<Shape>clippingPaths + +
+           
+protected  Listpdf_document_pages + +
+           
+ + + + + + + +
Fields inherited from class org.apache.pdfbox.pdfviewer.PageDrawer
page, pageSize
+  + + + + + + + + + + + + + +
+Constructor Summary
ObjectExtractor(org.apache.pdfbox.pdmodel.PDDocument pdf_document) + +
+           
ObjectExtractor(org.apache.pdfbox.pdmodel.PDDocument pdf_document, + String password) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ voidclose() + +
+           
+ Rectangle2DcurrentClippingPath() + +
+           
+ voiddrawImage(Image awtImage, + AffineTransform at) + +
+           
+ voiddrawPage(org.apache.pdfbox.pdmodel.PDPage p) + +
+           
+ PageIteratorextract() + +
+           
+ Pageextract(int pageNumber) + +
+           
+ PageIteratorextract(Iterable<Integer> pages) + +
+           
+protected  PageextractPage(Integer page_number) + +
+           
+ voidfillPath(int windingRule) + +
+           
+ List<TextElement>getCharacters() + +
+           
+ floatgetMinCharHeight() + +
+           
+ floatgetMinCharWidth() + +
+           
+ intgetPageCount() + +
+           
+ AffineTransformgetPageTransform() + +
+           
+ List<Ruling>getRulings() + +
+           
+ booleanisDebugClippingPaths() + +
+           
+ booleanisExtractRulingLines() + +
+           
+protected  voidprocessTextPosition(org.apache.pdfbox.util.TextPosition textPosition) + +
+           
+ voidsetDebugClippingPaths(boolean debugClippingPaths) + +
+           
+ voidsetExtractRulingLines(boolean extractRulingLines) + +
+           
+ voidstrokeOrFillPath(boolean isFill) + +
+           
+ voidstrokePath() + +
+           
+ + + + + + + +
Methods inherited from class org.apache.pdfbox.pdfviewer.PageDrawer
colorChanged, dispose, drawPage, endPath, fixY, getGraphics, getLinePath, getPage, getPageSize, getStroke, setClippingPath, setClippingWindingRule, setLinePath, setStroke, SHFill_Axial, SHFill_CoonsPatch, SHFill_FreeGourad, SHFill_Function, SHFill_LatticeGourad, SHFill_Radial, SHFill_TensorPatch, shFill, SHFill, transformedPoint
+ + + + + + + +
Methods inherited from class org.apache.pdfbox.util.PDFStreamEngine
getColorSpaces, getCurrentPage, getFonts, getGraphicsStack, getGraphicsState, getGraphicsStates, getResources, getTextLineMatrix, getTextMatrix, getTotalCharCnt, getValidCharCnt, getXObjects, inspectFontEncoding, isForceParsing, processEncodedText, processOperator, processOperator, processStream, processSubStream, registerOperatorProcessor, resetEngine, setColorSpaces, setFonts, setForceParsing, setGraphicsStack, setGraphicsState, setGraphicsStates, setTextLineMatrix, setTextMatrix
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+clippingPaths

+
+public List<Shape> clippingPaths
+
+
+
+
+
+ +

+pdf_document_pages

+
+protected List pdf_document_pages
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+ObjectExtractor

+
+public ObjectExtractor(org.apache.pdfbox.pdmodel.PDDocument pdf_document)
+                throws IOException
+
+
+ +
Throws: +
IOException
+
+
+ +

+ObjectExtractor

+
+public ObjectExtractor(org.apache.pdfbox.pdmodel.PDDocument pdf_document,
+                       String password)
+                throws IOException
+
+
+ +
Throws: +
IOException
+
+ + + + + + + + +
+Method Detail
+ +

+extractPage

+
+protected Page extractPage(Integer page_number)
+                    throws IOException
+
+
+ +
Throws: +
IOException
+
+
+
+ +

+extract

+
+public PageIterator extract(Iterable<Integer> pages)
+
+
+
+
+
+
+ +

+extract

+
+public PageIterator extract()
+
+
+
+
+
+
+ +

+extract

+
+public Page extract(int pageNumber)
+
+
+
+
+
+
+ +

+close

+
+public void close()
+           throws IOException
+
+
+ +
Throws: +
IOException
+
+
+
+ +

+drawPage

+
+public void drawPage(org.apache.pdfbox.pdmodel.PDPage p)
+              throws IOException
+
+
+ +
Throws: +
IOException
+
+
+
+ +

+drawImage

+
+public void drawImage(Image awtImage,
+                      AffineTransform at)
+
+
+
Overrides:
drawImage in class org.apache.pdfbox.pdfviewer.PageDrawer
+
+
+
+
+
+
+ +

+strokeOrFillPath

+
+public void strokeOrFillPath(boolean isFill)
+
+
+
+
+
+
+ +

+strokePath

+
+public void strokePath()
+                throws IOException
+
+
+
Overrides:
strokePath in class org.apache.pdfbox.pdfviewer.PageDrawer
+
+
+ +
Throws: +
IOException
+
+
+
+ +

+fillPath

+
+public void fillPath(int windingRule)
+              throws IOException
+
+
+
Overrides:
fillPath in class org.apache.pdfbox.pdfviewer.PageDrawer
+
+
+ +
Throws: +
IOException
+
+
+
+ +

+processTextPosition

+
+protected void processTextPosition(org.apache.pdfbox.util.TextPosition textPosition)
+
+
+
Overrides:
processTextPosition in class org.apache.pdfbox.pdfviewer.PageDrawer
+
+
+
+
+
+
+ +

+getMinCharWidth

+
+public float getMinCharWidth()
+
+
+
+
+
+
+ +

+getMinCharHeight

+
+public float getMinCharHeight()
+
+
+
+
+
+
+ +

+getPageTransform

+
+public AffineTransform getPageTransform()
+
+
+
+
+
+
+ +

+currentClippingPath

+
+public Rectangle2D currentClippingPath()
+
+
+
+
+
+
+ +

+isExtractRulingLines

+
+public boolean isExtractRulingLines()
+
+
+
+
+
+
+ +

+setExtractRulingLines

+
+public void setExtractRulingLines(boolean extractRulingLines)
+
+
+
+
+
+
+ +

+getRulings

+
+public List<Ruling> getRulings()
+
+
+
+
+
+
+ +

+getCharacters

+
+public List<TextElement> getCharacters()
+
+
+
+
+
+
+ +

+isDebugClippingPaths

+
+public boolean isDebugClippingPaths()
+
+
+
+
+
+
+ +

+setDebugClippingPaths

+
+public void setDebugClippingPaths(boolean debugClippingPaths)
+
+
+
+
+
+
+ +

+getPageCount

+
+public int getPageCount()
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/Page.html b/technology/tabula/Page.html new file mode 100644 index 00000000..1727bc97 --- /dev/null +++ b/technology/tabula/Page.html @@ -0,0 +1,773 @@ + + + + + + + +Page (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula +
+Class Page

+
+java.lang.Object
+  extended by java.awt.geom.RectangularShape
+      extended by java.awt.geom.Rectangle2D
+          extended by java.awt.geom.Rectangle2D.Float
+              extended by technology.tabula.Rectangle
+                  extended by technology.tabula.Page
+
+
+
All Implemented Interfaces:
Shape, Serializable, Cloneable, Comparable<Rectangle>
+
+
+
+
public class Page
extends Rectangle
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + +
+Nested Class Summary
+ + + + + + + +
Nested classes/interfaces inherited from class java.awt.geom.Rectangle2D
Rectangle2D.Double, Rectangle2D.Float
+  + + + + + + + +
+Field Summary
+ + + + + + + +
Fields inherited from class java.awt.geom.Rectangle2D.Float
height, width, x, y
+ + + + + + + +
Fields inherited from class java.awt.geom.Rectangle2D
OUT_BOTTOM, OUT_LEFT, OUT_RIGHT, OUT_TOP
+  + + + + + + + + + + + + + + + + +
+Constructor Summary
Page(float top, + float left, + float width, + float height, + int rotation, + int page_number) + +
+           
Page(float top, + float left, + float width, + float height, + int rotation, + int page_number, + List<TextElement> characters, + List<Ruling> rulings) + +
+           
Page(float top, + float left, + float width, + float height, + int rotation, + int page_number, + List<TextElement> characters, + List<Ruling> rulings, + float minCharWidth, + float minCharHeight, + technology.tabula.RectangleSpatialIndex<TextElement> index) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ voidaddRuling(Ruling r) + +
+           
+ PagegetArea(float top, + float left, + float bottom, + float right) + +
+           
+ PagegetArea(Rectangle area) + +
+           
+ List<Ruling>getHorizontalRulings() + +
+           
+ floatgetMinCharHeight() + +
+           
+ floatgetMinCharWidth() + +
+           
+ intgetPageNumber() + +
+           
+ IntegergetRotation() + +
+           
+ List<Ruling>getRulings() + +
+           
+ technology.tabula.RectangleSpatialIndex<TextElement>getSpatialIndex() + +
+           
+ List<TextElement>getText() + +
+           
+ List<TextElement>getText(float top, + float left, + float bottom, + float right) + +
+           
+ List<TextElement>getText(Rectangle area) + +
+           
+ RectanglegetTextBounds() + +
+          Returns the minimum bounding box that contains all the TextElements on this Page
+ List<TextElement>getTexts() + +
+           
+ List<Ruling>getUnprocessedRulings() + +
+           
+ List<Ruling>getVerticalRulings() + +
+           
+ booleanhasText() + +
+           
+ voidsnapPoints() + +
+           
+ + + + + + + +
Methods inherited from class technology.tabula.Rectangle
boundingBoxOf, compareTo, getArea, getBottom, getLeft, getPoints, getRight, getTop, horizontallyOverlaps, horizontalOverlapRatio, merge, overlapRatio, setBottom, setLeft, setRight, setTop, toString, verticallyOverlaps, verticalOverlap, verticalOverlapRatio
+ + + + + + + +
Methods inherited from class java.awt.geom.Rectangle2D.Float
createIntersection, createUnion, getBounds2D, getHeight, getWidth, getX, getY, isEmpty, outcode, setRect, setRect, setRect
+ + + + + + + +
Methods inherited from class java.awt.geom.Rectangle2D
add, add, add, contains, contains, equals, getPathIterator, getPathIterator, hashCode, intersect, intersects, intersectsLine, intersectsLine, outcode, setFrame, union
+ + + + + + + +
Methods inherited from class java.awt.geom.RectangularShape
clone, contains, contains, getBounds, getCenterX, getCenterY, getFrame, getMaxX, getMaxY, getMinX, getMinY, intersects, setFrame, setFrame, setFrameFromCenter, setFrameFromCenter, setFrameFromDiagonal, setFrameFromDiagonal
+ + + + + + + +
Methods inherited from class java.lang.Object
finalize, getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+Page

+
+public Page(float top,
+            float left,
+            float width,
+            float height,
+            int rotation,
+            int page_number)
+
+
+
+ +

+Page

+
+public Page(float top,
+            float left,
+            float width,
+            float height,
+            int rotation,
+            int page_number,
+            List<TextElement> characters,
+            List<Ruling> rulings)
+
+
+
+ +

+Page

+
+public Page(float top,
+            float left,
+            float width,
+            float height,
+            int rotation,
+            int page_number,
+            List<TextElement> characters,
+            List<Ruling> rulings,
+            float minCharWidth,
+            float minCharHeight,
+            technology.tabula.RectangleSpatialIndex<TextElement> index)
+
+
+ + + + + + + + +
+Method Detail
+ +

+getArea

+
+public Page getArea(Rectangle area)
+
+
+
+
+
+
+ +

+getArea

+
+public Page getArea(float top,
+                    float left,
+                    float bottom,
+                    float right)
+
+
+
+
+
+
+ +

+getText

+
+public List<TextElement> getText()
+
+
+
+
+
+
+ +

+getText

+
+public List<TextElement> getText(Rectangle area)
+
+
+
+
+
+
+ +

+getText

+
+public List<TextElement> getText(float top,
+                                 float left,
+                                 float bottom,
+                                 float right)
+
+
+
+
+
+
+ +

+getRotation

+
+public Integer getRotation()
+
+
+
+
+
+
+ +

+getPageNumber

+
+public int getPageNumber()
+
+
+
+
+
+
+ +

+getTexts

+
+public List<TextElement> getTexts()
+
+
+
+
+
+
+ +

+getTextBounds

+
+public Rectangle getTextBounds()
+
+
Returns the minimum bounding box that contains all the TextElements on this Page +

+

+ +
Returns:
+
+
+
+ +

+getRulings

+
+public List<Ruling> getRulings()
+
+
+
+
+
+
+ +

+getVerticalRulings

+
+public List<Ruling> getVerticalRulings()
+
+
+
+
+
+
+ +

+getHorizontalRulings

+
+public List<Ruling> getHorizontalRulings()
+
+
+
+
+
+
+ +

+addRuling

+
+public void addRuling(Ruling r)
+
+
+
+
+
+
+ +

+getUnprocessedRulings

+
+public List<Ruling> getUnprocessedRulings()
+
+
+
+
+
+
+ +

+getMinCharWidth

+
+public float getMinCharWidth()
+
+
+
+
+
+
+ +

+getMinCharHeight

+
+public float getMinCharHeight()
+
+
+
+
+
+
+ +

+getSpatialIndex

+
+public technology.tabula.RectangleSpatialIndex<TextElement> getSpatialIndex()
+
+
+
+
+
+
+ +

+hasText

+
+public boolean hasText()
+
+
+
+
+
+
+ +

+snapPoints

+
+public void snapPoints()
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/PageIterator.html b/technology/tabula/PageIterator.html new file mode 100644 index 00000000..01929f29 --- /dev/null +++ b/technology/tabula/PageIterator.html @@ -0,0 +1,305 @@ + + + + + + + +PageIterator (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula +
+Class PageIterator

+
+java.lang.Object
+  extended by technology.tabula.PageIterator
+
+
+
All Implemented Interfaces:
Iterator<Page>
+
+
+
+
public class PageIterator
extends Object
implements Iterator<Page>
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
PageIterator(ObjectExtractor oe, + Iterable<Integer> pages) + +
+           
+  + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ booleanhasNext() + +
+           
+ Pagenext() + +
+           
+ voidremove() + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+PageIterator

+
+public PageIterator(ObjectExtractor oe,
+                    Iterable<Integer> pages)
+
+
+ + + + + + + + +
+Method Detail
+ +

+hasNext

+
+public boolean hasNext()
+
+
+
Specified by:
hasNext in interface Iterator<Page>
+
+
+
+
+
+
+ +

+next

+
+public Page next()
+
+
+
Specified by:
next in interface Iterator<Page>
+
+
+
+
+
+
+ +

+remove

+
+public void remove()
+
+
+
Specified by:
remove in interface Iterator<Page>
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/ProjectionProfile.html b/technology/tabula/ProjectionProfile.html new file mode 100644 index 00000000..b9c97c80 --- /dev/null +++ b/technology/tabula/ProjectionProfile.html @@ -0,0 +1,435 @@ + + + + + + + +ProjectionProfile (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula +
+Class ProjectionProfile

+
+java.lang.Object
+  extended by technology.tabula.ProjectionProfile
+
+
+
+
public class ProjectionProfile
extends Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Field Summary
+static intDECIMAL_PLACES + +
+           
+  + + + + + + + + + + +
+Constructor Summary
ProjectionProfile(Page area, + List<? extends Rectangle> elements, + float horizontalKernelSize, + float verticalKernelSize) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+static float[]filter(float[] data, + float alpha) + +
+          Simple Low pass filter
+ float[]findHorizontalSeparators(float minRowHeight) + +
+           
+ float[]findVerticalSeparators(float minColumnWidth) + +
+           
+static float[]getAutocorrelation(float[] projection) + +
+           
+static float[]getFirstDeriv(float[] projection) + +
+           
+ float[]getHorizontalProjection() + +
+           
+ float[]getVerticalProjection() + +
+           
+static float[]smooth(float[] data, + int kernelSize) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+DECIMAL_PLACES

+
+public static final int DECIMAL_PLACES
+
+
+
See Also:
Constant Field Values
+
+ + + + + + + + +
+Constructor Detail
+ +

+ProjectionProfile

+
+public ProjectionProfile(Page area,
+                         List<? extends Rectangle> elements,
+                         float horizontalKernelSize,
+                         float verticalKernelSize)
+
+
+ + + + + + + + +
+Method Detail
+ +

+getVerticalProjection

+
+public float[] getVerticalProjection()
+
+
+
+
+
+
+ +

+getHorizontalProjection

+
+public float[] getHorizontalProjection()
+
+
+
+
+
+
+ +

+findVerticalSeparators

+
+public float[] findVerticalSeparators(float minColumnWidth)
+
+
+
+
+
+
+ +

+findHorizontalSeparators

+
+public float[] findHorizontalSeparators(float minRowHeight)
+
+
+
+
+
+
+ +

+smooth

+
+public static float[] smooth(float[] data,
+                             int kernelSize)
+
+
+
+
+
+
+ +

+filter

+
+public static float[] filter(float[] data,
+                             float alpha)
+
+
Simple Low pass filter +

+

+ +
Returns:
+
+
+
+ +

+getAutocorrelation

+
+public static float[] getAutocorrelation(float[] projection)
+
+
+
+
+
+
+ +

+getFirstDeriv

+
+public static float[] getFirstDeriv(float[] projection)
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/QuickSort.html b/technology/tabula/QuickSort.html new file mode 100644 index 00000000..0a2d4870 --- /dev/null +++ b/technology/tabula/QuickSort.html @@ -0,0 +1,269 @@ + + + + + + + +QuickSort (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula +
+Class QuickSort

+
+java.lang.Object
+  extended by technology.tabula.QuickSort
+
+
+
+
public class QuickSort
extends Object
+ + +

+see http://de.wikipedia.org/wiki/Quicksort. +

+ +

+

+
Author:
+
UWe Pachler
+
+
+ +

+ + + + + + + + + + + + + + + + +
+Method Summary
+static + + + + +
+<T extends Comparable> +
+void
+
sort(List<T> list) + +
+          Sorts the given list using compareTo as comparator.
+static + + + + +
+<T> void
+
sort(List<T> list, + Comparator<T> cmp) + +
+          Sorts the given list using the given comparator.
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Method Detail
+ +

+sort

+
+public static <T> void sort(List<T> list,
+                            Comparator<T> cmp)
+
+
Sorts the given list using the given comparator. +

+

+
Parameters:
list - list to be sorted
cmp - comparator used to compare the object swithin the list
+
+
+
+ +

+sort

+
+public static <T extends Comparable> void sort(List<T> list)
+
+
Sorts the given list using compareTo as comparator. +

+

+
Parameters:
list - list to be sorted
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/Rectangle.html b/technology/tabula/Rectangle.html new file mode 100644 index 00000000..9183a79e --- /dev/null +++ b/technology/tabula/Rectangle.html @@ -0,0 +1,779 @@ + + + + + + + +Rectangle (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula +
+Class Rectangle

+
+java.lang.Object
+  extended by java.awt.geom.RectangularShape
+      extended by java.awt.geom.Rectangle2D
+          extended by java.awt.geom.Rectangle2D.Float
+              extended by technology.tabula.Rectangle
+
+
+
All Implemented Interfaces:
Shape, Serializable, Cloneable, Comparable<Rectangle>
+
+
+
Direct Known Subclasses:
Line, Page, RectangularTextContainer, Table, TextElement
+
+
+
+
public class Rectangle
extends Rectangle2D.Float
implements Comparable<Rectangle>
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + +
+Nested Class Summary
+ + + + + + + +
Nested classes/interfaces inherited from class java.awt.geom.Rectangle2D
Rectangle2D.Double, Rectangle2D.Float
+  + + + + + + + +
+Field Summary
+ + + + + + + +
Fields inherited from class java.awt.geom.Rectangle2D.Float
height, width, x, y
+ + + + + + + +
Fields inherited from class java.awt.geom.Rectangle2D
OUT_BOTTOM, OUT_LEFT, OUT_RIGHT, OUT_TOP
+  + + + + + + + + + + + + + +
+Constructor Summary
Rectangle() + +
+           
Rectangle(float top, + float left, + float width, + float height) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+static RectangleboundingBoxOf(List<? extends Rectangle> rectangles) + +
+           
+ intcompareTo(Rectangle other) + +
+           
+ floatgetArea() + +
+           
+ floatgetBottom() + +
+           
+ floatgetLeft() + +
+           
+ Point2D[]getPoints() + +
+           
+ floatgetRight() + +
+           
+ floatgetTop() + +
+           
+ booleanhorizontallyOverlaps(Rectangle other) + +
+           
+ floathorizontalOverlapRatio(Rectangle other) + +
+           
+ Rectanglemerge(Rectangle other) + +
+           
+ floatoverlapRatio(Rectangle other) + +
+           
+ voidsetBottom(float bottom) + +
+           
+ voidsetLeft(float left) + +
+           
+ voidsetRight(float right) + +
+           
+ voidsetTop(float top) + +
+           
+ StringtoString() + +
+           
+ booleanverticallyOverlaps(Rectangle other) + +
+           
+ floatverticalOverlap(Rectangle other) + +
+           
+ floatverticalOverlapRatio(Rectangle other) + +
+           
+ + + + + + + +
Methods inherited from class java.awt.geom.Rectangle2D.Float
createIntersection, createUnion, getBounds2D, getHeight, getWidth, getX, getY, isEmpty, outcode, setRect, setRect, setRect
+ + + + + + + +
Methods inherited from class java.awt.geom.Rectangle2D
add, add, add, contains, contains, equals, getPathIterator, getPathIterator, hashCode, intersect, intersects, intersectsLine, intersectsLine, outcode, setFrame, union
+ + + + + + + +
Methods inherited from class java.awt.geom.RectangularShape
clone, contains, contains, getBounds, getCenterX, getCenterY, getFrame, getMaxX, getMaxY, getMinX, getMinY, intersects, setFrame, setFrame, setFrameFromCenter, setFrameFromCenter, setFrameFromDiagonal, setFrameFromDiagonal
+ + + + + + + +
Methods inherited from class java.lang.Object
finalize, getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+Rectangle

+
+public Rectangle()
+
+
+
+ +

+Rectangle

+
+public Rectangle(float top,
+                 float left,
+                 float width,
+                 float height)
+
+
+ + + + + + + + +
+Method Detail
+ +

+compareTo

+
+public int compareTo(Rectangle other)
+
+
+
Specified by:
compareTo in interface Comparable<Rectangle>
+
+
+
+
+
+
+ +

+getArea

+
+public float getArea()
+
+
+
+
+
+
+
+
+
+ +

+verticalOverlap

+
+public float verticalOverlap(Rectangle other)
+
+
+
+
+
+
+
+
+
+ +

+verticallyOverlaps

+
+public boolean verticallyOverlaps(Rectangle other)
+
+
+
+
+
+
+
+
+
+ +

+verticalOverlapRatio

+
+public float verticalOverlapRatio(Rectangle other)
+
+
+
+
+
+
+
+
+
+ +

+horizontallyOverlaps

+
+public boolean horizontallyOverlaps(Rectangle other)
+
+
+
+
+
+
+
+
+
+ +

+horizontalOverlapRatio

+
+public float horizontalOverlapRatio(Rectangle other)
+
+
+
+
+
+
+
+
+
+ +

+overlapRatio

+
+public float overlapRatio(Rectangle other)
+
+
+
+
+
+
+
+
+
+ +

+merge

+
+public Rectangle merge(Rectangle other)
+
+
+
+
+
+
+
+
+
+ +

+getTop

+
+public float getTop()
+
+
+
+
+
+
+
+
+
+ +

+setTop

+
+public void setTop(float top)
+
+
+
+
+
+
+
+
+
+ +

+getRight

+
+public float getRight()
+
+
+
+
+
+
+
+
+
+ +

+setRight

+
+public void setRight(float right)
+
+
+
+
+
+
+
+
+
+ +

+getLeft

+
+public float getLeft()
+
+
+
+
+
+
+
+
+
+ +

+setLeft

+
+public void setLeft(float left)
+
+
+
+
+
+
+
+
+
+ +

+getBottom

+
+public float getBottom()
+
+
+
+
+
+
+
+
+
+ +

+setBottom

+
+public void setBottom(float bottom)
+
+
+
+
+
+
+
+
+
+ +

+getPoints

+
+public Point2D[] getPoints()
+
+
+
+
+
+
+
+
+
+ +

+toString

+
+public String toString()
+
+
+
Overrides:
toString in class Rectangle2D.Float
+
+
+
+
+
+
+ +

+boundingBoxOf

+
+public static Rectangle boundingBoxOf(List<? extends Rectangle> rectangles)
+
+
+
+
+
+
Parameters:
rectangles - +
Returns:
minimum bounding box that contains all the rectangles
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/RectangularTextContainer.html b/technology/tabula/RectangularTextContainer.html new file mode 100644 index 00000000..1dc05650 --- /dev/null +++ b/technology/tabula/RectangularTextContainer.html @@ -0,0 +1,432 @@ + + + + + + + +RectangularTextContainer (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula +
+Class RectangularTextContainer<T extends HasText>

+
+java.lang.Object
+  extended by java.awt.geom.RectangularShape
+      extended by java.awt.geom.Rectangle2D
+          extended by java.awt.geom.Rectangle2D.Float
+              extended by technology.tabula.Rectangle
+                  extended by technology.tabula.RectangularTextContainer<T>
+
+
+
All Implemented Interfaces:
Shape, Serializable, Cloneable, Comparable<Rectangle>
+
+
+
Direct Known Subclasses:
Cell, TextChunk
+
+
+
+
public abstract class RectangularTextContainer<T extends HasText>
extends Rectangle
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + +
+Nested Class Summary
+ + + + + + + +
Nested classes/interfaces inherited from class java.awt.geom.Rectangle2D
Rectangle2D.Double, Rectangle2D.Float
+  + + + + + + + +
+Field Summary
+ + + + + + + +
Fields inherited from class java.awt.geom.Rectangle2D.Float
height, width, x, y
+ + + + + + + +
Fields inherited from class java.awt.geom.Rectangle2D
OUT_BOTTOM, OUT_LEFT, OUT_RIGHT, OUT_TOP
+  + + + + + + + + + + +
+Constructor Summary
RectangularTextContainer(float top, + float left, + float width, + float height) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+abstract  StringgetText() + +
+           
+abstract  StringgetText(boolean useLineReturns) + +
+           
+abstract  List<T>getTextElements() + +
+           
+ RectangularTextContainer<T>merge(RectangularTextContainer<T> other) + +
+           
+ StringtoString() + +
+           
+ + + + + + + +
Methods inherited from class technology.tabula.Rectangle
boundingBoxOf, compareTo, getArea, getBottom, getLeft, getPoints, getRight, getTop, horizontallyOverlaps, horizontalOverlapRatio, merge, overlapRatio, setBottom, setLeft, setRight, setTop, verticallyOverlaps, verticalOverlap, verticalOverlapRatio
+ + + + + + + +
Methods inherited from class java.awt.geom.Rectangle2D.Float
createIntersection, createUnion, getBounds2D, getHeight, getWidth, getX, getY, isEmpty, outcode, setRect, setRect, setRect
+ + + + + + + +
Methods inherited from class java.awt.geom.Rectangle2D
add, add, add, contains, contains, equals, getPathIterator, getPathIterator, hashCode, intersect, intersects, intersectsLine, intersectsLine, outcode, setFrame, union
+ + + + + + + +
Methods inherited from class java.awt.geom.RectangularShape
clone, contains, contains, getBounds, getCenterX, getCenterY, getFrame, getMaxX, getMaxY, getMinX, getMinY, intersects, setFrame, setFrame, setFrameFromCenter, setFrameFromCenter, setFrameFromDiagonal, setFrameFromDiagonal
+ + + + + + + +
Methods inherited from class java.lang.Object
finalize, getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+RectangularTextContainer

+
+public RectangularTextContainer(float top,
+                                float left,
+                                float width,
+                                float height)
+
+
+ + + + + + + + +
+Method Detail
+ +

+toString

+
+public String toString()
+
+
+
Overrides:
toString in class Rectangle
+
+
+
+
+
+
+ +

+merge

+
+public RectangularTextContainer<T> merge(RectangularTextContainer<T> other)
+
+
+
+
+
+
+ +

+getText

+
+public abstract String getText()
+
+
+
+
+
+
+ +

+getText

+
+public abstract String getText(boolean useLineReturns)
+
+
+
+
+
+
+ +

+getTextElements

+
+public abstract List<T> getTextElements()
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/Ruling.html b/technology/tabula/Ruling.html new file mode 100644 index 00000000..68554d27 --- /dev/null +++ b/technology/tabula/Ruling.html @@ -0,0 +1,958 @@ + + + + + + + +Ruling (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula +
+Class Ruling

+
+java.lang.Object
+  extended by java.awt.geom.Line2D
+      extended by java.awt.geom.Line2D.Float
+          extended by technology.tabula.Ruling
+
+
+
All Implemented Interfaces:
Shape, Serializable, Cloneable
+
+
+
+
public class Ruling
extends Line2D.Float
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + +
+Nested Class Summary
+ + + + + + + +
Nested classes/interfaces inherited from class java.awt.geom.Line2D
Line2D.Double, Line2D.Float
+  + + + + + + + +
+Field Summary
+ + + + + + + +
Fields inherited from class java.awt.geom.Line2D.Float
x1, x2, y1, y2
+  + + + + + + + + + + + + + +
+Constructor Summary
Ruling(float top, + float left, + float width, + float height) + +
+           
Ruling(Point2D p1, + Point2D p2) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ booleancolinear(Point2D point) + +
+           
+static List<Ruling>collapseOrientedRulings(List<Ruling> lines) + +
+           
+static List<Ruling>cropRulingsToArea(List<Ruling> rulings, + Rectangle2D area) + +
+           
+ booleanequals(Object other) + +
+           
+ Rulingexpand(float amount) + +
+           
+static Map<Point2D,Ruling[]>findIntersections(List<Ruling> horizontals, + List<Ruling> verticals) + +
+           
+ doublegetAngle() + +
+           
+ floatgetBottom() + +
+           
+ floatgetEnd() + +
+           
+ floatgetHeight() + +
+           
+ floatgetLeft() + +
+           
+ floatgetPosition() + +
+           
+ floatgetRight() + +
+           
+ floatgetStart() + +
+           
+ floatgetTop() + +
+           
+ floatgetWidth() + +
+           
+ inthashCode() + +
+           
+ booleanhorizontal() + +
+           
+ Rulingintersect(Rectangle2D clip) + +
+           
+ Point2DintersectionPoint(Ruling other) + +
+           
+ doublelength() + +
+           
+ booleannearlyIntersects(Ruling another) + +
+           
+ booleanoblique() + +
+           
+ booleanperpendicularTo(Ruling other) + +
+           
+ voidsetBottom(float v) + +
+           
+ voidsetEnd(float v) + +
+           
+ voidsetLeft(float v) + +
+           
+ voidsetPosition(float v) + +
+           
+ voidsetRight(float v) + +
+           
+ voidsetStart(float v) + +
+           
+ voidsetTop(float v) + +
+           
+ StringtoString() + +
+           
+ booleanvertical() + +
+           
+ + + + + + + +
Methods inherited from class java.awt.geom.Line2D.Float
getBounds2D, getP1, getP2, getX1, getX2, getY1, getY2, setLine, setLine
+ + + + + + + +
Methods inherited from class java.awt.geom.Line2D
clone, contains, contains, contains, contains, getBounds, getPathIterator, getPathIterator, intersects, intersects, intersectsLine, intersectsLine, linesIntersect, ptLineDist, ptLineDist, ptLineDist, ptLineDistSq, ptLineDistSq, ptLineDistSq, ptSegDist, ptSegDist, ptSegDist, ptSegDistSq, ptSegDistSq, ptSegDistSq, relativeCCW, relativeCCW, relativeCCW, setLine, setLine
+ + + + + + + +
Methods inherited from class java.lang.Object
finalize, getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+Ruling

+
+public Ruling(float top,
+              float left,
+              float width,
+              float height)
+
+
+
+ +

+Ruling

+
+public Ruling(Point2D p1,
+              Point2D p2)
+
+
+ + + + + + + + +
+Method Detail
+ +

+vertical

+
+public boolean vertical()
+
+
+
+
+
+
+ +

+horizontal

+
+public boolean horizontal()
+
+
+
+
+
+
+ +

+oblique

+
+public boolean oblique()
+
+
+
+
+
+
+ +

+getPosition

+
+public float getPosition()
+
+
+
+
+
+
+ +

+setPosition

+
+public void setPosition(float v)
+
+
+
+
+
+
+ +

+getStart

+
+public float getStart()
+
+
+
+
+
+
+ +

+setStart

+
+public void setStart(float v)
+
+
+
+
+
+
+ +

+getEnd

+
+public float getEnd()
+
+
+
+
+
+
+ +

+setEnd

+
+public void setEnd(float v)
+
+
+
+
+
+
+ +

+perpendicularTo

+
+public boolean perpendicularTo(Ruling other)
+
+
+
+
+
+
+ +

+colinear

+
+public boolean colinear(Point2D point)
+
+
+
+
+
+
+ +

+nearlyIntersects

+
+public boolean nearlyIntersects(Ruling another)
+
+
+
+
+
+
+ +

+length

+
+public double length()
+
+
+
+
+
+
+ +

+intersect

+
+public Ruling intersect(Rectangle2D clip)
+
+
+
+
+
+
+ +

+expand

+
+public Ruling expand(float amount)
+
+
+
+
+
+
+ +

+intersectionPoint

+
+public Point2D intersectionPoint(Ruling other)
+
+
+
+
+
+
+ +

+equals

+
+public boolean equals(Object other)
+
+
+
Overrides:
equals in class Object
+
+
+
+
+
+
+ +

+hashCode

+
+public int hashCode()
+
+
+
Overrides:
hashCode in class Object
+
+
+
+
+
+
+ +

+getTop

+
+public float getTop()
+
+
+
+
+
+
+ +

+setTop

+
+public void setTop(float v)
+
+
+
+
+
+
+ +

+getLeft

+
+public float getLeft()
+
+
+
+
+
+
+ +

+setLeft

+
+public void setLeft(float v)
+
+
+
+
+
+
+ +

+getBottom

+
+public float getBottom()
+
+
+
+
+
+
+ +

+setBottom

+
+public void setBottom(float v)
+
+
+
+
+
+
+ +

+getRight

+
+public float getRight()
+
+
+
+
+
+
+ +

+setRight

+
+public void setRight(float v)
+
+
+
+
+
+
+ +

+getWidth

+
+public float getWidth()
+
+
+
+
+
+
+ +

+getHeight

+
+public float getHeight()
+
+
+
+
+
+
+ +

+getAngle

+
+public double getAngle()
+
+
+
+
+
+
+ +

+toString

+
+public String toString()
+
+
+
Overrides:
toString in class Object
+
+
+
+
+
+
+ +

+cropRulingsToArea

+
+public static List<Ruling> cropRulingsToArea(List<Ruling> rulings,
+                                             Rectangle2D area)
+
+
+
+
+
+
+ +

+findIntersections

+
+public static Map<Point2D,Ruling[]> findIntersections(List<Ruling> horizontals,
+                                                      List<Ruling> verticals)
+
+
+
+
+
+
+ +

+collapseOrientedRulings

+
+public static List<Ruling> collapseOrientedRulings(List<Ruling> lines)
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/Table.html b/technology/tabula/Table.html new file mode 100644 index 00000000..8584ef1c --- /dev/null +++ b/technology/tabula/Table.html @@ -0,0 +1,510 @@ + + + + + + + +Table (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula +
+Class Table

+
+java.lang.Object
+  extended by java.awt.geom.RectangularShape
+      extended by java.awt.geom.Rectangle2D
+          extended by java.awt.geom.Rectangle2D.Float
+              extended by technology.tabula.Rectangle
+                  extended by technology.tabula.Table
+
+
+
All Implemented Interfaces:
Shape, Serializable, Cloneable, Comparable<Rectangle>
+
+
+
Direct Known Subclasses:
TableWithRulingLines
+
+
+
+
public class Table
extends Rectangle
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + +
+Nested Class Summary
+ + + + + + + +
Nested classes/interfaces inherited from class java.awt.geom.Rectangle2D
Rectangle2D.Double, Rectangle2D.Float
+  + + + + + + + + + + + +
+Field Summary
+static TableEMPTY + +
+           
+ + + + + + + +
Fields inherited from class java.awt.geom.Rectangle2D.Float
height, width, x, y
+ + + + + + + +
Fields inherited from class java.awt.geom.Rectangle2D
OUT_BOTTOM, OUT_LEFT, OUT_RIGHT, OUT_TOP
+  + + + + + + + + + + + + + +
+Constructor Summary
Table() + +
+           
Table(Page page, + ExtractionAlgorithm extractionAlgorithm) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ voidadd(RectangularTextContainer tc, + int i, + int j) + +
+           
+ RectangularTextContainergetCell(int i, + int j) + +
+           
+ List<RectangularTextContainer>getCells() + +
+           
+ List<List<RectangularTextContainer>>getCols() + +
+           
+ ExtractionAlgorithmgetExtractionAlgorithm() + +
+           
+ List<List<RectangularTextContainer>>getRows() + +
+           
+ voidsetExtractionAlgorithm(ExtractionAlgorithm extractionAlgorithm) + +
+           
+ + + + + + + +
Methods inherited from class technology.tabula.Rectangle
boundingBoxOf, compareTo, getArea, getBottom, getLeft, getPoints, getRight, getTop, horizontallyOverlaps, horizontalOverlapRatio, merge, overlapRatio, setBottom, setLeft, setRight, setTop, toString, verticallyOverlaps, verticalOverlap, verticalOverlapRatio
+ + + + + + + +
Methods inherited from class java.awt.geom.Rectangle2D.Float
createIntersection, createUnion, getBounds2D, getHeight, getWidth, getX, getY, isEmpty, outcode, setRect, setRect, setRect
+ + + + + + + +
Methods inherited from class java.awt.geom.Rectangle2D
add, add, add, contains, contains, equals, getPathIterator, getPathIterator, hashCode, intersect, intersects, intersectsLine, intersectsLine, outcode, setFrame, union
+ + + + + + + +
Methods inherited from class java.awt.geom.RectangularShape
clone, contains, contains, getBounds, getCenterX, getCenterY, getFrame, getMaxX, getMaxY, getMinX, getMinY, intersects, setFrame, setFrame, setFrameFromCenter, setFrameFromCenter, setFrameFromDiagonal, setFrameFromDiagonal
+ + + + + + + +
Methods inherited from class java.lang.Object
finalize, getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+EMPTY

+
+public static final Table EMPTY
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+Table

+
+public Table()
+
+
+
+ +

+Table

+
+public Table(Page page,
+             ExtractionAlgorithm extractionAlgorithm)
+
+
+ + + + + + + + +
+Method Detail
+ +

+add

+
+public void add(RectangularTextContainer tc,
+                int i,
+                int j)
+
+
+
+
+
+
+ +

+getRows

+
+public List<List<RectangularTextContainer>> getRows()
+
+
+
+
+
+
+ +

+getCell

+
+public RectangularTextContainer getCell(int i,
+                                        int j)
+
+
+
+
+
+
+ +

+getCols

+
+public List<List<RectangularTextContainer>> getCols()
+
+
+
+
+
+
+ +

+setExtractionAlgorithm

+
+public void setExtractionAlgorithm(ExtractionAlgorithm extractionAlgorithm)
+
+
+
+
+
+
+ +

+getExtractionAlgorithm

+
+public ExtractionAlgorithm getExtractionAlgorithm()
+
+
+
+
+
+
+ +

+getCells

+
+public List<RectangularTextContainer> getCells()
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/TableWithRulingLines.html b/technology/tabula/TableWithRulingLines.html new file mode 100644 index 00000000..96c36342 --- /dev/null +++ b/technology/tabula/TableWithRulingLines.html @@ -0,0 +1,357 @@ + + + + + + + +TableWithRulingLines (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula +
+Class TableWithRulingLines

+
+java.lang.Object
+  extended by java.awt.geom.RectangularShape
+      extended by java.awt.geom.Rectangle2D
+          extended by java.awt.geom.Rectangle2D.Float
+              extended by technology.tabula.Rectangle
+                  extended by technology.tabula.Table
+                      extended by technology.tabula.TableWithRulingLines
+
+
+
All Implemented Interfaces:
Shape, Serializable, Cloneable, Comparable<Rectangle>
+
+
+
+
public class TableWithRulingLines
extends Table
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + +
+Nested Class Summary
+ + + + + + + +
Nested classes/interfaces inherited from class java.awt.geom.Rectangle2D
Rectangle2D.Double, Rectangle2D.Float
+  + + + + + + + +
+Field Summary
+ + + + + + + +
Fields inherited from class technology.tabula.Table
EMPTY
+ + + + + + + +
Fields inherited from class java.awt.geom.Rectangle2D.Float
height, width, x, y
+ + + + + + + +
Fields inherited from class java.awt.geom.Rectangle2D
OUT_BOTTOM, OUT_LEFT, OUT_RIGHT, OUT_TOP
+  + + + + + + + + + + + + + +
+Constructor Summary
TableWithRulingLines() + +
+           
TableWithRulingLines(Rectangle area, + Page page, + List<Cell> cells, + List<Ruling> horizontalRulings, + List<Ruling> verticalRulings) + +
+           
+  + + + + + + + +
+Method Summary
+ + + + + + + +
Methods inherited from class technology.tabula.Table
add, getCell, getCells, getCols, getExtractionAlgorithm, getRows, setExtractionAlgorithm
+ + + + + + + +
Methods inherited from class technology.tabula.Rectangle
boundingBoxOf, compareTo, getArea, getBottom, getLeft, getPoints, getRight, getTop, horizontallyOverlaps, horizontalOverlapRatio, merge, overlapRatio, setBottom, setLeft, setRight, setTop, toString, verticallyOverlaps, verticalOverlap, verticalOverlapRatio
+ + + + + + + +
Methods inherited from class java.awt.geom.Rectangle2D.Float
createIntersection, createUnion, getBounds2D, getHeight, getWidth, getX, getY, isEmpty, outcode, setRect, setRect, setRect
+ + + + + + + +
Methods inherited from class java.awt.geom.Rectangle2D
add, add, add, contains, contains, equals, getPathIterator, getPathIterator, hashCode, intersect, intersects, intersectsLine, intersectsLine, outcode, setFrame, union
+ + + + + + + +
Methods inherited from class java.awt.geom.RectangularShape
clone, contains, contains, getBounds, getCenterX, getCenterY, getFrame, getMaxX, getMaxY, getMinX, getMinY, intersects, setFrame, setFrame, setFrameFromCenter, setFrameFromCenter, setFrameFromDiagonal, setFrameFromDiagonal
+ + + + + + + +
Methods inherited from class java.lang.Object
finalize, getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+TableWithRulingLines

+
+public TableWithRulingLines()
+
+
+
+ +

+TableWithRulingLines

+
+public TableWithRulingLines(Rectangle area,
+                            Page page,
+                            List<Cell> cells,
+                            List<Ruling> horizontalRulings,
+                            List<Ruling> verticalRulings)
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/TextChunk.html b/technology/tabula/TextChunk.html new file mode 100644 index 00000000..79246e04 --- /dev/null +++ b/technology/tabula/TextChunk.html @@ -0,0 +1,675 @@ + + + + + + + +TextChunk (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula +
+Class TextChunk

+
+java.lang.Object
+  extended by java.awt.geom.RectangularShape
+      extended by java.awt.geom.Rectangle2D
+          extended by java.awt.geom.Rectangle2D.Float
+              extended by technology.tabula.Rectangle
+                  extended by technology.tabula.RectangularTextContainer<TextElement>
+                      extended by technology.tabula.TextChunk
+
+
+
All Implemented Interfaces:
Shape, Serializable, Cloneable, Comparable<Rectangle>, HasText
+
+
+
+
public class TextChunk
extends RectangularTextContainer<TextElement>
implements HasText
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + +
+Nested Class Summary
+ + + + + + + +
Nested classes/interfaces inherited from class java.awt.geom.Rectangle2D
Rectangle2D.Double, Rectangle2D.Float
+  + + + + + + + + + + + +
+Field Summary
+static TextChunkEMPTY + +
+           
+ + + + + + + +
Fields inherited from class java.awt.geom.Rectangle2D.Float
height, width, x, y
+ + + + + + + +
Fields inherited from class java.awt.geom.Rectangle2D
OUT_BOTTOM, OUT_LEFT, OUT_RIGHT, OUT_TOP
+  + + + + + + + + + + + + + + + + +
+Constructor Summary
TextChunk(float top, + float left, + float width, + float height) + +
+           
TextChunk(List<TextElement> textElements) + +
+           
TextChunk(TextElement textElement) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ voidadd(List<TextElement> textElements) + +
+           
+ voidadd(TextElement textElement) + +
+           
+static booleanallSameChar(List<TextChunk> textChunks) + +
+           
+ StringgetText() + +
+           
+ StringgetText(boolean useLineReturns) + +
+           
+ List<TextElement>getTextElements() + +
+           
+static List<Line>groupByLines(List<TextChunk> textChunks) + +
+           
+ booleanisSameChar(Character c) + +
+           
+ booleanisSameChar(Character[] c) + +
+           
+ TextChunkmerge(TextChunk other) + +
+           
+ TextChunk[]splitAt(int i) + +
+          Splits a TextChunk in two, at the position of the i-th TextElement
+ List<TextChunk>squeeze(Character c, + int minRunLength) + +
+          Removes runs of identical TextElements in this TextChunk + For example, if the TextChunk contains this string of characters: "1234xxxxx56xx" + and c == 'x' and minRunLength == 4, this method will return a list of TextChunk + such that: ["1234", "56xx"]
+ + + + + + + +
Methods inherited from class technology.tabula.RectangularTextContainer
merge, toString
+ + + + + + + +
Methods inherited from class technology.tabula.Rectangle
boundingBoxOf, compareTo, getArea, getBottom, getLeft, getPoints, getRight, getTop, horizontallyOverlaps, horizontalOverlapRatio, merge, overlapRatio, setBottom, setLeft, setRight, setTop, verticallyOverlaps, verticalOverlap, verticalOverlapRatio
+ + + + + + + +
Methods inherited from class java.awt.geom.Rectangle2D.Float
createIntersection, createUnion, getBounds2D, getHeight, getWidth, getX, getY, isEmpty, outcode, setRect, setRect, setRect
+ + + + + + + +
Methods inherited from class java.awt.geom.Rectangle2D
add, add, add, contains, contains, equals, getPathIterator, getPathIterator, hashCode, intersect, intersects, intersectsLine, intersectsLine, outcode, setFrame, union
+ + + + + + + +
Methods inherited from class java.awt.geom.RectangularShape
clone, contains, contains, getBounds, getCenterX, getCenterY, getFrame, getMaxX, getMaxY, getMinX, getMinY, intersects, setFrame, setFrame, setFrameFromCenter, setFrameFromCenter, setFrameFromDiagonal, setFrameFromDiagonal
+ + + + + + + +
Methods inherited from class java.lang.Object
finalize, getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+EMPTY

+
+public static final TextChunk EMPTY
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+TextChunk

+
+public TextChunk(float top,
+                 float left,
+                 float width,
+                 float height)
+
+
+
+ +

+TextChunk

+
+public TextChunk(TextElement textElement)
+
+
+
+ +

+TextChunk

+
+public TextChunk(List<TextElement> textElements)
+
+
+ + + + + + + + +
+Method Detail
+ +

+merge

+
+public TextChunk merge(TextChunk other)
+
+
+
+
+
+
+
+
+
+ +

+add

+
+public void add(TextElement textElement)
+
+
+
+
+
+
+
+
+
+ +

+add

+
+public void add(List<TextElement> textElements)
+
+
+
+
+
+
+
+
+
+ +

+getTextElements

+
+public List<TextElement> getTextElements()
+
+
+
Specified by:
getTextElements in class RectangularTextContainer<TextElement>
+
+
+
+
+
+
+ +

+getText

+
+public String getText()
+
+
+
Specified by:
getText in interface HasText
Specified by:
getText in class RectangularTextContainer<TextElement>
+
+
+
+
+
+
+ +

+getText

+
+public String getText(boolean useLineReturns)
+
+
+
Specified by:
getText in class RectangularTextContainer<TextElement>
+
+
+
+
+
+
+ +

+isSameChar

+
+public boolean isSameChar(Character c)
+
+
+
+
+
+ +
Returns:
true if text contained in this TextChunk is the same repeated character
+
+
+
+ +

+isSameChar

+
+public boolean isSameChar(Character[] c)
+
+
+
+
+
+
+
+
+
+ +

+splitAt

+
+public TextChunk[] splitAt(int i)
+
+
Splits a TextChunk in two, at the position of the i-th TextElement +

+

+
+
+
+
Parameters:
textChunk - the TextChunk to split
i - +
Returns:
Two TextChunks, contained in a TextChunk[]
+
+
+
+ +

+squeeze

+
+public List<TextChunk> squeeze(Character c,
+                               int minRunLength)
+
+
Removes runs of identical TextElements in this TextChunk + For example, if the TextChunk contains this string of characters: "1234xxxxx56xx" + and c == 'x' and minRunLength == 4, this method will return a list of TextChunk + such that: ["1234", "56xx"] +

+

+
+
+
+
Parameters:
c - the Character to remove
minRunLength - minimum run length to consider. +
Returns:
+
+
+
+ +

+allSameChar

+
+public static boolean allSameChar(List<TextChunk> textChunks)
+
+
+
+
+
+
+
+
+
+ +

+groupByLines

+
+public static List<Line> groupByLines(List<TextChunk> textChunks)
+
+
+
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/TextElement.html b/technology/tabula/TextElement.html new file mode 100644 index 00000000..09b0449f --- /dev/null +++ b/technology/tabula/TextElement.html @@ -0,0 +1,553 @@ + + + + + + + +TextElement (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula +
+Class TextElement

+
+java.lang.Object
+  extended by java.awt.geom.RectangularShape
+      extended by java.awt.geom.Rectangle2D
+          extended by java.awt.geom.Rectangle2D.Float
+              extended by technology.tabula.Rectangle
+                  extended by technology.tabula.TextElement
+
+
+
All Implemented Interfaces:
Shape, Serializable, Cloneable, Comparable<Rectangle>, HasText
+
+
+
+
public class TextElement
extends Rectangle
implements HasText
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + +
+Nested Class Summary
+ + + + + + + +
Nested classes/interfaces inherited from class java.awt.geom.Rectangle2D
Rectangle2D.Double, Rectangle2D.Float
+  + + + + + + + +
+Field Summary
+ + + + + + + +
Fields inherited from class java.awt.geom.Rectangle2D.Float
height, width, x, y
+ + + + + + + +
Fields inherited from class java.awt.geom.Rectangle2D
OUT_BOTTOM, OUT_LEFT, OUT_RIGHT, OUT_TOP
+  + + + + + + + + + + + + + +
+Constructor Summary
TextElement(float y, + float x, + float width, + float height, + org.apache.pdfbox.pdmodel.font.PDFont font, + float fontSize, + String c, + float widthOfSpace) + +
+           
TextElement(float y, + float x, + float width, + float height, + org.apache.pdfbox.pdmodel.font.PDFont font, + float fontSize, + String c, + float widthOfSpace, + float dir) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ floatgetDirection() + +
+           
+ org.apache.pdfbox.pdmodel.font.PDFontgetFont() + +
+           
+ floatgetFontSize() + +
+           
+ StringgetText() + +
+           
+ floatgetWidthOfSpace() + +
+           
+static List<TextChunk>mergeWords(List<TextElement> textElements) + +
+           
+static List<TextChunk>mergeWords(List<TextElement> textElements, + List<Ruling> verticalRulings) + +
+          heuristically merge a list of TextElement into a list of TextChunk + ported from from PDFBox's PDFTextStripper.writePage, with modifications.
+ StringtoString() + +
+           
+ + + + + + + +
Methods inherited from class technology.tabula.Rectangle
boundingBoxOf, compareTo, getArea, getBottom, getLeft, getPoints, getRight, getTop, horizontallyOverlaps, horizontalOverlapRatio, merge, overlapRatio, setBottom, setLeft, setRight, setTop, verticallyOverlaps, verticalOverlap, verticalOverlapRatio
+ + + + + + + +
Methods inherited from class java.awt.geom.Rectangle2D.Float
createIntersection, createUnion, getBounds2D, getHeight, getWidth, getX, getY, isEmpty, outcode, setRect, setRect, setRect
+ + + + + + + +
Methods inherited from class java.awt.geom.Rectangle2D
add, add, add, contains, contains, equals, getPathIterator, getPathIterator, hashCode, intersect, intersects, intersectsLine, intersectsLine, outcode, setFrame, union
+ + + + + + + +
Methods inherited from class java.awt.geom.RectangularShape
clone, contains, contains, getBounds, getCenterX, getCenterY, getFrame, getMaxX, getMaxY, getMinX, getMinY, intersects, setFrame, setFrame, setFrameFromCenter, setFrameFromCenter, setFrameFromDiagonal, setFrameFromDiagonal
+ + + + + + + +
Methods inherited from class java.lang.Object
finalize, getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+TextElement

+
+public TextElement(float y,
+                   float x,
+                   float width,
+                   float height,
+                   org.apache.pdfbox.pdmodel.font.PDFont font,
+                   float fontSize,
+                   String c,
+                   float widthOfSpace)
+
+
+
+ +

+TextElement

+
+public TextElement(float y,
+                   float x,
+                   float width,
+                   float height,
+                   org.apache.pdfbox.pdmodel.font.PDFont font,
+                   float fontSize,
+                   String c,
+                   float widthOfSpace,
+                   float dir)
+
+
+ + + + + + + + +
+Method Detail
+ +

+getText

+
+public String getText()
+
+
+
Specified by:
getText in interface HasText
+
+
+
+
+
+
+ +

+getDirection

+
+public float getDirection()
+
+
+
+
+
+
+
+
+
+ +

+getWidthOfSpace

+
+public float getWidthOfSpace()
+
+
+
+
+
+
+
+
+
+ +

+getFont

+
+public org.apache.pdfbox.pdmodel.font.PDFont getFont()
+
+
+
+
+
+
+
+
+
+ +

+getFontSize

+
+public float getFontSize()
+
+
+
+
+
+
+
+
+
+ +

+toString

+
+public String toString()
+
+
+
Overrides:
toString in class Rectangle
+
+
+
+
+
+
+ +

+mergeWords

+
+public static List<TextChunk> mergeWords(List<TextElement> textElements)
+
+
+
+
+
+
+
+
+
+ +

+mergeWords

+
+public static List<TextChunk> mergeWords(List<TextElement> textElements,
+                                         List<Ruling> verticalRulings)
+
+
heuristically merge a list of TextElement into a list of TextChunk + ported from from PDFBox's PDFTextStripper.writePage, with modifications. + Here be dragons +

+

+
+
+
+
Parameters:
textElements -
verticalRulings - +
Returns:
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/Utils.html b/technology/tabula/Utils.html new file mode 100644 index 00000000..b0a0b46d --- /dev/null +++ b/technology/tabula/Utils.html @@ -0,0 +1,551 @@ + + + + + + + +Utils (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula +
+Class Utils

+
+java.lang.Object
+  extended by technology.tabula.Utils
+
+
+
+
public class Utils
extends Object
+ + +

+

+
Author:
+
manuel
+
+
+ +

+ + + + + + + + + + + +
+Field Summary
+protected static booleanuseQuickSort + +
+           
+  + + + + + + + + + + +
+Constructor Summary
Utils() + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+static Rectanglebounds(Collection<? extends Shape> shapes) + +
+           
+static booleanfeq(double f1, + double f2) + +
+           
+static booleanisNumeric(CharSequence cs) + +
+           
+static Stringjoin(String glue, + String... s) + +
+           
+static booleanoverlap(double y1, + double height1, + double y2, + double height2) + +
+           
+static booleanoverlap(double y1, + double height1, + double y2, + double height2, + double variance) + +
+           
+static List<Integer>parsePagesOption(String pagesSpec) + +
+           
+static List<Integer>range(int begin, + int end) + +
+           
+static floatround(double d, + int decimalPlace) + +
+           
+static + + + + +
+<T extends Comparable<? super T>> +
+void
+
sort(List<T> list) + +
+          Wrap Collections.sort so we can fallback to a non-stable quicksort + if we're running on JDK7+
+static + + + + +
+<T> List<List<T>>
+
transpose(List<List<T>> table) + +
+           
+static booleanwithin(double first, + double second, + double variance) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+useQuickSort

+
+protected static boolean useQuickSort
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+Utils

+
+public Utils()
+
+
+ + + + + + + + +
+Method Detail
+ +

+within

+
+public static boolean within(double first,
+                             double second,
+                             double variance)
+
+
+
+
+
+
+ +

+overlap

+
+public static boolean overlap(double y1,
+                              double height1,
+                              double y2,
+                              double height2,
+                              double variance)
+
+
+
+
+
+
+ +

+overlap

+
+public static boolean overlap(double y1,
+                              double height1,
+                              double y2,
+                              double height2)
+
+
+
+
+
+
+ +

+feq

+
+public static boolean feq(double f1,
+                          double f2)
+
+
+
+
+
+
+ +

+round

+
+public static float round(double d,
+                          int decimalPlace)
+
+
+
+
+
+
+ +

+bounds

+
+public static Rectangle bounds(Collection<? extends Shape> shapes)
+
+
+
+
+
+
+ +

+range

+
+public static List<Integer> range(int begin,
+                                  int end)
+
+
+
+
+
+
+ +

+isNumeric

+
+public static boolean isNumeric(CharSequence cs)
+
+
+
+
+
+
+ +

+join

+
+public static String join(String glue,
+                          String... s)
+
+
+
+
+
+
+ +

+transpose

+
+public static <T> List<List<T>> transpose(List<List<T>> table)
+
+
+
+
+
+
+ +

+sort

+
+public static <T extends Comparable<? super T>> void sort(List<T> list)
+
+
Wrap Collections.sort so we can fallback to a non-stable quicksort + if we're running on JDK7+ +

+

+
Parameters:
list -
+
+
+
+ +

+parsePagesOption

+
+public static List<Integer> parsePagesOption(String pagesSpec)
+                                      throws org.apache.commons.cli.ParseException
+
+
+ +
Throws: +
org.apache.commons.cli.ParseException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/class-use/Cell.html b/technology/tabula/class-use/Cell.html new file mode 100644 index 00000000..49482b03 --- /dev/null +++ b/technology/tabula/class-use/Cell.html @@ -0,0 +1,213 @@ + + + + + + + +Uses of Class technology.tabula.Cell (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.Cell

+
+ + + + + + + + + + + + + +
+Packages that use Cell
technology.tabula  
technology.tabula.extractors  
+  +

+ + + + + +
+Uses of Cell in technology.tabula
+  +

+ + + + + + + + +
Constructor parameters in technology.tabula with type arguments of type Cell
TableWithRulingLines(Rectangle area, + Page page, + List<Cell> cells, + List<Ruling> horizontalRulings, + List<Ruling> verticalRulings) + +
+           
+  +

+ + + + + +
+Uses of Cell in technology.tabula.extractors
+  +

+ + + + + + + + + +
Methods in technology.tabula.extractors that return types with arguments of type Cell
+static List<Cell>SpreadsheetExtractionAlgorithm.findCells(List<Ruling> horizontalRulingLines, + List<Ruling> verticalRulingLines) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/class-use/CohenSutherlandClipping.html b/technology/tabula/class-use/CohenSutherlandClipping.html new file mode 100644 index 00000000..d929015b --- /dev/null +++ b/technology/tabula/class-use/CohenSutherlandClipping.html @@ -0,0 +1,145 @@ + + + + + + + +Uses of Class technology.tabula.CohenSutherlandClipping (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.CohenSutherlandClipping

+
+No usage of technology.tabula.CohenSutherlandClipping +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/class-use/CommandLineApp.html b/technology/tabula/class-use/CommandLineApp.html new file mode 100644 index 00000000..fbc76cc1 --- /dev/null +++ b/technology/tabula/class-use/CommandLineApp.html @@ -0,0 +1,145 @@ + + + + + + + +Uses of Class technology.tabula.CommandLineApp (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.CommandLineApp

+
+No usage of technology.tabula.CommandLineApp +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/class-use/DummyGraphics2D.html b/technology/tabula/class-use/DummyGraphics2D.html new file mode 100644 index 00000000..23d8b7d4 --- /dev/null +++ b/technology/tabula/class-use/DummyGraphics2D.html @@ -0,0 +1,145 @@ + + + + + + + +Uses of Class technology.tabula.DummyGraphics2D (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.DummyGraphics2D

+
+No usage of technology.tabula.DummyGraphics2D +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/class-use/HasText.html b/technology/tabula/class-use/HasText.html new file mode 100644 index 00000000..6704c1d2 --- /dev/null +++ b/technology/tabula/class-use/HasText.html @@ -0,0 +1,205 @@ + + + + + + + +Uses of Interface technology.tabula.HasText (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Interface
technology.tabula.HasText

+
+ + + + + + + + + +
+Packages that use HasText
technology.tabula  
+  +

+ + + + + +
+Uses of HasText in technology.tabula
+  +

+ + + + + + + + + +
Classes in technology.tabula with type parameters of type HasText
+ classRectangularTextContainer<T extends HasText> + +
+           
+  +

+ + + + + + + + + + + + + +
Classes in technology.tabula that implement HasText
+ classTextChunk + +
+           
+ classTextElement + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/class-use/Line.html b/technology/tabula/class-use/Line.html new file mode 100644 index 00000000..3d36b750 --- /dev/null +++ b/technology/tabula/class-use/Line.html @@ -0,0 +1,210 @@ + + + + + + + +Uses of Class technology.tabula.Line (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.Line

+
+ + + + + + + + + + + + + +
+Packages that use Line
technology.tabula  
technology.tabula.extractors  
+  +

+ + + + + +
+Uses of Line in technology.tabula
+  +

+ + + + + + + + + +
Methods in technology.tabula that return types with arguments of type Line
+static List<Line>TextChunk.groupByLines(List<TextChunk> textChunks) + +
+           
+  +

+ + + + + +
+Uses of Line in technology.tabula.extractors
+  +

+ + + + + + + + + +
Method parameters in technology.tabula.extractors with type arguments of type Line
+static List<Float>BasicExtractionAlgorithm.columnPositions(List<Line> lines) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/class-use/ObjectExtractor.html b/technology/tabula/class-use/ObjectExtractor.html new file mode 100644 index 00000000..dd225535 --- /dev/null +++ b/technology/tabula/class-use/ObjectExtractor.html @@ -0,0 +1,180 @@ + + + + + + + +Uses of Class technology.tabula.ObjectExtractor (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.ObjectExtractor

+
+ + + + + + + + + +
+Packages that use ObjectExtractor
technology.tabula  
+  +

+ + + + + +
+Uses of ObjectExtractor in technology.tabula
+  +

+ + + + + + + + +
Constructors in technology.tabula with parameters of type ObjectExtractor
PageIterator(ObjectExtractor oe, + Iterable<Integer> pages) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/class-use/Page.html b/technology/tabula/class-use/Page.html new file mode 100644 index 00000000..0c9c5f86 --- /dev/null +++ b/technology/tabula/class-use/Page.html @@ -0,0 +1,351 @@ + + + + + + + +Uses of Class technology.tabula.Page (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.Page

+
+ + + + + + + + + + + + + + + + + +
+Packages that use Page
technology.tabula  
technology.tabula.debug  
technology.tabula.extractors  
+  +

+ + + + + +
+Uses of Page in technology.tabula
+  +

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Methods in technology.tabula that return Page
+ PageObjectExtractor.extract(int pageNumber) + +
+           
+protected  PageObjectExtractor.extractPage(Integer page_number) + +
+           
+ PagePage.getArea(float top, + float left, + float bottom, + float right) + +
+           
+ PagePage.getArea(Rectangle area) + +
+           
+ PagePageIterator.next() + +
+           
+  +

+ + + + + + + + + + + + + + +
Constructors in technology.tabula with parameters of type Page
ProjectionProfile(Page area, + List<? extends Rectangle> elements, + float horizontalKernelSize, + float verticalKernelSize) + +
+           
Table(Page page, + ExtractionAlgorithm extractionAlgorithm) + +
+           
TableWithRulingLines(Rectangle area, + Page page, + List<Cell> cells, + List<Ruling> horizontalRulings, + List<Ruling> verticalRulings) + +
+           
+  +

+ + + + + +
+Uses of Page in technology.tabula.debug
+  +

+ + + + + + + + + +
Methods in technology.tabula.debug with parameters of type Page
+static voidDebug.debugIntersections(Graphics2D g, + Page page) + +
+           
+  +

+ + + + + +
+Uses of Page in technology.tabula.extractors
+  +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Methods in technology.tabula.extractors with parameters of type Page
+ List<? extends Table>SpreadsheetExtractionAlgorithm.extract(Page page) + +
+           
+ List<? extends Table>ExtractionAlgorithm.extract(Page page) + +
+           
+ List<Table>BasicExtractionAlgorithm.extract(Page page) + +
+           
+ List<Table>BasicExtractionAlgorithm.extract(Page page, + List<Float> verticalRulingPositions) + +
+           
+ List<? extends Table>SpreadsheetExtractionAlgorithm.extract(Page page, + List<Ruling> rulings) + +
+          Extract a list of Table from page using rulings as separators
+ booleanSpreadsheetExtractionAlgorithm.isTabular(Page page) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/class-use/PageIterator.html b/technology/tabula/class-use/PageIterator.html new file mode 100644 index 00000000..5907a6df --- /dev/null +++ b/technology/tabula/class-use/PageIterator.html @@ -0,0 +1,189 @@ + + + + + + + +Uses of Class technology.tabula.PageIterator (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.PageIterator

+
+ + + + + + + + + +
+Packages that use PageIterator
technology.tabula  
+  +

+ + + + + +
+Uses of PageIterator in technology.tabula
+  +

+ + + + + + + + + + + + + +
Methods in technology.tabula that return PageIterator
+ PageIteratorObjectExtractor.extract() + +
+           
+ PageIteratorObjectExtractor.extract(Iterable<Integer> pages) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/class-use/ProjectionProfile.html b/technology/tabula/class-use/ProjectionProfile.html new file mode 100644 index 00000000..28e3486c --- /dev/null +++ b/technology/tabula/class-use/ProjectionProfile.html @@ -0,0 +1,145 @@ + + + + + + + +Uses of Class technology.tabula.ProjectionProfile (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.ProjectionProfile

+
+No usage of technology.tabula.ProjectionProfile +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/class-use/QuickSort.html b/technology/tabula/class-use/QuickSort.html new file mode 100644 index 00000000..42163fff --- /dev/null +++ b/technology/tabula/class-use/QuickSort.html @@ -0,0 +1,145 @@ + + + + + + + +Uses of Class technology.tabula.QuickSort (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.QuickSort

+
+No usage of technology.tabula.QuickSort +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/class-use/Rectangle.html b/technology/tabula/class-use/Rectangle.html new file mode 100644 index 00000000..349bc365 --- /dev/null +++ b/technology/tabula/class-use/Rectangle.html @@ -0,0 +1,504 @@ + + + + + + + +Uses of Class technology.tabula.Rectangle (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.Rectangle

+
+ + + + + + + + + + + + + + + + + +
+Packages that use Rectangle
technology.tabula  
technology.tabula.debug  
technology.tabula.extractors  
+  +

+ + + + + +
+Uses of Rectangle in technology.tabula
+  +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Subclasses of Rectangle in technology.tabula
+ classCell + +
+           
+ classLine + +
+           
+ classPage + +
+           
+ classRectangularTextContainer<T extends HasText> + +
+           
+ classTable + +
+           
+ classTableWithRulingLines + +
+           
+ classTextChunk + +
+           
+ classTextElement + +
+           
+  +

+ + + + + + + + + + + + + + + + + + + + + +
Methods in technology.tabula that return Rectangle
+static RectangleRectangle.boundingBoxOf(List<? extends Rectangle> rectangles) + +
+           
+static RectangleUtils.bounds(Collection<? extends Shape> shapes) + +
+           
+ RectanglePage.getTextBounds() + +
+          Returns the minimum bounding box that contains all the TextElements on this Page
+ RectangleRectangle.merge(Rectangle other) + +
+           
+  +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Methods in technology.tabula with parameters of type Rectangle
+ intRectangle.compareTo(Rectangle other) + +
+           
+ PagePage.getArea(Rectangle area) + +
+           
+ List<TextElement>Page.getText(Rectangle area) + +
+           
+ booleanRectangle.horizontallyOverlaps(Rectangle other) + +
+           
+ floatRectangle.horizontalOverlapRatio(Rectangle other) + +
+           
+ RectangleRectangle.merge(Rectangle other) + +
+           
+ floatRectangle.overlapRatio(Rectangle other) + +
+           
+ booleanRectangle.verticallyOverlaps(Rectangle other) + +
+           
+ floatRectangle.verticalOverlap(Rectangle other) + +
+           
+ floatRectangle.verticalOverlapRatio(Rectangle other) + +
+           
+  +

+ + + + + + + + + +
Method parameters in technology.tabula with type arguments of type Rectangle
+static RectangleRectangle.boundingBoxOf(List<? extends Rectangle> rectangles) + +
+           
+  +

+ + + + + + + + +
Constructors in technology.tabula with parameters of type Rectangle
TableWithRulingLines(Rectangle area, + Page page, + List<Cell> cells, + List<Ruling> horizontalRulings, + List<Ruling> verticalRulings) + +
+           
+  +

+ + + + + + + + +
Constructor parameters in technology.tabula with type arguments of type Rectangle
ProjectionProfile(Page area, + List<? extends Rectangle> elements, + float horizontalKernelSize, + float verticalKernelSize) + +
+           
+  +

+ + + + + +
+Uses of Rectangle in technology.tabula.debug
+  +

+ + + + + + + + + +
Methods in technology.tabula.debug with parameters of type Rectangle
+static voidDebug.renderPage(String pdfPath, + String outPath, + int pageNumber, + Rectangle area, + boolean drawTextChunks, + boolean drawSpreadsheets, + boolean drawRulings, + boolean drawIntersections, + boolean drawColumns, + boolean drawCharacters, + boolean drawArea, + boolean drawCells, + boolean drawUnprocessedRulings, + boolean drawProjectionProfile, + boolean drawClippingPaths) + +
+           
+  +

+ + + + + +
+Uses of Rectangle in technology.tabula.extractors
+  +

+ + + + + + + + + +
Methods in technology.tabula.extractors that return types with arguments of type Rectangle
+ List<Rectangle>SpreadsheetExtractionAlgorithm.findSpreadsheetsFromCells(List<? extends Rectangle> cells) + +
+           
+  +

+ + + + + + + + + +
Method parameters in technology.tabula.extractors with type arguments of type Rectangle
+ List<Rectangle>SpreadsheetExtractionAlgorithm.findSpreadsheetsFromCells(List<? extends Rectangle> cells) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/class-use/RectangularTextContainer.html b/technology/tabula/class-use/RectangularTextContainer.html new file mode 100644 index 00000000..2067dcde --- /dev/null +++ b/technology/tabula/class-use/RectangularTextContainer.html @@ -0,0 +1,303 @@ + + + + + + + +Uses of Class technology.tabula.RectangularTextContainer (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.RectangularTextContainer

+
+ + + + + + + + + + + + + +
+Packages that use RectangularTextContainer
technology.tabula  
technology.tabula.json  
+  +

+ + + + + +
+Uses of RectangularTextContainer in technology.tabula
+  +

+ + + + + + + + + + + + + +
Subclasses of RectangularTextContainer in technology.tabula
+ classCell + +
+           
+ classTextChunk + +
+           
+  +

+ + + + + + + + + + + + + +
Methods in technology.tabula that return RectangularTextContainer
+ RectangularTextContainerTable.getCell(int i, + int j) + +
+           
+ RectangularTextContainer<T>RectangularTextContainer.merge(RectangularTextContainer<T> other) + +
+           
+  +

+ + + + + + + + + + + + + + + + + +
Methods in technology.tabula that return types with arguments of type RectangularTextContainer
+ List<RectangularTextContainer>Table.getCells() + +
+           
+ List<List<RectangularTextContainer>>Table.getCols() + +
+           
+ List<List<RectangularTextContainer>>Table.getRows() + +
+           
+  +

+ + + + + + + + + + + + + +
Methods in technology.tabula with parameters of type RectangularTextContainer
+ voidTable.add(RectangularTextContainer tc, + int i, + int j) + +
+           
+ RectangularTextContainer<T>RectangularTextContainer.merge(RectangularTextContainer<T> other) + +
+           
+  +

+ + + + + +
+Uses of RectangularTextContainer in technology.tabula.json
+  +

+ + + + + + + + + +
Methods in technology.tabula.json with parameters of type RectangularTextContainer
+ com.google.gson.JsonElementTextChunkSerializer.serialize(RectangularTextContainer textChunk, + Type arg1, + com.google.gson.JsonSerializationContext context) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/class-use/Ruling.html b/technology/tabula/class-use/Ruling.html new file mode 100644 index 00000000..834c0028 --- /dev/null +++ b/technology/tabula/class-use/Ruling.html @@ -0,0 +1,497 @@ + + + + + + + +Uses of Class technology.tabula.Ruling (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.Ruling

+
+ + + + + + + + + + + + + + + + + +
+Packages that use Ruling
technology.tabula  
technology.tabula.extractors  
technology.tabula.json  
+  +

+ + + + + +
+Uses of Ruling in technology.tabula
+  +

+ + + + + + + + + + + + + +
Methods in technology.tabula that return Ruling
+ RulingRuling.expand(float amount) + +
+           
+ RulingRuling.intersect(Rectangle2D clip) + +
+           
+  +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Methods in technology.tabula that return types with arguments of type Ruling
+static List<Ruling>Ruling.collapseOrientedRulings(List<Ruling> lines) + +
+           
+static List<Ruling>Ruling.cropRulingsToArea(List<Ruling> rulings, + Rectangle2D area) + +
+           
+ List<Ruling>Page.getHorizontalRulings() + +
+           
+ List<Ruling>Page.getRulings() + +
+           
+ List<Ruling>ObjectExtractor.getRulings() + +
+           
+ List<Ruling>Page.getUnprocessedRulings() + +
+           
+ List<Ruling>Page.getVerticalRulings() + +
+           
+  +

+ + + + + + + + + + + + + + + + + + + + + +
Methods in technology.tabula with parameters of type Ruling
+ voidPage.addRuling(Ruling r) + +
+           
+ Point2DRuling.intersectionPoint(Ruling other) + +
+           
+ booleanRuling.nearlyIntersects(Ruling another) + +
+           
+ booleanRuling.perpendicularTo(Ruling other) + +
+           
+  +

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Method parameters in technology.tabula with type arguments of type Ruling
+static List<Ruling>Ruling.collapseOrientedRulings(List<Ruling> lines) + +
+           
+static List<Ruling>Ruling.cropRulingsToArea(List<Ruling> rulings, + Rectangle2D area) + +
+           
+static Map<Point2D,Ruling[]>Ruling.findIntersections(List<Ruling> horizontals, + List<Ruling> verticals) + +
+           
+static Map<Point2D,Ruling[]>Ruling.findIntersections(List<Ruling> horizontals, + List<Ruling> verticals) + +
+           
+static List<TextChunk>TextElement.mergeWords(List<TextElement> textElements, + List<Ruling> verticalRulings) + +
+          heuristically merge a list of TextElement into a list of TextChunk + ported from from PDFBox's PDFTextStripper.writePage, with modifications.
+  +

+ + + + + + + + + + + + + + + + + +
Constructor parameters in technology.tabula with type arguments of type Ruling
Page(float top, + float left, + float width, + float height, + int rotation, + int page_number, + List<TextElement> characters, + List<Ruling> rulings) + +
+           
Page(float top, + float left, + float width, + float height, + int rotation, + int page_number, + List<TextElement> characters, + List<Ruling> rulings, + float minCharWidth, + float minCharHeight, + technology.tabula.RectangleSpatialIndex<TextElement> index) + +
+           
TableWithRulingLines(Rectangle area, + Page page, + List<Cell> cells, + List<Ruling> horizontalRulings, + List<Ruling> verticalRulings) + +
+           
TableWithRulingLines(Rectangle area, + Page page, + List<Cell> cells, + List<Ruling> horizontalRulings, + List<Ruling> verticalRulings) + +
+           
+  +

+ + + + + +
+Uses of Ruling in technology.tabula.extractors
+  +

+ + + + + + + + + + + + + + + + + +
Method parameters in technology.tabula.extractors with type arguments of type Ruling
+ List<? extends Table>SpreadsheetExtractionAlgorithm.extract(Page page, + List<Ruling> rulings) + +
+          Extract a list of Table from page using rulings as separators
+static List<Cell>SpreadsheetExtractionAlgorithm.findCells(List<Ruling> horizontalRulingLines, + List<Ruling> verticalRulingLines) + +
+           
+static List<Cell>SpreadsheetExtractionAlgorithm.findCells(List<Ruling> horizontalRulingLines, + List<Ruling> verticalRulingLines) + +
+           
+  +

+ + + + + + + + +
Constructor parameters in technology.tabula.extractors with type arguments of type Ruling
BasicExtractionAlgorithm(List<Ruling> verticalRulings) + +
+           
+  +

+ + + + + +
+Uses of Ruling in technology.tabula.json
+  +

+ + + + + + + + + +
Methods in technology.tabula.json with parameters of type Ruling
+ com.google.gson.JsonElementRulingSerializer.serialize(Ruling arg0, + Type arg1, + com.google.gson.JsonSerializationContext arg2) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/class-use/Table.html b/technology/tabula/class-use/Table.html new file mode 100644 index 00000000..6b14db65 --- /dev/null +++ b/technology/tabula/class-use/Table.html @@ -0,0 +1,339 @@ + + + + + + + +Uses of Class technology.tabula.Table (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.Table

+
+ + + + + + + + + + + + + + + + + + + + + +
+Packages that use Table
technology.tabula  
technology.tabula.extractors  
technology.tabula.json  
technology.tabula.writers  
+  +

+ + + + + +
+Uses of Table in technology.tabula
+  +

+ + + + + + + + + +
Subclasses of Table in technology.tabula
+ classTableWithRulingLines + +
+           
+  +

+ + + + + + + + + +
Fields in technology.tabula declared as Table
+static TableTable.EMPTY + +
+           
+  +

+ + + + + +
+Uses of Table in technology.tabula.extractors
+  +

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Methods in technology.tabula.extractors that return types with arguments of type Table
+ List<? extends Table>SpreadsheetExtractionAlgorithm.extract(Page page) + +
+           
+ List<? extends Table>ExtractionAlgorithm.extract(Page page) + +
+           
+ List<Table>BasicExtractionAlgorithm.extract(Page page) + +
+           
+ List<Table>BasicExtractionAlgorithm.extract(Page page, + List<Float> verticalRulingPositions) + +
+           
+ List<? extends Table>SpreadsheetExtractionAlgorithm.extract(Page page, + List<Ruling> rulings) + +
+          Extract a list of Table from page using rulings as separators
+  +

+ + + + + +
+Uses of Table in technology.tabula.json
+  +

+ + + + + + + + + +
Methods in technology.tabula.json with parameters of type Table
+ com.google.gson.JsonElementTableSerializer.serialize(Table table, + Type type, + com.google.gson.JsonSerializationContext context) + +
+           
+  +

+ + + + + +
+Uses of Table in technology.tabula.writers
+  +

+ + + + + + + + + + + + + + + + + +
Methods in technology.tabula.writers with parameters of type Table
+ voidWriter.write(Appendable out, + Table table) + +
+           
+ voidJSONWriter.write(Appendable out, + Table table) + +
+           
+ voidCSVWriter.write(Appendable out, + Table table) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/class-use/TableWithRulingLines.html b/technology/tabula/class-use/TableWithRulingLines.html new file mode 100644 index 00000000..92c61faf --- /dev/null +++ b/technology/tabula/class-use/TableWithRulingLines.html @@ -0,0 +1,145 @@ + + + + + + + +Uses of Class technology.tabula.TableWithRulingLines (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.TableWithRulingLines

+
+No usage of technology.tabula.TableWithRulingLines +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/class-use/TextChunk.html b/technology/tabula/class-use/TextChunk.html new file mode 100644 index 00000000..56ab95ac --- /dev/null +++ b/technology/tabula/class-use/TextChunk.html @@ -0,0 +1,332 @@ + + + + + + + +Uses of Class technology.tabula.TextChunk (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.TextChunk

+
+ + + + + + + + + +
+Packages that use TextChunk
technology.tabula  
+  +

+ + + + + +
+Uses of TextChunk in technology.tabula
+  +

+ + + + + + + + + +
Fields in technology.tabula declared as TextChunk
+static TextChunkTextChunk.EMPTY + +
+           
+  +

+ + + + + + + + + + + + + +
Methods in technology.tabula that return TextChunk
+ TextChunkTextChunk.merge(TextChunk other) + +
+           
+ TextChunk[]TextChunk.splitAt(int i) + +
+          Splits a TextChunk in two, at the position of the i-th TextElement
+  +

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Methods in technology.tabula that return types with arguments of type TextChunk
+ List<TextChunk>Line.getTextElements() + +
+           
+ List<TextChunk>Cell.getTextElements() + +
+           
+static List<TextChunk>TextElement.mergeWords(List<TextElement> textElements) + +
+           
+static List<TextChunk>TextElement.mergeWords(List<TextElement> textElements, + List<Ruling> verticalRulings) + +
+          heuristically merge a list of TextElement into a list of TextChunk + ported from from PDFBox's PDFTextStripper.writePage, with modifications.
+ List<TextChunk>TextChunk.squeeze(Character c, + int minRunLength) + +
+          Removes runs of identical TextElements in this TextChunk + For example, if the TextChunk contains this string of characters: "1234xxxxx56xx" + and c == 'x' and minRunLength == 4, this method will return a list of TextChunk + such that: ["1234", "56xx"]
+  +

+ + + + + + + + + + + + + + + + + +
Methods in technology.tabula with parameters of type TextChunk
+ voidLine.addTextChunk(int i, + TextChunk textChunk) + +
+           
+ voidLine.addTextChunk(TextChunk textChunk) + +
+           
+ TextChunkTextChunk.merge(TextChunk other) + +
+           
+  +

+ + + + + + + + + + + + + + + + + + + + + +
Method parameters in technology.tabula with type arguments of type TextChunk
+static booleanTextChunk.allSameChar(List<TextChunk> textChunks) + +
+           
+static List<Line>TextChunk.groupByLines(List<TextChunk> textChunks) + +
+           
+ voidLine.setTextElements(List<TextChunk> textChunks) + +
+           
+ voidCell.setTextElements(List<TextChunk> textElements) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/class-use/TextElement.html b/technology/tabula/class-use/TextElement.html new file mode 100644 index 00000000..1d8d2682 --- /dev/null +++ b/technology/tabula/class-use/TextElement.html @@ -0,0 +1,355 @@ + + + + + + + +Uses of Class technology.tabula.TextElement (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.TextElement

+
+ + + + + + + + + +
+Packages that use TextElement
technology.tabula  
+  +

+ + + + + +
+Uses of TextElement in technology.tabula
+  +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Methods in technology.tabula that return types with arguments of type TextElement
+ List<TextElement>ObjectExtractor.getCharacters() + +
+           
+ technology.tabula.RectangleSpatialIndex<TextElement>Page.getSpatialIndex() + +
+           
+ List<TextElement>Page.getText() + +
+           
+ List<TextElement>Page.getText(float top, + float left, + float bottom, + float right) + +
+           
+ List<TextElement>Page.getText(Rectangle area) + +
+           
+ List<TextElement>TextChunk.getTextElements() + +
+           
+ List<TextElement>Page.getTexts() + +
+           
+  +

+ + + + + + + + + +
Methods in technology.tabula with parameters of type TextElement
+ voidTextChunk.add(TextElement textElement) + +
+           
+  +

+ + + + + + + + + + + + + + + + + +
Method parameters in technology.tabula with type arguments of type TextElement
+ voidTextChunk.add(List<TextElement> textElements) + +
+           
+static List<TextChunk>TextElement.mergeWords(List<TextElement> textElements) + +
+           
+static List<TextChunk>TextElement.mergeWords(List<TextElement> textElements, + List<Ruling> verticalRulings) + +
+          heuristically merge a list of TextElement into a list of TextChunk + ported from from PDFBox's PDFTextStripper.writePage, with modifications.
+  +

+ + + + + + + + +
Constructors in technology.tabula with parameters of type TextElement
TextChunk(TextElement textElement) + +
+           
+  +

+ + + + + + + + + + + + + + + + + +
Constructor parameters in technology.tabula with type arguments of type TextElement
Page(float top, + float left, + float width, + float height, + int rotation, + int page_number, + List<TextElement> characters, + List<Ruling> rulings) + +
+           
Page(float top, + float left, + float width, + float height, + int rotation, + int page_number, + List<TextElement> characters, + List<Ruling> rulings, + float minCharWidth, + float minCharHeight, + technology.tabula.RectangleSpatialIndex<TextElement> index) + +
+           
Page(float top, + float left, + float width, + float height, + int rotation, + int page_number, + List<TextElement> characters, + List<Ruling> rulings, + float minCharWidth, + float minCharHeight, + technology.tabula.RectangleSpatialIndex<TextElement> index) + +
+           
TextChunk(List<TextElement> textElements) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/class-use/Utils.html b/technology/tabula/class-use/Utils.html new file mode 100644 index 00000000..7f8ee1e3 --- /dev/null +++ b/technology/tabula/class-use/Utils.html @@ -0,0 +1,145 @@ + + + + + + + +Uses of Class technology.tabula.Utils (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.Utils

+
+No usage of technology.tabula.Utils +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/debug/Debug.html b/technology/tabula/debug/Debug.html new file mode 100644 index 00000000..e2b29bc2 --- /dev/null +++ b/technology/tabula/debug/Debug.html @@ -0,0 +1,327 @@ + + + + + + + +Debug (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula.debug +
+Class Debug

+
+java.lang.Object
+  extended by technology.tabula.debug.Debug
+
+
+
+
public class Debug
extends Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
Debug() + +
+           
+  + + + + + + + + + + + + + + + + + + + +
+Method Summary
+static voiddebugIntersections(Graphics2D g, + Page page) + +
+           
+static voidmain(String[] args) + +
+           
+static voidrenderPage(String pdfPath, + String outPath, + int pageNumber, + Rectangle area, + boolean drawTextChunks, + boolean drawSpreadsheets, + boolean drawRulings, + boolean drawIntersections, + boolean drawColumns, + boolean drawCharacters, + boolean drawArea, + boolean drawCells, + boolean drawUnprocessedRulings, + boolean drawProjectionProfile, + boolean drawClippingPaths) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+Debug

+
+public Debug()
+
+
+ + + + + + + + +
+Method Detail
+ +

+debugIntersections

+
+public static void debugIntersections(Graphics2D g,
+                                      Page page)
+
+
+
+
+
+
+ +

+renderPage

+
+public static void renderPage(String pdfPath,
+                              String outPath,
+                              int pageNumber,
+                              Rectangle area,
+                              boolean drawTextChunks,
+                              boolean drawSpreadsheets,
+                              boolean drawRulings,
+                              boolean drawIntersections,
+                              boolean drawColumns,
+                              boolean drawCharacters,
+                              boolean drawArea,
+                              boolean drawCells,
+                              boolean drawUnprocessedRulings,
+                              boolean drawProjectionProfile,
+                              boolean drawClippingPaths)
+                       throws IOException
+
+
+ +
Throws: +
IOException
+
+
+
+ +

+main

+
+public static void main(String[] args)
+                 throws IOException
+
+
+ +
Throws: +
IOException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/debug/class-use/Debug.html b/technology/tabula/debug/class-use/Debug.html new file mode 100644 index 00000000..47b18424 --- /dev/null +++ b/technology/tabula/debug/class-use/Debug.html @@ -0,0 +1,145 @@ + + + + + + + +Uses of Class technology.tabula.debug.Debug (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.debug.Debug

+
+No usage of technology.tabula.debug.Debug +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/debug/package-frame.html b/technology/tabula/debug/package-frame.html new file mode 100644 index 00000000..a907e1eb --- /dev/null +++ b/technology/tabula/debug/package-frame.html @@ -0,0 +1,33 @@ + + + + + + + +technology.tabula.debug (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + +technology.tabula.debug + + + + +
+Classes  + +
+Debug
+ + + + diff --git a/technology/tabula/debug/package-summary.html b/technology/tabula/debug/package-summary.html new file mode 100644 index 00000000..2267aae5 --- /dev/null +++ b/technology/tabula/debug/package-summary.html @@ -0,0 +1,158 @@ + + + + + + + +technology.tabula.debug (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package technology.tabula.debug +

+ + + + + + + + + +
+Class Summary
Debug 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/debug/package-tree.html b/technology/tabula/debug/package-tree.html new file mode 100644 index 00000000..9989b346 --- /dev/null +++ b/technology/tabula/debug/package-tree.html @@ -0,0 +1,154 @@ + + + + + + + +technology.tabula.debug Class Hierarchy (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package technology.tabula.debug +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/debug/package-use.html b/technology/tabula/debug/package-use.html new file mode 100644 index 00000000..ee1c0bf3 --- /dev/null +++ b/technology/tabula/debug/package-use.html @@ -0,0 +1,145 @@ + + + + + + + +Uses of Package technology.tabula.debug (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
technology.tabula.debug

+
+No usage of technology.tabula.debug +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/extractors/BasicExtractionAlgorithm.html b/technology/tabula/extractors/BasicExtractionAlgorithm.html new file mode 100644 index 00000000..dc03cd2c --- /dev/null +++ b/technology/tabula/extractors/BasicExtractionAlgorithm.html @@ -0,0 +1,342 @@ + + + + + + + +BasicExtractionAlgorithm (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula.extractors +
+Class BasicExtractionAlgorithm

+
+java.lang.Object
+  extended by technology.tabula.extractors.BasicExtractionAlgorithm
+
+
+
All Implemented Interfaces:
ExtractionAlgorithm
+
+
+
+
public class BasicExtractionAlgorithm
extends Object
implements ExtractionAlgorithm
+ + +

+


+ +

+ + + + + + + + + + + + + + +
+Constructor Summary
BasicExtractionAlgorithm() + +
+           
BasicExtractionAlgorithm(List<Ruling> verticalRulings) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+static List<Float>columnPositions(List<Line> lines) + +
+           
+ List<Table>extract(Page page) + +
+           
+ List<Table>extract(Page page, + List<Float> verticalRulingPositions) + +
+           
+ StringtoString() + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+BasicExtractionAlgorithm

+
+public BasicExtractionAlgorithm()
+
+
+
+ +

+BasicExtractionAlgorithm

+
+public BasicExtractionAlgorithm(List<Ruling> verticalRulings)
+
+
+ + + + + + + + +
+Method Detail
+ +

+extract

+
+public List<Table> extract(Page page,
+                           List<Float> verticalRulingPositions)
+
+
+
+
+
+
+
+
+
+ +

+extract

+
+public List<Table> extract(Page page)
+
+
+
Specified by:
extract in interface ExtractionAlgorithm
+
+
+
+
+
+
+ +

+toString

+
+public String toString()
+
+
+
Specified by:
toString in interface ExtractionAlgorithm
Overrides:
toString in class Object
+
+
+
+
+
+
+ +

+columnPositions

+
+public static List<Float> columnPositions(List<Line> lines)
+
+
+
+
+
+
Parameters:
lines - must be an array of lines sorted by their +top+ attribute +
Returns:
a list of column boundaries (x axis)
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/extractors/ExtractionAlgorithm.html b/technology/tabula/extractors/ExtractionAlgorithm.html new file mode 100644 index 00000000..3fffffa0 --- /dev/null +++ b/technology/tabula/extractors/ExtractionAlgorithm.html @@ -0,0 +1,232 @@ + + + + + + + +ExtractionAlgorithm (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula.extractors +
+Interface ExtractionAlgorithm

+
+
All Known Implementing Classes:
BasicExtractionAlgorithm, SpreadsheetExtractionAlgorithm
+
+
+
+
public interface ExtractionAlgorithm
+ + +

+


+ +

+ + + + + + + + + + + + + + + + +
+Method Summary
+ List<? extends Table>extract(Page page) + +
+           
+ StringtoString() + +
+           
+  +

+ + + + + + + + +
+Method Detail
+ +

+extract

+
+List<? extends Table> extract(Page page)
+
+
+
+
+
+
+ +

+toString

+
+String toString()
+
+
+
Overrides:
toString in class Object
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.html b/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.html new file mode 100644 index 00000000..6d65ffca --- /dev/null +++ b/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.html @@ -0,0 +1,380 @@ + + + + + + + +SpreadsheetExtractionAlgorithm (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula.extractors +
+Class SpreadsheetExtractionAlgorithm

+
+java.lang.Object
+  extended by technology.tabula.extractors.SpreadsheetExtractionAlgorithm
+
+
+
All Implemented Interfaces:
ExtractionAlgorithm
+
+
+
+
public class SpreadsheetExtractionAlgorithm
extends Object
implements ExtractionAlgorithm
+ + +

+

+
Author:
+
manuel
+
+
+ +

+ + + + + + + + + + + +
+Constructor Summary
SpreadsheetExtractionAlgorithm() + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ List<? extends Table>extract(Page page) + +
+           
+ List<? extends Table>extract(Page page, + List<Ruling> rulings) + +
+          Extract a list of Table from page using rulings as separators
+static List<Cell>findCells(List<Ruling> horizontalRulingLines, + List<Ruling> verticalRulingLines) + +
+           
+ List<Rectangle>findSpreadsheetsFromCells(List<? extends Rectangle> cells) + +
+           
+ booleanisTabular(Page page) + +
+           
+ StringtoString() + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+SpreadsheetExtractionAlgorithm

+
+public SpreadsheetExtractionAlgorithm()
+
+
+ + + + + + + + +
+Method Detail
+ +

+extract

+
+public List<? extends Table> extract(Page page)
+
+
+
Specified by:
extract in interface ExtractionAlgorithm
+
+
+
+
+
+
+ +

+extract

+
+public List<? extends Table> extract(Page page,
+                                     List<Ruling> rulings)
+
+
Extract a list of Table from page using rulings as separators +

+

+
+
+
+
Parameters:
page -
rulings - +
Returns:
+
+
+
+ +

+isTabular

+
+public boolean isTabular(Page page)
+
+
+
+
+
+
+
+
+
+ +

+findCells

+
+public static List<Cell> findCells(List<Ruling> horizontalRulingLines,
+                                   List<Ruling> verticalRulingLines)
+
+
+
+
+
+
+
+
+
+ +

+findSpreadsheetsFromCells

+
+public List<Rectangle> findSpreadsheetsFromCells(List<? extends Rectangle> cells)
+
+
+
+
+
+
+
+
+
+ +

+toString

+
+public String toString()
+
+
+
Specified by:
toString in interface ExtractionAlgorithm
Overrides:
toString in class Object
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/extractors/class-use/BasicExtractionAlgorithm.html b/technology/tabula/extractors/class-use/BasicExtractionAlgorithm.html new file mode 100644 index 00000000..be3c61e1 --- /dev/null +++ b/technology/tabula/extractors/class-use/BasicExtractionAlgorithm.html @@ -0,0 +1,145 @@ + + + + + + + +Uses of Class technology.tabula.extractors.BasicExtractionAlgorithm (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.extractors.BasicExtractionAlgorithm

+
+No usage of technology.tabula.extractors.BasicExtractionAlgorithm +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/extractors/class-use/ExtractionAlgorithm.html b/technology/tabula/extractors/class-use/ExtractionAlgorithm.html new file mode 100644 index 00000000..5390f08d --- /dev/null +++ b/technology/tabula/extractors/class-use/ExtractionAlgorithm.html @@ -0,0 +1,249 @@ + + + + + + + +Uses of Interface technology.tabula.extractors.ExtractionAlgorithm (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Interface
technology.tabula.extractors.ExtractionAlgorithm

+
+ + + + + + + + + + + + + +
+Packages that use ExtractionAlgorithm
technology.tabula  
technology.tabula.extractors  
+  +

+ + + + + +
+Uses of ExtractionAlgorithm in technology.tabula
+  +

+ + + + + + + + + +
Methods in technology.tabula that return ExtractionAlgorithm
+ ExtractionAlgorithmTable.getExtractionAlgorithm() + +
+           
+  +

+ + + + + + + + + +
Methods in technology.tabula with parameters of type ExtractionAlgorithm
+ voidTable.setExtractionAlgorithm(ExtractionAlgorithm extractionAlgorithm) + +
+           
+  +

+ + + + + + + + +
Constructors in technology.tabula with parameters of type ExtractionAlgorithm
Table(Page page, + ExtractionAlgorithm extractionAlgorithm) + +
+           
+  +

+ + + + + +
+Uses of ExtractionAlgorithm in technology.tabula.extractors
+  +

+ + + + + + + + + + + + + +
Classes in technology.tabula.extractors that implement ExtractionAlgorithm
+ classBasicExtractionAlgorithm + +
+           
+ classSpreadsheetExtractionAlgorithm + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/extractors/class-use/SpreadsheetExtractionAlgorithm.html b/technology/tabula/extractors/class-use/SpreadsheetExtractionAlgorithm.html new file mode 100644 index 00000000..a84701ac --- /dev/null +++ b/technology/tabula/extractors/class-use/SpreadsheetExtractionAlgorithm.html @@ -0,0 +1,145 @@ + + + + + + + +Uses of Class technology.tabula.extractors.SpreadsheetExtractionAlgorithm (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.extractors.SpreadsheetExtractionAlgorithm

+
+No usage of technology.tabula.extractors.SpreadsheetExtractionAlgorithm +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/extractors/package-frame.html b/technology/tabula/extractors/package-frame.html new file mode 100644 index 00000000..b4c1b7de --- /dev/null +++ b/technology/tabula/extractors/package-frame.html @@ -0,0 +1,46 @@ + + + + + + + +technology.tabula.extractors (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + +technology.tabula.extractors + + + + +
+Interfaces  + +
+ExtractionAlgorithm
+ + + + + + +
+Classes  + +
+BasicExtractionAlgorithm +
+SpreadsheetExtractionAlgorithm
+ + + + diff --git a/technology/tabula/extractors/package-summary.html b/technology/tabula/extractors/package-summary.html new file mode 100644 index 00000000..6eaedadc --- /dev/null +++ b/technology/tabula/extractors/package-summary.html @@ -0,0 +1,176 @@ + + + + + + + +technology.tabula.extractors (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package technology.tabula.extractors +

+ + + + + + + + + +
+Interface Summary
ExtractionAlgorithm 
+  + +

+ + + + + + + + + + + + + +
+Class Summary
BasicExtractionAlgorithm 
SpreadsheetExtractionAlgorithm 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/extractors/package-tree.html b/technology/tabula/extractors/package-tree.html new file mode 100644 index 00000000..328107fd --- /dev/null +++ b/technology/tabula/extractors/package-tree.html @@ -0,0 +1,161 @@ + + + + + + + +technology.tabula.extractors Class Hierarchy (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package technology.tabula.extractors +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+ +

+Interface Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/extractors/package-use.html b/technology/tabula/extractors/package-use.html new file mode 100644 index 00000000..b1754b45 --- /dev/null +++ b/technology/tabula/extractors/package-use.html @@ -0,0 +1,190 @@ + + + + + + + +Uses of Package technology.tabula.extractors (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
technology.tabula.extractors

+
+ + + + + + + + + + + + + +
+Packages that use technology.tabula.extractors
technology.tabula  
technology.tabula.extractors  
+  +

+ + + + + + + + +
+Classes in technology.tabula.extractors used by technology.tabula
ExtractionAlgorithm + +
+           
+  +

+ + + + + + + + +
+Classes in technology.tabula.extractors used by technology.tabula.extractors
ExtractionAlgorithm + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/json/RulingSerializer.html b/technology/tabula/json/RulingSerializer.html new file mode 100644 index 00000000..269d84be --- /dev/null +++ b/technology/tabula/json/RulingSerializer.html @@ -0,0 +1,263 @@ + + + + + + + +RulingSerializer (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula.json +
+Class RulingSerializer

+
+java.lang.Object
+  extended by technology.tabula.json.RulingSerializer
+
+
+
All Implemented Interfaces:
com.google.gson.JsonSerializer<Ruling>
+
+
+
+
public class RulingSerializer
extends Object
implements com.google.gson.JsonSerializer<Ruling>
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
RulingSerializer() + +
+           
+  + + + + + + + + + + + +
+Method Summary
+ com.google.gson.JsonElementserialize(Ruling arg0, + Type arg1, + com.google.gson.JsonSerializationContext arg2) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+RulingSerializer

+
+public RulingSerializer()
+
+
+ + + + + + + + +
+Method Detail
+ +

+serialize

+
+public com.google.gson.JsonElement serialize(Ruling arg0,
+                                             Type arg1,
+                                             com.google.gson.JsonSerializationContext arg2)
+
+
+
Specified by:
serialize in interface com.google.gson.JsonSerializer<Ruling>
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/json/TableSerializer.html b/technology/tabula/json/TableSerializer.html new file mode 100644 index 00000000..76fa9976 --- /dev/null +++ b/technology/tabula/json/TableSerializer.html @@ -0,0 +1,263 @@ + + + + + + + +TableSerializer (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula.json +
+Class TableSerializer

+
+java.lang.Object
+  extended by technology.tabula.json.TableSerializer
+
+
+
All Implemented Interfaces:
com.google.gson.JsonSerializer<Table>
+
+
+
+
public class TableSerializer
extends Object
implements com.google.gson.JsonSerializer<Table>
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
TableSerializer() + +
+           
+  + + + + + + + + + + + +
+Method Summary
+ com.google.gson.JsonElementserialize(Table table, + Type type, + com.google.gson.JsonSerializationContext context) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+TableSerializer

+
+public TableSerializer()
+
+
+ + + + + + + + +
+Method Detail
+ +

+serialize

+
+public com.google.gson.JsonElement serialize(Table table,
+                                             Type type,
+                                             com.google.gson.JsonSerializationContext context)
+
+
+
Specified by:
serialize in interface com.google.gson.JsonSerializer<Table>
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/json/TextChunkSerializer.html b/technology/tabula/json/TextChunkSerializer.html new file mode 100644 index 00000000..7d105f31 --- /dev/null +++ b/technology/tabula/json/TextChunkSerializer.html @@ -0,0 +1,263 @@ + + + + + + + +TextChunkSerializer (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula.json +
+Class TextChunkSerializer

+
+java.lang.Object
+  extended by technology.tabula.json.TextChunkSerializer
+
+
+
All Implemented Interfaces:
com.google.gson.JsonSerializer<RectangularTextContainer>
+
+
+
+
public class TextChunkSerializer
extends Object
implements com.google.gson.JsonSerializer<RectangularTextContainer>
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
TextChunkSerializer() + +
+           
+  + + + + + + + + + + + +
+Method Summary
+ com.google.gson.JsonElementserialize(RectangularTextContainer textChunk, + Type arg1, + com.google.gson.JsonSerializationContext context) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+TextChunkSerializer

+
+public TextChunkSerializer()
+
+
+ + + + + + + + +
+Method Detail
+ +

+serialize

+
+public com.google.gson.JsonElement serialize(RectangularTextContainer textChunk,
+                                             Type arg1,
+                                             com.google.gson.JsonSerializationContext context)
+
+
+
Specified by:
serialize in interface com.google.gson.JsonSerializer<RectangularTextContainer>
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/json/class-use/RulingSerializer.html b/technology/tabula/json/class-use/RulingSerializer.html new file mode 100644 index 00000000..03131a2c --- /dev/null +++ b/technology/tabula/json/class-use/RulingSerializer.html @@ -0,0 +1,145 @@ + + + + + + + +Uses of Class technology.tabula.json.RulingSerializer (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.json.RulingSerializer

+
+No usage of technology.tabula.json.RulingSerializer +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/json/class-use/TableSerializer.html b/technology/tabula/json/class-use/TableSerializer.html new file mode 100644 index 00000000..e81aace3 --- /dev/null +++ b/technology/tabula/json/class-use/TableSerializer.html @@ -0,0 +1,145 @@ + + + + + + + +Uses of Class technology.tabula.json.TableSerializer (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.json.TableSerializer

+
+No usage of technology.tabula.json.TableSerializer +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/json/class-use/TextChunkSerializer.html b/technology/tabula/json/class-use/TextChunkSerializer.html new file mode 100644 index 00000000..6850fa9e --- /dev/null +++ b/technology/tabula/json/class-use/TextChunkSerializer.html @@ -0,0 +1,145 @@ + + + + + + + +Uses of Class technology.tabula.json.TextChunkSerializer (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.json.TextChunkSerializer

+
+No usage of technology.tabula.json.TextChunkSerializer +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/json/package-frame.html b/technology/tabula/json/package-frame.html new file mode 100644 index 00000000..d18ea4fb --- /dev/null +++ b/technology/tabula/json/package-frame.html @@ -0,0 +1,37 @@ + + + + + + + +technology.tabula.json (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + +technology.tabula.json + + + + +
+Classes  + +
+RulingSerializer +
+TableSerializer +
+TextChunkSerializer
+ + + + diff --git a/technology/tabula/json/package-summary.html b/technology/tabula/json/package-summary.html new file mode 100644 index 00000000..059c74c2 --- /dev/null +++ b/technology/tabula/json/package-summary.html @@ -0,0 +1,166 @@ + + + + + + + +technology.tabula.json (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package technology.tabula.json +

+ + + + + + + + + + + + + + + + + +
+Class Summary
RulingSerializer 
TableSerializer 
TextChunkSerializer 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/json/package-tree.html b/technology/tabula/json/package-tree.html new file mode 100644 index 00000000..2d700206 --- /dev/null +++ b/technology/tabula/json/package-tree.html @@ -0,0 +1,157 @@ + + + + + + + +technology.tabula.json Class Hierarchy (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package technology.tabula.json +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+
    +
  • java.lang.Object
      +
    • technology.tabula.json.RulingSerializer (implements com.google.gson.JsonSerializer<T>) +
    • technology.tabula.json.TableSerializer (implements com.google.gson.JsonSerializer<T>) +
    • technology.tabula.json.TextChunkSerializer (implements com.google.gson.JsonSerializer<T>) +
    +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/json/package-use.html b/technology/tabula/json/package-use.html new file mode 100644 index 00000000..40519d46 --- /dev/null +++ b/technology/tabula/json/package-use.html @@ -0,0 +1,145 @@ + + + + + + + +Uses of Package technology.tabula.json (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
technology.tabula.json

+
+No usage of technology.tabula.json +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/package-frame.html b/technology/tabula/package-frame.html new file mode 100644 index 00000000..9674d066 --- /dev/null +++ b/technology/tabula/package-frame.html @@ -0,0 +1,78 @@ + + + + + + + +technology.tabula (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + +technology.tabula + + + + +
+Interfaces  + +
+HasText
+ + + + + + +
+Classes  + +
+Cell +
+CohenSutherlandClipping +
+CommandLineApp +
+DummyGraphics2D +
+Line +
+ObjectExtractor +
+Page +
+PageIterator +
+ProjectionProfile +
+QuickSort +
+Rectangle +
+RectangularTextContainer +
+Ruling +
+Table +
+TableWithRulingLines +
+TextChunk +
+TextElement +
+Utils
+ + + + diff --git a/technology/tabula/package-summary.html b/technology/tabula/package-summary.html new file mode 100644 index 00000000..04ce6fff --- /dev/null +++ b/technology/tabula/package-summary.html @@ -0,0 +1,241 @@ + + + + + + + +technology.tabula (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package technology.tabula +

+ + + + + + + + + +
+Interface Summary
HasText 
+  + +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Class Summary
Cell 
CohenSutherlandClippingImplements the well known Cohen Sutherland line + clipping algorithm (line against clip rectangle).
CommandLineApp 
DummyGraphics2D 
Line 
ObjectExtractor 
Page 
PageIterator 
ProjectionProfile 
QuickSortsee http://de.wikipedia.org/wiki/Quicksort.
Rectangle 
RectangularTextContainer<T extends HasText> 
Ruling 
Table 
TableWithRulingLines 
TextChunk 
TextElement 
Utils 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/package-tree.html b/technology/tabula/package-tree.html new file mode 100644 index 00000000..44e61bc3 --- /dev/null +++ b/technology/tabula/package-tree.html @@ -0,0 +1,191 @@ + + + + + + + +technology.tabula Class Hierarchy (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package technology.tabula +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+ +

+Interface Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/package-use.html b/technology/tabula/package-use.html new file mode 100644 index 00000000..9a274d2b --- /dev/null +++ b/technology/tabula/package-use.html @@ -0,0 +1,361 @@ + + + + + + + +Uses of Package technology.tabula (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
technology.tabula

+
+ + + + + + + + + + + + + + + + + + + + + + + + + +
+Packages that use technology.tabula
technology.tabula  
technology.tabula.debug  
technology.tabula.extractors  
technology.tabula.json  
technology.tabula.writers  
+  +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Classes in technology.tabula used by technology.tabula
Cell + +
+           
HasText + +
+           
Line + +
+           
ObjectExtractor + +
+           
Page + +
+           
PageIterator + +
+           
Rectangle + +
+           
RectangularTextContainer + +
+           
Ruling + +
+           
Table + +
+           
TextChunk + +
+           
TextElement + +
+           
+  +

+ + + + + + + + + + + +
+Classes in technology.tabula used by technology.tabula.debug
Page + +
+           
Rectangle + +
+           
+  +

+ + + + + + + + + + + + + + + + + + + + + + + +
+Classes in technology.tabula used by technology.tabula.extractors
Cell + +
+           
Line + +
+           
Page + +
+           
Rectangle + +
+           
Ruling + +
+           
Table + +
+           
+  +

+ + + + + + + + + + + + + + +
+Classes in technology.tabula used by technology.tabula.json
RectangularTextContainer + +
+           
Ruling + +
+           
Table + +
+           
+  +

+ + + + + + + + +
+Classes in technology.tabula used by technology.tabula.writers
Table + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/writers/CSVWriter.html b/technology/tabula/writers/CSVWriter.html new file mode 100644 index 00000000..96ea1887 --- /dev/null +++ b/technology/tabula/writers/CSVWriter.html @@ -0,0 +1,267 @@ + + + + + + + +CSVWriter (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula.writers +
+Class CSVWriter

+
+java.lang.Object
+  extended by technology.tabula.writers.CSVWriter
+
+
+
All Implemented Interfaces:
Writer
+
+
+
Direct Known Subclasses:
TSVWriter
+
+
+
+
public class CSVWriter
extends Object
implements Writer
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
CSVWriter() + +
+           
+  + + + + + + + + + + + +
+Method Summary
+ voidwrite(Appendable out, + Table table) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+CSVWriter

+
+public CSVWriter()
+
+
+ + + + + + + + +
+Method Detail
+ +

+write

+
+public void write(Appendable out,
+                  Table table)
+           throws IOException
+
+
+
Specified by:
write in interface Writer
+
+
+ +
Throws: +
IOException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/writers/JSONWriter.html b/technology/tabula/writers/JSONWriter.html new file mode 100644 index 00000000..c156eafd --- /dev/null +++ b/technology/tabula/writers/JSONWriter.html @@ -0,0 +1,264 @@ + + + + + + + +JSONWriter (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula.writers +
+Class JSONWriter

+
+java.lang.Object
+  extended by technology.tabula.writers.JSONWriter
+
+
+
All Implemented Interfaces:
Writer
+
+
+
+
public class JSONWriter
extends Object
implements Writer
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
JSONWriter() + +
+           
+  + + + + + + + + + + + +
+Method Summary
+ voidwrite(Appendable out, + Table table) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+JSONWriter

+
+public JSONWriter()
+
+
+ + + + + + + + +
+Method Detail
+ +

+write

+
+public void write(Appendable out,
+                  Table table)
+           throws IOException
+
+
+
Specified by:
write in interface Writer
+
+
+ +
Throws: +
IOException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/writers/TSVWriter.html b/technology/tabula/writers/TSVWriter.html new file mode 100644 index 00000000..0c1ba6db --- /dev/null +++ b/technology/tabula/writers/TSVWriter.html @@ -0,0 +1,238 @@ + + + + + + + +TSVWriter (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula.writers +
+Class TSVWriter

+
+java.lang.Object
+  extended by technology.tabula.writers.CSVWriter
+      extended by technology.tabula.writers.TSVWriter
+
+
+
All Implemented Interfaces:
Writer
+
+
+
+
public class TSVWriter
extends CSVWriter
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
TSVWriter() + +
+           
+  + + + + + + + +
+Method Summary
+ + + + + + + +
Methods inherited from class technology.tabula.writers.CSVWriter
write
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+TSVWriter

+
+public TSVWriter()
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/writers/Writer.html b/technology/tabula/writers/Writer.html new file mode 100644 index 00000000..0eb3bda3 --- /dev/null +++ b/technology/tabula/writers/Writer.html @@ -0,0 +1,215 @@ + + + + + + + +Writer (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +technology.tabula.writers +
+Interface Writer

+
+
All Known Implementing Classes:
CSVWriter, JSONWriter, TSVWriter
+
+
+
+
public interface Writer
+ + +

+


+ +

+ + + + + + + + + + + + +
+Method Summary
+ voidwrite(Appendable out, + Table table) + +
+           
+  +

+ + + + + + + + +
+Method Detail
+ +

+write

+
+void write(Appendable out,
+           Table table)
+           throws IOException
+
+
+ +
Throws: +
IOException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/writers/class-use/CSVWriter.html b/technology/tabula/writers/class-use/CSVWriter.html new file mode 100644 index 00000000..0ec1ff1e --- /dev/null +++ b/technology/tabula/writers/class-use/CSVWriter.html @@ -0,0 +1,181 @@ + + + + + + + +Uses of Class technology.tabula.writers.CSVWriter (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.writers.CSVWriter

+
+ + + + + + + + + +
+Packages that use CSVWriter
technology.tabula.writers  
+  +

+ + + + + +
+Uses of CSVWriter in technology.tabula.writers
+  +

+ + + + + + + + + +
Subclasses of CSVWriter in technology.tabula.writers
+ classTSVWriter + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/writers/class-use/JSONWriter.html b/technology/tabula/writers/class-use/JSONWriter.html new file mode 100644 index 00000000..9095d3b6 --- /dev/null +++ b/technology/tabula/writers/class-use/JSONWriter.html @@ -0,0 +1,145 @@ + + + + + + + +Uses of Class technology.tabula.writers.JSONWriter (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.writers.JSONWriter

+
+No usage of technology.tabula.writers.JSONWriter +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/writers/class-use/TSVWriter.html b/technology/tabula/writers/class-use/TSVWriter.html new file mode 100644 index 00000000..2cd8e7d3 --- /dev/null +++ b/technology/tabula/writers/class-use/TSVWriter.html @@ -0,0 +1,145 @@ + + + + + + + +Uses of Class technology.tabula.writers.TSVWriter (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
technology.tabula.writers.TSVWriter

+
+No usage of technology.tabula.writers.TSVWriter +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/writers/class-use/Writer.html b/technology/tabula/writers/class-use/Writer.html new file mode 100644 index 00000000..33020281 --- /dev/null +++ b/technology/tabula/writers/class-use/Writer.html @@ -0,0 +1,197 @@ + + + + + + + +Uses of Interface technology.tabula.writers.Writer (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Interface
technology.tabula.writers.Writer

+
+ + + + + + + + + +
+Packages that use Writer
technology.tabula.writers  
+  +

+ + + + + +
+Uses of Writer in technology.tabula.writers
+  +

+ + + + + + + + + + + + + + + + + +
Classes in technology.tabula.writers that implement Writer
+ classCSVWriter + +
+           
+ classJSONWriter + +
+           
+ classTSVWriter + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/writers/package-frame.html b/technology/tabula/writers/package-frame.html new file mode 100644 index 00000000..1b39a249 --- /dev/null +++ b/technology/tabula/writers/package-frame.html @@ -0,0 +1,48 @@ + + + + + + + +technology.tabula.writers (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + +technology.tabula.writers + + + + +
+Interfaces  + +
+Writer
+ + + + + + +
+Classes  + +
+CSVWriter +
+JSONWriter +
+TSVWriter
+ + + + diff --git a/technology/tabula/writers/package-summary.html b/technology/tabula/writers/package-summary.html new file mode 100644 index 00000000..71b1229d --- /dev/null +++ b/technology/tabula/writers/package-summary.html @@ -0,0 +1,180 @@ + + + + + + + +technology.tabula.writers (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package technology.tabula.writers +

+ + + + + + + + + +
+Interface Summary
Writer 
+  + +

+ + + + + + + + + + + + + + + + + +
+Class Summary
CSVWriter 
JSONWriter 
TSVWriter 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/writers/package-tree.html b/technology/tabula/writers/package-tree.html new file mode 100644 index 00000000..24de8c45 --- /dev/null +++ b/technology/tabula/writers/package-tree.html @@ -0,0 +1,163 @@ + + + + + + + +technology.tabula.writers Class Hierarchy (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package technology.tabula.writers +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+ +

+Interface Hierarchy +

+
    +
  • technology.tabula.writers.Writer
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + + diff --git a/technology/tabula/writers/package-use.html b/technology/tabula/writers/package-use.html new file mode 100644 index 00000000..2b8655aa --- /dev/null +++ b/technology/tabula/writers/package-use.html @@ -0,0 +1,177 @@ + + + + + + + +Uses of Package technology.tabula.writers (tabula-extractor 0.7.4-SNAPSHOT API) + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
technology.tabula.writers

+
+ + + + + + + + + +
+Packages that use technology.tabula.writers
technology.tabula.writers  
+  +

+ + + + + + + + + + + +
+Classes in technology.tabula.writers used by technology.tabula.writers
CSVWriter + +
+           
Writer + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+Copyright © 2015. All rights reserved. + +