tables = new ArrayList<>();
diff --git a/src/main/java/technology/tabula/debug/Debug.java b/src/main/java/technology/tabula/debug/Debug.java
index 91609045..d6d257ce 100644
--- a/src/main/java/technology/tabula/debug/Debug.java
+++ b/src/main/java/technology/tabula/debug/Debug.java
@@ -16,6 +16,7 @@
import java.util.List;
import org.apache.commons.cli.*;
+import org.apache.pdfbox.Loader;
import technology.tabula.Cell;
import technology.tabula.CommandLineApp;
import technology.tabula.Line;
@@ -215,7 +216,7 @@ public static void renderPage(String pdfPath, String outPath, int pageNumber, Re
boolean drawColumns, boolean drawCharacters, boolean drawArea, boolean drawCells,
boolean drawUnprocessedRulings, boolean drawProjectionProfile, boolean drawClippingPaths,
boolean drawDetectedTables) throws IOException {
- PDDocument document = PDDocument.load(new File(pdfPath));
+ PDDocument document = Loader.loadPDF(new File(pdfPath));
ObjectExtractor oe = new ObjectExtractor(document);
@@ -349,7 +350,7 @@ public static void main(String[] args) throws IOException {
if (pages == null) {
// user specified all pages
- PDDocument document = PDDocument.load(pdfFile);
+ PDDocument document = Loader.loadPDF(pdfFile);
int numPages = document.getNumberOfPages();
pages = new ArrayList<>(numPages);
diff --git a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java
index fb43622a..86639f66 100644
--- a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java
+++ b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java
@@ -1,14 +1,8 @@
package technology.tabula.detectors;
-import java.awt.geom.Line2D;
-import java.awt.geom.Point2D;
-import java.awt.image.BufferedImage;
-import java.awt.image.Raster;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.*;
-
+import org.apache.pdfbox.contentstream.PDContentStream;
import org.apache.pdfbox.contentstream.operator.Operator;
+import org.apache.pdfbox.contentstream.operator.OperatorName;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdfparser.PDFStreamParser;
import org.apache.pdfbox.pdfwriter.ContentStreamWriter;
@@ -16,16 +10,17 @@
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.rendering.ImageType;
-
-import technology.tabula.Line;
-import technology.tabula.Page;
-import technology.tabula.Rectangle;
-import technology.tabula.Ruling;
-import technology.tabula.TextChunk;
-import technology.tabula.TextElement;
-import technology.tabula.Utils;
+import technology.tabula.*;
import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
+import java.awt.geom.Line2D;
+import java.awt.geom.Point2D;
+import java.awt.image.BufferedImage;
+import java.awt.image.Raster;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.*;
+
/**
* Created by matt on 2015-12-17.
*
@@ -799,25 +794,10 @@ private List getVerticalRulings(BufferedImage image) {
return verticalRulings;
}
-
- // taken from http://www.docjar.com/html/api/org/apache/pdfbox/examples/util/RemoveAllText.java.html
private PDDocument removeText(PDPage page) throws IOException {
PDFStreamParser parser = new PDFStreamParser(page);
parser.parse();
- List tokens = parser.getTokens();
- List newTokens = new ArrayList<>();
- for (Object token : tokens) {
- if (token instanceof Operator) {
- Operator op = (Operator) token;
- if (op.getName().equals("TJ") || op.getName().equals("Tj")) {
- //remove the one argument to this operator
- newTokens.remove(newTokens.size() - 1);
- continue;
- }
- }
- newTokens.add(token);
- }
PDDocument document = new PDDocument();
PDPage newPage = document.importPage(page);
@@ -826,9 +806,51 @@ private PDDocument removeText(PDPage page) throws IOException {
PDStream newContents = new PDStream(document);
OutputStream out = newContents.createOutputStream(COSName.FLATE_DECODE);
ContentStreamWriter writer = new ContentStreamWriter(out);
- writer.writeTokens(newTokens);
+ List tokensWithoutText = createTokensWithoutText(page);
+ writer.writeTokens(tokensWithoutText);
out.close();
newPage.setContents(newContents);
return document;
}
+
+
+ /**
+ * @param contentStream contentStream
+ * @return newTokens
+ * @throws IOException When parseNextToken on Error
+ * @see ...
+ */
+ private static List createTokensWithoutText(PDContentStream contentStream) throws IOException {
+ PDFStreamParser parser = new PDFStreamParser(contentStream);
+ Object token = parser.parseNextToken();
+ List newTokens = new ArrayList<>();
+ while (token != null) {
+ if (token instanceof Operator) {
+ Operator op = (Operator) token;
+ String opName = op.getName();
+ if (OperatorName.SHOW_TEXT_ADJUSTED.equals(opName)
+ || OperatorName.SHOW_TEXT.equals(opName)
+ || OperatorName.SHOW_TEXT_LINE.equals(opName)) {
+ // remove the argument to this operator
+ newTokens.remove(newTokens.size() - 1);
+
+ token = parser.parseNextToken();
+ continue;
+ } else if (OperatorName.SHOW_TEXT_LINE_AND_SPACE.equals(opName)) {
+ // remove the 3 arguments to this operator
+ newTokens.remove(newTokens.size() - 1);
+ newTokens.remove(newTokens.size() - 1);
+ newTokens.remove(newTokens.size() - 1);
+
+ token = parser.parseNextToken();
+ continue;
+ }
+ }
+ newTokens.add(token);
+ token = parser.parseNextToken();
+ }
+ return newTokens;
+ }
+
+
}
diff --git a/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java
index 243cc3bf..43136ba5 100644
--- a/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java
+++ b/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java
@@ -20,8 +20,6 @@ public class SpreadsheetDetectionAlgorithm implements DetectionAlgorithm {
public List detect(Page page) {
List cells = SpreadsheetExtractionAlgorithm.findCells(page.getHorizontalRulings(), page.getVerticalRulings());
- SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm();
-
List tables = SpreadsheetExtractionAlgorithm.findSpreadsheetsFromCells(cells);
// we want tables to be returned from top to bottom on the page
diff --git a/src/test/java/technology/tabula/TestBasicExtractor.java b/src/test/java/technology/tabula/TestBasicExtractor.java
index d120546f..b56fd6ea 100644
--- a/src/test/java/technology/tabula/TestBasicExtractor.java
+++ b/src/test/java/technology/tabula/TestBasicExtractor.java
@@ -203,7 +203,7 @@ public void testCheckSqueezeDoesntBreak() throws IOException {
List> rows = table.getRows();
List firstRow = rows.get(0);
List lastRow = rows.get(rows.size() - 1);
- assertTrue(firstRow.get(0).getText().equals("Violent crime . . . . . . . . . . . . . . . . . ."));
+ assertTrue(firstRow.get(0).getText().equals("Violent crime. . . . . . . . . . . . . . . . . ."));
assertTrue(lastRow.get(lastRow.size() - 1).getText().equals("(X)"));
page.getPDDoc().close();
}
diff --git a/src/test/java/technology/tabula/TestCell.java b/src/test/java/technology/tabula/TestCell.java
index de1b8cb8..2795565c 100644
--- a/src/test/java/technology/tabula/TestCell.java
+++ b/src/test/java/technology/tabula/TestCell.java
@@ -6,6 +6,7 @@
import java.util.ArrayList;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
+import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
import org.junit.Test;
public class TestCell {
@@ -31,7 +32,7 @@ public void testGetTextElements() {
Cell cell = new Cell(0, 0, 0, 0);
assertTrue(cell.getTextElements().isEmpty());
- TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5);
+ TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5);
TextChunk tChunk = new TextChunk(tElement);
List tList = new ArrayList<>();
tList.add(tChunk);
diff --git a/src/test/java/technology/tabula/TestLine.java b/src/test/java/technology/tabula/TestLine.java
index 90df0e31..f7a6a88d 100644
--- a/src/test/java/technology/tabula/TestLine.java
+++ b/src/test/java/technology/tabula/TestLine.java
@@ -6,6 +6,7 @@
import java.util.List;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
+import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
import org.junit.Test;
public class TestLine {
@@ -14,7 +15,7 @@ public class TestLine {
public void testSetTextElements() {
Line line = new Line();
- TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5);
+ TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5);
TextChunk tChunk = new TextChunk(tElement);
List tList = new ArrayList<>();
tList.add(tChunk);
@@ -28,7 +29,7 @@ public void testSetTextElements() {
public void testAddTextChunkIntTextChunk() {
Line line = new Line();
- TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5);
+ TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5);
TextChunk tChunk = new TextChunk(tElement);
line.addTextChunk(3, tChunk);
@@ -39,7 +40,7 @@ public void testAddTextChunkIntTextChunk() {
public void testLessThanAddTextChunkIntTextChunk() {
Line line = new Line();
- TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5);
+ TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5);
TextChunk tChunk = new TextChunk(tElement);
line.addTextChunk(0, tChunk);
line.addTextChunk(0, tChunk);
@@ -51,7 +52,7 @@ public void testLessThanAddTextChunkIntTextChunk() {
public void testErrorAddTextChunkIntTextChunk() {
Line line = new Line();
- TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5);
+ TextElement tElement = new TextElement(0, 0, 0, 0,new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5);
TextChunk tChunk = new TextChunk(tElement);
line.addTextChunk(-1, tChunk);
}
@@ -60,7 +61,7 @@ public void testErrorAddTextChunkIntTextChunk() {
public void testToString() {
Line line = new Line();
- TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5);
+ TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5);
TextChunk tChunk = new TextChunk(tElement);
line.addTextChunk(0, tChunk);
line.addTextChunk(0, tChunk);
diff --git a/src/test/java/technology/tabula/TestObjectExtractor.java b/src/test/java/technology/tabula/TestObjectExtractor.java
index 9db7ad18..69864c61 100644
--- a/src/test/java/technology/tabula/TestObjectExtractor.java
+++ b/src/test/java/technology/tabula/TestObjectExtractor.java
@@ -7,6 +7,7 @@
import java.util.ArrayList;
import java.util.List;
+import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.junit.Test;
@@ -21,7 +22,7 @@ public void testWrongPasswordRaisesException() throws IOException {
@Test(expected = IOException.class)
public void testEmptyOnEncryptedFileRaisesException() throws IOException {
- PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/encrypted.pdf"));
+ PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/encrypted.pdf"));
try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
oe.extract().next();
}
@@ -29,7 +30,7 @@ public void testEmptyOnEncryptedFileRaisesException() throws IOException {
@Test
public void testCanReadPDFWithOwnerEncryption() throws IOException {
- PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf"));
+ PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf"));
try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
PageIterator pi = oe.extract();
int i = 0;
@@ -44,7 +45,7 @@ public void testCanReadPDFWithOwnerEncryption() throws IOException {
@Test
public void testGoodPassword() throws IOException {
- PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/encrypted.pdf"), "userpassword");
+ PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/encrypted.pdf"), "userpassword");
try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
List pages = new ArrayList<>();
PageIterator pi = oe.extract();
@@ -58,7 +59,7 @@ public void testGoodPassword() throws IOException {
@Test
public void testTextExtractionDoesNotRaise() throws IOException {
- PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/rotated_page.pdf"));
+ PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/rotated_page.pdf"));
try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
PageIterator pi = oe.extract();
@@ -70,7 +71,7 @@ public void testTextExtractionDoesNotRaise() throws IOException {
@Test
public void testShouldDetectRulings() throws IOException {
- PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/should_detect_rulings.pdf"));
+ PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/should_detect_rulings.pdf"));
try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
PageIterator pi = oe.extract();
@@ -85,7 +86,7 @@ public void testShouldDetectRulings() throws IOException {
@Test
public void testDontThrowNPEInShfill() throws IOException {
- PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/labor.pdf"));
+ PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/labor.pdf"));
try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
PageIterator pi = oe.extract();
@@ -101,7 +102,7 @@ public void testDontThrowNPEInShfill() throws IOException {
@Test
public void testExtractOnePage() throws IOException {
- PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf"));
+ PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf"));
assertEquals(2, pdf_document.getNumberOfPages());
try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
@@ -114,7 +115,7 @@ public void testExtractOnePage() throws IOException {
@Test(expected = IndexOutOfBoundsException.class)
public void testExtractWrongPageNumber() throws IOException {
- PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf"));
+ PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf"));
assertEquals(2, pdf_document.getNumberOfPages());
try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
@@ -124,7 +125,7 @@ public void testExtractWrongPageNumber() throws IOException {
@Test
public void testTextElementsContainedInPage() throws IOException {
- PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/cs-en-us-pbms.pdf"));
+ PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/cs-en-us-pbms.pdf"));
try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
Page page = oe.extractPage(1);
@@ -137,7 +138,7 @@ public void testTextElementsContainedInPage() throws IOException {
}
@Test public void testDoNotNPEInPointComparator() throws IOException {
- PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/npe_issue_206.pdf"));
+ PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/npe_issue_206.pdf"));
try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
Page p = oe.extractPage(1);
diff --git a/src/test/java/technology/tabula/TestProjectionProfile.java b/src/test/java/technology/tabula/TestProjectionProfile.java
index e7af882f..e6d93b39 100644
--- a/src/test/java/technology/tabula/TestProjectionProfile.java
+++ b/src/test/java/technology/tabula/TestProjectionProfile.java
@@ -8,6 +8,7 @@
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
+import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
import org.junit.Before;
import org.junit.Test;
@@ -20,9 +21,10 @@ public class TestProjectionProfile {
public void setUpProjectionProfile() {
PDPage pdPage = new PDPage();
PDDocument pdDocument = new PDDocument();
-
- TextElement textElement = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "test", 1f);
- TextElement textElement2 = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "test", 1f);
+
+ PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
+ TextElement textElement = new TextElement(5f, 15f, 10f, 20f, font, 1f, "test", 1f);
+ TextElement textElement2 = new TextElement(5f, 15f, 10f, 20f, font, 1f, "test", 1f);
List textList = new ArrayList<>();
textList.add(textElement);
textList.add(textElement2);
diff --git a/src/test/java/technology/tabula/TestTableDetection.java b/src/test/java/technology/tabula/TestTableDetection.java
index 6e58f6a4..c13ff201 100644
--- a/src/test/java/technology/tabula/TestTableDetection.java
+++ b/src/test/java/technology/tabula/TestTableDetection.java
@@ -1,28 +1,29 @@
package technology.tabula;
-import java.io.File;
-import java.io.FileWriter;
-import java.io.FilenameFilter;
-import java.io.IOException;
-import java.util.*;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import static org.junit.Assert.*;
-
import com.google.gson.Gson;
+import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.pdmodel.PDDocument;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
-import org.w3c.dom.*;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+import technology.tabula.detectors.NurminenDetectionAlgorithm;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.*;
+import java.util.logging.Level;
+import java.util.logging.Logger;
-import org.apache.pdfbox.pdmodel.PDDocument;
-import technology.tabula.detectors.NurminenDetectionAlgorithm;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
/**
* Created by matt on 2015-12-14.
@@ -110,15 +111,10 @@ public static Collection data() {
String directoryName = "src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-" + regionCode + "/";
File dir = new File(directoryName);
- File[] pdfs = dir.listFiles(new FilenameFilter() {
- @Override
- public boolean accept(File dir, String name) {
- return name.toLowerCase().endsWith(".pdf");
- }
- });
+ File[] pdfs = dir.listFiles((dir1, name) -> name.toLowerCase().endsWith(".pdf"));
for (File pdf : pdfs) {
- data.add(new Object[] {pdf});
+ data.add(new Object[]{pdf});
}
}
@@ -162,7 +158,9 @@ public void testDetectionOfTables() throws Exception {
NodeList tables = regionDocument.getElementsByTagName("table");
// tabula extractors
- PDDocument pdfDocument = PDDocument.load(this.pdf);
+
+
+ PDDocument pdfDocument = Loader.loadPDF(this.pdf);
ObjectExtractor extractor = new ObjectExtractor(pdfDocument);
// parse expected tables from the ground truth dataset
@@ -170,7 +168,7 @@ public void testDetectionOfTables() throws Exception {
int numExpectedTables = 0;
- for (int i=0; i tablesOnPage = detectionAlgorithm.detect(page);
- if (tablesOnPage.size() > 0) {
- detectedTables.put(new Integer(page.getPageNumber()), tablesOnPage);
+ if (!tablesOnPage.isEmpty()) {
+ detectedTables.put(page.getPageNumber(), tablesOnPage);
}
}
@@ -266,7 +264,7 @@ public void testDetectionOfTables() throws Exception {
System.out.println(totalErroneouslyDetectedTables + " tables incorrectly detected");
- if(this.status.isFirstRun()) {
+ if (this.status.isFirstRun()) {
// make the baseline
this.status.expectedFailure = failed;
this.status.numCorrectlyDetectedTables = this.numCorrectlyDetectedTables;
@@ -292,14 +290,14 @@ private List comparePages(Integer page, List detected, List detectedIterator = detected.iterator(); detectedIterator.hasNext();) {
+ for (Iterator detectedIterator = detected.iterator(); detectedIterator.hasNext(); ) {
Rectangle detectedTable = detectedIterator.next();
- for (int i=0; i elements = new ArrayList<>();
- elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
- elements.add(new TextElement(20f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f));
- elements.add(new TextElement(40f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f));
- elements.add(new TextElement(60f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f));
-
- List words = TextElement.mergeWords(elements);
-
- List expectedWords = new ArrayList<>();
- expectedWords.add(new TextChunk(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)));
- expectedWords.add(new TextChunk(new TextElement(20f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)));
- expectedWords.add(new TextChunk(new TextElement(40f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)));
- expectedWords.add(new TextChunk(new TextElement(60f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f)));
-
- Assert.assertEquals(expectedWords, words);
-
- }
-
- @Test
- public void mergeFourElementsIntoOneWord() {
-
- List elements = new ArrayList<>();
- elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
- elements.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f));
- elements.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f));
- elements.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f));
-
- List words = TextElement.mergeWords(elements);
-
- List expectedWords = new ArrayList<>();
- TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
- textChunk.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f));
- textChunk.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f));
- textChunk.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f));
- expectedWords.add(textChunk);
-
- Assert.assertEquals(expectedWords, words);
-
- }
-
- @Test
- public void mergeElementsShouldBeIdempotent() {
- /*
- * a bug in TextElement.merge_words would delete the first TextElement in the array
- * it was called with. Discussion here: https://github.com/tabulapdf/tabula-java/issues/78
- */
-
- List elements = new ArrayList<>();
- elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
- elements.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f));
- elements.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f));
- elements.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f));
-
- List words = TextElement.mergeWords(elements);
- List words2 = TextElement.mergeWords(elements);
- Assert.assertEquals(words, words2);
- }
-
- @Test
- public void mergeElementsWithSkippingRules() {
-
- List elements = new ArrayList<>();
- elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
- elements.add(new TextElement(0f, 17f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
- elements.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f));
- elements.add(new TextElement(0.001f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, " ", 1f, 6f));
- elements.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f));
- elements.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.TIMES_ROMAN, 10f, "D", 1f, 6f));
-
- List words = TextElement.mergeWords(elements);
-
- List expectedWords = new ArrayList<>();
- TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
- textChunk.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f));
- textChunk.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f));
- textChunk.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.TIMES_ROMAN, 10f, "D", 1f, 6f));
- expectedWords.add(textChunk);
-
- Assert.assertEquals(expectedWords, words);
-
- }
-
- @Test
- public void mergeTenElementsIntoTwoWords() {
-
- List elements = new ArrayList<>();
- elements.add(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f));
- elements.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f));
- elements.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f));
- elements.add(new TextElement(0f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
- elements.add(new TextElement(0f, 60f, 10f, 20f, PDType1Font.HELVETICA, 1f, "M", 1f, 6f));
- elements.add(new TextElement(0f, 70f, 10f, 20f, PDType1Font.HELVETICA, 1f, "U", 1f, 6f));
- elements.add(new TextElement(0f, 80f, 10f, 20f, PDType1Font.HELVETICA, 1f, "N", 1f, 6f));
- elements.add(new TextElement(0f, 90f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f));
- elements.add(new TextElement(0f, 100f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f));
-
- List words = TextElement.mergeWords(elements);
-
- List expectedWords = new ArrayList<>();
- TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f));
- textChunk.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f));
- textChunk.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f));
- textChunk.add(new TextElement(0f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
- textChunk.add(new TextElement(0f, 30f, 10.5f, 20f, PDType1Font.HELVETICA, 1f, " ", 1f)); //Check why width=10.5?
- expectedWords.add(textChunk);
- TextChunk textChunk2 = new TextChunk(new TextElement(0f, 60f, 10f, 20f, PDType1Font.HELVETICA, 1f, "M", 1f, 6f));
- textChunk2.add(new TextElement(0f, 70f, 10f, 20f, PDType1Font.HELVETICA, 1f, "U", 1f, 6f));
- textChunk2.add(new TextElement(0f, 80f, 10f, 20f, PDType1Font.HELVETICA, 1f, "N", 1f, 6f));
- textChunk2.add(new TextElement(0f, 90f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f));
- textChunk2.add(new TextElement(0f, 100f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f));
- expectedWords.add(textChunk2);
-
- Assert.assertEquals(2, words.size());
- Assert.assertEquals(expectedWords, words);
-
- }
-
- @Test
- public void mergeTenElementsIntoTwoLines() {
-
- List elements = new ArrayList<>();
- elements.add(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f));
- elements.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f));
- elements.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f));
- elements.add(new TextElement(0f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
- elements.add(new TextElement(20f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "M", 1f, 6f));
- elements.add(new TextElement(20f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "U", 1f, 6f));
- elements.add(new TextElement(20f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "N", 1f, 6f));
- elements.add(new TextElement(20f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f));
- elements.add(new TextElement(20f, 40f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f));
-
- List words = TextElement.mergeWords(elements);
-
- List expectedWords = new ArrayList<>();
- TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f));
- textChunk.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f));
- textChunk.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f));
- textChunk.add(new TextElement(0f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
- expectedWords.add(textChunk);
- TextChunk textChunk2 = new TextChunk(new TextElement(20f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "M", 1f, 6f));
- textChunk2.add(new TextElement(20f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "U", 1f, 6f));
- textChunk2.add(new TextElement(20f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "N", 1f, 6f));
- textChunk2.add(new TextElement(20f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f));
- textChunk2.add(new TextElement(20f, 40f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f));
- expectedWords.add(textChunk2);
-
- Assert.assertEquals(2, words.size());
- Assert.assertEquals(expectedWords, words);
-
- }
-
-
+
+
+ @Test
+ public void createTextElement() {
+
+ TextElement textElement = new TextElement(5f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f);
+
+ Assert.assertNotNull(textElement);
+ Assert.assertEquals("A", textElement.getText());
+ Assert.assertEquals(1f, textElement.getFontSize(), 0);
+ Assert.assertEquals(15f, textElement.getLeft(), 0);
+ Assert.assertEquals(5f, textElement.getTop(), 0);
+ Assert.assertEquals(10f, textElement.getWidth(), 0);
+ Assert.assertEquals(20f, textElement.getHeight(), 0);
+ Assert.assertEquals(Standard14Fonts.FontName.HELVETICA.getName(), textElement.getFont().getName());
+ Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0);
+ Assert.assertEquals(0f, textElement.getDirection(), 0);
+
+
+ }
+
+ @Test
+ public void createTextElementWithDirection() {
+
+ TextElement textElement = new TextElement(5f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f);
+
+ Assert.assertNotNull(textElement);
+ Assert.assertEquals("A", textElement.getText());
+ Assert.assertEquals(1f, textElement.getFontSize(), 0);
+ Assert.assertEquals(15f, textElement.getLeft(), 0);
+ Assert.assertEquals(5f, textElement.getTop(), 0);
+ Assert.assertEquals(10f, textElement.getWidth(), 0);
+ Assert.assertEquals(20f, textElement.getHeight(), 0);
+ Assert.assertEquals(Standard14Fonts.FontName.HELVETICA.getName(), textElement.getFont().getName());
+ Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0);
+ Assert.assertEquals(6f, textElement.getDirection(), 0);
+
+
+ }
+
+ @Test
+ public void mergeFourElementsIntoFourWords() {
+
+ List elements = new ArrayList<>();
+ PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
+ elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f));
+ elements.add(new TextElement(20f, 15f, 10f, 20f, font, 1f, "B", 1f, 6f));
+ elements.add(new TextElement(40f, 15f, 10f, 20f, font, 1f, "C", 1f, 6f));
+ elements.add(new TextElement(60f, 15f, 10f, 20f, font, 1f, "D", 1f, 6f));
+
+ List words = TextElement.mergeWords(elements);
+
+ List expectedWords = new ArrayList<>();
+ expectedWords.add(new TextChunk(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f)));
+ expectedWords.add(new TextChunk(new TextElement(20f, 15f, 10f, 20f, font, 1f, "B", 1f, 6f)));
+ expectedWords.add(new TextChunk(new TextElement(40f, 15f, 10f, 20f, font, 1f, "C", 1f, 6f)));
+ expectedWords.add(new TextChunk(new TextElement(60f, 15f, 10f, 20f, font, 1f, "D", 1f, 6f)));
+
+ Assert.assertEquals(expectedWords, words);
+
+ }
+
+ @Test
+ public void mergeFourElementsIntoOneWord() {
+
+ List elements = new ArrayList<>();
+ PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
+ elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f));
+ elements.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f));
+ elements.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f));
+ elements.add(new TextElement(0f, 45f, 10f, 20f, font, 1f, "D", 1f, 6f));
+
+ List words = TextElement.mergeWords(elements);
+
+ List expectedWords = new ArrayList<>();
+ TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f));
+ textChunk.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f));
+ textChunk.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f));
+ textChunk.add(new TextElement(0f, 45f, 10f, 20f, font, 1f, "D", 1f, 6f));
+ expectedWords.add(textChunk);
+
+ Assert.assertEquals(expectedWords, words);
+
+ }
+
+ @Test
+ public void mergeElementsShouldBeIdempotent() {
+ /*
+ * a bug in TextElement.merge_words would delete the first TextElement in the array
+ * it was called with. Discussion here: https://github.com/tabulapdf/tabula-java/issues/78
+ */
+
+ List elements = new ArrayList<>();
+ PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
+ elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f));
+ elements.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f));
+ elements.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f));
+ elements.add(new TextElement(0f, 45f, 10f, 20f, font, 1f, "D", 1f, 6f));
+
+ List words = TextElement.mergeWords(elements);
+ List words2 = TextElement.mergeWords(elements);
+ Assert.assertEquals(words, words2);
+ }
+
+ @Test
+ public void mergeElementsWithSkippingRules() {
+
+ List elements = new ArrayList<>();
+ PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
+ elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f));
+ elements.add(new TextElement(0f, 17f, 10f, 20f, font, 1f, "A", 1f, 6f));
+ elements.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f));
+ elements.add(new TextElement(0.001f, 25f, 10f, 20f, font, 1f, " ", 1f, 6f));
+ elements.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f));
+ PDFont TIMES_ROMAN = new PDType1Font(Standard14Fonts.FontName.TIMES_ROMAN);
+ elements.add(new TextElement(0f, 45f, 10f, 20f, TIMES_ROMAN, 10f, "D", 1f, 6f));
+
+ List words = TextElement.mergeWords(elements);
+
+ List expectedWords = new ArrayList<>();
+ TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f));
+ textChunk.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f));
+ textChunk.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f));
+ textChunk.add(new TextElement(0f, 45f, 10f, 20f, TIMES_ROMAN, 10f, "D", 1f, 6f));
+ expectedWords.add(textChunk);
+
+ Assert.assertEquals(expectedWords, words);
+
+ }
+
+ @Test
+ public void mergeTenElementsIntoTwoWords() {
+
+ List elements = new ArrayList<>();
+ PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
+ elements.add(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f));
+ elements.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f));
+ elements.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f));
+ elements.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f));
+ elements.add(new TextElement(0f, 60f, 10f, 20f, font, 1f, "M", 1f, 6f));
+ elements.add(new TextElement(0f, 70f, 10f, 20f, font, 1f, "U", 1f, 6f));
+ elements.add(new TextElement(0f, 80f, 10f, 20f, font, 1f, "N", 1f, 6f));
+ elements.add(new TextElement(0f, 90f, 10f, 20f, font, 1f, "D", 1f, 6f));
+ elements.add(new TextElement(0f, 100f, 10f, 20f, font, 1f, "O", 1f, 6f));
+
+ List words = TextElement.mergeWords(elements);
+
+ List expectedWords = new ArrayList<>();
+ TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f));
+ textChunk.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f));
+ textChunk.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f));
+ textChunk.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f));
+ textChunk.add(new TextElement(0f, 30f, 10.5f, 20f, font, 1f, " ", 1f)); //Check why width=10.5?
+ expectedWords.add(textChunk);
+ TextChunk textChunk2 = new TextChunk(new TextElement(0f, 60f, 10f, 20f, font, 1f, "M", 1f, 6f));
+ textChunk2.add(new TextElement(0f, 70f, 10f, 20f, font, 1f, "U", 1f, 6f));
+ textChunk2.add(new TextElement(0f, 80f, 10f, 20f, font, 1f, "N", 1f, 6f));
+ textChunk2.add(new TextElement(0f, 90f, 10f, 20f, font, 1f, "D", 1f, 6f));
+ textChunk2.add(new TextElement(0f, 100f, 10f, 20f, font, 1f, "O", 1f, 6f));
+ expectedWords.add(textChunk2);
+
+ Assert.assertEquals(2, words.size());
+ Assert.assertEquals(expectedWords, words);
+
+ }
+
+ @Test
+ public void mergeTenElementsIntoTwoLines() {
+
+ List elements = new ArrayList<>();
+ PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
+ elements.add(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f));
+ elements.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f));
+ elements.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f));
+ elements.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f));
+ elements.add(new TextElement(20f, 0f, 10f, 20f, font, 1f, "M", 1f, 6f));
+ elements.add(new TextElement(20f, 10f, 10f, 20f, font, 1f, "U", 1f, 6f));
+ elements.add(new TextElement(20f, 20f, 10f, 20f, font, 1f, "N", 1f, 6f));
+ elements.add(new TextElement(20f, 30f, 10f, 20f, font, 1f, "D", 1f, 6f));
+ elements.add(new TextElement(20f, 40f, 10f, 20f, font, 1f, "O", 1f, 6f));
+
+ List words = TextElement.mergeWords(elements);
+
+ List expectedWords = new ArrayList<>();
+ TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f));
+ textChunk.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f));
+ textChunk.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f));
+ textChunk.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f));
+ expectedWords.add(textChunk);
+ TextChunk textChunk2 = new TextChunk(new TextElement(20f, 0f, 10f, 20f, font, 1f, "M", 1f, 6f));
+ textChunk2.add(new TextElement(20f, 10f, 10f, 20f, font, 1f, "U", 1f, 6f));
+ textChunk2.add(new TextElement(20f, 20f, 10f, 20f, font, 1f, "N", 1f, 6f));
+ textChunk2.add(new TextElement(20f, 30f, 10f, 20f, font, 1f, "D", 1f, 6f));
+ textChunk2.add(new TextElement(20f, 40f, 10f, 20f, font, 1f, "O", 1f, 6f));
+ expectedWords.add(textChunk2);
+
+ Assert.assertEquals(2, words.size());
+ Assert.assertEquals(expectedWords, words);
+
+ }
+
}
diff --git a/src/test/java/technology/tabula/TestUtils.java b/src/test/java/technology/tabula/TestUtils.java
index e68411df..cb85cb7b 100644
--- a/src/test/java/technology/tabula/TestUtils.java
+++ b/src/test/java/technology/tabula/TestUtils.java
@@ -12,6 +12,7 @@
import java.util.Collections;
import java.util.List;
+import org.apache.pdfbox.Loader;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.commons.cli.ParseException;
import org.apache.pdfbox.pdmodel.PDDocument;
@@ -122,7 +123,7 @@ public void testQuickSortLongList() {
@Test
public void testJPEG2000DoesNotRaise() throws IOException {
- PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/jpeg2000.pdf"));
+ PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/jpeg2000.pdf"));
PDPage page = pdf_document.getPage(0);
Utils.pageConvertToImage(pdf_document, page, 360, ImageType.RGB);
}
diff --git a/src/test/java/technology/tabula/UtilsForTesting.java b/src/test/java/technology/tabula/UtilsForTesting.java
index 3ee8efde..8d3c91cf 100644
--- a/src/test/java/technology/tabula/UtilsForTesting.java
+++ b/src/test/java/technology/tabula/UtilsForTesting.java
@@ -7,6 +7,7 @@
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVPrinter;
+import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.junit.Assert;
@@ -23,11 +24,9 @@ public static Page getAreaFromPage(String path, int page, float top, float left,
public static Page getPage(String path, int pageNumber) throws IOException {
ObjectExtractor oe = null;
try {
- PDDocument document = PDDocument
- .load(new File(path));
+ PDDocument document = Loader.loadPDF(new File(path));
oe = new ObjectExtractor(document);
- Page page = oe.extract(pageNumber);
- return page;
+ return oe.extract(pageNumber);
} finally {
if (oe != null)
oe.close();
|