package technology.tabula; import static org.junit.Assert.*; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.pdfbox.Loader; import org.apache.pdfbox.pdmodel.PDDocument; import org.junit.Test; public class TestObjectExtractor { /*@Test(expected=IOException.class) public void testWrongPasswordRaisesException() throws IOException { PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/encrypted.pdf")); ObjectExtractor oe = new ObjectExtractor(pdf_document, "wrongpass"); oe.extract().next(); }*/ @Test(expected = IOException.class) public void testEmptyOnEncryptedFileRaisesException() throws IOException { PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/encrypted.pdf")); try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { oe.extract().next(); } } @Test public void testCanReadPDFWithOwnerEncryption() throws IOException { PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf")); try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { PageIterator pi = oe.extract(); int i = 0; while (pi.hasNext()) { i++; pi.next(); } assertEquals(2, i); } } @Test public void testGoodPassword() throws IOException { PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/encrypted.pdf"), "userpassword"); try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { List pages = new ArrayList<>(); PageIterator pi = oe.extract(); while (pi.hasNext()) { pages.add(pi.next()); } assertEquals(1, pages.size()); } } @Test public void testTextExtractionDoesNotRaise() throws IOException { PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/rotated_page.pdf")); try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { PageIterator pi = oe.extract(); assertTrue(pi.hasNext()); assertNotNull(pi.next()); assertFalse(pi.hasNext()); } } @Test public void testShouldDetectRulings() throws IOException { PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/should_detect_rulings.pdf")); try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { PageIterator pi = oe.extract(); Page page = pi.next(); List rulings = page.getRulings(); for (Ruling r: rulings) { assertTrue(page.contains(r.getBounds())); } } } @Test public void testDontThrowNPEInShfill() throws IOException { PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/labor.pdf")); try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { PageIterator pi = oe.extract(); assertTrue(pi.hasNext()); try { Page p = pi.next(); assertNotNull(p); } catch (NullPointerException e) { fail("NPE in ObjectExtractor " + e.toString()); } } } @Test public void testExtractOnePage() throws IOException { PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf")); assertEquals(2, pdf_document.getNumberOfPages()); try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { Page page = oe.extract(2); assertNotNull(page); } } @Test(expected = IndexOutOfBoundsException.class) public void testExtractWrongPageNumber() throws IOException { PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf")); assertEquals(2, pdf_document.getNumberOfPages()); try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { oe.extract(3); } } @Test public void testTextElementsContainedInPage() throws IOException { PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/cs-en-us-pbms.pdf")); try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { Page page = oe.extractPage(1); for (TextElement te: page.getText()) { assertTrue(page.contains(te)); } } } @Test public void testDoNotNPEInPointComparator() throws IOException { PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/npe_issue_206.pdf")); try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) { Page p = oe.extractPage(1); assertNotNull(p); } catch (NullPointerException e) { fail("NPE in ObjectExtractor " + e.toString()); } } }