From bc60be27d663f40e132a21407ec81790a8aee361 Mon Sep 17 00:00:00 2001
From: young <chen.yang@goupwith.com>
Date: Wed, 6 Mar 2024 14:53:55 +0800
Subject: [PATCH 01/26] update pdfbox to 3.0.1

---
 pom.xml                                       |   2 +-
 .../technology/tabula/CommandLineApp.java     |   3 +-
 .../java/technology/tabula/debug/Debug.java   |   5 +-
 .../detectors/NurminenDetectionAlgorithm.java |   6 +-
 src/test/java/technology/tabula/TestCell.java |   3 +-
 src/test/java/technology/tabula/TestLine.java |  11 +-
 .../tabula/TestObjectExtractor.java           |  21 +-
 .../tabula/TestProjectionProfile.java         |   8 +-
 .../technology/tabula/TestTableDetection.java |   3 +-
 .../technology/tabula/TestTextElement.java    | 393 +++++++++---------
 .../java/technology/tabula/TestUtils.java     |   3 +-
 .../technology/tabula/UtilsForTesting.java    |   7 +-
 12 files changed, 238 insertions(+), 227 deletions(-)
diff --git a/pom.xml b/pom.xml
index 27a03e73..6c71b426 100644
--- a/pom.xml
+++ b/pom.xml
@@ -262,7 +262,7 @@
     <dependency>
         <groupId>org.apache.pdfbox</groupId>
         <artifactId>pdfbox</artifactId>
-        <version>2.0.28</version>
+        <version>3.0.1</version>
     </dependency>
 
     <dependency>
diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java
index 3a6773a9..1b422303 100644
--- a/src/main/java/technology/tabula/CommandLineApp.java
+++ b/src/main/java/technology/tabula/CommandLineApp.java
@@ -15,6 +15,7 @@
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.ParseException;
 import org.apache.commons.cli.DefaultParser;
+import org.apache.pdfbox.Loader;
 import org.apache.pdfbox.pdmodel.PDDocument;
 
 import technology.tabula.detectors.DetectionAlgorithm;
@@ -158,7 +159,7 @@ public void extractFileInto(File pdfFile, File outputFile) throws ParseException
     private void extractFile(File pdfFile, Appendable outFile) throws ParseException {
         PDDocument pdfDocument = null;
         try {
-            pdfDocument = this.password == null ? PDDocument.load(pdfFile) : PDDocument.load(pdfFile, this.password);
+            pdfDocument = this.password == null ?  Loader.loadPDF(pdfFile) :  Loader.loadPDF(pdfFile,password);
             PageIterator pageIterator = getPageIterator(pdfDocument);
             List<Table> tables = new ArrayList<>();
 
diff --git a/src/main/java/technology/tabula/debug/Debug.java b/src/main/java/technology/tabula/debug/Debug.java
index 91609045..d6d257ce 100644
--- a/src/main/java/technology/tabula/debug/Debug.java
+++ b/src/main/java/technology/tabula/debug/Debug.java
@@ -16,6 +16,7 @@
 import java.util.List;
 
 import org.apache.commons.cli.*;
+import org.apache.pdfbox.Loader;
 import technology.tabula.Cell;
 import technology.tabula.CommandLineApp;
 import technology.tabula.Line;
@@ -215,7 +216,7 @@ public static void renderPage(String pdfPath, String outPath, int pageNumber, Re
                                   boolean drawColumns, boolean drawCharacters, boolean drawArea, boolean drawCells,
                                   boolean drawUnprocessedRulings, boolean drawProjectionProfile, boolean drawClippingPaths,
                                   boolean drawDetectedTables) throws IOException {
-        PDDocument document = PDDocument.load(new File(pdfPath));
+        PDDocument document = Loader.loadPDF(new File(pdfPath));
 
         ObjectExtractor oe = new ObjectExtractor(document);
 
@@ -349,7 +350,7 @@ public static void main(String[] args) throws IOException {
 
             if (pages == null) {
                 // user specified all pages
-                PDDocument document = PDDocument.load(pdfFile);
+                PDDocument document =  Loader.loadPDF(pdfFile);
 
                 int numPages = document.getNumberOfPages();
                 pages = new ArrayList<>(numPages);
diff --git a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java
index fb43622a..9a377854 100644
--- a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java
+++ b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java
@@ -805,12 +805,12 @@ private PDDocument removeText(PDPage page) throws IOException {
 
         PDFStreamParser parser = new PDFStreamParser(page);
         parser.parse();
-        List<Object> tokens = parser.getTokens();
         List<Object> newTokens = new ArrayList<>();
-        for (Object token : tokens) {
+        while (page.hasContents()) {
+            Object token = parser.parseNextToken();
             if (token instanceof Operator) {
                 Operator op = (Operator) token;
-                if (op.getName().equals("TJ") || op.getName().equals("Tj")) {
+                if ("TJ".equals(op.getName()) || "Tj".equals(op.getName())) {
                     //remove the one argument to this operator
                     newTokens.remove(newTokens.size() - 1);
                     continue;
diff --git a/src/test/java/technology/tabula/TestCell.java b/src/test/java/technology/tabula/TestCell.java
index de1b8cb8..2795565c 100644
--- a/src/test/java/technology/tabula/TestCell.java
+++ b/src/test/java/technology/tabula/TestCell.java
@@ -6,6 +6,7 @@
 import java.util.ArrayList;
 
 import org.apache.pdfbox.pdmodel.font.PDType1Font;
+import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
 import org.junit.Test;
 
 public class TestCell {
@@ -31,7 +32,7 @@ public void testGetTextElements() {
 		Cell cell = new Cell(0, 0, 0, 0);
 		assertTrue(cell.getTextElements().isEmpty());
 		
-		TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5);
+		TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5);
 		TextChunk tChunk = new TextChunk(tElement);
 		List<TextChunk> tList = new ArrayList<>();
 		tList.add(tChunk);
diff --git a/src/test/java/technology/tabula/TestLine.java b/src/test/java/technology/tabula/TestLine.java
index 90df0e31..f7a6a88d 100644
--- a/src/test/java/technology/tabula/TestLine.java
+++ b/src/test/java/technology/tabula/TestLine.java
@@ -6,6 +6,7 @@
 import java.util.List;
 
 import org.apache.pdfbox.pdmodel.font.PDType1Font;
+import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
 import org.junit.Test;
 
 public class TestLine {
@@ -14,7 +15,7 @@ public class TestLine {
 	public void testSetTextElements() {
 		Line line = new Line();
 		
-		TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5);
+		TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5);
 		TextChunk tChunk = new TextChunk(tElement);
 		List<TextChunk> tList = new ArrayList<>();
 		tList.add(tChunk);
@@ -28,7 +29,7 @@ public void testSetTextElements() {
 	public void testAddTextChunkIntTextChunk() {
 		Line line = new Line();
 		
-		TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5);
+		TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5);
 		TextChunk tChunk = new TextChunk(tElement);
 		line.addTextChunk(3, tChunk);
 		
@@ -39,7 +40,7 @@ public void testAddTextChunkIntTextChunk() {
 	public void testLessThanAddTextChunkIntTextChunk() {
 		Line line = new Line();
 		
-		TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5);
+		TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5);
 		TextChunk tChunk = new TextChunk(tElement);
 		line.addTextChunk(0, tChunk);
 		line.addTextChunk(0, tChunk);
@@ -51,7 +52,7 @@ public void testLessThanAddTextChunkIntTextChunk() {
 	public void testErrorAddTextChunkIntTextChunk() {
 		Line line = new Line();
 		
-		TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5);
+		TextElement tElement = new TextElement(0, 0, 0, 0,new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5);
 		TextChunk tChunk = new TextChunk(tElement);
 		line.addTextChunk(-1, tChunk);
 		}
@@ -60,7 +61,7 @@ public void testErrorAddTextChunkIntTextChunk() {
 	public void testToString() {
 		Line line = new Line();
 		
-		TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5);
+		TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5);
 		TextChunk tChunk = new TextChunk(tElement);
 		line.addTextChunk(0, tChunk);
 		line.addTextChunk(0, tChunk);
diff --git a/src/test/java/technology/tabula/TestObjectExtractor.java b/src/test/java/technology/tabula/TestObjectExtractor.java
index 9db7ad18..69864c61 100644
--- a/src/test/java/technology/tabula/TestObjectExtractor.java
+++ b/src/test/java/technology/tabula/TestObjectExtractor.java
@@ -7,6 +7,7 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.pdfbox.Loader;
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.junit.Test;
 
@@ -21,7 +22,7 @@ public void testWrongPasswordRaisesException() throws IOException {
 
     @Test(expected = IOException.class)
     public void testEmptyOnEncryptedFileRaisesException() throws IOException {
-        PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/encrypted.pdf"));
+        PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/encrypted.pdf"));
         try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
           oe.extract().next();
         }
@@ -29,7 +30,7 @@ public void testEmptyOnEncryptedFileRaisesException() throws IOException {
 
     @Test
     public void testCanReadPDFWithOwnerEncryption() throws IOException {
-        PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf"));
+        PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf"));
         try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
           PageIterator pi = oe.extract();
           int i = 0;
@@ -44,7 +45,7 @@ public void testCanReadPDFWithOwnerEncryption() throws IOException {
 
     @Test
     public void testGoodPassword() throws IOException {
-        PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/encrypted.pdf"), "userpassword");
+        PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/encrypted.pdf"), "userpassword");
         try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
           List<Page> pages = new ArrayList<>();
           PageIterator pi = oe.extract();
@@ -58,7 +59,7 @@ public void testGoodPassword() throws IOException {
 
     @Test
     public void testTextExtractionDoesNotRaise() throws IOException {
-        PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/rotated_page.pdf"));
+        PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/rotated_page.pdf"));
         try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
           PageIterator pi = oe.extract();
 
@@ -70,7 +71,7 @@ public void testTextExtractionDoesNotRaise() throws IOException {
 
     @Test
     public void testShouldDetectRulings() throws IOException {
-        PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/should_detect_rulings.pdf"));
+        PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/should_detect_rulings.pdf"));
         try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
           PageIterator pi = oe.extract();
 
@@ -85,7 +86,7 @@ public void testShouldDetectRulings() throws IOException {
 
     @Test
     public void testDontThrowNPEInShfill() throws IOException {
-        PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/labor.pdf"));
+        PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/labor.pdf"));
 
         try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
           PageIterator pi = oe.extract();
@@ -101,7 +102,7 @@ public void testDontThrowNPEInShfill() throws IOException {
 
     @Test
     public void testExtractOnePage() throws IOException {
-        PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf"));
+        PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf"));
         assertEquals(2, pdf_document.getNumberOfPages());
 
         try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
@@ -114,7 +115,7 @@ public void testExtractOnePage() throws IOException {
 
     @Test(expected = IndexOutOfBoundsException.class)
     public void testExtractWrongPageNumber() throws IOException {
-        PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf"));
+        PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf"));
         assertEquals(2, pdf_document.getNumberOfPages());
 
         try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
@@ -124,7 +125,7 @@ public void testExtractWrongPageNumber() throws IOException {
 
     @Test
     public void testTextElementsContainedInPage() throws IOException {
-        PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/cs-en-us-pbms.pdf"));
+        PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/cs-en-us-pbms.pdf"));
 
         try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
           Page page = oe.extractPage(1);
@@ -137,7 +138,7 @@ public void testTextElementsContainedInPage() throws IOException {
     }
 
     @Test public void testDoNotNPEInPointComparator() throws IOException {
-        PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/npe_issue_206.pdf"));
+        PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/npe_issue_206.pdf"));
 
         try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
             Page p = oe.extractPage(1);
diff --git a/src/test/java/technology/tabula/TestProjectionProfile.java b/src/test/java/technology/tabula/TestProjectionProfile.java
index e7af882f..e6d93b39 100644
--- a/src/test/java/technology/tabula/TestProjectionProfile.java
+++ b/src/test/java/technology/tabula/TestProjectionProfile.java
@@ -8,6 +8,7 @@
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.pdmodel.PDPage;
 import org.apache.pdfbox.pdmodel.font.PDType1Font;
+import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -20,9 +21,10 @@ public class TestProjectionProfile {
 	public void setUpProjectionProfile() {
 		PDPage pdPage = new PDPage();
 		PDDocument pdDocument = new PDDocument();
-		
-		TextElement textElement = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "test", 1f);
-		TextElement textElement2 = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "test", 1f);
+
+		PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
+		TextElement textElement = new TextElement(5f, 15f, 10f, 20f, font, 1f, "test", 1f);
+		TextElement textElement2 = new TextElement(5f, 15f, 10f, 20f, font, 1f, "test", 1f);
 		List<TextElement> textList = new ArrayList<>();
 		textList.add(textElement);
 		textList.add(textElement2);
diff --git a/src/test/java/technology/tabula/TestTableDetection.java b/src/test/java/technology/tabula/TestTableDetection.java
index 6e58f6a4..80d21350 100644
--- a/src/test/java/technology/tabula/TestTableDetection.java
+++ b/src/test/java/technology/tabula/TestTableDetection.java
@@ -11,6 +11,7 @@
 import static org.junit.Assert.*;
 
 import com.google.gson.Gson;
+import org.apache.pdfbox.Loader;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -162,7 +163,7 @@ public void testDetectionOfTables() throws Exception {
         NodeList tables = regionDocument.getElementsByTagName("table");
 
         // tabula extractors
-        PDDocument pdfDocument = PDDocument.load(this.pdf);
+        PDDocument pdfDocument = Loader.loadPDF(this.pdf);
         ObjectExtractor extractor = new ObjectExtractor(pdfDocument);
 
         // parse expected tables from the ground truth dataset
diff --git a/src/test/java/technology/tabula/TestTextElement.java b/src/test/java/technology/tabula/TestTextElement.java
index feaaa5e6..3db1ca31 100644
--- a/src/test/java/technology/tabula/TestTextElement.java
+++ b/src/test/java/technology/tabula/TestTextElement.java
@@ -3,205 +3,208 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.pdfbox.pdmodel.font.PDFont;
 import org.apache.pdfbox.pdmodel.font.PDType1Font;
+import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
 import org.junit.Assert;
 import org.junit.Test;
 
 public class TestTextElement {
-	
-	
-	@Test
-	public void createTextElement() {
-		
-		TextElement textElement = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f);
-		
-		Assert.assertNotNull(textElement);
-		Assert.assertEquals("A", textElement.getText());
-		Assert.assertEquals(1f, textElement.getFontSize(), 0);
-		Assert.assertEquals(15f, textElement.getLeft(), 0);
-		Assert.assertEquals(5f, textElement.getTop(), 0);
-		Assert.assertEquals(10f, textElement.getWidth(), 0);
-		Assert.assertEquals(20f, textElement.getHeight(), 0);
-		Assert.assertEquals(PDType1Font.HELVETICA, textElement.getFont());
-		Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0);
-		Assert.assertEquals(0f, textElement.getDirection(), 0);
-		
-		
-	}
-	
-	@Test
-	public void createTextElementWithDirection() {
-		
-		TextElement textElement = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f);
-		
-		Assert.assertNotNull(textElement);
-		Assert.assertEquals("A", textElement.getText());
-		Assert.assertEquals(1f, textElement.getFontSize(), 0);
-		Assert.assertEquals(15f, textElement.getLeft(), 0);
-		Assert.assertEquals(5f, textElement.getTop(), 0);
-		Assert.assertEquals(10f, textElement.getWidth(), 0);
-		Assert.assertEquals(20f, textElement.getHeight(), 0);
-		Assert.assertEquals(PDType1Font.HELVETICA, textElement.getFont());
-		Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0);
-		Assert.assertEquals(6f, textElement.getDirection(), 0);
-		
-		
-	}
-	
-	@Test
-	public void mergeFourElementsIntoFourWords() {
-		
-		List<TextElement> elements = new ArrayList<>();
-		elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
-		elements.add(new TextElement(20f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f));
-		elements.add(new TextElement(40f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f));
-		elements.add(new TextElement(60f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f));
-		
-		List<TextChunk> words = TextElement.mergeWords(elements);
-		
-		List<TextChunk> expectedWords = new ArrayList<>();
-		expectedWords.add(new TextChunk(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)));
-		expectedWords.add(new TextChunk(new TextElement(20f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)));
-		expectedWords.add(new TextChunk(new TextElement(40f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)));
-		expectedWords.add(new TextChunk(new TextElement(60f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f)));
-		
-		Assert.assertEquals(expectedWords, words);
-		
-	}
-	
-	@Test
-	public void mergeFourElementsIntoOneWord() {
-		
-		List<TextElement> elements = new ArrayList<>();
-		elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
-		elements.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f));
-		elements.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f));
-		elements.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f));
-		
-		List<TextChunk> words = TextElement.mergeWords(elements);
-		
-		List<TextChunk> expectedWords = new ArrayList<>();
-		TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
-		textChunk.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f));
-		textChunk.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f));
-		textChunk.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f));
-		expectedWords.add(textChunk);
-		
-		Assert.assertEquals(expectedWords, words);
-		
-	}
-	
-	@Test
-	public void mergeElementsShouldBeIdempotent() {
-		/*
-	   * a bug in TextElement.merge_words would delete the first TextElement in the array
-	   * it was called with. Discussion here: https://github.com/tabulapdf/tabula-java/issues/78
-	   */
-
-		List<TextElement> elements = new ArrayList<>();
-		elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
-		elements.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f));
-		elements.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f));
-		elements.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f));
-		
-		List<TextChunk> words = TextElement.mergeWords(elements);
-		List<TextChunk> words2 = TextElement.mergeWords(elements);
-		Assert.assertEquals(words, words2);
-	}
-
-	@Test
-	public void mergeElementsWithSkippingRules() {
-		
-		List<TextElement> elements = new ArrayList<>();
-		elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
-		elements.add(new TextElement(0f, 17f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
-		elements.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f));
-		elements.add(new TextElement(0.001f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, " ", 1f, 6f));
-		elements.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f));
-		elements.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.TIMES_ROMAN, 10f, "D", 1f, 6f));
-		
-		List<TextChunk> words = TextElement.mergeWords(elements);
-		
-		List<TextChunk> expectedWords = new ArrayList<>();
-		TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
-		textChunk.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f));
-		textChunk.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f));
-		textChunk.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.TIMES_ROMAN, 10f, "D", 1f, 6f));
-		expectedWords.add(textChunk);
-		
-		Assert.assertEquals(expectedWords, words);
-		
-	}
-	
-	@Test
-	public void mergeTenElementsIntoTwoWords() {
-		
-		List<TextElement> elements = new ArrayList<>();
-		elements.add(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f));
-		elements.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f));
-		elements.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f));
-		elements.add(new TextElement(0f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
-		elements.add(new TextElement(0f, 60f, 10f, 20f, PDType1Font.HELVETICA, 1f, "M", 1f, 6f));
-		elements.add(new TextElement(0f, 70f, 10f, 20f, PDType1Font.HELVETICA, 1f, "U", 1f, 6f));
-		elements.add(new TextElement(0f, 80f, 10f, 20f, PDType1Font.HELVETICA, 1f, "N", 1f, 6f));
-		elements.add(new TextElement(0f, 90f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f));
-		elements.add(new TextElement(0f, 100f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f));
-		
-		List<TextChunk> words = TextElement.mergeWords(elements);
-		
-		List<TextChunk> expectedWords = new ArrayList<>();
-		TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f));
-		textChunk.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f));
-		textChunk.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f));
-		textChunk.add(new TextElement(0f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
-		textChunk.add(new TextElement(0f, 30f, 10.5f, 20f, PDType1Font.HELVETICA, 1f, " ", 1f)); //Check why width=10.5?
-		expectedWords.add(textChunk);
-		TextChunk textChunk2 = new TextChunk(new TextElement(0f, 60f, 10f, 20f, PDType1Font.HELVETICA, 1f, "M", 1f, 6f));
-		textChunk2.add(new TextElement(0f, 70f, 10f, 20f, PDType1Font.HELVETICA, 1f, "U", 1f, 6f));
-		textChunk2.add(new TextElement(0f, 80f, 10f, 20f, PDType1Font.HELVETICA, 1f, "N", 1f, 6f));
-		textChunk2.add(new TextElement(0f, 90f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f));
-		textChunk2.add(new TextElement(0f, 100f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f));
-		expectedWords.add(textChunk2);
-		
-		Assert.assertEquals(2, words.size());
-		Assert.assertEquals(expectedWords, words);
-		
-	}
-	
-	@Test
-	public void mergeTenElementsIntoTwoLines() {
-		
-		List<TextElement> elements = new ArrayList<>();
-		elements.add(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f));
-		elements.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f));
-		elements.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f));
-		elements.add(new TextElement(0f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
-		elements.add(new TextElement(20f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "M", 1f, 6f));
-		elements.add(new TextElement(20f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "U", 1f, 6f));
-		elements.add(new TextElement(20f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "N", 1f, 6f));
-		elements.add(new TextElement(20f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f));
-		elements.add(new TextElement(20f, 40f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f));
-		
-		List<TextChunk> words = TextElement.mergeWords(elements);
-		
-		List<TextChunk> expectedWords = new ArrayList<>();
-		TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f));
-		textChunk.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f));
-		textChunk.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f));
-		textChunk.add(new TextElement(0f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
-		expectedWords.add(textChunk);
-		TextChunk textChunk2 = new TextChunk(new TextElement(20f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "M", 1f, 6f));
-		textChunk2.add(new TextElement(20f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "U", 1f, 6f));
-		textChunk2.add(new TextElement(20f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "N", 1f, 6f));
-		textChunk2.add(new TextElement(20f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f));
-		textChunk2.add(new TextElement(20f, 40f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f));
-		expectedWords.add(textChunk2);
-		
-		Assert.assertEquals(2, words.size());
-		Assert.assertEquals(expectedWords, words);
-		
-	}	
-	
-	
+
+
+    @Test
+    public void createTextElement() {
+
+        TextElement textElement = new TextElement(5f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f);
+
+        Assert.assertNotNull(textElement);
+        Assert.assertEquals("A", textElement.getText());
+        Assert.assertEquals(1f, textElement.getFontSize(), 0);
+        Assert.assertEquals(15f, textElement.getLeft(), 0);
+        Assert.assertEquals(5f, textElement.getTop(), 0);
+        Assert.assertEquals(10f, textElement.getWidth(), 0);
+        Assert.assertEquals(20f, textElement.getHeight(), 0);
+        Assert.assertEquals(new PDType1Font(Standard14Fonts.FontName.HELVETICA), textElement.getFont());
+        Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0);
+        Assert.assertEquals(0f, textElement.getDirection(), 0);
+
+
+    }
+
+    @Test
+    public void createTextElementWithDirection() {
+
+        TextElement textElement = new TextElement(5f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f);
+
+        Assert.assertNotNull(textElement);
+        Assert.assertEquals("A", textElement.getText());
+        Assert.assertEquals(1f, textElement.getFontSize(), 0);
+        Assert.assertEquals(15f, textElement.getLeft(), 0);
+        Assert.assertEquals(5f, textElement.getTop(), 0);
+        Assert.assertEquals(10f, textElement.getWidth(), 0);
+        Assert.assertEquals(20f, textElement.getHeight(), 0);
+        Assert.assertEquals(new PDType1Font(Standard14Fonts.FontName.HELVETICA), textElement.getFont());
+        Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0);
+        Assert.assertEquals(6f, textElement.getDirection(), 0);
+
+
+    }
+
+    @Test
+    public void mergeFourElementsIntoFourWords() {
+
+        List<TextElement> elements = new ArrayList<>();
+        elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
+        elements.add(new TextElement(20f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
+        elements.add(new TextElement(40f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
+        elements.add(new TextElement(60f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
+
+        List<TextChunk> words = TextElement.mergeWords(elements);
+
+        List<TextChunk> expectedWords = new ArrayList<>();
+        expectedWords.add(new TextChunk(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f)));
+        expectedWords.add(new TextChunk(new TextElement(20f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f)));
+        expectedWords.add(new TextChunk(new TextElement(40f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f)));
+        expectedWords.add(new TextChunk(new TextElement(60f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f)));
+
+        Assert.assertEquals(expectedWords, words);
+
+    }
+
+    @Test
+    public void mergeFourElementsIntoOneWord() {
+
+        List<TextElement> elements = new ArrayList<>();
+        elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
+        elements.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
+        elements.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
+        elements.add(new TextElement(0f, 45f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
+
+        List<TextChunk> words = TextElement.mergeWords(elements);
+
+        List<TextChunk> expectedWords = new ArrayList<>();
+        TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
+        textChunk.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
+        textChunk.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
+        textChunk.add(new TextElement(0f, 45f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
+        expectedWords.add(textChunk);
+
+        Assert.assertEquals(expectedWords, words);
+
+    }
+
+    @Test
+    public void mergeElementsShouldBeIdempotent() {
+        /*
+         * a bug in TextElement.merge_words would delete the first TextElement in the array
+         * it was called with. Discussion here: https://github.com/tabulapdf/tabula-java/issues/78
+         */
+
+        List<TextElement> elements = new ArrayList<>();
+        elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
+        elements.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
+        elements.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
+        elements.add(new TextElement(0f, 45f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
+
+        List<TextChunk> words = TextElement.mergeWords(elements);
+        List<TextChunk> words2 = TextElement.mergeWords(elements);
+        Assert.assertEquals(words, words2);
+    }
+
+    @Test
+    public void mergeElementsWithSkippingRules() {
+
+        List<TextElement> elements = new ArrayList<>();
+        elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
+        elements.add(new TextElement(0f, 17f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
+        elements.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
+        elements.add(new TextElement(0.001f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, " ", 1f, 6f));
+        elements.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
+        PDFont TIMES_ROMAN = new PDType1Font(Standard14Fonts.FontName.TIMES_ROMAN);
+        elements.add(new TextElement(0f, 45f, 10f, 20f, TIMES_ROMAN, 10f, "D", 1f, 6f));
+
+        List<TextChunk> words = TextElement.mergeWords(elements);
+
+        List<TextChunk> expectedWords = new ArrayList<>();
+        TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
+        textChunk.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
+        textChunk.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
+        textChunk.add(new TextElement(0f, 45f, 10f, 20f, TIMES_ROMAN, 10f, "D", 1f, 6f));
+        expectedWords.add(textChunk);
+
+        Assert.assertEquals(expectedWords, words);
+
+    }
+
+    @Test
+    public void mergeTenElementsIntoTwoWords() {
+
+        List<TextElement> elements = new ArrayList<>();
+        elements.add(new TextElement(0f, 0f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "H", 1f, 6f));
+        elements.add(new TextElement(0f, 10f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f));
+        elements.add(new TextElement(0f, 20f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "L", 1f, 6f));
+        elements.add(new TextElement(0f, 30f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
+        elements.add(new TextElement(0f, 60f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "M", 1f, 6f));
+        elements.add(new TextElement(0f, 70f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "U", 1f, 6f));
+        elements.add(new TextElement(0f, 80f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "N", 1f, 6f));
+        elements.add(new TextElement(0f, 90f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
+        elements.add(new TextElement(0f, 100f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f));
+
+        List<TextChunk> words = TextElement.mergeWords(elements);
+
+        List<TextChunk> expectedWords = new ArrayList<>();
+        TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "H", 1f, 6f));
+        textChunk.add(new TextElement(0f, 10f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f));
+        textChunk.add(new TextElement(0f, 20f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "L", 1f, 6f));
+        textChunk.add(new TextElement(0f, 30f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
+        textChunk.add(new TextElement(0f, 30f, 10.5f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, " ", 1f)); //Check why width=10.5?
+        expectedWords.add(textChunk);
+        TextChunk textChunk2 = new TextChunk(new TextElement(0f, 60f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "M", 1f, 6f));
+        textChunk2.add(new TextElement(0f, 70f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "U", 1f, 6f));
+        textChunk2.add(new TextElement(0f, 80f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "N", 1f, 6f));
+        textChunk2.add(new TextElement(0f, 90f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
+        textChunk2.add(new TextElement(0f, 100f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f));
+        expectedWords.add(textChunk2);
+
+        Assert.assertEquals(2, words.size());
+        Assert.assertEquals(expectedWords, words);
+
+    }
+
+    @Test
+    public void mergeTenElementsIntoTwoLines() {
+
+        List<TextElement> elements = new ArrayList<>();
+        PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
+        elements.add(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f));
+        elements.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f));
+        elements.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f));
+        elements.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f));
+        elements.add(new TextElement(20f, 0f, 10f, 20f, font, 1f, "M", 1f, 6f));
+        elements.add(new TextElement(20f, 10f, 10f, 20f, font, 1f, "U", 1f, 6f));
+        elements.add(new TextElement(20f, 20f, 10f, 20f, font, 1f, "N", 1f, 6f));
+        elements.add(new TextElement(20f, 30f, 10f, 20f, font, 1f, "D", 1f, 6f));
+        elements.add(new TextElement(20f, 40f, 10f, 20f, font, 1f, "O", 1f, 6f));
+
+        List<TextChunk> words = TextElement.mergeWords(elements);
+
+        List<TextChunk> expectedWords = new ArrayList<>();
+        TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f));
+        textChunk.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f));
+        textChunk.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f));
+        textChunk.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f));
+        expectedWords.add(textChunk);
+        TextChunk textChunk2 = new TextChunk(new TextElement(20f, 0f, 10f, 20f, font, 1f, "M", 1f, 6f));
+        textChunk2.add(new TextElement(20f, 10f, 10f, 20f, font, 1f, "U", 1f, 6f));
+        textChunk2.add(new TextElement(20f, 20f, 10f, 20f, font, 1f, "N", 1f, 6f));
+        textChunk2.add(new TextElement(20f, 30f, 10f, 20f, font, 1f, "D", 1f, 6f));
+        textChunk2.add(new TextElement(20f, 40f, 10f, 20f, font, 1f, "O", 1f, 6f));
+        expectedWords.add(textChunk2);
+
+        Assert.assertEquals(2, words.size());
+        Assert.assertEquals(expectedWords, words);
+
+    }
+
 
 }
diff --git a/src/test/java/technology/tabula/TestUtils.java b/src/test/java/technology/tabula/TestUtils.java
index e68411df..cb85cb7b 100644
--- a/src/test/java/technology/tabula/TestUtils.java
+++ b/src/test/java/technology/tabula/TestUtils.java
@@ -12,6 +12,7 @@
 import java.util.Collections;
 import java.util.List;
 
+import org.apache.pdfbox.Loader;
 import org.apache.pdfbox.rendering.ImageType;
 import org.apache.commons.cli.ParseException;
 import org.apache.pdfbox.pdmodel.PDDocument;
@@ -122,7 +123,7 @@ public void testQuickSortLongList() {
 
     @Test
     public void testJPEG2000DoesNotRaise() throws IOException {
-        PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/jpeg2000.pdf"));
+        PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/jpeg2000.pdf"));
         PDPage page = pdf_document.getPage(0);
         Utils.pageConvertToImage(pdf_document, page, 360, ImageType.RGB);
     }
diff --git a/src/test/java/technology/tabula/UtilsForTesting.java b/src/test/java/technology/tabula/UtilsForTesting.java
index 3ee8efde..8d3c91cf 100644
--- a/src/test/java/technology/tabula/UtilsForTesting.java
+++ b/src/test/java/technology/tabula/UtilsForTesting.java
@@ -7,6 +7,7 @@
 import org.apache.commons.csv.CSVFormat;
 import org.apache.commons.csv.CSVParser;
 import org.apache.commons.csv.CSVPrinter;
+import org.apache.pdfbox.Loader;
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.junit.Assert;
 
@@ -23,11 +24,9 @@ public static Page getAreaFromPage(String path, int page, float top, float left,
     public static Page getPage(String path, int pageNumber) throws IOException {
         ObjectExtractor oe = null;
         try {
-            PDDocument document = PDDocument
-                    .load(new File(path));
+            PDDocument document = Loader.loadPDF(new File(path));
             oe = new ObjectExtractor(document);
-            Page page = oe.extract(pageNumber);
-            return page;
+            return oe.extract(pageNumber);
         } finally {
             if (oe != null)
                 oe.close();

From d0241fb5ff9182d7980c3ccd572cc8bb2dba9357 Mon Sep 17 00:00:00 2001
From: young <chen.yang@goupwith.com>
Date: Wed, 6 Mar 2024 14:59:35 +0800
Subject: [PATCH 02/26] remove useless variable

---
 .../tabula/detectors/SpreadsheetDetectionAlgorithm.java         | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java
index 243cc3bf..43136ba5 100644
--- a/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java
+++ b/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java
@@ -20,8 +20,6 @@ public class SpreadsheetDetectionAlgorithm implements DetectionAlgorithm {
     public List<Rectangle> detect(Page page) {
         List<Cell> cells = SpreadsheetExtractionAlgorithm.findCells(page.getHorizontalRulings(), page.getVerticalRulings());
 
-        SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm();
-
         List<Rectangle> tables = SpreadsheetExtractionAlgorithm.findSpreadsheetsFromCells(cells);
 
         // we want tables to be returned from top to bottom on the page

From 63de16a4e102b44ea370919625221561dc783e75 Mon Sep 17 00:00:00 2001
From: young <chen.yang@goupwith.com>
Date: Thu, 11 Apr 2024 14:09:16 +0800
Subject: [PATCH 03/26] exclude junit-jupiter from pdfbox

---
 pom.xml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pom.xml b/pom.xml
index 6c71b426..f60528d0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -263,6 +263,12 @@
         <groupId>org.apache.pdfbox</groupId>
         <artifactId>pdfbox</artifactId>
         <version>3.0.1</version>
+        <exclusions>
+            <exclusion>
+                <groupId>org.junit.jupiter</groupId>
+                <artifactId>junit-jupiter</artifactId>
+            </exclusion>
+        </exclusions>
     </dependency>
 
     <dependency>

From e0ee0728ca398023ab67f59626a55525de0355b0 Mon Sep 17 00:00:00 2001
From: young <chen.yang@goupwith.com>
Date: Thu, 11 Apr 2024 14:31:10 +0800
Subject: [PATCH 04/26] update pdfbox to 3.0.2

---
 pom.xml | 341 ++++++++++++++++++++++++++++----------------------------
 1 file changed, 168 insertions(+), 173 deletions(-)

diff --git a/pom.xml b/pom.xml
index f60528d0..52943fbf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,4 +1,5 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
     <modelVersion>4.0.0</modelVersion>
     <groupId>technology.tabula</groupId>
     <artifactId>tabula</artifactId>
@@ -33,16 +34,16 @@
     </developers>
 
     <repositories>
-      <repository>
-        <id>snapshots</id>
-        <url>https://repository.apache.org/content/repositories/snapshots/</url>
-        <releases>
-          <enabled>false</enabled>
-        </releases>
-        <snapshots>
-          <enabled>true</enabled>
-        </snapshots>
-      </repository>
+        <repository>
+            <id>snapshots</id>
+            <url>https://repository.apache.org/content/repositories/snapshots/</url>
+            <releases>
+                <enabled>false</enabled>
+            </releases>
+            <snapshots>
+                <enabled>true</enabled>
+            </snapshots>
+        </repository>
     </repositories>
 
     <scm>
@@ -109,20 +110,20 @@
                 </executions>
             </plugin>
             <plugin>
-              <groupId>org.apache.maven.plugins</groupId>
-              <artifactId>maven-javadoc-plugin</artifactId>
-              <version>3.3.1</version>
-              <configuration>
-                  <source>8</source>
-              </configuration>
-              <executions>
-                <execution>
-                  <id>attach-javadocs</id>
-                  <goals>
-                    <goal>jar</goal>
-                  </goals>
-                </execution>
-              </executions>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-javadoc-plugin</artifactId>
+                <version>3.3.1</version>
+                <configuration>
+                    <source>8</source>
+                </configuration>
+                <executions>
+                    <execution>
+                        <id>attach-javadocs</id>
+                        <goals>
+                            <goal>jar</goal>
+                        </goals>
+                    </execution>
+                </executions>
             </plugin>
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
@@ -159,172 +160,166 @@
                         <manifest>
                             <mainClass>technology.tabula.CommandLineApp</mainClass>
                         </manifest>
-                </archive>
-                <descriptorRefs>
-                    <descriptorRef>jar-with-dependencies</descriptorRef>
-                </descriptorRefs>
+                    </archive>
+                    <descriptorRefs>
+                        <descriptorRef>jar-with-dependencies</descriptorRef>
+                    </descriptorRefs>
                 </configuration>
-        </plugin>
-        <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-surefire-plugin</artifactId>
-            <version>2.22.2</version>
-            <configuration>
-                <!-- Travis build workaround -->
-                <argLine>-Xms1024m -Xmx2048m</argLine>
-            </configuration>
-        </plugin>
-        <!-- download source jars and link them when running eclipse:eclipse -->
-        <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-eclipse-plugin</artifactId>
-            <version>2.10</version>
-            <configuration>
-                <downloadSources>true</downloadSources>
-                <downloadJavadocs>true</downloadJavadocs>
-            </configuration>
-        </plugin>
-    </plugins>
-</build>
-
-<profiles>
-    <profile>
-        <id>release</id>
-        <build>
-          <plugins>
+            </plugin>
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-javadoc-plugin</artifactId>
-                <version>3.3.1</version>
+                <artifactId>maven-surefire-plugin</artifactId>
+                <version>2.22.2</version>
                 <configuration>
-                    <source>8</source>
+                    <!-- Travis build workaround -->
+                    <argLine>-Xms1024m -Xmx2048m</argLine>
                 </configuration>
-                <executions>
-                    <execution>
-                        <id>attach-javadocs</id>
-                        <goals>
-                            <goal>jar</goal>
-                        </goals>
-                    </execution>
-                </executions>
             </plugin>
-                <plugin>
-                    <groupId>org.apache.maven.plugins</groupId>
-                    <artifactId>maven-source-plugin</artifactId>
-                    <version>3.2.1</version>
-                    <executions>
-                        <execution>
-                            <id>attach-sources</id>
-                            <goals>
-                                <goal>jar-no-fork</goal>
-                            </goals>
-                        </execution>
-                    </executions>
-                </plugin>
-                <plugin>
-                    <groupId>org.apache.maven.plugins</groupId>
-                    <artifactId>maven-gpg-plugin</artifactId>
-                    <version>1.6</version>
-                    <executions>
-                        <execution>
-                            <id>sign-artifacts</id>
-                            <phase>verify</phase>
-                            <goals>
-                                <goal>sign</goal>
-                            </goals>
-                        </execution>
-                    </executions>
-                </plugin>
-            </plugins>
-        </build>
-    </profile>
-</profiles>
+            <!-- download source jars and link them when running eclipse:eclipse -->
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-eclipse-plugin</artifactId>
+                <version>2.10</version>
+                <configuration>
+                    <downloadSources>true</downloadSources>
+                    <downloadJavadocs>true</downloadJavadocs>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+    <profiles>
+        <profile>
+            <id>release</id>
+            <build>
+                <plugins>
+                    <plugin>
+                        <groupId>org.apache.maven.plugins</groupId>
+                        <artifactId>maven-javadoc-plugin</artifactId>
+                        <version>3.3.1</version>
+                        <configuration>
+                            <source>8</source>
+                        </configuration>
+                        <executions>
+                            <execution>
+                                <id>attach-javadocs</id>
+                                <goals>
+                                    <goal>jar</goal>
+                                </goals>
+                            </execution>
+                        </executions>
+                    </plugin>
+                    <plugin>
+                        <groupId>org.apache.maven.plugins</groupId>
+                        <artifactId>maven-source-plugin</artifactId>
+                        <version>3.2.1</version>
+                        <executions>
+                            <execution>
+                                <id>attach-sources</id>
+                                <goals>
+                                    <goal>jar-no-fork</goal>
+                                </goals>
+                            </execution>
+                        </executions>
+                    </plugin>
+                    <plugin>
+                        <groupId>org.apache.maven.plugins</groupId>
+                        <artifactId>maven-gpg-plugin</artifactId>
+                        <version>1.6</version>
+                        <executions>
+                            <execution>
+                                <id>sign-artifacts</id>
+                                <phase>verify</phase>
+                                <goals>
+                                    <goal>sign</goal>
+                                </goals>
+                            </execution>
+                        </executions>
+                    </plugin>
+                </plugins>
+            </build>
+        </profile>
+    </profiles>
 
-<dependencies>
-    <dependency>
-        <groupId>org.locationtech.jts</groupId>
-        <artifactId>jts-core</artifactId>
-        <version>1.18.1</version>
-    </dependency>
+    <dependencies>
+        <dependency>
+            <groupId>org.locationtech.jts</groupId>
+            <artifactId>jts-core</artifactId>
+            <version>1.18.1</version>
+        </dependency>
 
-    <dependency>
-        <groupId>org.slf4j</groupId>
-        <artifactId>slf4j-api</artifactId>
-        <version>1.7.35</version>
-    </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+            <version>1.7.35</version>
+        </dependency>
 
-    <dependency>
-        <groupId>org.slf4j</groupId>
-        <artifactId>slf4j-simple</artifactId>
-        <version>1.7.32</version>
-    </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-simple</artifactId>
+            <version>1.7.32</version>
+        </dependency>
 
-    <dependency>
-        <groupId>org.apache.pdfbox</groupId>
-        <artifactId>pdfbox</artifactId>
-        <version>3.0.1</version>
-        <exclusions>
-            <exclusion>
-                <groupId>org.junit.jupiter</groupId>
-                <artifactId>junit-jupiter</artifactId>
-            </exclusion>
-        </exclusions>
-    </dependency>
+        <dependency>
+            <groupId>org.apache.pdfbox</groupId>
+            <artifactId>pdfbox</artifactId>
+            <version>3.0.2</version>
+        </dependency>
 
-    <dependency>
-        <groupId>org.bouncycastle</groupId>
-        <artifactId>bcprov-jdk15on</artifactId>
-        <version>1.70</version>
-    </dependency>
+        <dependency>
+            <groupId>org.bouncycastle</groupId>
+            <artifactId>bcprov-jdk15on</artifactId>
+            <version>1.70</version>
+        </dependency>
 
-    <dependency>
-        <groupId>org.bouncycastle</groupId>
-        <artifactId>bcmail-jdk15on</artifactId>
-        <version>1.70</version>
-    </dependency>
+        <dependency>
+            <groupId>org.bouncycastle</groupId>
+            <artifactId>bcmail-jdk15on</artifactId>
+            <version>1.70</version>
+        </dependency>
 
-    <dependency>
-        <groupId>junit</groupId>
-        <artifactId>junit</artifactId>
-        <version>4.13.2</version>
-        <scope>test</scope>
-    </dependency>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>4.13.2</version>
+            <scope>test</scope>
+        </dependency>
 
-    <dependency>
-        <groupId>commons-cli</groupId>
-        <artifactId>commons-cli</artifactId>
-        <version>1.4</version>
-    </dependency>
+        <dependency>
+            <groupId>commons-cli</groupId>
+            <artifactId>commons-cli</artifactId>
+            <version>1.4</version>
+        </dependency>
 
-    <dependency>
-        <groupId>org.apache.commons</groupId>
-        <artifactId>commons-csv</artifactId>
-        <version>1.9.0</version>
-    </dependency>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-csv</artifactId>
+            <version>1.9.0</version>
+        </dependency>
 
-    <dependency>
-        <groupId>com.google.code.gson</groupId>
-        <artifactId>gson</artifactId>
-        <version>2.9.0</version>
-    </dependency>
+        <dependency>
+            <groupId>com.google.code.gson</groupId>
+            <artifactId>gson</artifactId>
+            <version>2.9.0</version>
+        </dependency>
 
-    <dependency>
-        <groupId>com.github.jai-imageio</groupId>
-        <artifactId>jai-imageio-core</artifactId>
-        <version>1.4.0</version>
-    </dependency>
+        <dependency>
+            <groupId>com.github.jai-imageio</groupId>
+            <artifactId>jai-imageio-core</artifactId>
+            <version>1.4.0</version>
+        </dependency>
 
-    <dependency>
-        <groupId>com.github.jai-imageio</groupId>
-        <artifactId>jai-imageio-jpeg2000</artifactId>
-        <version>1.4.0</version>
-    </dependency>
+        <dependency>
+            <groupId>com.github.jai-imageio</groupId>
+            <artifactId>jai-imageio-jpeg2000</artifactId>
+            <version>1.4.0</version>
+        </dependency>
 
-    <dependency>
-        <groupId>org.apache.pdfbox</groupId>
-        <artifactId>jbig2-imageio</artifactId>
-        <version>3.0.4</version>
-    </dependency>
-</dependencies>
+        <dependency>
+            <groupId>org.apache.pdfbox</groupId>
+            <artifactId>jbig2-imageio</artifactId>
+            <version>3.0.4</version>
+        </dependency>
+    </dependencies>
 
 </project>

From 20b1053a24402a1e3a587ee90211661027d66484 Mon Sep 17 00:00:00 2001
From: young <chen.yang@goupwith.com>
Date: Mon, 29 Apr 2024 17:40:46 +0800
Subject: [PATCH 05/26] fix: oom for removeText

---
 .../detectors/NurminenDetectionAlgorithm.java | 86 ++++++++++++-------
 .../technology/tabula/TestTableDetection.java | 53 ++++++------
 2 files changed, 79 insertions(+), 60 deletions(-)

diff --git a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java
index 9a377854..86639f66 100644
--- a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java
+++ b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java
@@ -1,14 +1,8 @@
 package technology.tabula.detectors;
 
-import java.awt.geom.Line2D;
-import java.awt.geom.Point2D;
-import java.awt.image.BufferedImage;
-import java.awt.image.Raster;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.*;
-
+import org.apache.pdfbox.contentstream.PDContentStream;
 import org.apache.pdfbox.contentstream.operator.Operator;
+import org.apache.pdfbox.contentstream.operator.OperatorName;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.pdfparser.PDFStreamParser;
 import org.apache.pdfbox.pdfwriter.ContentStreamWriter;
@@ -16,16 +10,17 @@
 import org.apache.pdfbox.pdmodel.PDPage;
 import org.apache.pdfbox.pdmodel.common.PDStream;
 import org.apache.pdfbox.rendering.ImageType;
-
-import technology.tabula.Line;
-import technology.tabula.Page;
-import technology.tabula.Rectangle;
-import technology.tabula.Ruling;
-import technology.tabula.TextChunk;
-import technology.tabula.TextElement;
-import technology.tabula.Utils;
+import technology.tabula.*;
 import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
 
+import java.awt.geom.Line2D;
+import java.awt.geom.Point2D;
+import java.awt.image.BufferedImage;
+import java.awt.image.Raster;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.*;
+
 /**
  * Created by matt on 2015-12-17.
  * <p>
@@ -799,25 +794,10 @@ private List<Ruling> getVerticalRulings(BufferedImage image) {
         return verticalRulings;
     }
 
-
-    // taken from http://www.docjar.com/html/api/org/apache/pdfbox/examples/util/RemoveAllText.java.html
     private PDDocument removeText(PDPage page) throws IOException {
 
         PDFStreamParser parser = new PDFStreamParser(page);
         parser.parse();
-        List<Object> newTokens = new ArrayList<>();
-        while (page.hasContents()) {
-            Object token = parser.parseNextToken();
-            if (token instanceof Operator) {
-                Operator op = (Operator) token;
-                if ("TJ".equals(op.getName()) || "Tj".equals(op.getName())) {
-                    //remove the one argument to this operator
-                    newTokens.remove(newTokens.size() - 1);
-                    continue;
-                }
-            }
-            newTokens.add(token);
-        }
 
         PDDocument document = new PDDocument();
         PDPage newPage = document.importPage(page);
@@ -826,9 +806,51 @@ private PDDocument removeText(PDPage page) throws IOException {
         PDStream newContents = new PDStream(document);
         OutputStream out = newContents.createOutputStream(COSName.FLATE_DECODE);
         ContentStreamWriter writer = new ContentStreamWriter(out);
-        writer.writeTokens(newTokens);
+        List<Object> tokensWithoutText = createTokensWithoutText(page);
+        writer.writeTokens(tokensWithoutText);
         out.close();
         newPage.setContents(newContents);
         return document;
     }
+
+
+    /**
+     * @param contentStream contentStream
+     * @return newTokens
+     * @throws IOException When parseNextToken on Error
+     * @see <a href="https://github.com/apache/pdfbox/blob/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java#L127">...</a>
+     */
+    private static List<Object> createTokensWithoutText(PDContentStream contentStream) throws IOException {
+        PDFStreamParser parser = new PDFStreamParser(contentStream);
+        Object token = parser.parseNextToken();
+        List<Object> newTokens = new ArrayList<>();
+        while (token != null) {
+            if (token instanceof Operator) {
+                Operator op = (Operator) token;
+                String opName = op.getName();
+                if (OperatorName.SHOW_TEXT_ADJUSTED.equals(opName)
+                        || OperatorName.SHOW_TEXT.equals(opName)
+                        || OperatorName.SHOW_TEXT_LINE.equals(opName)) {
+                    // remove the argument to this operator
+                    newTokens.remove(newTokens.size() - 1);
+
+                    token = parser.parseNextToken();
+                    continue;
+                } else if (OperatorName.SHOW_TEXT_LINE_AND_SPACE.equals(opName)) {
+                    // remove the 3 arguments to this operator
+                    newTokens.remove(newTokens.size() - 1);
+                    newTokens.remove(newTokens.size() - 1);
+                    newTokens.remove(newTokens.size() - 1);
+
+                    token = parser.parseNextToken();
+                    continue;
+                }
+            }
+            newTokens.add(token);
+            token = parser.parseNextToken();
+        }
+        return newTokens;
+    }
+
+
 }
diff --git a/src/test/java/technology/tabula/TestTableDetection.java b/src/test/java/technology/tabula/TestTableDetection.java
index 80d21350..c13ff201 100644
--- a/src/test/java/technology/tabula/TestTableDetection.java
+++ b/src/test/java/technology/tabula/TestTableDetection.java
@@ -1,29 +1,29 @@
 package technology.tabula;
 
-import java.io.File;
-import java.io.FileWriter;
-import java.io.FilenameFilter;
-import java.io.IOException;
-import java.util.*;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import static org.junit.Assert.*;
-
 import com.google.gson.Gson;
 import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.pdmodel.PDDocument;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
-import org.w3c.dom.*;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+import technology.tabula.detectors.NurminenDetectionAlgorithm;
 
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.*;
+import java.util.logging.Level;
+import java.util.logging.Logger;
 
-import org.apache.pdfbox.pdmodel.PDDocument;
-import technology.tabula.detectors.NurminenDetectionAlgorithm;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
 
 /**
  * Created by matt on 2015-12-14.
@@ -111,15 +111,10 @@ public static Collection<Object[]> data() {
             String directoryName = "src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-" + regionCode + "/";
             File dir = new File(directoryName);
 
-            File[] pdfs = dir.listFiles(new FilenameFilter() {
-                @Override
-                public boolean accept(File dir, String name) {
-                    return name.toLowerCase().endsWith(".pdf");
-                }
-            });
+            File[] pdfs = dir.listFiles((dir1, name) -> name.toLowerCase().endsWith(".pdf"));
 
             for (File pdf : pdfs) {
-                data.add(new Object[] {pdf});
+                data.add(new Object[]{pdf});
             }
         }
 
@@ -163,6 +158,8 @@ public void testDetectionOfTables() throws Exception {
         NodeList tables = regionDocument.getElementsByTagName("table");
 
         // tabula extractors
+
+
         PDDocument pdfDocument = Loader.loadPDF(this.pdf);
         ObjectExtractor extractor = new ObjectExtractor(pdfDocument);
 
@@ -171,7 +168,7 @@ public void testDetectionOfTables() throws Exception {
 
         int numExpectedTables = 0;
 
-        for (int i=0; i<tables.getLength(); i++) {
+        for (int i = 0; i < tables.getLength(); i++) {
 
             Element table = (Element) tables.item(i);
             Element region = (Element) table.getElementsByTagName("region").item(0);
@@ -195,7 +192,7 @@ public void testDetectionOfTables() throws Exception {
             // do some extra work to extract the page with tabula and get the dimensions from there
             Page extractedPage = extractor.extractPage(page);
 
-            float top = (float)extractedPage.getHeight() - y2;
+            float top = (float) extractedPage.getHeight() - y2;
             float left = x1;
             float width = x2 - x1;
             float height = y2 - y1;
@@ -214,8 +211,8 @@ public void testDetectionOfTables() throws Exception {
         while (pages.hasNext()) {
             Page page = pages.next();
             List<Rectangle> tablesOnPage = detectionAlgorithm.detect(page);
-            if (tablesOnPage.size() > 0) {
-                detectedTables.put(new Integer(page.getPageNumber()), tablesOnPage);
+            if (!tablesOnPage.isEmpty()) {
+                detectedTables.put(page.getPageNumber(), tablesOnPage);
             }
         }
 
@@ -267,7 +264,7 @@ public void testDetectionOfTables() throws Exception {
         System.out.println(totalErroneouslyDetectedTables + " tables incorrectly detected");
 
 
-        if(this.status.isFirstRun()) {
+        if (this.status.isFirstRun()) {
             // make the baseline
             this.status.expectedFailure = failed;
             this.status.numCorrectlyDetectedTables = this.numCorrectlyDetectedTables;
@@ -293,14 +290,14 @@ private List<String> comparePages(Integer page, List<Rectangle> detected, List<R
         // from http://www.orsigiorgio.net/wp-content/papercite-data/pdf/gho*12.pdf (comparing regions):
         // for other (e.g.“black-box”) algorithms, bounding boxes and content are used. A region is correct if it
         // contains the minimal bounding box of the ground truth without intersecting additional content.
-        for (Iterator<Rectangle> detectedIterator = detected.iterator(); detectedIterator.hasNext();) {
+        for (Iterator<Rectangle> detectedIterator = detected.iterator(); detectedIterator.hasNext(); ) {
             Rectangle detectedTable = detectedIterator.next();
 
-            for (int i=0; i<expected.size(); i++) {
+            for (int i = 0; i < expected.size(); i++) {
                 if (detectedTable.contains(expected.get(i))) {
                     // we have a candidate for the detected table, make sure it doesn't intersect any others
                     boolean intersectsOthers = false;
-                    for (int j=0; j<expected.size(); j++) {
+                    for (int j = 0; j < expected.size(); j++) {
                         if (i == j) continue;
                         if (detectedTable.intersects(expected.get(j))) {
                             intersectsOthers = true;

From a01730669761b8a62444abc066c4f5b7fb8c5e1c Mon Sep 17 00:00:00 2001
From: young <chen.yang@goupwith.com>
Date: Mon, 29 Apr 2024 17:51:40 +0800
Subject: [PATCH 06/26] fix: unit test

---
 .../technology/tabula/TestTextElement.java    | 109 +++++++++---------
 1 file changed, 57 insertions(+), 52 deletions(-)

diff --git a/src/test/java/technology/tabula/TestTextElement.java b/src/test/java/technology/tabula/TestTextElement.java
index 3db1ca31..ee0fbf3d 100644
--- a/src/test/java/technology/tabula/TestTextElement.java
+++ b/src/test/java/technology/tabula/TestTextElement.java
@@ -1,14 +1,14 @@
 package technology.tabula;
 
-import java.util.ArrayList;
-import java.util.List;
-
 import org.apache.pdfbox.pdmodel.font.PDFont;
 import org.apache.pdfbox.pdmodel.font.PDType1Font;
 import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
 import org.junit.Assert;
 import org.junit.Test;
 
+import java.util.ArrayList;
+import java.util.List;
+
 public class TestTextElement {
 
 
@@ -24,7 +24,7 @@ public void createTextElement() {
         Assert.assertEquals(5f, textElement.getTop(), 0);
         Assert.assertEquals(10f, textElement.getWidth(), 0);
         Assert.assertEquals(20f, textElement.getHeight(), 0);
-        Assert.assertEquals(new PDType1Font(Standard14Fonts.FontName.HELVETICA), textElement.getFont());
+        Assert.assertEquals(Standard14Fonts.FontName.HELVETICA.getName(), textElement.getFont().getName());
         Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0);
         Assert.assertEquals(0f, textElement.getDirection(), 0);
 
@@ -43,7 +43,7 @@ public void createTextElementWithDirection() {
         Assert.assertEquals(5f, textElement.getTop(), 0);
         Assert.assertEquals(10f, textElement.getWidth(), 0);
         Assert.assertEquals(20f, textElement.getHeight(), 0);
-        Assert.assertEquals(new PDType1Font(Standard14Fonts.FontName.HELVETICA), textElement.getFont());
+        Assert.assertEquals(Standard14Fonts.FontName.HELVETICA.getName(), textElement.getFont().getName());
         Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0);
         Assert.assertEquals(6f, textElement.getDirection(), 0);
 
@@ -54,18 +54,19 @@ public void createTextElementWithDirection() {
     public void mergeFourElementsIntoFourWords() {
 
         List<TextElement> elements = new ArrayList<>();
-        elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
-        elements.add(new TextElement(20f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
-        elements.add(new TextElement(40f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
-        elements.add(new TextElement(60f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
+        PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
+        elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f));
+        elements.add(new TextElement(20f, 15f, 10f, 20f, font, 1f, "B", 1f, 6f));
+        elements.add(new TextElement(40f, 15f, 10f, 20f, font, 1f, "C", 1f, 6f));
+        elements.add(new TextElement(60f, 15f, 10f, 20f, font, 1f, "D", 1f, 6f));
 
         List<TextChunk> words = TextElement.mergeWords(elements);
 
         List<TextChunk> expectedWords = new ArrayList<>();
-        expectedWords.add(new TextChunk(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f)));
-        expectedWords.add(new TextChunk(new TextElement(20f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f)));
-        expectedWords.add(new TextChunk(new TextElement(40f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f)));
-        expectedWords.add(new TextChunk(new TextElement(60f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f)));
+        expectedWords.add(new TextChunk(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f)));
+        expectedWords.add(new TextChunk(new TextElement(20f, 15f, 10f, 20f, font, 1f, "B", 1f, 6f)));
+        expectedWords.add(new TextChunk(new TextElement(40f, 15f, 10f, 20f, font, 1f, "C", 1f, 6f)));
+        expectedWords.add(new TextChunk(new TextElement(60f, 15f, 10f, 20f, font, 1f, "D", 1f, 6f)));
 
         Assert.assertEquals(expectedWords, words);
 
@@ -75,18 +76,19 @@ public void mergeFourElementsIntoFourWords() {
     public void mergeFourElementsIntoOneWord() {
 
         List<TextElement> elements = new ArrayList<>();
-        elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
-        elements.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
-        elements.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
-        elements.add(new TextElement(0f, 45f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
+        PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
+        elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f));
+        elements.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f));
+        elements.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f));
+        elements.add(new TextElement(0f, 45f, 10f, 20f, font, 1f, "D", 1f, 6f));
 
         List<TextChunk> words = TextElement.mergeWords(elements);
 
         List<TextChunk> expectedWords = new ArrayList<>();
-        TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
-        textChunk.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
-        textChunk.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
-        textChunk.add(new TextElement(0f, 45f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
+        TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f));
+        textChunk.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f));
+        textChunk.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f));
+        textChunk.add(new TextElement(0f, 45f, 10f, 20f, font, 1f, "D", 1f, 6f));
         expectedWords.add(textChunk);
 
         Assert.assertEquals(expectedWords, words);
@@ -101,10 +103,11 @@ public void mergeElementsShouldBeIdempotent() {
          */
 
         List<TextElement> elements = new ArrayList<>();
-        elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
-        elements.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
-        elements.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
-        elements.add(new TextElement(0f, 45f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
+        PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
+        elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f));
+        elements.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f));
+        elements.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f));
+        elements.add(new TextElement(0f, 45f, 10f, 20f, font, 1f, "D", 1f, 6f));
 
         List<TextChunk> words = TextElement.mergeWords(elements);
         List<TextChunk> words2 = TextElement.mergeWords(elements);
@@ -115,20 +118,21 @@ public void mergeElementsShouldBeIdempotent() {
     public void mergeElementsWithSkippingRules() {
 
         List<TextElement> elements = new ArrayList<>();
-        elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
-        elements.add(new TextElement(0f, 17f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
-        elements.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
-        elements.add(new TextElement(0.001f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, " ", 1f, 6f));
-        elements.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
+        PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
+        elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f));
+        elements.add(new TextElement(0f, 17f, 10f, 20f, font, 1f, "A", 1f, 6f));
+        elements.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f));
+        elements.add(new TextElement(0.001f, 25f, 10f, 20f, font, 1f, " ", 1f, 6f));
+        elements.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f));
         PDFont TIMES_ROMAN = new PDType1Font(Standard14Fonts.FontName.TIMES_ROMAN);
         elements.add(new TextElement(0f, 45f, 10f, 20f, TIMES_ROMAN, 10f, "D", 1f, 6f));
 
         List<TextChunk> words = TextElement.mergeWords(elements);
 
         List<TextChunk> expectedWords = new ArrayList<>();
-        TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
-        textChunk.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
-        textChunk.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
+        TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f));
+        textChunk.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f));
+        textChunk.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f));
         textChunk.add(new TextElement(0f, 45f, 10f, 20f, TIMES_ROMAN, 10f, "D", 1f, 6f));
         expectedWords.add(textChunk);
 
@@ -140,30 +144,31 @@ public void mergeElementsWithSkippingRules() {
     public void mergeTenElementsIntoTwoWords() {
 
         List<TextElement> elements = new ArrayList<>();
-        elements.add(new TextElement(0f, 0f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "H", 1f, 6f));
-        elements.add(new TextElement(0f, 10f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f));
-        elements.add(new TextElement(0f, 20f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "L", 1f, 6f));
-        elements.add(new TextElement(0f, 30f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
-        elements.add(new TextElement(0f, 60f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "M", 1f, 6f));
-        elements.add(new TextElement(0f, 70f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "U", 1f, 6f));
-        elements.add(new TextElement(0f, 80f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "N", 1f, 6f));
-        elements.add(new TextElement(0f, 90f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
-        elements.add(new TextElement(0f, 100f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f));
+        PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
+        elements.add(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f));
+        elements.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f));
+        elements.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f));
+        elements.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f));
+        elements.add(new TextElement(0f, 60f, 10f, 20f, font, 1f, "M", 1f, 6f));
+        elements.add(new TextElement(0f, 70f, 10f, 20f, font, 1f, "U", 1f, 6f));
+        elements.add(new TextElement(0f, 80f, 10f, 20f, font, 1f, "N", 1f, 6f));
+        elements.add(new TextElement(0f, 90f, 10f, 20f, font, 1f, "D", 1f, 6f));
+        elements.add(new TextElement(0f, 100f, 10f, 20f, font, 1f, "O", 1f, 6f));
 
         List<TextChunk> words = TextElement.mergeWords(elements);
 
         List<TextChunk> expectedWords = new ArrayList<>();
-        TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "H", 1f, 6f));
-        textChunk.add(new TextElement(0f, 10f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f));
-        textChunk.add(new TextElement(0f, 20f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "L", 1f, 6f));
-        textChunk.add(new TextElement(0f, 30f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
-        textChunk.add(new TextElement(0f, 30f, 10.5f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, " ", 1f)); //Check why width=10.5?
+        TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f));
+        textChunk.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f));
+        textChunk.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f));
+        textChunk.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f));
+        textChunk.add(new TextElement(0f, 30f, 10.5f, 20f, font, 1f, " ", 1f)); //Check why width=10.5?
         expectedWords.add(textChunk);
-        TextChunk textChunk2 = new TextChunk(new TextElement(0f, 60f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "M", 1f, 6f));
-        textChunk2.add(new TextElement(0f, 70f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "U", 1f, 6f));
-        textChunk2.add(new TextElement(0f, 80f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "N", 1f, 6f));
-        textChunk2.add(new TextElement(0f, 90f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
-        textChunk2.add(new TextElement(0f, 100f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f));
+        TextChunk textChunk2 = new TextChunk(new TextElement(0f, 60f, 10f, 20f, font, 1f, "M", 1f, 6f));
+        textChunk2.add(new TextElement(0f, 70f, 10f, 20f, font, 1f, "U", 1f, 6f));
+        textChunk2.add(new TextElement(0f, 80f, 10f, 20f, font, 1f, "N", 1f, 6f));
+        textChunk2.add(new TextElement(0f, 90f, 10f, 20f, font, 1f, "D", 1f, 6f));
+        textChunk2.add(new TextElement(0f, 100f, 10f, 20f, font, 1f, "O", 1f, 6f));
         expectedWords.add(textChunk2);
 
         Assert.assertEquals(2, words.size());

From 6d59cddd5e4523d74aa03739be5992d35372fdd3 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 27 Feb 2023 21:12:28 +0000
Subject: [PATCH 07/26] Bump maven-compiler-plugin from 3.8.1 to 3.11.0

Bumps [maven-compiler-plugin](https://github.com/apache/maven-compiler-plugin) from 3.8.1 to 3.11.0.
- [Release notes](https://github.com/apache/maven-compiler-plugin/releases)
- [Commits](https://github.com/apache/maven-compiler-plugin/compare/maven-compiler-plugin-3.8.1...maven-compiler-plugin-3.11.0)

---
updated-dependencies:
- dependency-name: org.apache.maven.plugins:maven-compiler-plugin
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 52943fbf..b3344e12 100644
--- a/pom.xml
+++ b/pom.xml
@@ -147,7 +147,7 @@
             </plugin>
             <plugin>
                 <artifactId>maven-compiler-plugin</artifactId>
-                <version>3.8.1</version>
+                <version>3.11.0</version>
                 <configuration>
                     <source>1.8</source>
                     <target>1.8</target>

From 2bdeb954675cb2ad05431210d3f06db74a490fe9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 16 Jul 2024 21:29:09 +0000
Subject: [PATCH 08/26] Bump org.apache.maven.plugins:maven-gpg-plugin from 1.6
 to 3.2.4

Bumps [org.apache.maven.plugins:maven-gpg-plugin](https://github.com/apache/maven-gpg-plugin) from 1.6 to 3.2.4.
- [Release notes](https://github.com/apache/maven-gpg-plugin/releases)
- [Commits](https://github.com/apache/maven-gpg-plugin/compare/maven-gpg-plugin-1.6...maven-gpg-plugin-3.2.4)

---
updated-dependencies:
- dependency-name: org.apache.maven.plugins:maven-gpg-plugin
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index b3344e12..7f30e7a4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -128,7 +128,7 @@
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-gpg-plugin</artifactId>
-                <version>1.6</version>
+                <version>3.2.4</version>
                 <executions>
                     <execution>
                         <id>sign-artifacts</id>
@@ -225,7 +225,7 @@
                     <plugin>
                         <groupId>org.apache.maven.plugins</groupId>
                         <artifactId>maven-gpg-plugin</artifactId>
-                        <version>1.6</version>
+                        <version>3.2.4</version>
                         <executions>
                             <execution>
                                 <id>sign-artifacts</id>

From c831cf6ac36c5315b96ff6a49212bb67908ce48e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 16 Jul 2024 21:28:58 +0000
Subject: [PATCH 09/26] Bump commons-cli:commons-cli from 1.4 to 1.8.0

Bumps commons-cli:commons-cli from 1.4 to 1.8.0.

---
updated-dependencies:
- dependency-name: commons-cli:commons-cli
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 7f30e7a4..eb362e89 100644
--- a/pom.xml
+++ b/pom.xml
@@ -288,7 +288,7 @@
         <dependency>
             <groupId>commons-cli</groupId>
             <artifactId>commons-cli</artifactId>
-            <version>1.4</version>
+            <version>1.8.0</version>
         </dependency>
 
         <dependency>

From 9dc64f867a01e69e6e929feaa5a909c02b9bd3e9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 16 Jul 2024 21:28:56 +0000
Subject: [PATCH 10/26] Bump org.slf4j:slf4j-api from 1.7.35 to 2.0.13

Bumps org.slf4j:slf4j-api from 1.7.35 to 2.0.13.

---
updated-dependencies:
- dependency-name: org.slf4j:slf4j-api
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index eb362e89..adf29ce5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -251,7 +251,7 @@
         <dependency>
             <groupId>org.slf4j</groupId>
             <artifactId>slf4j-api</artifactId>
-            <version>1.7.35</version>
+            <version>2.0.13</version>
         </dependency>
 
         <dependency>

From 3f7445380ec4f48dfc545dd6d33e89d4c501af55 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 16 Jul 2024 21:28:55 +0000
Subject: [PATCH 11/26] Bump org.slf4j:slf4j-simple from 1.7.32 to 2.0.13

Bumps org.slf4j:slf4j-simple from 1.7.32 to 2.0.13.

---
updated-dependencies:
- dependency-name: org.slf4j:slf4j-simple
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index adf29ce5..8e0736c4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -257,7 +257,7 @@
         <dependency>
             <groupId>org.slf4j</groupId>
             <artifactId>slf4j-simple</artifactId>
-            <version>1.7.32</version>
+            <version>2.0.13</version>
         </dependency>
 
         <dependency>

From 2ef079f2a14dc6d66c68c5ce8d03853eea7436f4 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 21 Jun 2022 21:32:54 +0000
Subject: [PATCH 12/26] Bump jts-core from 1.18.1 to 1.19.0

Bumps jts-core from 1.18.1 to 1.19.0.

---
updated-dependencies:
- dependency-name: org.locationtech.jts:jts-core
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 8e0736c4..19bf0133 100644
--- a/pom.xml
+++ b/pom.xml
@@ -245,7 +245,7 @@
         <dependency>
             <groupId>org.locationtech.jts</groupId>
             <artifactId>jts-core</artifactId>
-            <version>1.18.1</version>
+            <version>1.19.0</version>
         </dependency>
 
         <dependency>

From c1e4e326eddc1a2dfe59febf24a569d11bde5cfb Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 15 Feb 2023 21:59:29 +0000
Subject: [PATCH 13/26] Bump maven-javadoc-plugin from 3.3.1 to 3.5.0

Bumps [maven-javadoc-plugin](https://github.com/apache/maven-javadoc-plugin) from 3.3.1 to 3.5.0.
- [Release notes](https://github.com/apache/maven-javadoc-plugin/releases)
- [Commits](https://github.com/apache/maven-javadoc-plugin/compare/maven-javadoc-plugin-3.3.1...maven-javadoc-plugin-3.5.0)

---
updated-dependencies:
- dependency-name: org.apache.maven.plugins:maven-javadoc-plugin
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/pom.xml b/pom.xml
index 19bf0133..749fa0b7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -74,7 +74,7 @@
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-javadoc-plugin</artifactId>
-                <version>3.3.1</version>
+                <version>3.5.0</version>
                 <configuration>
                     <skip>true</skip>
                 </configuration>
@@ -110,20 +110,20 @@
                 </executions>
             </plugin>
             <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-javadoc-plugin</artifactId>
-                <version>3.3.1</version>
-                <configuration>
-                    <source>8</source>
-                </configuration>
-                <executions>
-                    <execution>
-                        <id>attach-javadocs</id>
-                        <goals>
-                            <goal>jar</goal>
-                        </goals>
-                    </execution>
-                </executions>
+              <groupId>org.apache.maven.plugins</groupId>
+              <artifactId>maven-javadoc-plugin</artifactId>
+              <version>3.5.0</version>
+              <configuration>
+                  <source>8</source>
+              </configuration>
+              <executions>
+                <execution>
+                  <id>attach-javadocs</id>
+                  <goals>
+                    <goal>jar</goal>
+                  </goals>
+                </execution>
+              </executions>
             </plugin>
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>

From 5761334b86f58723e761b4941f2950d7b6e53d82 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 16 Jul 2024 21:29:03 +0000
Subject: [PATCH 14/26] Bump org.sonatype.plugins:nexus-staging-maven-plugin
 from 1.6.8 to 1.7.0

Bumps org.sonatype.plugins:nexus-staging-maven-plugin from 1.6.8 to 1.7.0.

---
updated-dependencies:
- dependency-name: org.sonatype.plugins:nexus-staging-maven-plugin
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 749fa0b7..6a66162f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -87,7 +87,7 @@
             <plugin>
                 <groupId>org.sonatype.plugins</groupId>
                 <artifactId>nexus-staging-maven-plugin</artifactId>
-                <version>1.6.8</version>
+                <version>1.7.0</version>
                 <extensions>true</extensions>
                 <configuration>
                     <serverId>ossrh</serverId>

From ab7c4bd54bd20ca03c2bfad400c5cc6c26e34d59 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 16 Jul 2024 21:28:53 +0000
Subject: [PATCH 15/26] Bump org.apache.maven.plugins:maven-source-plugin from
 3.2.1 to 3.3.1

Bumps [org.apache.maven.plugins:maven-source-plugin](https://github.com/apache/maven-source-plugin) from 3.2.1 to 3.3.1.
- [Release notes](https://github.com/apache/maven-source-plugin/releases)
- [Commits](https://github.com/apache/maven-source-plugin/compare/maven-source-plugin-3.2.1...maven-source-plugin-3.3.1)

---
updated-dependencies:
- dependency-name: org.apache.maven.plugins:maven-source-plugin
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index 6a66162f..77cda400 100644
--- a/pom.xml
+++ b/pom.xml
@@ -99,7 +99,7 @@
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-source-plugin</artifactId>
-                <version>3.2.1</version>
+                <version>3.3.1</version>
                 <executions>
                     <execution>
                         <id>attach-sources</id>
@@ -212,7 +212,7 @@
                     <plugin>
                         <groupId>org.apache.maven.plugins</groupId>
                         <artifactId>maven-source-plugin</artifactId>
-                        <version>3.2.1</version>
+                        <version>3.3.1</version>
                         <executions>
                             <execution>
                                 <id>attach-sources</id>

From ebe8e30dedfd6f7553046bbe6bbd3640b121d3dd Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 17 Jul 2024 21:36:59 +0000
Subject: [PATCH 16/26] Bump org.apache.commons:commons-csv from 1.9.0 to
 1.11.0

Bumps [org.apache.commons:commons-csv](https://github.com/apache/commons-csv) from 1.9.0 to 1.11.0.
- [Changelog](https://github.com/apache/commons-csv/blob/master/RELEASE-NOTES.txt)
- [Commits](https://github.com/apache/commons-csv/compare/rel/commons-csv-1.9.0...rel/commons-csv-1.11.0)

---
updated-dependencies:
- dependency-name: org.apache.commons:commons-csv
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 77cda400..a45e6089 100644
--- a/pom.xml
+++ b/pom.xml
@@ -294,7 +294,7 @@
         <dependency>
             <groupId>org.apache.commons</groupId>
             <artifactId>commons-csv</artifactId>
-            <version>1.9.0</version>
+            <version>1.11.0</version>
         </dependency>
 
         <dependency>

From db3f6dfd74801c824efd2a25dc26b4a3cb8d7922 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 17 Jul 2024 21:36:45 +0000
Subject: [PATCH 17/26] Bump org.apache.maven.plugins:maven-compiler-plugin

Bumps [org.apache.maven.plugins:maven-compiler-plugin](https://github.com/apache/maven-compiler-plugin) from 3.11.0 to 3.13.0.
- [Release notes](https://github.com/apache/maven-compiler-plugin/releases)
- [Commits](https://github.com/apache/maven-compiler-plugin/compare/maven-compiler-plugin-3.11.0...maven-compiler-plugin-3.13.0)

---
updated-dependencies:
- dependency-name: org.apache.maven.plugins:maven-compiler-plugin
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index a45e6089..b73d8b1e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -147,7 +147,7 @@
             </plugin>
             <plugin>
                 <artifactId>maven-compiler-plugin</artifactId>
-                <version>3.11.0</version>
+                <version>3.13.0</version>
                 <configuration>
                     <source>1.8</source>
                     <target>1.8</target>

From fd3a32c579f672ba17c5f1231985e980c4e3ec4e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 17 Jul 2024 21:36:48 +0000
Subject: [PATCH 18/26] Bump com.google.code.gson:gson from 2.9.0 to 2.11.0

Bumps [com.google.code.gson:gson](https://github.com/google/gson) from 2.9.0 to 2.11.0.
- [Release notes](https://github.com/google/gson/releases)
- [Changelog](https://github.com/google/gson/blob/main/CHANGELOG.md)
- [Commits](https://github.com/google/gson/compare/gson-parent-2.9.0...gson-parent-2.11.0)

---
updated-dependencies:
- dependency-name: com.google.code.gson:gson
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index b73d8b1e..394ea68f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -300,7 +300,7 @@
         <dependency>
             <groupId>com.google.code.gson</groupId>
             <artifactId>gson</artifactId>
-            <version>2.9.0</version>
+            <version>2.11.0</version>
         </dependency>
 
         <dependency>

From 097559d0a185ca1dda25d7b7ff103e884848c70c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 17 Jul 2024 21:36:52 +0000
Subject: [PATCH 19/26] Bump org.apache.maven.plugins:maven-javadoc-plugin from
 3.3.1 to 3.7.0

Bumps [org.apache.maven.plugins:maven-javadoc-plugin](https://github.com/apache/maven-javadoc-plugin) from 3.3.1 to 3.7.0.
- [Release notes](https://github.com/apache/maven-javadoc-plugin/releases)
- [Commits](https://github.com/apache/maven-javadoc-plugin/compare/maven-javadoc-plugin-3.3.1...maven-javadoc-plugin-3.7.0)

---
updated-dependencies:
- dependency-name: org.apache.maven.plugins:maven-javadoc-plugin
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pom.xml b/pom.xml
index 394ea68f..a4871012 100644
--- a/pom.xml
+++ b/pom.xml
@@ -74,7 +74,7 @@
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-javadoc-plugin</artifactId>
-                <version>3.5.0</version>
+                <version>3.7.0</version>
                 <configuration>
                     <skip>true</skip>
                 </configuration>
@@ -112,7 +112,7 @@
             <plugin>
               <groupId>org.apache.maven.plugins</groupId>
               <artifactId>maven-javadoc-plugin</artifactId>
-              <version>3.5.0</version>
+              <version>3.7.0</version>
               <configuration>
                   <source>8</source>
               </configuration>
@@ -196,7 +196,7 @@
                     <plugin>
                         <groupId>org.apache.maven.plugins</groupId>
                         <artifactId>maven-javadoc-plugin</artifactId>
-                        <version>3.3.1</version>
+                        <version>3.7.0</version>
                         <configuration>
                             <source>8</source>
                         </configuration>

From bde6d765cfab25d53ff885de33a4556fc41bb9d7 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 17 Jul 2024 21:36:55 +0000
Subject: [PATCH 20/26] Bump org.apache.maven.plugins:maven-surefire-plugin
 from 2.22.2 to 3.3.1

Bumps [org.apache.maven.plugins:maven-surefire-plugin](https://github.com/apache/maven-surefire) from 2.22.2 to 3.3.1.
- [Release notes](https://github.com/apache/maven-surefire/releases)
- [Commits](https://github.com/apache/maven-surefire/compare/surefire-2.22.2...surefire-3.3.1)

---
updated-dependencies:
- dependency-name: org.apache.maven.plugins:maven-surefire-plugin
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index a4871012..8fd27509 100644
--- a/pom.xml
+++ b/pom.xml
@@ -169,7 +169,7 @@
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-surefire-plugin</artifactId>
-                <version>2.22.2</version>
+                <version>3.3.1</version>
                 <configuration>
                     <!-- Travis build workaround -->
                     <argLine>-Xms1024m -Xmx2048m</argLine>

From 0c73e698b979a74cac0e917718b2c5dfd098dacc Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 22 Jul 2024 21:03:37 +0000
Subject: [PATCH 21/26] Bump org.apache.maven.plugins:maven-javadoc-plugin from
 3.7.0 to 3.8.0

Bumps [org.apache.maven.plugins:maven-javadoc-plugin](https://github.com/apache/maven-javadoc-plugin) from 3.7.0 to 3.8.0.
- [Release notes](https://github.com/apache/maven-javadoc-plugin/releases)
- [Commits](https://github.com/apache/maven-javadoc-plugin/compare/maven-javadoc-plugin-3.7.0...maven-javadoc-plugin-3.8.0)

---
updated-dependencies:
- dependency-name: org.apache.maven.plugins:maven-javadoc-plugin
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pom.xml b/pom.xml
index 8fd27509..d0b40101 100644
--- a/pom.xml
+++ b/pom.xml
@@ -74,7 +74,7 @@
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-javadoc-plugin</artifactId>
-                <version>3.7.0</version>
+                <version>3.8.0</version>
                 <configuration>
                     <skip>true</skip>
                 </configuration>
@@ -112,7 +112,7 @@
             <plugin>
               <groupId>org.apache.maven.plugins</groupId>
               <artifactId>maven-javadoc-plugin</artifactId>
-              <version>3.7.0</version>
+              <version>3.8.0</version>
               <configuration>
                   <source>8</source>
               </configuration>
@@ -196,7 +196,7 @@
                     <plugin>
                         <groupId>org.apache.maven.plugins</groupId>
                         <artifactId>maven-javadoc-plugin</artifactId>
-                        <version>3.7.0</version>
+                        <version>3.8.0</version>
                         <configuration>
                             <source>8</source>
                         </configuration>

From 818c9a2f5a5ea8dc72d3efa775f192381e84b8c1 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 9 Aug 2024 21:53:01 +0000
Subject: [PATCH 22/26] Bump org.apache.pdfbox:pdfbox from 3.0.2 to 3.0.3

Bumps org.apache.pdfbox:pdfbox from 3.0.2 to 3.0.3.

---
updated-dependencies:
- dependency-name: org.apache.pdfbox:pdfbox
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index d0b40101..a963e35a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -263,7 +263,7 @@
         <dependency>
             <groupId>org.apache.pdfbox</groupId>
             <artifactId>pdfbox</artifactId>
-            <version>3.0.2</version>
+            <version>3.0.3</version>
         </dependency>
 
         <dependency>

From 5d91f1d733c4895d31854a641c152220f8c5f341 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 30 Aug 2024 21:39:59 +0000
Subject: [PATCH 23/26] Bump org.locationtech.jts:jts-core from 1.19.0 to
 1.20.0

Bumps org.locationtech.jts:jts-core from 1.19.0 to 1.20.0.

---
updated-dependencies:
- dependency-name: org.locationtech.jts:jts-core
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index a963e35a..49057e90 100644
--- a/pom.xml
+++ b/pom.xml
@@ -245,7 +245,7 @@
         <dependency>
             <groupId>org.locationtech.jts</groupId>
             <artifactId>jts-core</artifactId>
-            <version>1.19.0</version>
+            <version>1.20.0</version>
         </dependency>
 
         <dependency>

From 971ae765e84f09ed83f5808b66f764590146e923 Mon Sep 17 00:00:00 2001
From: Kyle Lacy <kylelacy@kyle.space>
Date: Thu, 20 Feb 2025 15:29:09 -0800
Subject: [PATCH 24/26] Upgrade BouncyCastle dependencies

---
 pom.xml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pom.xml b/pom.xml
index 49057e90..8b7b3b2d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -268,14 +268,14 @@
 
         <dependency>
             <groupId>org.bouncycastle</groupId>
-            <artifactId>bcprov-jdk15on</artifactId>
-            <version>1.70</version>
+            <artifactId>bcprov-jdk18on</artifactId>
+            <version>1.80</version>
         </dependency>
 
         <dependency>
             <groupId>org.bouncycastle</groupId>
-            <artifactId>bcmail-jdk15on</artifactId>
-            <version>1.70</version>
+            <artifactId>bcmail-jdk18on</artifactId>
+            <version>1.80</version>
         </dependency>
 
         <dependency>

From 88154e2c15967cc4c2a2606a8da25d47b9b916c3 Mon Sep 17 00:00:00 2001
From: Tilman Hausherr <tilman@snafu.de>
Date: Wed, 19 Mar 2025 15:36:11 +0100
Subject: [PATCH 25/26] Update PDFBox

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 8b7b3b2d..211d0d4d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -263,7 +263,7 @@
         <dependency>
             <groupId>org.apache.pdfbox</groupId>
             <artifactId>pdfbox</artifactId>
-            <version>3.0.3</version>
+            <version>3.0.4</version>
         </dependency>
 
         <dependency>

From 2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f Mon Sep 17 00:00:00 2001
From: Tilman Hausherr <tilman@snafu.de>
Date: Wed, 19 Mar 2025 15:38:16 +0100
Subject: [PATCH 26/26] Adjust test

Test needs to be adjusted because PDFBox supports the /ActualText feature of PDFBox.
---
 src/test/java/technology/tabula/TestBasicExtractor.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/test/java/technology/tabula/TestBasicExtractor.java b/src/test/java/technology/tabula/TestBasicExtractor.java
index d120546f..b56fd6ea 100644
--- a/src/test/java/technology/tabula/TestBasicExtractor.java
+++ b/src/test/java/technology/tabula/TestBasicExtractor.java
@@ -203,7 +203,7 @@ public void testCheckSqueezeDoesntBreak() throws IOException {
         List<List<RectangularTextContainer>> rows = table.getRows();
         List<RectangularTextContainer> firstRow = rows.get(0);
         List<RectangularTextContainer> lastRow = rows.get(rows.size() - 1);
-        assertTrue(firstRow.get(0).getText().equals("Violent crime  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  ."));
+        assertTrue(firstRow.get(0).getText().equals("Violent crime. .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  ."));
         assertTrue(lastRow.get(lastRow.size() - 1).getText().equals("(X)"));
         page.getPDDoc().close();
     }