From 84aef7f0c84f4ec0fdb5adee789c93c4a216073c Mon Sep 17 00:00:00 2001
From: Martin Skopp <skopp@riege.com>
Date: Wed, 7 Dec 2022 15:11:10 +0100
Subject: [PATCH 01/30] Add a simple API usage example applying
 SpreadsheetExtractionAlgorithm

---
 README.md | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 8d3c8210..c3a5f92f 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@ tabula-java [![Build Status](https://travis-ci.org/tabulapdf/tabula-java.svg?bra
 
 Download a version of the tabula-java's jar, with all dependencies included, that works on Mac, Windows and Linux from our [releases page](../../releases).
 
-## Usage Examples
+## Commandline Usage Examples
 
 `tabula-java` provides a command line application:
 
@@ -81,6 +81,44 @@ JVM start-up time is a lot of the cost of the `tabula` command, so if you're try
  - writing your own program in any JVM language (Java, JRuby, Scala) that imports tabula-java.
  - waiting for us to implement an API/server-style system (it's on the [roadmap](https://github.com/tabulapdf/tabula-api))
 
+## API Usage Examples
+
+A simple Java code example which extracts all rows and cells from all tables of all pages of a PDF document:
+
+        InputStream in = this.getClass().getResourceAsStream("my.pdf");
+        try (PDDocument document = PDDocument.load(in)) {
+            SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm();
+            PageIterator pi = new ObjectExtractor(document).extract();
+            while (pi.hasNext()) {
+                // iterate over the pages of the document
+                Page page = pi.next();
+                List<Table> table = sea.extract(page);
+                // iterate over the tables of the page
+                for(Table tables: table) {
+                    List<List<RectangularTextContainer>> rows = tables.getRows();
+                    // iterate over the rows of the table
+                    for (List<RectangularTextContainer> cells : rows) {
+                        // print all column-cells of the row plus linefeed
+                        for (RectangularTextContainer content : cells) {
+                            // Note: Cell.getText() uses \r to concat text chunks
+                            String text = content.getText().replace("\r", " ");
+                            System.out.print(text + "|");
+                        }
+                        System.out.println();
+                    }
+                }
+            }
+        }
+
+For more detail information check the Javadoc. 
+The Javadoc API documentation can be generated (see also '_Building from Source_' section) via
+
+```
+mvn javadoc:javadoc
+```
+
+which generates the HTML files to directory ```target/site/apidocs/```
+
 ## Building from Source
 
 Clone this repo and run:

From 3c2af18f7c3daedafb6a4d33ab5f818cdc468d09 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20Aristar=C3=A1n?= <jazzido@jazzido.com>
Date: Wed, 7 Dec 2022 12:04:34 -0300
Subject: [PATCH 02/30] Fix Markdown formatting for code example

---
 README.md | 45 ++++++++++++++++++++++++---------------------
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index c3a5f92f..db7b0023 100644
--- a/README.md
+++ b/README.md
@@ -85,30 +85,33 @@ JVM start-up time is a lot of the cost of the `tabula` command, so if you're try
 
 A simple Java code example which extracts all rows and cells from all tables of all pages of a PDF document:
 
-        InputStream in = this.getClass().getResourceAsStream("my.pdf");
-        try (PDDocument document = PDDocument.load(in)) {
-            SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm();
-            PageIterator pi = new ObjectExtractor(document).extract();
-            while (pi.hasNext()) {
-                // iterate over the pages of the document
-                Page page = pi.next();
-                List<Table> table = sea.extract(page);
-                // iterate over the tables of the page
-                for(Table tables: table) {
-                    List<List<RectangularTextContainer>> rows = tables.getRows();
-                    // iterate over the rows of the table
-                    for (List<RectangularTextContainer> cells : rows) {
-                        // print all column-cells of the row plus linefeed
-                        for (RectangularTextContainer content : cells) {
-                            // Note: Cell.getText() uses \r to concat text chunks
-                            String text = content.getText().replace("\r", " ");
-                            System.out.print(text + "|");
-                        }
-                        System.out.println();
-                    }
+```java
+InputStream in = this.getClass().getResourceAsStream("my.pdf");
+try (PDDocument document = PDDocument.load(in)) {
+    SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm();
+    PageIterator pi = new ObjectExtractor(document).extract();
+    while (pi.hasNext()) {
+        // iterate over the pages of the document
+        Page page = pi.next();
+        List<Table> table = sea.extract(page);
+        // iterate over the tables of the page
+        for(Table tables: table) {
+            List<List<RectangularTextContainer>> rows = tables.getRows();
+            // iterate over the rows of the table
+            for (List<RectangularTextContainer> cells : rows) {
+                // print all column-cells of the row plus linefeed
+                for (RectangularTextContainer content : cells) {
+                    // Note: Cell.getText() uses \r to concat text chunks
+                    String text = content.getText().replace("\r", " ");
+                    System.out.print(text + "|");
                 }
+                System.out.println();
             }
         }
+    }
+}
+```
+
 
 For more detail information check the Javadoc. 
 The Javadoc API documentation can be generated (see also '_Building from Source_' section) via

From b0fde49e6aa06593d16c8aa0b8da0e3172db1ec2 Mon Sep 17 00:00:00 2001
From: Aki Ariga <chezou+github@gmail.com>
Date: Mon, 20 Feb 2023 18:17:19 -0800
Subject: [PATCH 03/30] Enforce checkout with LF

Windows CI fails when parsing CSV with line breaks within a cell. This
is due to the difference of line endings between CRLF vs LF, and test
CSV parser implementation.

To mitigate this issue, tweak Windows CI to enforce git checkout with LF.
---
 .github/workflows/tests-windows.yml | 11 ++++++++---
 .github/workflows/tests.yml         |  4 ++--
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/tests-windows.yml b/.github/workflows/tests-windows.yml
index 4ff7f542..5cc1031a 100644
--- a/.github/workflows/tests-windows.yml
+++ b/.github/workflows/tests-windows.yml
@@ -1,4 +1,4 @@
-name: Java CI
+name: Java CI (Windows)
 
 on: [push]
 
@@ -7,9 +7,14 @@ jobs:
     runs-on: windows-latest
 
     steps:
-      - uses: actions/checkout@v2
+      # https://github.com/actions/checkout/issues/135#issuecomment-602171132
+      - name: Set git to use LF
+        run: |
+          git config --global core.autocrlf false
+          git config --global core.eol lf
+      - uses: actions/checkout@v3
       - name: Set up JDK 11
-        uses: actions/setup-java@v2
+        uses: actions/setup-java@v3
         with:
           java-version: '11'
           distribution: 'adopt'
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index b8aa9c14..da2d019b 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -7,9 +7,9 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - name: Set up JDK 11
-        uses: actions/setup-java@v2
+        uses: actions/setup-java@v3
         with:
           java-version: '11'
           distribution: 'adopt'

From 8bfa3ad23af34f757f72fe46584a34abfc022ed3 Mon Sep 17 00:00:00 2001
From: Tilman Hausherr <tilman@snafu.de>
Date: Fri, 14 Apr 2023 17:46:46 +0200
Subject: [PATCH 04/30] update pdfbox to 2.0.28

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index fb1f7e08..27a03e73 100644
--- a/pom.xml
+++ b/pom.xml
@@ -262,7 +262,7 @@
     <dependency>
         <groupId>org.apache.pdfbox</groupId>
         <artifactId>pdfbox</artifactId>
-        <version>2.0.26</version>
+        <version>2.0.28</version>
     </dependency>
 
     <dependency>

From bc60be27d663f40e132a21407ec81790a8aee361 Mon Sep 17 00:00:00 2001
From: young <chen.yang@goupwith.com>
Date: Wed, 6 Mar 2024 14:53:55 +0800
Subject: [PATCH 05/30] update pdfbox to 3.0.1

---
 pom.xml                                       |   2 +-
 .../technology/tabula/CommandLineApp.java     |   3 +-
 .../java/technology/tabula/debug/Debug.java   |   5 +-
 .../detectors/NurminenDetectionAlgorithm.java |   6 +-
 src/test/java/technology/tabula/TestCell.java |   3 +-
 src/test/java/technology/tabula/TestLine.java |  11 +-
 .../tabula/TestObjectExtractor.java           |  21 +-
 .../tabula/TestProjectionProfile.java         |   8 +-
 .../technology/tabula/TestTableDetection.java |   3 +-
 .../technology/tabula/TestTextElement.java    | 393 +++++++++---------
 .../java/technology/tabula/TestUtils.java     |   3 +-
 .../technology/tabula/UtilsForTesting.java    |   7 +-
 12 files changed, 238 insertions(+), 227 deletions(-)

diff --git a/pom.xml b/pom.xml
index 27a03e73..6c71b426 100644
--- a/pom.xml
+++ b/pom.xml
@@ -262,7 +262,7 @@
     <dependency>
         <groupId>org.apache.pdfbox</groupId>
         <artifactId>pdfbox</artifactId>
-        <version>2.0.28</version>
+        <version>3.0.1</version>
     </dependency>
 
     <dependency>
diff --git a/src/main/java/technology/tabula/CommandLineApp.java b/src/main/java/technology/tabula/CommandLineApp.java
index 3a6773a9..1b422303 100644
--- a/src/main/java/technology/tabula/CommandLineApp.java
+++ b/src/main/java/technology/tabula/CommandLineApp.java
@@ -15,6 +15,7 @@
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.ParseException;
 import org.apache.commons.cli.DefaultParser;
+import org.apache.pdfbox.Loader;
 import org.apache.pdfbox.pdmodel.PDDocument;
 
 import technology.tabula.detectors.DetectionAlgorithm;
@@ -158,7 +159,7 @@ public void extractFileInto(File pdfFile, File outputFile) throws ParseException
     private void extractFile(File pdfFile, Appendable outFile) throws ParseException {
         PDDocument pdfDocument = null;
         try {
-            pdfDocument = this.password == null ? PDDocument.load(pdfFile) : PDDocument.load(pdfFile, this.password);
+            pdfDocument = this.password == null ?  Loader.loadPDF(pdfFile) :  Loader.loadPDF(pdfFile,password);
             PageIterator pageIterator = getPageIterator(pdfDocument);
             List<Table> tables = new ArrayList<>();
 
diff --git a/src/main/java/technology/tabula/debug/Debug.java b/src/main/java/technology/tabula/debug/Debug.java
index 91609045..d6d257ce 100644
--- a/src/main/java/technology/tabula/debug/Debug.java
+++ b/src/main/java/technology/tabula/debug/Debug.java
@@ -16,6 +16,7 @@
 import java.util.List;
 
 import org.apache.commons.cli.*;
+import org.apache.pdfbox.Loader;
 import technology.tabula.Cell;
 import technology.tabula.CommandLineApp;
 import technology.tabula.Line;
@@ -215,7 +216,7 @@ public static void renderPage(String pdfPath, String outPath, int pageNumber, Re
                                   boolean drawColumns, boolean drawCharacters, boolean drawArea, boolean drawCells,
                                   boolean drawUnprocessedRulings, boolean drawProjectionProfile, boolean drawClippingPaths,
                                   boolean drawDetectedTables) throws IOException {
-        PDDocument document = PDDocument.load(new File(pdfPath));
+        PDDocument document = Loader.loadPDF(new File(pdfPath));
 
         ObjectExtractor oe = new ObjectExtractor(document);
 
@@ -349,7 +350,7 @@ public static void main(String[] args) throws IOException {
 
             if (pages == null) {
                 // user specified all pages
-                PDDocument document = PDDocument.load(pdfFile);
+                PDDocument document =  Loader.loadPDF(pdfFile);
 
                 int numPages = document.getNumberOfPages();
                 pages = new ArrayList<>(numPages);
diff --git a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java
index fb43622a..9a377854 100644
--- a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java
+++ b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java
@@ -805,12 +805,12 @@ private PDDocument removeText(PDPage page) throws IOException {
 
         PDFStreamParser parser = new PDFStreamParser(page);
         parser.parse();
-        List<Object> tokens = parser.getTokens();
         List<Object> newTokens = new ArrayList<>();
-        for (Object token : tokens) {
+        while (page.hasContents()) {
+            Object token = parser.parseNextToken();
             if (token instanceof Operator) {
                 Operator op = (Operator) token;
-                if (op.getName().equals("TJ") || op.getName().equals("Tj")) {
+                if ("TJ".equals(op.getName()) || "Tj".equals(op.getName())) {
                     //remove the one argument to this operator
                     newTokens.remove(newTokens.size() - 1);
                     continue;
diff --git a/src/test/java/technology/tabula/TestCell.java b/src/test/java/technology/tabula/TestCell.java
index de1b8cb8..2795565c 100644
--- a/src/test/java/technology/tabula/TestCell.java
+++ b/src/test/java/technology/tabula/TestCell.java
@@ -6,6 +6,7 @@
 import java.util.ArrayList;
 
 import org.apache.pdfbox.pdmodel.font.PDType1Font;
+import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
 import org.junit.Test;
 
 public class TestCell {
@@ -31,7 +32,7 @@ public void testGetTextElements() {
 		Cell cell = new Cell(0, 0, 0, 0);
 		assertTrue(cell.getTextElements().isEmpty());
 		
-		TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5);
+		TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5);
 		TextChunk tChunk = new TextChunk(tElement);
 		List<TextChunk> tList = new ArrayList<>();
 		tList.add(tChunk);
diff --git a/src/test/java/technology/tabula/TestLine.java b/src/test/java/technology/tabula/TestLine.java
index 90df0e31..f7a6a88d 100644
--- a/src/test/java/technology/tabula/TestLine.java
+++ b/src/test/java/technology/tabula/TestLine.java
@@ -6,6 +6,7 @@
 import java.util.List;
 
 import org.apache.pdfbox.pdmodel.font.PDType1Font;
+import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
 import org.junit.Test;
 
 public class TestLine {
@@ -14,7 +15,7 @@ public class TestLine {
 	public void testSetTextElements() {
 		Line line = new Line();
 		
-		TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5);
+		TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5);
 		TextChunk tChunk = new TextChunk(tElement);
 		List<TextChunk> tList = new ArrayList<>();
 		tList.add(tChunk);
@@ -28,7 +29,7 @@ public void testSetTextElements() {
 	public void testAddTextChunkIntTextChunk() {
 		Line line = new Line();
 		
-		TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5);
+		TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5);
 		TextChunk tChunk = new TextChunk(tElement);
 		line.addTextChunk(3, tChunk);
 		
@@ -39,7 +40,7 @@ public void testAddTextChunkIntTextChunk() {
 	public void testLessThanAddTextChunkIntTextChunk() {
 		Line line = new Line();
 		
-		TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5);
+		TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5);
 		TextChunk tChunk = new TextChunk(tElement);
 		line.addTextChunk(0, tChunk);
 		line.addTextChunk(0, tChunk);
@@ -51,7 +52,7 @@ public void testLessThanAddTextChunkIntTextChunk() {
 	public void testErrorAddTextChunkIntTextChunk() {
 		Line line = new Line();
 		
-		TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5);
+		TextElement tElement = new TextElement(0, 0, 0, 0,new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5);
 		TextChunk tChunk = new TextChunk(tElement);
 		line.addTextChunk(-1, tChunk);
 		}
@@ -60,7 +61,7 @@ public void testErrorAddTextChunkIntTextChunk() {
 	public void testToString() {
 		Line line = new Line();
 		
-		TextElement tElement = new TextElement(0, 0, 0, 0, PDType1Font.HELVETICA_BOLD, 10, "test", 5);
+		TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5);
 		TextChunk tChunk = new TextChunk(tElement);
 		line.addTextChunk(0, tChunk);
 		line.addTextChunk(0, tChunk);
diff --git a/src/test/java/technology/tabula/TestObjectExtractor.java b/src/test/java/technology/tabula/TestObjectExtractor.java
index 9db7ad18..69864c61 100644
--- a/src/test/java/technology/tabula/TestObjectExtractor.java
+++ b/src/test/java/technology/tabula/TestObjectExtractor.java
@@ -7,6 +7,7 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.pdfbox.Loader;
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.junit.Test;
 
@@ -21,7 +22,7 @@ public void testWrongPasswordRaisesException() throws IOException {
 
     @Test(expected = IOException.class)
     public void testEmptyOnEncryptedFileRaisesException() throws IOException {
-        PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/encrypted.pdf"));
+        PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/encrypted.pdf"));
         try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
           oe.extract().next();
         }
@@ -29,7 +30,7 @@ public void testEmptyOnEncryptedFileRaisesException() throws IOException {
 
     @Test
     public void testCanReadPDFWithOwnerEncryption() throws IOException {
-        PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf"));
+        PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf"));
         try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
           PageIterator pi = oe.extract();
           int i = 0;
@@ -44,7 +45,7 @@ public void testCanReadPDFWithOwnerEncryption() throws IOException {
 
     @Test
     public void testGoodPassword() throws IOException {
-        PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/encrypted.pdf"), "userpassword");
+        PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/encrypted.pdf"), "userpassword");
         try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
           List<Page> pages = new ArrayList<>();
           PageIterator pi = oe.extract();
@@ -58,7 +59,7 @@ public void testGoodPassword() throws IOException {
 
     @Test
     public void testTextExtractionDoesNotRaise() throws IOException {
-        PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/rotated_page.pdf"));
+        PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/rotated_page.pdf"));
         try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
           PageIterator pi = oe.extract();
 
@@ -70,7 +71,7 @@ public void testTextExtractionDoesNotRaise() throws IOException {
 
     @Test
     public void testShouldDetectRulings() throws IOException {
-        PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/should_detect_rulings.pdf"));
+        PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/should_detect_rulings.pdf"));
         try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
           PageIterator pi = oe.extract();
 
@@ -85,7 +86,7 @@ public void testShouldDetectRulings() throws IOException {
 
     @Test
     public void testDontThrowNPEInShfill() throws IOException {
-        PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/labor.pdf"));
+        PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/labor.pdf"));
 
         try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
           PageIterator pi = oe.extract();
@@ -101,7 +102,7 @@ public void testDontThrowNPEInShfill() throws IOException {
 
     @Test
     public void testExtractOnePage() throws IOException {
-        PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf"));
+        PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf"));
         assertEquals(2, pdf_document.getNumberOfPages());
 
         try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
@@ -114,7 +115,7 @@ public void testExtractOnePage() throws IOException {
 
     @Test(expected = IndexOutOfBoundsException.class)
     public void testExtractWrongPageNumber() throws IOException {
-        PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf"));
+        PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/S2MNCEbirdisland.pdf"));
         assertEquals(2, pdf_document.getNumberOfPages());
 
         try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
@@ -124,7 +125,7 @@ public void testExtractWrongPageNumber() throws IOException {
 
     @Test
     public void testTextElementsContainedInPage() throws IOException {
-        PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/cs-en-us-pbms.pdf"));
+        PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/cs-en-us-pbms.pdf"));
 
         try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
           Page page = oe.extractPage(1);
@@ -137,7 +138,7 @@ public void testTextElementsContainedInPage() throws IOException {
     }
 
     @Test public void testDoNotNPEInPointComparator() throws IOException {
-        PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/npe_issue_206.pdf"));
+        PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/npe_issue_206.pdf"));
 
         try (ObjectExtractor oe = new ObjectExtractor(pdf_document)) {
             Page p = oe.extractPage(1);
diff --git a/src/test/java/technology/tabula/TestProjectionProfile.java b/src/test/java/technology/tabula/TestProjectionProfile.java
index e7af882f..e6d93b39 100644
--- a/src/test/java/technology/tabula/TestProjectionProfile.java
+++ b/src/test/java/technology/tabula/TestProjectionProfile.java
@@ -8,6 +8,7 @@
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.pdmodel.PDPage;
 import org.apache.pdfbox.pdmodel.font.PDType1Font;
+import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -20,9 +21,10 @@ public class TestProjectionProfile {
 	public void setUpProjectionProfile() {
 		PDPage pdPage = new PDPage();
 		PDDocument pdDocument = new PDDocument();
-		
-		TextElement textElement = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "test", 1f);
-		TextElement textElement2 = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "test", 1f);
+
+		PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
+		TextElement textElement = new TextElement(5f, 15f, 10f, 20f, font, 1f, "test", 1f);
+		TextElement textElement2 = new TextElement(5f, 15f, 10f, 20f, font, 1f, "test", 1f);
 		List<TextElement> textList = new ArrayList<>();
 		textList.add(textElement);
 		textList.add(textElement2);
diff --git a/src/test/java/technology/tabula/TestTableDetection.java b/src/test/java/technology/tabula/TestTableDetection.java
index 6e58f6a4..80d21350 100644
--- a/src/test/java/technology/tabula/TestTableDetection.java
+++ b/src/test/java/technology/tabula/TestTableDetection.java
@@ -11,6 +11,7 @@
 import static org.junit.Assert.*;
 
 import com.google.gson.Gson;
+import org.apache.pdfbox.Loader;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -162,7 +163,7 @@ public void testDetectionOfTables() throws Exception {
         NodeList tables = regionDocument.getElementsByTagName("table");
 
         // tabula extractors
-        PDDocument pdfDocument = PDDocument.load(this.pdf);
+        PDDocument pdfDocument = Loader.loadPDF(this.pdf);
         ObjectExtractor extractor = new ObjectExtractor(pdfDocument);
 
         // parse expected tables from the ground truth dataset
diff --git a/src/test/java/technology/tabula/TestTextElement.java b/src/test/java/technology/tabula/TestTextElement.java
index feaaa5e6..3db1ca31 100644
--- a/src/test/java/technology/tabula/TestTextElement.java
+++ b/src/test/java/technology/tabula/TestTextElement.java
@@ -3,205 +3,208 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.pdfbox.pdmodel.font.PDFont;
 import org.apache.pdfbox.pdmodel.font.PDType1Font;
+import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
 import org.junit.Assert;
 import org.junit.Test;
 
 public class TestTextElement {
-	
-	
-	@Test
-	public void createTextElement() {
-		
-		TextElement textElement = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f);
-		
-		Assert.assertNotNull(textElement);
-		Assert.assertEquals("A", textElement.getText());
-		Assert.assertEquals(1f, textElement.getFontSize(), 0);
-		Assert.assertEquals(15f, textElement.getLeft(), 0);
-		Assert.assertEquals(5f, textElement.getTop(), 0);
-		Assert.assertEquals(10f, textElement.getWidth(), 0);
-		Assert.assertEquals(20f, textElement.getHeight(), 0);
-		Assert.assertEquals(PDType1Font.HELVETICA, textElement.getFont());
-		Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0);
-		Assert.assertEquals(0f, textElement.getDirection(), 0);
-		
-		
-	}
-	
-	@Test
-	public void createTextElementWithDirection() {
-		
-		TextElement textElement = new TextElement(5f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f);
-		
-		Assert.assertNotNull(textElement);
-		Assert.assertEquals("A", textElement.getText());
-		Assert.assertEquals(1f, textElement.getFontSize(), 0);
-		Assert.assertEquals(15f, textElement.getLeft(), 0);
-		Assert.assertEquals(5f, textElement.getTop(), 0);
-		Assert.assertEquals(10f, textElement.getWidth(), 0);
-		Assert.assertEquals(20f, textElement.getHeight(), 0);
-		Assert.assertEquals(PDType1Font.HELVETICA, textElement.getFont());
-		Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0);
-		Assert.assertEquals(6f, textElement.getDirection(), 0);
-		
-		
-	}
-	
-	@Test
-	public void mergeFourElementsIntoFourWords() {
-		
-		List<TextElement> elements = new ArrayList<>();
-		elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
-		elements.add(new TextElement(20f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f));
-		elements.add(new TextElement(40f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f));
-		elements.add(new TextElement(60f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f));
-		
-		List<TextChunk> words = TextElement.mergeWords(elements);
-		
-		List<TextChunk> expectedWords = new ArrayList<>();
-		expectedWords.add(new TextChunk(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f)));
-		expectedWords.add(new TextChunk(new TextElement(20f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f)));
-		expectedWords.add(new TextChunk(new TextElement(40f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f)));
-		expectedWords.add(new TextChunk(new TextElement(60f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f)));
-		
-		Assert.assertEquals(expectedWords, words);
-		
-	}
-	
-	@Test
-	public void mergeFourElementsIntoOneWord() {
-		
-		List<TextElement> elements = new ArrayList<>();
-		elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
-		elements.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f));
-		elements.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f));
-		elements.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f));
-		
-		List<TextChunk> words = TextElement.mergeWords(elements);
-		
-		List<TextChunk> expectedWords = new ArrayList<>();
-		TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
-		textChunk.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f));
-		textChunk.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f));
-		textChunk.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f));
-		expectedWords.add(textChunk);
-		
-		Assert.assertEquals(expectedWords, words);
-		
-	}
-	
-	@Test
-	public void mergeElementsShouldBeIdempotent() {
-		/*
-	   * a bug in TextElement.merge_words would delete the first TextElement in the array
-	   * it was called with. Discussion here: https://github.com/tabulapdf/tabula-java/issues/78
-	   */
-
-		List<TextElement> elements = new ArrayList<>();
-		elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
-		elements.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f));
-		elements.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f));
-		elements.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f));
-		
-		List<TextChunk> words = TextElement.mergeWords(elements);
-		List<TextChunk> words2 = TextElement.mergeWords(elements);
-		Assert.assertEquals(words, words2);
-	}
-
-	@Test
-	public void mergeElementsWithSkippingRules() {
-		
-		List<TextElement> elements = new ArrayList<>();
-		elements.add(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
-		elements.add(new TextElement(0f, 17f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
-		elements.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f));
-		elements.add(new TextElement(0.001f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, " ", 1f, 6f));
-		elements.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f));
-		elements.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.TIMES_ROMAN, 10f, "D", 1f, 6f));
-		
-		List<TextChunk> words = TextElement.mergeWords(elements);
-		
-		List<TextChunk> expectedWords = new ArrayList<>();
-		TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
-		textChunk.add(new TextElement(0f, 25f, 10f, 20f, PDType1Font.HELVETICA, 1f, "B", 1f, 6f));
-		textChunk.add(new TextElement(0f, 35f, 10f, 20f, PDType1Font.HELVETICA, 1f, "C", 1f, 6f));
-		textChunk.add(new TextElement(0f, 45f, 10f, 20f, PDType1Font.TIMES_ROMAN, 10f, "D", 1f, 6f));
-		expectedWords.add(textChunk);
-		
-		Assert.assertEquals(expectedWords, words);
-		
-	}
-	
-	@Test
-	public void mergeTenElementsIntoTwoWords() {
-		
-		List<TextElement> elements = new ArrayList<>();
-		elements.add(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f));
-		elements.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f));
-		elements.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f));
-		elements.add(new TextElement(0f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
-		elements.add(new TextElement(0f, 60f, 10f, 20f, PDType1Font.HELVETICA, 1f, "M", 1f, 6f));
-		elements.add(new TextElement(0f, 70f, 10f, 20f, PDType1Font.HELVETICA, 1f, "U", 1f, 6f));
-		elements.add(new TextElement(0f, 80f, 10f, 20f, PDType1Font.HELVETICA, 1f, "N", 1f, 6f));
-		elements.add(new TextElement(0f, 90f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f));
-		elements.add(new TextElement(0f, 100f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f));
-		
-		List<TextChunk> words = TextElement.mergeWords(elements);
-		
-		List<TextChunk> expectedWords = new ArrayList<>();
-		TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f));
-		textChunk.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f));
-		textChunk.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f));
-		textChunk.add(new TextElement(0f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
-		textChunk.add(new TextElement(0f, 30f, 10.5f, 20f, PDType1Font.HELVETICA, 1f, " ", 1f)); //Check why width=10.5?
-		expectedWords.add(textChunk);
-		TextChunk textChunk2 = new TextChunk(new TextElement(0f, 60f, 10f, 20f, PDType1Font.HELVETICA, 1f, "M", 1f, 6f));
-		textChunk2.add(new TextElement(0f, 70f, 10f, 20f, PDType1Font.HELVETICA, 1f, "U", 1f, 6f));
-		textChunk2.add(new TextElement(0f, 80f, 10f, 20f, PDType1Font.HELVETICA, 1f, "N", 1f, 6f));
-		textChunk2.add(new TextElement(0f, 90f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f));
-		textChunk2.add(new TextElement(0f, 100f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f));
-		expectedWords.add(textChunk2);
-		
-		Assert.assertEquals(2, words.size());
-		Assert.assertEquals(expectedWords, words);
-		
-	}
-	
-	@Test
-	public void mergeTenElementsIntoTwoLines() {
-		
-		List<TextElement> elements = new ArrayList<>();
-		elements.add(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f));
-		elements.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f));
-		elements.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f));
-		elements.add(new TextElement(0f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
-		elements.add(new TextElement(20f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "M", 1f, 6f));
-		elements.add(new TextElement(20f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "U", 1f, 6f));
-		elements.add(new TextElement(20f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "N", 1f, 6f));
-		elements.add(new TextElement(20f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f));
-		elements.add(new TextElement(20f, 40f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f));
-		
-		List<TextChunk> words = TextElement.mergeWords(elements);
-		
-		List<TextChunk> expectedWords = new ArrayList<>();
-		TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "H", 1f, 6f));
-		textChunk.add(new TextElement(0f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f));
-		textChunk.add(new TextElement(0f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "L", 1f, 6f));
-		textChunk.add(new TextElement(0f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "A", 1f, 6f));
-		expectedWords.add(textChunk);
-		TextChunk textChunk2 = new TextChunk(new TextElement(20f, 0f, 10f, 20f, PDType1Font.HELVETICA, 1f, "M", 1f, 6f));
-		textChunk2.add(new TextElement(20f, 10f, 10f, 20f, PDType1Font.HELVETICA, 1f, "U", 1f, 6f));
-		textChunk2.add(new TextElement(20f, 20f, 10f, 20f, PDType1Font.HELVETICA, 1f, "N", 1f, 6f));
-		textChunk2.add(new TextElement(20f, 30f, 10f, 20f, PDType1Font.HELVETICA, 1f, "D", 1f, 6f));
-		textChunk2.add(new TextElement(20f, 40f, 10f, 20f, PDType1Font.HELVETICA, 1f, "O", 1f, 6f));
-		expectedWords.add(textChunk2);
-		
-		Assert.assertEquals(2, words.size());
-		Assert.assertEquals(expectedWords, words);
-		
-	}	
-	
-	
+
+
+    @Test
+    public void createTextElement() {
+
+        TextElement textElement = new TextElement(5f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f);
+
+        Assert.assertNotNull(textElement);
+        Assert.assertEquals("A", textElement.getText());
+        Assert.assertEquals(1f, textElement.getFontSize(), 0);
+        Assert.assertEquals(15f, textElement.getLeft(), 0);
+        Assert.assertEquals(5f, textElement.getTop(), 0);
+        Assert.assertEquals(10f, textElement.getWidth(), 0);
+        Assert.assertEquals(20f, textElement.getHeight(), 0);
+        Assert.assertEquals(new PDType1Font(Standard14Fonts.FontName.HELVETICA), textElement.getFont());
+        Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0);
+        Assert.assertEquals(0f, textElement.getDirection(), 0);
+
+
+    }
+
+    @Test
+    public void createTextElementWithDirection() {
+
+        TextElement textElement = new TextElement(5f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f);
+
+        Assert.assertNotNull(textElement);
+        Assert.assertEquals("A", textElement.getText());
+        Assert.assertEquals(1f, textElement.getFontSize(), 0);
+        Assert.assertEquals(15f, textElement.getLeft(), 0);
+        Assert.assertEquals(5f, textElement.getTop(), 0);
+        Assert.assertEquals(10f, textElement.getWidth(), 0);
+        Assert.assertEquals(20f, textElement.getHeight(), 0);
+        Assert.assertEquals(new PDType1Font(Standard14Fonts.FontName.HELVETICA), textElement.getFont());
+        Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0);
+        Assert.assertEquals(6f, textElement.getDirection(), 0);
+
+
+    }
+
+    @Test
+    public void mergeFourElementsIntoFourWords() {
+
+        List<TextElement> elements = new ArrayList<>();
+        elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
+        elements.add(new TextElement(20f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
+        elements.add(new TextElement(40f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
+        elements.add(new TextElement(60f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
+
+        List<TextChunk> words = TextElement.mergeWords(elements);
+
+        List<TextChunk> expectedWords = new ArrayList<>();
+        expectedWords.add(new TextChunk(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f)));
+        expectedWords.add(new TextChunk(new TextElement(20f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f)));
+        expectedWords.add(new TextChunk(new TextElement(40f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f)));
+        expectedWords.add(new TextChunk(new TextElement(60f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f)));
+
+        Assert.assertEquals(expectedWords, words);
+
+    }
+
+    @Test
+    public void mergeFourElementsIntoOneWord() {
+
+        List<TextElement> elements = new ArrayList<>();
+        elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
+        elements.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
+        elements.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
+        elements.add(new TextElement(0f, 45f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
+
+        List<TextChunk> words = TextElement.mergeWords(elements);
+
+        List<TextChunk> expectedWords = new ArrayList<>();
+        TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
+        textChunk.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
+        textChunk.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
+        textChunk.add(new TextElement(0f, 45f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
+        expectedWords.add(textChunk);
+
+        Assert.assertEquals(expectedWords, words);
+
+    }
+
+    @Test
+    public void mergeElementsShouldBeIdempotent() {
+        /*
+         * a bug in TextElement.merge_words would delete the first TextElement in the array
+         * it was called with. Discussion here: https://github.com/tabulapdf/tabula-java/issues/78
+         */
+
+        List<TextElement> elements = new ArrayList<>();
+        elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
+        elements.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
+        elements.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
+        elements.add(new TextElement(0f, 45f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
+
+        List<TextChunk> words = TextElement.mergeWords(elements);
+        List<TextChunk> words2 = TextElement.mergeWords(elements);
+        Assert.assertEquals(words, words2);
+    }
+
+    @Test
+    public void mergeElementsWithSkippingRules() {
+
+        List<TextElement> elements = new ArrayList<>();
+        elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
+        elements.add(new TextElement(0f, 17f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
+        elements.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
+        elements.add(new TextElement(0.001f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, " ", 1f, 6f));
+        elements.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
+        PDFont TIMES_ROMAN = new PDType1Font(Standard14Fonts.FontName.TIMES_ROMAN);
+        elements.add(new TextElement(0f, 45f, 10f, 20f, TIMES_ROMAN, 10f, "D", 1f, 6f));
+
+        List<TextChunk> words = TextElement.mergeWords(elements);
+
+        List<TextChunk> expectedWords = new ArrayList<>();
+        TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
+        textChunk.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
+        textChunk.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
+        textChunk.add(new TextElement(0f, 45f, 10f, 20f, TIMES_ROMAN, 10f, "D", 1f, 6f));
+        expectedWords.add(textChunk);
+
+        Assert.assertEquals(expectedWords, words);
+
+    }
+
+    @Test
+    public void mergeTenElementsIntoTwoWords() {
+
+        List<TextElement> elements = new ArrayList<>();
+        elements.add(new TextElement(0f, 0f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "H", 1f, 6f));
+        elements.add(new TextElement(0f, 10f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f));
+        elements.add(new TextElement(0f, 20f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "L", 1f, 6f));
+        elements.add(new TextElement(0f, 30f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
+        elements.add(new TextElement(0f, 60f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "M", 1f, 6f));
+        elements.add(new TextElement(0f, 70f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "U", 1f, 6f));
+        elements.add(new TextElement(0f, 80f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "N", 1f, 6f));
+        elements.add(new TextElement(0f, 90f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
+        elements.add(new TextElement(0f, 100f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f));
+
+        List<TextChunk> words = TextElement.mergeWords(elements);
+
+        List<TextChunk> expectedWords = new ArrayList<>();
+        TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "H", 1f, 6f));
+        textChunk.add(new TextElement(0f, 10f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f));
+        textChunk.add(new TextElement(0f, 20f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "L", 1f, 6f));
+        textChunk.add(new TextElement(0f, 30f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
+        textChunk.add(new TextElement(0f, 30f, 10.5f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, " ", 1f)); //Check why width=10.5?
+        expectedWords.add(textChunk);
+        TextChunk textChunk2 = new TextChunk(new TextElement(0f, 60f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "M", 1f, 6f));
+        textChunk2.add(new TextElement(0f, 70f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "U", 1f, 6f));
+        textChunk2.add(new TextElement(0f, 80f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "N", 1f, 6f));
+        textChunk2.add(new TextElement(0f, 90f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
+        textChunk2.add(new TextElement(0f, 100f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f));
+        expectedWords.add(textChunk2);
+
+        Assert.assertEquals(2, words.size());
+        Assert.assertEquals(expectedWords, words);
+
+    }
+
+    @Test
+    public void mergeTenElementsIntoTwoLines() {
+
+        List<TextElement> elements = new ArrayList<>();
+        PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
+        elements.add(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f));
+        elements.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f));
+        elements.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f));
+        elements.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f));
+        elements.add(new TextElement(20f, 0f, 10f, 20f, font, 1f, "M", 1f, 6f));
+        elements.add(new TextElement(20f, 10f, 10f, 20f, font, 1f, "U", 1f, 6f));
+        elements.add(new TextElement(20f, 20f, 10f, 20f, font, 1f, "N", 1f, 6f));
+        elements.add(new TextElement(20f, 30f, 10f, 20f, font, 1f, "D", 1f, 6f));
+        elements.add(new TextElement(20f, 40f, 10f, 20f, font, 1f, "O", 1f, 6f));
+
+        List<TextChunk> words = TextElement.mergeWords(elements);
+
+        List<TextChunk> expectedWords = new ArrayList<>();
+        TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f));
+        textChunk.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f));
+        textChunk.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f));
+        textChunk.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f));
+        expectedWords.add(textChunk);
+        TextChunk textChunk2 = new TextChunk(new TextElement(20f, 0f, 10f, 20f, font, 1f, "M", 1f, 6f));
+        textChunk2.add(new TextElement(20f, 10f, 10f, 20f, font, 1f, "U", 1f, 6f));
+        textChunk2.add(new TextElement(20f, 20f, 10f, 20f, font, 1f, "N", 1f, 6f));
+        textChunk2.add(new TextElement(20f, 30f, 10f, 20f, font, 1f, "D", 1f, 6f));
+        textChunk2.add(new TextElement(20f, 40f, 10f, 20f, font, 1f, "O", 1f, 6f));
+        expectedWords.add(textChunk2);
+
+        Assert.assertEquals(2, words.size());
+        Assert.assertEquals(expectedWords, words);
+
+    }
+
 
 }
diff --git a/src/test/java/technology/tabula/TestUtils.java b/src/test/java/technology/tabula/TestUtils.java
index e68411df..cb85cb7b 100644
--- a/src/test/java/technology/tabula/TestUtils.java
+++ b/src/test/java/technology/tabula/TestUtils.java
@@ -12,6 +12,7 @@
 import java.util.Collections;
 import java.util.List;
 
+import org.apache.pdfbox.Loader;
 import org.apache.pdfbox.rendering.ImageType;
 import org.apache.commons.cli.ParseException;
 import org.apache.pdfbox.pdmodel.PDDocument;
@@ -122,7 +123,7 @@ public void testQuickSortLongList() {
 
     @Test
     public void testJPEG2000DoesNotRaise() throws IOException {
-        PDDocument pdf_document = PDDocument.load(new File("src/test/resources/technology/tabula/jpeg2000.pdf"));
+        PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/jpeg2000.pdf"));
         PDPage page = pdf_document.getPage(0);
         Utils.pageConvertToImage(pdf_document, page, 360, ImageType.RGB);
     }
diff --git a/src/test/java/technology/tabula/UtilsForTesting.java b/src/test/java/technology/tabula/UtilsForTesting.java
index 3ee8efde..8d3c91cf 100644
--- a/src/test/java/technology/tabula/UtilsForTesting.java
+++ b/src/test/java/technology/tabula/UtilsForTesting.java
@@ -7,6 +7,7 @@
 import org.apache.commons.csv.CSVFormat;
 import org.apache.commons.csv.CSVParser;
 import org.apache.commons.csv.CSVPrinter;
+import org.apache.pdfbox.Loader;
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.junit.Assert;
 
@@ -23,11 +24,9 @@ public static Page getAreaFromPage(String path, int page, float top, float left,
     public static Page getPage(String path, int pageNumber) throws IOException {
         ObjectExtractor oe = null;
         try {
-            PDDocument document = PDDocument
-                    .load(new File(path));
+            PDDocument document = Loader.loadPDF(new File(path));
             oe = new ObjectExtractor(document);
-            Page page = oe.extract(pageNumber);
-            return page;
+            return oe.extract(pageNumber);
         } finally {
             if (oe != null)
                 oe.close();

From d0241fb5ff9182d7980c3ccd572cc8bb2dba9357 Mon Sep 17 00:00:00 2001
From: young <chen.yang@goupwith.com>
Date: Wed, 6 Mar 2024 14:59:35 +0800
Subject: [PATCH 06/30] remove useless variable

---
 .../tabula/detectors/SpreadsheetDetectionAlgorithm.java         | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java
index 243cc3bf..43136ba5 100644
--- a/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java
+++ b/src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java
@@ -20,8 +20,6 @@ public class SpreadsheetDetectionAlgorithm implements DetectionAlgorithm {
     public List<Rectangle> detect(Page page) {
         List<Cell> cells = SpreadsheetExtractionAlgorithm.findCells(page.getHorizontalRulings(), page.getVerticalRulings());
 
-        SpreadsheetExtractionAlgorithm sea = new SpreadsheetExtractionAlgorithm();
-
         List<Rectangle> tables = SpreadsheetExtractionAlgorithm.findSpreadsheetsFromCells(cells);
 
         // we want tables to be returned from top to bottom on the page

From 63de16a4e102b44ea370919625221561dc783e75 Mon Sep 17 00:00:00 2001
From: young <chen.yang@goupwith.com>
Date: Thu, 11 Apr 2024 14:09:16 +0800
Subject: [PATCH 07/30] exclude junit-jupiter from pdfbox

---
 pom.xml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pom.xml b/pom.xml
index 6c71b426..f60528d0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -263,6 +263,12 @@
         <groupId>org.apache.pdfbox</groupId>
         <artifactId>pdfbox</artifactId>
         <version>3.0.1</version>
+        <exclusions>
+            <exclusion>
+                <groupId>org.junit.jupiter</groupId>
+                <artifactId>junit-jupiter</artifactId>
+            </exclusion>
+        </exclusions>
     </dependency>
 
     <dependency>

From e0ee0728ca398023ab67f59626a55525de0355b0 Mon Sep 17 00:00:00 2001
From: young <chen.yang@goupwith.com>
Date: Thu, 11 Apr 2024 14:31:10 +0800
Subject: [PATCH 08/30] update pdfbox to 3.0.2

---
 pom.xml | 341 ++++++++++++++++++++++++++++----------------------------
 1 file changed, 168 insertions(+), 173 deletions(-)

diff --git a/pom.xml b/pom.xml
index f60528d0..52943fbf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,4 +1,5 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
     <modelVersion>4.0.0</modelVersion>
     <groupId>technology.tabula</groupId>
     <artifactId>tabula</artifactId>
@@ -33,16 +34,16 @@
     </developers>
 
     <repositories>
-      <repository>
-        <id>snapshots</id>
-        <url>https://repository.apache.org/content/repositories/snapshots/</url>
-        <releases>
-          <enabled>false</enabled>
-        </releases>
-        <snapshots>
-          <enabled>true</enabled>
-        </snapshots>
-      </repository>
+        <repository>
+            <id>snapshots</id>
+            <url>https://repository.apache.org/content/repositories/snapshots/</url>
+            <releases>
+                <enabled>false</enabled>
+            </releases>
+            <snapshots>
+                <enabled>true</enabled>
+            </snapshots>
+        </repository>
     </repositories>
 
     <scm>
@@ -109,20 +110,20 @@
                 </executions>
             </plugin>
             <plugin>
-              <groupId>org.apache.maven.plugins</groupId>
-              <artifactId>maven-javadoc-plugin</artifactId>
-              <version>3.3.1</version>
-              <configuration>
-                  <source>8</source>
-              </configuration>
-              <executions>
-                <execution>
-                  <id>attach-javadocs</id>
-                  <goals>
-                    <goal>jar</goal>
-                  </goals>
-                </execution>
-              </executions>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-javadoc-plugin</artifactId>
+                <version>3.3.1</version>
+                <configuration>
+                    <source>8</source>
+                </configuration>
+                <executions>
+                    <execution>
+                        <id>attach-javadocs</id>
+                        <goals>
+                            <goal>jar</goal>
+                        </goals>
+                    </execution>
+                </executions>
             </plugin>
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
@@ -159,172 +160,166 @@
                         <manifest>
                             <mainClass>technology.tabula.CommandLineApp</mainClass>
                         </manifest>
-                </archive>
-                <descriptorRefs>
-                    <descriptorRef>jar-with-dependencies</descriptorRef>
-                </descriptorRefs>
+                    </archive>
+                    <descriptorRefs>
+                        <descriptorRef>jar-with-dependencies</descriptorRef>
+                    </descriptorRefs>
                 </configuration>
-        </plugin>
-        <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-surefire-plugin</artifactId>
-            <version>2.22.2</version>
-            <configuration>
-                <!-- Travis build workaround -->
-                <argLine>-Xms1024m -Xmx2048m</argLine>
-            </configuration>
-        </plugin>
-        <!-- download source jars and link them when running eclipse:eclipse -->
-        <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-eclipse-plugin</artifactId>
-            <version>2.10</version>
-            <configuration>
-                <downloadSources>true</downloadSources>
-                <downloadJavadocs>true</downloadJavadocs>
-            </configuration>
-        </plugin>
-    </plugins>
-</build>
-
-<profiles>
-    <profile>
-        <id>release</id>
-        <build>
-          <plugins>
+            </plugin>
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-javadoc-plugin</artifactId>
-                <version>3.3.1</version>
+                <artifactId>maven-surefire-plugin</artifactId>
+                <version>2.22.2</version>
                 <configuration>
-                    <source>8</source>
+                    <!-- Travis build workaround -->
+                    <argLine>-Xms1024m -Xmx2048m</argLine>
                 </configuration>
-                <executions>
-                    <execution>
-                        <id>attach-javadocs</id>
-                        <goals>
-                            <goal>jar</goal>
-                        </goals>
-                    </execution>
-                </executions>
             </plugin>
-                <plugin>
-                    <groupId>org.apache.maven.plugins</groupId>
-                    <artifactId>maven-source-plugin</artifactId>
-                    <version>3.2.1</version>
-                    <executions>
-                        <execution>
-                            <id>attach-sources</id>
-                            <goals>
-                                <goal>jar-no-fork</goal>
-                            </goals>
-                        </execution>
-                    </executions>
-                </plugin>
-                <plugin>
-                    <groupId>org.apache.maven.plugins</groupId>
-                    <artifactId>maven-gpg-plugin</artifactId>
-                    <version>1.6</version>
-                    <executions>
-                        <execution>
-                            <id>sign-artifacts</id>
-                            <phase>verify</phase>
-                            <goals>
-                                <goal>sign</goal>
-                            </goals>
-                        </execution>
-                    </executions>
-                </plugin>
-            </plugins>
-        </build>
-    </profile>
-</profiles>
+            <!-- download source jars and link them when running eclipse:eclipse -->
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-eclipse-plugin</artifactId>
+                <version>2.10</version>
+                <configuration>
+                    <downloadSources>true</downloadSources>
+                    <downloadJavadocs>true</downloadJavadocs>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+    <profiles>
+        <profile>
+            <id>release</id>
+            <build>
+                <plugins>
+                    <plugin>
+                        <groupId>org.apache.maven.plugins</groupId>
+                        <artifactId>maven-javadoc-plugin</artifactId>
+                        <version>3.3.1</version>
+                        <configuration>
+                            <source>8</source>
+                        </configuration>
+                        <executions>
+                            <execution>
+                                <id>attach-javadocs</id>
+                                <goals>
+                                    <goal>jar</goal>
+                                </goals>
+                            </execution>
+                        </executions>
+                    </plugin>
+                    <plugin>
+                        <groupId>org.apache.maven.plugins</groupId>
+                        <artifactId>maven-source-plugin</artifactId>
+                        <version>3.2.1</version>
+                        <executions>
+                            <execution>
+                                <id>attach-sources</id>
+                                <goals>
+                                    <goal>jar-no-fork</goal>
+                                </goals>
+                            </execution>
+                        </executions>
+                    </plugin>
+                    <plugin>
+                        <groupId>org.apache.maven.plugins</groupId>
+                        <artifactId>maven-gpg-plugin</artifactId>
+                        <version>1.6</version>
+                        <executions>
+                            <execution>
+                                <id>sign-artifacts</id>
+                                <phase>verify</phase>
+                                <goals>
+                                    <goal>sign</goal>
+                                </goals>
+                            </execution>
+                        </executions>
+                    </plugin>
+                </plugins>
+            </build>
+        </profile>
+    </profiles>
 
-<dependencies>
-    <dependency>
-        <groupId>org.locationtech.jts</groupId>
-        <artifactId>jts-core</artifactId>
-        <version>1.18.1</version>
-    </dependency>
+    <dependencies>
+        <dependency>
+            <groupId>org.locationtech.jts</groupId>
+            <artifactId>jts-core</artifactId>
+            <version>1.18.1</version>
+        </dependency>
 
-    <dependency>
-        <groupId>org.slf4j</groupId>
-        <artifactId>slf4j-api</artifactId>
-        <version>1.7.35</version>
-    </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+            <version>1.7.35</version>
+        </dependency>
 
-    <dependency>
-        <groupId>org.slf4j</groupId>
-        <artifactId>slf4j-simple</artifactId>
-        <version>1.7.32</version>
-    </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-simple</artifactId>
+            <version>1.7.32</version>
+        </dependency>
 
-    <dependency>
-        <groupId>org.apache.pdfbox</groupId>
-        <artifactId>pdfbox</artifactId>
-        <version>3.0.1</version>
-        <exclusions>
-            <exclusion>
-                <groupId>org.junit.jupiter</groupId>
-                <artifactId>junit-jupiter</artifactId>
-            </exclusion>
-        </exclusions>
-    </dependency>
+        <dependency>
+            <groupId>org.apache.pdfbox</groupId>
+            <artifactId>pdfbox</artifactId>
+            <version>3.0.2</version>
+        </dependency>
 
-    <dependency>
-        <groupId>org.bouncycastle</groupId>
-        <artifactId>bcprov-jdk15on</artifactId>
-        <version>1.70</version>
-    </dependency>
+        <dependency>
+            <groupId>org.bouncycastle</groupId>
+            <artifactId>bcprov-jdk15on</artifactId>
+            <version>1.70</version>
+        </dependency>
 
-    <dependency>
-        <groupId>org.bouncycastle</groupId>
-        <artifactId>bcmail-jdk15on</artifactId>
-        <version>1.70</version>
-    </dependency>
+        <dependency>
+            <groupId>org.bouncycastle</groupId>
+            <artifactId>bcmail-jdk15on</artifactId>
+            <version>1.70</version>
+        </dependency>
 
-    <dependency>
-        <groupId>junit</groupId>
-        <artifactId>junit</artifactId>
-        <version>4.13.2</version>
-        <scope>test</scope>
-    </dependency>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>4.13.2</version>
+            <scope>test</scope>
+        </dependency>
 
-    <dependency>
-        <groupId>commons-cli</groupId>
-        <artifactId>commons-cli</artifactId>
-        <version>1.4</version>
-    </dependency>
+        <dependency>
+            <groupId>commons-cli</groupId>
+            <artifactId>commons-cli</artifactId>
+            <version>1.4</version>
+        </dependency>
 
-    <dependency>
-        <groupId>org.apache.commons</groupId>
-        <artifactId>commons-csv</artifactId>
-        <version>1.9.0</version>
-    </dependency>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-csv</artifactId>
+            <version>1.9.0</version>
+        </dependency>
 
-    <dependency>
-        <groupId>com.google.code.gson</groupId>
-        <artifactId>gson</artifactId>
-        <version>2.9.0</version>
-    </dependency>
+        <dependency>
+            <groupId>com.google.code.gson</groupId>
+            <artifactId>gson</artifactId>
+            <version>2.9.0</version>
+        </dependency>
 
-    <dependency>
-        <groupId>com.github.jai-imageio</groupId>
-        <artifactId>jai-imageio-core</artifactId>
-        <version>1.4.0</version>
-    </dependency>
+        <dependency>
+            <groupId>com.github.jai-imageio</groupId>
+            <artifactId>jai-imageio-core</artifactId>
+            <version>1.4.0</version>
+        </dependency>
 
-    <dependency>
-        <groupId>com.github.jai-imageio</groupId>
-        <artifactId>jai-imageio-jpeg2000</artifactId>
-        <version>1.4.0</version>
-    </dependency>
+        <dependency>
+            <groupId>com.github.jai-imageio</groupId>
+            <artifactId>jai-imageio-jpeg2000</artifactId>
+            <version>1.4.0</version>
+        </dependency>
 
-    <dependency>
-        <groupId>org.apache.pdfbox</groupId>
-        <artifactId>jbig2-imageio</artifactId>
-        <version>3.0.4</version>
-    </dependency>
-</dependencies>
+        <dependency>
+            <groupId>org.apache.pdfbox</groupId>
+            <artifactId>jbig2-imageio</artifactId>
+            <version>3.0.4</version>
+        </dependency>
+    </dependencies>
 
 </project>

From 20b1053a24402a1e3a587ee90211661027d66484 Mon Sep 17 00:00:00 2001
From: young <chen.yang@goupwith.com>
Date: Mon, 29 Apr 2024 17:40:46 +0800
Subject: [PATCH 09/30] fix: oom for removeText

---
 .../detectors/NurminenDetectionAlgorithm.java | 86 ++++++++++++-------
 .../technology/tabula/TestTableDetection.java | 53 ++++++------
 2 files changed, 79 insertions(+), 60 deletions(-)

diff --git a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java
index 9a377854..86639f66 100644
--- a/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java
+++ b/src/main/java/technology/tabula/detectors/NurminenDetectionAlgorithm.java
@@ -1,14 +1,8 @@
 package technology.tabula.detectors;
 
-import java.awt.geom.Line2D;
-import java.awt.geom.Point2D;
-import java.awt.image.BufferedImage;
-import java.awt.image.Raster;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.*;
-
+import org.apache.pdfbox.contentstream.PDContentStream;
 import org.apache.pdfbox.contentstream.operator.Operator;
+import org.apache.pdfbox.contentstream.operator.OperatorName;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.pdfparser.PDFStreamParser;
 import org.apache.pdfbox.pdfwriter.ContentStreamWriter;
@@ -16,16 +10,17 @@
 import org.apache.pdfbox.pdmodel.PDPage;
 import org.apache.pdfbox.pdmodel.common.PDStream;
 import org.apache.pdfbox.rendering.ImageType;
-
-import technology.tabula.Line;
-import technology.tabula.Page;
-import technology.tabula.Rectangle;
-import technology.tabula.Ruling;
-import technology.tabula.TextChunk;
-import technology.tabula.TextElement;
-import technology.tabula.Utils;
+import technology.tabula.*;
 import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
 
+import java.awt.geom.Line2D;
+import java.awt.geom.Point2D;
+import java.awt.image.BufferedImage;
+import java.awt.image.Raster;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.*;
+
 /**
  * Created by matt on 2015-12-17.
  * <p>
@@ -799,25 +794,10 @@ private List<Ruling> getVerticalRulings(BufferedImage image) {
         return verticalRulings;
     }
 
-
-    // taken from http://www.docjar.com/html/api/org/apache/pdfbox/examples/util/RemoveAllText.java.html
     private PDDocument removeText(PDPage page) throws IOException {
 
         PDFStreamParser parser = new PDFStreamParser(page);
         parser.parse();
-        List<Object> newTokens = new ArrayList<>();
-        while (page.hasContents()) {
-            Object token = parser.parseNextToken();
-            if (token instanceof Operator) {
-                Operator op = (Operator) token;
-                if ("TJ".equals(op.getName()) || "Tj".equals(op.getName())) {
-                    //remove the one argument to this operator
-                    newTokens.remove(newTokens.size() - 1);
-                    continue;
-                }
-            }
-            newTokens.add(token);
-        }
 
         PDDocument document = new PDDocument();
         PDPage newPage = document.importPage(page);
@@ -826,9 +806,51 @@ private PDDocument removeText(PDPage page) throws IOException {
         PDStream newContents = new PDStream(document);
         OutputStream out = newContents.createOutputStream(COSName.FLATE_DECODE);
         ContentStreamWriter writer = new ContentStreamWriter(out);
-        writer.writeTokens(newTokens);
+        List<Object> tokensWithoutText = createTokensWithoutText(page);
+        writer.writeTokens(tokensWithoutText);
         out.close();
         newPage.setContents(newContents);
         return document;
     }
+
+
+    /**
+     * @param contentStream contentStream
+     * @return newTokens
+     * @throws IOException When parseNextToken on Error
+     * @see <a href="https://github.com/apache/pdfbox/blob/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java#L127">...</a>
+     */
+    private static List<Object> createTokensWithoutText(PDContentStream contentStream) throws IOException {
+        PDFStreamParser parser = new PDFStreamParser(contentStream);
+        Object token = parser.parseNextToken();
+        List<Object> newTokens = new ArrayList<>();
+        while (token != null) {
+            if (token instanceof Operator) {
+                Operator op = (Operator) token;
+                String opName = op.getName();
+                if (OperatorName.SHOW_TEXT_ADJUSTED.equals(opName)
+                        || OperatorName.SHOW_TEXT.equals(opName)
+                        || OperatorName.SHOW_TEXT_LINE.equals(opName)) {
+                    // remove the argument to this operator
+                    newTokens.remove(newTokens.size() - 1);
+
+                    token = parser.parseNextToken();
+                    continue;
+                } else if (OperatorName.SHOW_TEXT_LINE_AND_SPACE.equals(opName)) {
+                    // remove the 3 arguments to this operator
+                    newTokens.remove(newTokens.size() - 1);
+                    newTokens.remove(newTokens.size() - 1);
+                    newTokens.remove(newTokens.size() - 1);
+
+                    token = parser.parseNextToken();
+                    continue;
+                }
+            }
+            newTokens.add(token);
+            token = parser.parseNextToken();
+        }
+        return newTokens;
+    }
+
+
 }
diff --git a/src/test/java/technology/tabula/TestTableDetection.java b/src/test/java/technology/tabula/TestTableDetection.java
index 80d21350..c13ff201 100644
--- a/src/test/java/technology/tabula/TestTableDetection.java
+++ b/src/test/java/technology/tabula/TestTableDetection.java
@@ -1,29 +1,29 @@
 package technology.tabula;
 
-import java.io.File;
-import java.io.FileWriter;
-import java.io.FilenameFilter;
-import java.io.IOException;
-import java.util.*;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import static org.junit.Assert.*;
-
 import com.google.gson.Gson;
 import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.pdmodel.PDDocument;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
-import org.w3c.dom.*;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+import technology.tabula.detectors.NurminenDetectionAlgorithm;
 
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.*;
+import java.util.logging.Level;
+import java.util.logging.Logger;
 
-import org.apache.pdfbox.pdmodel.PDDocument;
-import technology.tabula.detectors.NurminenDetectionAlgorithm;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
 
 /**
  * Created by matt on 2015-12-14.
@@ -111,15 +111,10 @@ public static Collection<Object[]> data() {
             String directoryName = "src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-" + regionCode + "/";
             File dir = new File(directoryName);
 
-            File[] pdfs = dir.listFiles(new FilenameFilter() {
-                @Override
-                public boolean accept(File dir, String name) {
-                    return name.toLowerCase().endsWith(".pdf");
-                }
-            });
+            File[] pdfs = dir.listFiles((dir1, name) -> name.toLowerCase().endsWith(".pdf"));
 
             for (File pdf : pdfs) {
-                data.add(new Object[] {pdf});
+                data.add(new Object[]{pdf});
             }
         }
 
@@ -163,6 +158,8 @@ public void testDetectionOfTables() throws Exception {
         NodeList tables = regionDocument.getElementsByTagName("table");
 
         // tabula extractors
+
+
         PDDocument pdfDocument = Loader.loadPDF(this.pdf);
         ObjectExtractor extractor = new ObjectExtractor(pdfDocument);
 
@@ -171,7 +168,7 @@ public void testDetectionOfTables() throws Exception {
 
         int numExpectedTables = 0;
 
-        for (int i=0; i<tables.getLength(); i++) {
+        for (int i = 0; i < tables.getLength(); i++) {
 
             Element table = (Element) tables.item(i);
             Element region = (Element) table.getElementsByTagName("region").item(0);
@@ -195,7 +192,7 @@ public void testDetectionOfTables() throws Exception {
             // do some extra work to extract the page with tabula and get the dimensions from there
             Page extractedPage = extractor.extractPage(page);
 
-            float top = (float)extractedPage.getHeight() - y2;
+            float top = (float) extractedPage.getHeight() - y2;
             float left = x1;
             float width = x2 - x1;
             float height = y2 - y1;
@@ -214,8 +211,8 @@ public void testDetectionOfTables() throws Exception {
         while (pages.hasNext()) {
             Page page = pages.next();
             List<Rectangle> tablesOnPage = detectionAlgorithm.detect(page);
-            if (tablesOnPage.size() > 0) {
-                detectedTables.put(new Integer(page.getPageNumber()), tablesOnPage);
+            if (!tablesOnPage.isEmpty()) {
+                detectedTables.put(page.getPageNumber(), tablesOnPage);
             }
         }
 
@@ -267,7 +264,7 @@ public void testDetectionOfTables() throws Exception {
         System.out.println(totalErroneouslyDetectedTables + " tables incorrectly detected");
 
 
-        if(this.status.isFirstRun()) {
+        if (this.status.isFirstRun()) {
             // make the baseline
             this.status.expectedFailure = failed;
             this.status.numCorrectlyDetectedTables = this.numCorrectlyDetectedTables;
@@ -293,14 +290,14 @@ private List<String> comparePages(Integer page, List<Rectangle> detected, List<R
         // from http://www.orsigiorgio.net/wp-content/papercite-data/pdf/gho*12.pdf (comparing regions):
         // for other (e.g.“black-box”) algorithms, bounding boxes and content are used. A region is correct if it
         // contains the minimal bounding box of the ground truth without intersecting additional content.
-        for (Iterator<Rectangle> detectedIterator = detected.iterator(); detectedIterator.hasNext();) {
+        for (Iterator<Rectangle> detectedIterator = detected.iterator(); detectedIterator.hasNext(); ) {
             Rectangle detectedTable = detectedIterator.next();
 
-            for (int i=0; i<expected.size(); i++) {
+            for (int i = 0; i < expected.size(); i++) {
                 if (detectedTable.contains(expected.get(i))) {
                     // we have a candidate for the detected table, make sure it doesn't intersect any others
                     boolean intersectsOthers = false;
-                    for (int j=0; j<expected.size(); j++) {
+                    for (int j = 0; j < expected.size(); j++) {
                         if (i == j) continue;
                         if (detectedTable.intersects(expected.get(j))) {
                             intersectsOthers = true;

From a01730669761b8a62444abc066c4f5b7fb8c5e1c Mon Sep 17 00:00:00 2001
From: young <chen.yang@goupwith.com>
Date: Mon, 29 Apr 2024 17:51:40 +0800
Subject: [PATCH 10/30] fix: unit test

---
 .../technology/tabula/TestTextElement.java    | 109 +++++++++---------
 1 file changed, 57 insertions(+), 52 deletions(-)

diff --git a/src/test/java/technology/tabula/TestTextElement.java b/src/test/java/technology/tabula/TestTextElement.java
index 3db1ca31..ee0fbf3d 100644
--- a/src/test/java/technology/tabula/TestTextElement.java
+++ b/src/test/java/technology/tabula/TestTextElement.java
@@ -1,14 +1,14 @@
 package technology.tabula;
 
-import java.util.ArrayList;
-import java.util.List;
-
 import org.apache.pdfbox.pdmodel.font.PDFont;
 import org.apache.pdfbox.pdmodel.font.PDType1Font;
 import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
 import org.junit.Assert;
 import org.junit.Test;
 
+import java.util.ArrayList;
+import java.util.List;
+
 public class TestTextElement {
 
 
@@ -24,7 +24,7 @@ public void createTextElement() {
         Assert.assertEquals(5f, textElement.getTop(), 0);
         Assert.assertEquals(10f, textElement.getWidth(), 0);
         Assert.assertEquals(20f, textElement.getHeight(), 0);
-        Assert.assertEquals(new PDType1Font(Standard14Fonts.FontName.HELVETICA), textElement.getFont());
+        Assert.assertEquals(Standard14Fonts.FontName.HELVETICA.getName(), textElement.getFont().getName());
         Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0);
         Assert.assertEquals(0f, textElement.getDirection(), 0);
 
@@ -43,7 +43,7 @@ public void createTextElementWithDirection() {
         Assert.assertEquals(5f, textElement.getTop(), 0);
         Assert.assertEquals(10f, textElement.getWidth(), 0);
         Assert.assertEquals(20f, textElement.getHeight(), 0);
-        Assert.assertEquals(new PDType1Font(Standard14Fonts.FontName.HELVETICA), textElement.getFont());
+        Assert.assertEquals(Standard14Fonts.FontName.HELVETICA.getName(), textElement.getFont().getName());
         Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0);
         Assert.assertEquals(6f, textElement.getDirection(), 0);
 
@@ -54,18 +54,19 @@ public void createTextElementWithDirection() {
     public void mergeFourElementsIntoFourWords() {
 
         List<TextElement> elements = new ArrayList<>();
-        elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
-        elements.add(new TextElement(20f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
-        elements.add(new TextElement(40f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
-        elements.add(new TextElement(60f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
+        PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
+        elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f));
+        elements.add(new TextElement(20f, 15f, 10f, 20f, font, 1f, "B", 1f, 6f));
+        elements.add(new TextElement(40f, 15f, 10f, 20f, font, 1f, "C", 1f, 6f));
+        elements.add(new TextElement(60f, 15f, 10f, 20f, font, 1f, "D", 1f, 6f));
 
         List<TextChunk> words = TextElement.mergeWords(elements);
 
         List<TextChunk> expectedWords = new ArrayList<>();
-        expectedWords.add(new TextChunk(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f)));
-        expectedWords.add(new TextChunk(new TextElement(20f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f)));
-        expectedWords.add(new TextChunk(new TextElement(40f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f)));
-        expectedWords.add(new TextChunk(new TextElement(60f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f)));
+        expectedWords.add(new TextChunk(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f)));
+        expectedWords.add(new TextChunk(new TextElement(20f, 15f, 10f, 20f, font, 1f, "B", 1f, 6f)));
+        expectedWords.add(new TextChunk(new TextElement(40f, 15f, 10f, 20f, font, 1f, "C", 1f, 6f)));
+        expectedWords.add(new TextChunk(new TextElement(60f, 15f, 10f, 20f, font, 1f, "D", 1f, 6f)));
 
         Assert.assertEquals(expectedWords, words);
 
@@ -75,18 +76,19 @@ public void mergeFourElementsIntoFourWords() {
     public void mergeFourElementsIntoOneWord() {
 
         List<TextElement> elements = new ArrayList<>();
-        elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
-        elements.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
-        elements.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
-        elements.add(new TextElement(0f, 45f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
+        PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
+        elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f));
+        elements.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f));
+        elements.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f));
+        elements.add(new TextElement(0f, 45f, 10f, 20f, font, 1f, "D", 1f, 6f));
 
         List<TextChunk> words = TextElement.mergeWords(elements);
 
         List<TextChunk> expectedWords = new ArrayList<>();
-        TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
-        textChunk.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
-        textChunk.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
-        textChunk.add(new TextElement(0f, 45f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
+        TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f));
+        textChunk.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f));
+        textChunk.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f));
+        textChunk.add(new TextElement(0f, 45f, 10f, 20f, font, 1f, "D", 1f, 6f));
         expectedWords.add(textChunk);
 
         Assert.assertEquals(expectedWords, words);
@@ -101,10 +103,11 @@ public void mergeElementsShouldBeIdempotent() {
          */
 
         List<TextElement> elements = new ArrayList<>();
-        elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
-        elements.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
-        elements.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
-        elements.add(new TextElement(0f, 45f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
+        PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
+        elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f));
+        elements.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f));
+        elements.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f));
+        elements.add(new TextElement(0f, 45f, 10f, 20f, font, 1f, "D", 1f, 6f));
 
         List<TextChunk> words = TextElement.mergeWords(elements);
         List<TextChunk> words2 = TextElement.mergeWords(elements);
@@ -115,20 +118,21 @@ public void mergeElementsShouldBeIdempotent() {
     public void mergeElementsWithSkippingRules() {
 
         List<TextElement> elements = new ArrayList<>();
-        elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
-        elements.add(new TextElement(0f, 17f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
-        elements.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
-        elements.add(new TextElement(0.001f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, " ", 1f, 6f));
-        elements.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
+        PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
+        elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f));
+        elements.add(new TextElement(0f, 17f, 10f, 20f, font, 1f, "A", 1f, 6f));
+        elements.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f));
+        elements.add(new TextElement(0.001f, 25f, 10f, 20f, font, 1f, " ", 1f, 6f));
+        elements.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f));
         PDFont TIMES_ROMAN = new PDType1Font(Standard14Fonts.FontName.TIMES_ROMAN);
         elements.add(new TextElement(0f, 45f, 10f, 20f, TIMES_ROMAN, 10f, "D", 1f, 6f));
 
         List<TextChunk> words = TextElement.mergeWords(elements);
 
         List<TextChunk> expectedWords = new ArrayList<>();
-        TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
-        textChunk.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
-        textChunk.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
+        TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f));
+        textChunk.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f));
+        textChunk.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f));
         textChunk.add(new TextElement(0f, 45f, 10f, 20f, TIMES_ROMAN, 10f, "D", 1f, 6f));
         expectedWords.add(textChunk);
 
@@ -140,30 +144,31 @@ public void mergeElementsWithSkippingRules() {
     public void mergeTenElementsIntoTwoWords() {
 
         List<TextElement> elements = new ArrayList<>();
-        elements.add(new TextElement(0f, 0f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "H", 1f, 6f));
-        elements.add(new TextElement(0f, 10f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f));
-        elements.add(new TextElement(0f, 20f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "L", 1f, 6f));
-        elements.add(new TextElement(0f, 30f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
-        elements.add(new TextElement(0f, 60f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "M", 1f, 6f));
-        elements.add(new TextElement(0f, 70f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "U", 1f, 6f));
-        elements.add(new TextElement(0f, 80f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "N", 1f, 6f));
-        elements.add(new TextElement(0f, 90f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
-        elements.add(new TextElement(0f, 100f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f));
+        PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
+        elements.add(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f));
+        elements.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f));
+        elements.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f));
+        elements.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f));
+        elements.add(new TextElement(0f, 60f, 10f, 20f, font, 1f, "M", 1f, 6f));
+        elements.add(new TextElement(0f, 70f, 10f, 20f, font, 1f, "U", 1f, 6f));
+        elements.add(new TextElement(0f, 80f, 10f, 20f, font, 1f, "N", 1f, 6f));
+        elements.add(new TextElement(0f, 90f, 10f, 20f, font, 1f, "D", 1f, 6f));
+        elements.add(new TextElement(0f, 100f, 10f, 20f, font, 1f, "O", 1f, 6f));
 
         List<TextChunk> words = TextElement.mergeWords(elements);
 
         List<TextChunk> expectedWords = new ArrayList<>();
-        TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "H", 1f, 6f));
-        textChunk.add(new TextElement(0f, 10f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f));
-        textChunk.add(new TextElement(0f, 20f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "L", 1f, 6f));
-        textChunk.add(new TextElement(0f, 30f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
-        textChunk.add(new TextElement(0f, 30f, 10.5f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, " ", 1f)); //Check why width=10.5?
+        TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f));
+        textChunk.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f));
+        textChunk.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f));
+        textChunk.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f));
+        textChunk.add(new TextElement(0f, 30f, 10.5f, 20f, font, 1f, " ", 1f)); //Check why width=10.5?
         expectedWords.add(textChunk);
-        TextChunk textChunk2 = new TextChunk(new TextElement(0f, 60f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "M", 1f, 6f));
-        textChunk2.add(new TextElement(0f, 70f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "U", 1f, 6f));
-        textChunk2.add(new TextElement(0f, 80f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "N", 1f, 6f));
-        textChunk2.add(new TextElement(0f, 90f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
-        textChunk2.add(new TextElement(0f, 100f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f));
+        TextChunk textChunk2 = new TextChunk(new TextElement(0f, 60f, 10f, 20f, font, 1f, "M", 1f, 6f));
+        textChunk2.add(new TextElement(0f, 70f, 10f, 20f, font, 1f, "U", 1f, 6f));
+        textChunk2.add(new TextElement(0f, 80f, 10f, 20f, font, 1f, "N", 1f, 6f));
+        textChunk2.add(new TextElement(0f, 90f, 10f, 20f, font, 1f, "D", 1f, 6f));
+        textChunk2.add(new TextElement(0f, 100f, 10f, 20f, font, 1f, "O", 1f, 6f));
         expectedWords.add(textChunk2);
 
         Assert.assertEquals(2, words.size());

From 6d59cddd5e4523d74aa03739be5992d35372fdd3 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 27 Feb 2023 21:12:28 +0000
Subject: [PATCH 11/30] Bump maven-compiler-plugin from 3.8.1 to 3.11.0

Bumps [maven-compiler-plugin](https://github.com/apache/maven-compiler-plugin) from 3.8.1 to 3.11.0.
- [Release notes](https://github.com/apache/maven-compiler-plugin/releases)
- [Commits](https://github.com/apache/maven-compiler-plugin/compare/maven-compiler-plugin-3.8.1...maven-compiler-plugin-3.11.0)

---
updated-dependencies:
- dependency-name: org.apache.maven.plugins:maven-compiler-plugin
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 52943fbf..b3344e12 100644
--- a/pom.xml
+++ b/pom.xml
@@ -147,7 +147,7 @@
             </plugin>
             <plugin>
                 <artifactId>maven-compiler-plugin</artifactId>
-                <version>3.8.1</version>
+                <version>3.11.0</version>
                 <configuration>
                     <source>1.8</source>
                     <target>1.8</target>

From 2bdeb954675cb2ad05431210d3f06db74a490fe9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 16 Jul 2024 21:29:09 +0000
Subject: [PATCH 12/30] Bump org.apache.maven.plugins:maven-gpg-plugin from 1.6
 to 3.2.4

Bumps [org.apache.maven.plugins:maven-gpg-plugin](https://github.com/apache/maven-gpg-plugin) from 1.6 to 3.2.4.
- [Release notes](https://github.com/apache/maven-gpg-plugin/releases)
- [Commits](https://github.com/apache/maven-gpg-plugin/compare/maven-gpg-plugin-1.6...maven-gpg-plugin-3.2.4)

---
updated-dependencies:
- dependency-name: org.apache.maven.plugins:maven-gpg-plugin
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index b3344e12..7f30e7a4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -128,7 +128,7 @@
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-gpg-plugin</artifactId>
-                <version>1.6</version>
+                <version>3.2.4</version>
                 <executions>
                     <execution>
                         <id>sign-artifacts</id>
@@ -225,7 +225,7 @@
                     <plugin>
                         <groupId>org.apache.maven.plugins</groupId>
                         <artifactId>maven-gpg-plugin</artifactId>
-                        <version>1.6</version>
+                        <version>3.2.4</version>
                         <executions>
                             <execution>
                                 <id>sign-artifacts</id>

From c831cf6ac36c5315b96ff6a49212bb67908ce48e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 16 Jul 2024 21:28:58 +0000
Subject: [PATCH 13/30] Bump commons-cli:commons-cli from 1.4 to 1.8.0

Bumps commons-cli:commons-cli from 1.4 to 1.8.0.

---
updated-dependencies:
- dependency-name: commons-cli:commons-cli
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 7f30e7a4..eb362e89 100644
--- a/pom.xml
+++ b/pom.xml
@@ -288,7 +288,7 @@
         <dependency>
             <groupId>commons-cli</groupId>
             <artifactId>commons-cli</artifactId>
-            <version>1.4</version>
+            <version>1.8.0</version>
         </dependency>
 
         <dependency>

From 9dc64f867a01e69e6e929feaa5a909c02b9bd3e9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 16 Jul 2024 21:28:56 +0000
Subject: [PATCH 14/30] Bump org.slf4j:slf4j-api from 1.7.35 to 2.0.13

Bumps org.slf4j:slf4j-api from 1.7.35 to 2.0.13.

---
updated-dependencies:
- dependency-name: org.slf4j:slf4j-api
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index eb362e89..adf29ce5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -251,7 +251,7 @@
         <dependency>
             <groupId>org.slf4j</groupId>
             <artifactId>slf4j-api</artifactId>
-            <version>1.7.35</version>
+            <version>2.0.13</version>
         </dependency>
 
         <dependency>

From 3f7445380ec4f48dfc545dd6d33e89d4c501af55 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 16 Jul 2024 21:28:55 +0000
Subject: [PATCH 15/30] Bump org.slf4j:slf4j-simple from 1.7.32 to 2.0.13

Bumps org.slf4j:slf4j-simple from 1.7.32 to 2.0.13.

---
updated-dependencies:
- dependency-name: org.slf4j:slf4j-simple
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index adf29ce5..8e0736c4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -257,7 +257,7 @@
         <dependency>
             <groupId>org.slf4j</groupId>
             <artifactId>slf4j-simple</artifactId>
-            <version>1.7.32</version>
+            <version>2.0.13</version>
         </dependency>
 
         <dependency>

From 2ef079f2a14dc6d66c68c5ce8d03853eea7436f4 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 21 Jun 2022 21:32:54 +0000
Subject: [PATCH 16/30] Bump jts-core from 1.18.1 to 1.19.0

Bumps jts-core from 1.18.1 to 1.19.0.

---
updated-dependencies:
- dependency-name: org.locationtech.jts:jts-core
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 8e0736c4..19bf0133 100644
--- a/pom.xml
+++ b/pom.xml
@@ -245,7 +245,7 @@
         <dependency>
             <groupId>org.locationtech.jts</groupId>
             <artifactId>jts-core</artifactId>
-            <version>1.18.1</version>
+            <version>1.19.0</version>
         </dependency>
 
         <dependency>

From c1e4e326eddc1a2dfe59febf24a569d11bde5cfb Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 15 Feb 2023 21:59:29 +0000
Subject: [PATCH 17/30] Bump maven-javadoc-plugin from 3.3.1 to 3.5.0

Bumps [maven-javadoc-plugin](https://github.com/apache/maven-javadoc-plugin) from 3.3.1 to 3.5.0.
- [Release notes](https://github.com/apache/maven-javadoc-plugin/releases)
- [Commits](https://github.com/apache/maven-javadoc-plugin/compare/maven-javadoc-plugin-3.3.1...maven-javadoc-plugin-3.5.0)

---
updated-dependencies:
- dependency-name: org.apache.maven.plugins:maven-javadoc-plugin
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/pom.xml b/pom.xml
index 19bf0133..749fa0b7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -74,7 +74,7 @@
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-javadoc-plugin</artifactId>
-                <version>3.3.1</version>
+                <version>3.5.0</version>
                 <configuration>
                     <skip>true</skip>
                 </configuration>
@@ -110,20 +110,20 @@
                 </executions>
             </plugin>
             <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-javadoc-plugin</artifactId>
-                <version>3.3.1</version>
-                <configuration>
-                    <source>8</source>
-                </configuration>
-                <executions>
-                    <execution>
-                        <id>attach-javadocs</id>
-                        <goals>
-                            <goal>jar</goal>
-                        </goals>
-                    </execution>
-                </executions>
+              <groupId>org.apache.maven.plugins</groupId>
+              <artifactId>maven-javadoc-plugin</artifactId>
+              <version>3.5.0</version>
+              <configuration>
+                  <source>8</source>
+              </configuration>
+              <executions>
+                <execution>
+                  <id>attach-javadocs</id>
+                  <goals>
+                    <goal>jar</goal>
+                  </goals>
+                </execution>
+              </executions>
             </plugin>
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>

From 5761334b86f58723e761b4941f2950d7b6e53d82 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 16 Jul 2024 21:29:03 +0000
Subject: [PATCH 18/30] Bump org.sonatype.plugins:nexus-staging-maven-plugin
 from 1.6.8 to 1.7.0

Bumps org.sonatype.plugins:nexus-staging-maven-plugin from 1.6.8 to 1.7.0.

---
updated-dependencies:
- dependency-name: org.sonatype.plugins:nexus-staging-maven-plugin
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 749fa0b7..6a66162f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -87,7 +87,7 @@
             <plugin>
                 <groupId>org.sonatype.plugins</groupId>
                 <artifactId>nexus-staging-maven-plugin</artifactId>
-                <version>1.6.8</version>
+                <version>1.7.0</version>
                 <extensions>true</extensions>
                 <configuration>
                     <serverId>ossrh</serverId>

From ab7c4bd54bd20ca03c2bfad400c5cc6c26e34d59 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 16 Jul 2024 21:28:53 +0000
Subject: [PATCH 19/30] Bump org.apache.maven.plugins:maven-source-plugin from
 3.2.1 to 3.3.1

Bumps [org.apache.maven.plugins:maven-source-plugin](https://github.com/apache/maven-source-plugin) from 3.2.1 to 3.3.1.
- [Release notes](https://github.com/apache/maven-source-plugin/releases)
- [Commits](https://github.com/apache/maven-source-plugin/compare/maven-source-plugin-3.2.1...maven-source-plugin-3.3.1)

---
updated-dependencies:
- dependency-name: org.apache.maven.plugins:maven-source-plugin
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index 6a66162f..77cda400 100644
--- a/pom.xml
+++ b/pom.xml
@@ -99,7 +99,7 @@
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-source-plugin</artifactId>
-                <version>3.2.1</version>
+                <version>3.3.1</version>
                 <executions>
                     <execution>
                         <id>attach-sources</id>
@@ -212,7 +212,7 @@
                     <plugin>
                         <groupId>org.apache.maven.plugins</groupId>
                         <artifactId>maven-source-plugin</artifactId>
-                        <version>3.2.1</version>
+                        <version>3.3.1</version>
                         <executions>
                             <execution>
                                 <id>attach-sources</id>

From ebe8e30dedfd6f7553046bbe6bbd3640b121d3dd Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 17 Jul 2024 21:36:59 +0000
Subject: [PATCH 20/30] Bump org.apache.commons:commons-csv from 1.9.0 to
 1.11.0

Bumps [org.apache.commons:commons-csv](https://github.com/apache/commons-csv) from 1.9.0 to 1.11.0.
- [Changelog](https://github.com/apache/commons-csv/blob/master/RELEASE-NOTES.txt)
- [Commits](https://github.com/apache/commons-csv/compare/rel/commons-csv-1.9.0...rel/commons-csv-1.11.0)

---
updated-dependencies:
- dependency-name: org.apache.commons:commons-csv
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 77cda400..a45e6089 100644
--- a/pom.xml
+++ b/pom.xml
@@ -294,7 +294,7 @@
         <dependency>
             <groupId>org.apache.commons</groupId>
             <artifactId>commons-csv</artifactId>
-            <version>1.9.0</version>
+            <version>1.11.0</version>
         </dependency>
 
         <dependency>

From db3f6dfd74801c824efd2a25dc26b4a3cb8d7922 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 17 Jul 2024 21:36:45 +0000
Subject: [PATCH 21/30] Bump org.apache.maven.plugins:maven-compiler-plugin

Bumps [org.apache.maven.plugins:maven-compiler-plugin](https://github.com/apache/maven-compiler-plugin) from 3.11.0 to 3.13.0.
- [Release notes](https://github.com/apache/maven-compiler-plugin/releases)
- [Commits](https://github.com/apache/maven-compiler-plugin/compare/maven-compiler-plugin-3.11.0...maven-compiler-plugin-3.13.0)

---
updated-dependencies:
- dependency-name: org.apache.maven.plugins:maven-compiler-plugin
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index a45e6089..b73d8b1e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -147,7 +147,7 @@
             </plugin>
             <plugin>
                 <artifactId>maven-compiler-plugin</artifactId>
-                <version>3.11.0</version>
+                <version>3.13.0</version>
                 <configuration>
                     <source>1.8</source>
                     <target>1.8</target>

From fd3a32c579f672ba17c5f1231985e980c4e3ec4e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 17 Jul 2024 21:36:48 +0000
Subject: [PATCH 22/30] Bump com.google.code.gson:gson from 2.9.0 to 2.11.0

Bumps [com.google.code.gson:gson](https://github.com/google/gson) from 2.9.0 to 2.11.0.
- [Release notes](https://github.com/google/gson/releases)
- [Changelog](https://github.com/google/gson/blob/main/CHANGELOG.md)
- [Commits](https://github.com/google/gson/compare/gson-parent-2.9.0...gson-parent-2.11.0)

---
updated-dependencies:
- dependency-name: com.google.code.gson:gson
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index b73d8b1e..394ea68f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -300,7 +300,7 @@
         <dependency>
             <groupId>com.google.code.gson</groupId>
             <artifactId>gson</artifactId>
-            <version>2.9.0</version>
+            <version>2.11.0</version>
         </dependency>
 
         <dependency>

From 097559d0a185ca1dda25d7b7ff103e884848c70c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 17 Jul 2024 21:36:52 +0000
Subject: [PATCH 23/30] Bump org.apache.maven.plugins:maven-javadoc-plugin from
 3.3.1 to 3.7.0

Bumps [org.apache.maven.plugins:maven-javadoc-plugin](https://github.com/apache/maven-javadoc-plugin) from 3.3.1 to 3.7.0.
- [Release notes](https://github.com/apache/maven-javadoc-plugin/releases)
- [Commits](https://github.com/apache/maven-javadoc-plugin/compare/maven-javadoc-plugin-3.3.1...maven-javadoc-plugin-3.7.0)

---
updated-dependencies:
- dependency-name: org.apache.maven.plugins:maven-javadoc-plugin
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pom.xml b/pom.xml
index 394ea68f..a4871012 100644
--- a/pom.xml
+++ b/pom.xml
@@ -74,7 +74,7 @@
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-javadoc-plugin</artifactId>
-                <version>3.5.0</version>
+                <version>3.7.0</version>
                 <configuration>
                     <skip>true</skip>
                 </configuration>
@@ -112,7 +112,7 @@
             <plugin>
               <groupId>org.apache.maven.plugins</groupId>
               <artifactId>maven-javadoc-plugin</artifactId>
-              <version>3.5.0</version>
+              <version>3.7.0</version>
               <configuration>
                   <source>8</source>
               </configuration>
@@ -196,7 +196,7 @@
                     <plugin>
                         <groupId>org.apache.maven.plugins</groupId>
                         <artifactId>maven-javadoc-plugin</artifactId>
-                        <version>3.3.1</version>
+                        <version>3.7.0</version>
                         <configuration>
                             <source>8</source>
                         </configuration>

From bde6d765cfab25d53ff885de33a4556fc41bb9d7 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 17 Jul 2024 21:36:55 +0000
Subject: [PATCH 24/30] Bump org.apache.maven.plugins:maven-surefire-plugin
 from 2.22.2 to 3.3.1

Bumps [org.apache.maven.plugins:maven-surefire-plugin](https://github.com/apache/maven-surefire) from 2.22.2 to 3.3.1.
- [Release notes](https://github.com/apache/maven-surefire/releases)
- [Commits](https://github.com/apache/maven-surefire/compare/surefire-2.22.2...surefire-3.3.1)

---
updated-dependencies:
- dependency-name: org.apache.maven.plugins:maven-surefire-plugin
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index a4871012..8fd27509 100644
--- a/pom.xml
+++ b/pom.xml
@@ -169,7 +169,7 @@
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-surefire-plugin</artifactId>
-                <version>2.22.2</version>
+                <version>3.3.1</version>
                 <configuration>
                     <!-- Travis build workaround -->
                     <argLine>-Xms1024m -Xmx2048m</argLine>

From 0c73e698b979a74cac0e917718b2c5dfd098dacc Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 22 Jul 2024 21:03:37 +0000
Subject: [PATCH 25/30] Bump org.apache.maven.plugins:maven-javadoc-plugin from
 3.7.0 to 3.8.0

Bumps [org.apache.maven.plugins:maven-javadoc-plugin](https://github.com/apache/maven-javadoc-plugin) from 3.7.0 to 3.8.0.
- [Release notes](https://github.com/apache/maven-javadoc-plugin/releases)
- [Commits](https://github.com/apache/maven-javadoc-plugin/compare/maven-javadoc-plugin-3.7.0...maven-javadoc-plugin-3.8.0)

---
updated-dependencies:
- dependency-name: org.apache.maven.plugins:maven-javadoc-plugin
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pom.xml b/pom.xml
index 8fd27509..d0b40101 100644
--- a/pom.xml
+++ b/pom.xml
@@ -74,7 +74,7 @@
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-javadoc-plugin</artifactId>
-                <version>3.7.0</version>
+                <version>3.8.0</version>
                 <configuration>
                     <skip>true</skip>
                 </configuration>
@@ -112,7 +112,7 @@
             <plugin>
               <groupId>org.apache.maven.plugins</groupId>
               <artifactId>maven-javadoc-plugin</artifactId>
-              <version>3.7.0</version>
+              <version>3.8.0</version>
               <configuration>
                   <source>8</source>
               </configuration>
@@ -196,7 +196,7 @@
                     <plugin>
                         <groupId>org.apache.maven.plugins</groupId>
                         <artifactId>maven-javadoc-plugin</artifactId>
-                        <version>3.7.0</version>
+                        <version>3.8.0</version>
                         <configuration>
                             <source>8</source>
                         </configuration>

From 818c9a2f5a5ea8dc72d3efa775f192381e84b8c1 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 9 Aug 2024 21:53:01 +0000
Subject: [PATCH 26/30] Bump org.apache.pdfbox:pdfbox from 3.0.2 to 3.0.3

Bumps org.apache.pdfbox:pdfbox from 3.0.2 to 3.0.3.

---
updated-dependencies:
- dependency-name: org.apache.pdfbox:pdfbox
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index d0b40101..a963e35a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -263,7 +263,7 @@
         <dependency>
             <groupId>org.apache.pdfbox</groupId>
             <artifactId>pdfbox</artifactId>
-            <version>3.0.2</version>
+            <version>3.0.3</version>
         </dependency>
 
         <dependency>

From 5d91f1d733c4895d31854a641c152220f8c5f341 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 30 Aug 2024 21:39:59 +0000
Subject: [PATCH 27/30] Bump org.locationtech.jts:jts-core from 1.19.0 to
 1.20.0

Bumps org.locationtech.jts:jts-core from 1.19.0 to 1.20.0.

---
updated-dependencies:
- dependency-name: org.locationtech.jts:jts-core
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index a963e35a..49057e90 100644
--- a/pom.xml
+++ b/pom.xml
@@ -245,7 +245,7 @@
         <dependency>
             <groupId>org.locationtech.jts</groupId>
             <artifactId>jts-core</artifactId>
-            <version>1.19.0</version>
+            <version>1.20.0</version>
         </dependency>
 
         <dependency>

From 971ae765e84f09ed83f5808b66f764590146e923 Mon Sep 17 00:00:00 2001
From: Kyle Lacy <kylelacy@kyle.space>
Date: Thu, 20 Feb 2025 15:29:09 -0800
Subject: [PATCH 28/30] Upgrade BouncyCastle dependencies

---
 pom.xml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pom.xml b/pom.xml
index 49057e90..8b7b3b2d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -268,14 +268,14 @@
 
         <dependency>
             <groupId>org.bouncycastle</groupId>
-            <artifactId>bcprov-jdk15on</artifactId>
-            <version>1.70</version>
+            <artifactId>bcprov-jdk18on</artifactId>
+            <version>1.80</version>
         </dependency>
 
         <dependency>
             <groupId>org.bouncycastle</groupId>
-            <artifactId>bcmail-jdk15on</artifactId>
-            <version>1.70</version>
+            <artifactId>bcmail-jdk18on</artifactId>
+            <version>1.80</version>
         </dependency>
 
         <dependency>

From 88154e2c15967cc4c2a2606a8da25d47b9b916c3 Mon Sep 17 00:00:00 2001
From: Tilman Hausherr <tilman@snafu.de>
Date: Wed, 19 Mar 2025 15:36:11 +0100
Subject: [PATCH 29/30] Update PDFBox

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 8b7b3b2d..211d0d4d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -263,7 +263,7 @@
         <dependency>
             <groupId>org.apache.pdfbox</groupId>
             <artifactId>pdfbox</artifactId>
-            <version>3.0.3</version>
+            <version>3.0.4</version>
         </dependency>
 
         <dependency>

From 2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f Mon Sep 17 00:00:00 2001
From: Tilman Hausherr <tilman@snafu.de>
Date: Wed, 19 Mar 2025 15:38:16 +0100
Subject: [PATCH 30/30] Adjust test

Test needs to be adjusted because PDFBox supports the /ActualText feature of PDFBox.
---
 src/test/java/technology/tabula/TestBasicExtractor.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/test/java/technology/tabula/TestBasicExtractor.java b/src/test/java/technology/tabula/TestBasicExtractor.java
index d120546f..b56fd6ea 100644
--- a/src/test/java/technology/tabula/TestBasicExtractor.java
+++ b/src/test/java/technology/tabula/TestBasicExtractor.java
@@ -203,7 +203,7 @@ public void testCheckSqueezeDoesntBreak() throws IOException {
         List<List<RectangularTextContainer>> rows = table.getRows();
         List<RectangularTextContainer> firstRow = rows.get(0);
         List<RectangularTextContainer> lastRow = rows.get(rows.size() - 1);
-        assertTrue(firstRow.get(0).getText().equals("Violent crime  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  ."));
+        assertTrue(firstRow.get(0).getText().equals("Violent crime. .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  ."));
         assertTrue(lastRow.get(lastRow.size() - 1).getText().equals("(X)"));
         page.getPDDoc().close();
     }