From 3c57b82dead2727d63e8473dbdc5fd555a8ee2af Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Tue, 6 Jul 2021 13:43:02 +0200 Subject: [PATCH 01/38] Rename the package, fix some obvious issues and failing tests --- composer.json | 7 ++++++- index.php | 16 ---------------- src/Html/Font.php | 2 +- src/Html/HtmlFormatter.php | 11 +++++++---- tests/BulletsTest.php | 2 +- tests/ExtraParagraphTest.php | 7 +++++-- tests/FontFamilyTest.php | 4 ++-- tests/ParseSimpleTest.php | 4 ++-- 8 files changed, 24 insertions(+), 29 deletions(-) delete mode 100644 index.php diff --git a/composer.json b/composer.json index f5248ad..eded4d0 100644 --- a/composer.json +++ b/composer.json @@ -1,5 +1,5 @@ { - "name": "henck/rtf-html-php", + "name": "roundcube/rtf-html-php", "description": "RTF to HTML converter in PHP", "keywords": ["rtf", "converter"], "type": "library", @@ -9,6 +9,10 @@ { "name": "Alexander van Oostenrijk", "email": "alex.vanoostenrijk@gmail.com" + }, + { + "name": "Aleksander Machniak", + "email": "alec@alec.pl" } ], "scripts": { @@ -17,6 +21,7 @@ ] }, "require": { + "php": ">=5.4", "ext-mbstring": "*" }, "autoload": { diff --git a/index.php b/index.php deleted file mode 100644 index ea87e25..0000000 --- a/index.php +++ /dev/null @@ -1,16 +0,0 @@ -Format($document); - file_put_contents('rtf.html', $r); - echo $r; -?> \ No newline at end of file diff --git a/src/Html/Font.php b/src/Html/Font.php index b27ead5..ce394c1 100644 --- a/src/Html/Font.php +++ b/src/Html/Font.php @@ -14,6 +14,6 @@ public function toStyle(): string { if($this->name) array_push($list, $this->name); if($this->family) array_push($list, $this->family); if(sizeof($list) == 0) return ""; - return "font-family:" . join($list, ',') . ";"; + return "font-family:" . implode(',', $list) . ";"; } } \ No newline at end of file diff --git a/src/Html/HtmlFormatter.php b/src/Html/HtmlFormatter.php index f0e7023..cc9372b 100644 --- a/src/Html/HtmlFormatter.php +++ b/src/Html/HtmlFormatter.php @@ -48,11 +48,14 @@ public function Format(Document $document) // Begin format $this->ProcessGroup($document->root); // Instead of removing opened tags, we close them - $append = $this->openedTags['span'] ? '' : ''; - $append .= $this->openedTags['p'] ? '

' : ''; + $this->output .= $this->openedTags['span'] ? '' : ''; + $this->output .= $this->openedTags['p'] ? '

' : ''; - return $this->output . $append; + // Remove extra empty paragraph + // TODO: Find the real reason it's there and fix it + $this->output = preg_replace('|

$|', '', $this->output); + return $this->output; } protected function LoadFont(\RtfHtmlPhp\Group $fontGroup) { @@ -372,7 +375,7 @@ protected function DecodeUnicode($code, $srcEnc = 'UTF-8') $utf8 = ''; if ($srcEnc != 'UTF-8') { // convert character to Unicode - $utf8 = iconv($srcEnc, 'UTF-8', chr($code)); + $utf8 = mb_convert_encoding(chr($code), 'UTF-8', $srcEnc); } if ($this->encoding == 'HTML-ENTITIES') { diff --git a/tests/BulletsTest.php b/tests/BulletsTest.php index d3bc2c3..c1407f8 100644 --- a/tests/BulletsTest.php +++ b/tests/BulletsTest.php @@ -17,5 +17,5 @@ public function testBullets(): void '

· A

· B

· C

', $html ); - } + } } diff --git a/tests/ExtraParagraphTest.php b/tests/ExtraParagraphTest.php index 42c8983..d7a7900 100644 --- a/tests/ExtraParagraphTest.php +++ b/tests/ExtraParagraphTest.php @@ -14,8 +14,11 @@ public function testExtraParagraph(): void $html = $formatter->Format($document); $this->assertEquals( - '

Conditions
Delivery: FCA in our warehouse in Rotterdam
Lead Time: 25 working days after confirmation, subject to prior sale
Payment: 60 days after invoice date
Quote validity: 30 days', + '

Conditions
' + . 'Delivery: FCA in our warehouse in Rotterdam
' + . 'Lead Time: 25 working days after confirmation, subject to prior sale
Payment: 60 days after invoice date
' + . 'Quote validity: 30 days

', $html ); - } + } } diff --git a/tests/FontFamilyTest.php b/tests/FontFamilyTest.php index a5629b0..df8cb36 100644 --- a/tests/FontFamilyTest.php +++ b/tests/FontFamilyTest.php @@ -11,11 +11,11 @@ public function testParseFontFamilyHtml(): void $rtf = file_get_contents("tests/rtf/fonts.rtf"); $document = new Document($rtf); $formatter = new HtmlFormatter(); - $html = $formatter->Format($document); + $html = $formatter->Format($document); $this->assertEquals( '

Hello, world.

', $html ); - } + } } diff --git a/tests/ParseSimpleTest.php b/tests/ParseSimpleTest.php index bd57726..276bb78 100644 --- a/tests/ParseSimpleTest.php +++ b/tests/ParseSimpleTest.php @@ -18,11 +18,11 @@ public function testParseSimpleHtml(): void $rtf = file_get_contents("tests/rtf/hello-world.rtf"); $document = new Document($rtf); $formatter = new HtmlFormatter(); - $html = $formatter->Format($document); + $html = $formatter->Format($document); $this->assertEquals( '

Hello, world.

', $html ); - } + } } From ca10a3001b4d2226a7481e1a3221ad94315721b8 Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Tue, 6 Jul 2021 13:44:53 +0200 Subject: [PATCH 02/38] Run tests on Github Actions --- .github/workflows/tests.yml | 42 +++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 .github/workflows/tests.yml diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..a4d24c0 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,42 @@ +name: tests + +on: + push: + pull_request: + +jobs: + linux_tests: + runs-on: ubuntu-18.04 + if: "!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, '[ci skip]')" + + strategy: + fail-fast: true + matrix: + php: [5.5, 5.6, 7.0, 7.1, 7.2, 7.3, 7.4, 8.0] + + name: PHP ${{ matrix.php }}/Linux + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Setup PHP + uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php }} + extensions: mbstring + tools: composer:v2 + coverage: none + + - name: Setup problem matchers + run: echo "::add-matcher::${{ runner.tool_cache }}/phpunit.json" + + - name: Fix PHPUnit for PHP8 + run: composer config platform.php 7.4 + if: matrix.php >= 8 + + - name: Install dependencies + run: composer install --prefer-dist --no-interaction --no-progress + + - name: Execute tests + run: vendor/bin/phpunit tests From 1590f56c1910a7342ab4e3169d7aa06fc4b959d4 Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Tue, 6 Jul 2021 13:46:42 +0200 Subject: [PATCH 03/38] Support more PHP versions --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index eded4d0..4fd71e5 100644 --- a/composer.json +++ b/composer.json @@ -30,6 +30,6 @@ } }, "require-dev": { - "phpunit/phpunit": "7" + "phpunit/phpunit": "^4.8.36 || ^5.7.21 || ^6 || ^7" } } From 7df7c2f957c58eccb83a49290fce1f15452e3e45 Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Tue, 6 Jul 2021 13:49:25 +0200 Subject: [PATCH 04/38] Remove composer.lock --- composer.lock | 1536 ------------------------------------------------- 1 file changed, 1536 deletions(-) delete mode 100644 composer.lock diff --git a/composer.lock b/composer.lock deleted file mode 100644 index 019f7bc..0000000 --- a/composer.lock +++ /dev/null @@ -1,1536 +0,0 @@ -{ - "_readme": [ - "This file locks the dependencies of your project to a known state", - "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", - "This file is @generated automatically" - ], - "content-hash": "131ac4f10f3865b1c94fb45597e9954b", - "packages": [], - "packages-dev": [ - { - "name": "doctrine/instantiator", - "version": "1.2.0", - "source": { - "type": "git", - "url": "https://github.com/doctrine/instantiator.git", - "reference": "a2c590166b2133a4633738648b6b064edae0814a" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/doctrine/instantiator/zipball/a2c590166b2133a4633738648b6b064edae0814a", - "reference": "a2c590166b2133a4633738648b6b064edae0814a", - "shasum": "" - }, - "require": { - "php": "^7.1" - }, - "require-dev": { - "doctrine/coding-standard": "^6.0", - "ext-pdo": "*", - "ext-phar": "*", - "phpbench/phpbench": "^0.13", - "phpstan/phpstan-phpunit": "^0.11", - "phpstan/phpstan-shim": "^0.11", - "phpunit/phpunit": "^7.0" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.2.x-dev" - } - }, - "autoload": { - "psr-4": { - "Doctrine\\Instantiator\\": "src/Doctrine/Instantiator/" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Marco Pivetta", - "email": "ocramius@gmail.com", - "homepage": "http://ocramius.github.com/" - } - ], - "description": "A small, lightweight utility to instantiate objects in PHP without invoking their constructors", - "homepage": "https://www.doctrine-project.org/projects/instantiator.html", - "keywords": [ - "constructor", - "instantiate" - ], - "time": "2019-03-17T17:37:11+00:00" - }, - { - "name": "myclabs/deep-copy", - "version": "1.9.3", - "source": { - "type": "git", - "url": "https://github.com/myclabs/DeepCopy.git", - "reference": "007c053ae6f31bba39dfa19a7726f56e9763bbea" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/myclabs/DeepCopy/zipball/007c053ae6f31bba39dfa19a7726f56e9763bbea", - "reference": "007c053ae6f31bba39dfa19a7726f56e9763bbea", - "shasum": "" - }, - "require": { - "php": "^7.1" - }, - "replace": { - "myclabs/deep-copy": "self.version" - }, - "require-dev": { - "doctrine/collections": "^1.0", - "doctrine/common": "^2.6", - "phpunit/phpunit": "^7.1" - }, - "type": "library", - "autoload": { - "psr-4": { - "DeepCopy\\": "src/DeepCopy/" - }, - "files": [ - "src/DeepCopy/deep_copy.php" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "description": "Create deep copies (clones) of your objects", - "keywords": [ - "clone", - "copy", - "duplicate", - "object", - "object graph" - ], - "time": "2019-08-09T12:45:53+00:00" - }, - { - "name": "phar-io/manifest", - "version": "1.0.1", - "source": { - "type": "git", - "url": "https://github.com/phar-io/manifest.git", - "reference": "2df402786ab5368a0169091f61a7c1e0eb6852d0" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/phar-io/manifest/zipball/2df402786ab5368a0169091f61a7c1e0eb6852d0", - "reference": "2df402786ab5368a0169091f61a7c1e0eb6852d0", - "shasum": "" - }, - "require": { - "ext-dom": "*", - "ext-phar": "*", - "phar-io/version": "^1.0.1", - "php": "^5.6 || ^7.0" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.0.x-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Arne Blankerts", - "email": "arne@blankerts.de", - "role": "Developer" - }, - { - "name": "Sebastian Heuer", - "email": "sebastian@phpeople.de", - "role": "Developer" - }, - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de", - "role": "Developer" - } - ], - "description": "Component for reading phar.io manifest information from a PHP Archive (PHAR)", - "time": "2017-03-05T18:14:27+00:00" - }, - { - "name": "phar-io/version", - "version": "1.0.1", - "source": { - "type": "git", - "url": "https://github.com/phar-io/version.git", - "reference": "a70c0ced4be299a63d32fa96d9281d03e94041df" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/phar-io/version/zipball/a70c0ced4be299a63d32fa96d9281d03e94041df", - "reference": "a70c0ced4be299a63d32fa96d9281d03e94041df", - "shasum": "" - }, - "require": { - "php": "^5.6 || ^7.0" - }, - "type": "library", - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Arne Blankerts", - "email": "arne@blankerts.de", - "role": "Developer" - }, - { - "name": "Sebastian Heuer", - "email": "sebastian@phpeople.de", - "role": "Developer" - }, - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de", - "role": "Developer" - } - ], - "description": "Library for handling version information and constraints", - "time": "2017-03-05T17:38:23+00:00" - }, - { - "name": "phpdocumentor/reflection-common", - "version": "1.0.1", - "source": { - "type": "git", - "url": "https://github.com/phpDocumentor/ReflectionCommon.git", - "reference": "21bdeb5f65d7ebf9f43b1b25d404f87deab5bfb6" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/phpDocumentor/ReflectionCommon/zipball/21bdeb5f65d7ebf9f43b1b25d404f87deab5bfb6", - "reference": "21bdeb5f65d7ebf9f43b1b25d404f87deab5bfb6", - "shasum": "" - }, - "require": { - "php": ">=5.5" - }, - "require-dev": { - "phpunit/phpunit": "^4.6" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.0.x-dev" - } - }, - "autoload": { - "psr-4": { - "phpDocumentor\\Reflection\\": [ - "src" - ] - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Jaap van Otterdijk", - "email": "opensource@ijaap.nl" - } - ], - "description": "Common reflection classes used by phpdocumentor to reflect the code structure", - "homepage": "http://www.phpdoc.org", - "keywords": [ - "FQSEN", - "phpDocumentor", - "phpdoc", - "reflection", - "static analysis" - ], - "time": "2017-09-11T18:02:19+00:00" - }, - { - "name": "phpdocumentor/reflection-docblock", - "version": "4.3.1", - "source": { - "type": "git", - "url": "https://github.com/phpDocumentor/ReflectionDocBlock.git", - "reference": "bdd9f737ebc2a01c06ea7ff4308ec6697db9b53c" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/phpDocumentor/ReflectionDocBlock/zipball/bdd9f737ebc2a01c06ea7ff4308ec6697db9b53c", - "reference": "bdd9f737ebc2a01c06ea7ff4308ec6697db9b53c", - "shasum": "" - }, - "require": { - "php": "^7.0", - "phpdocumentor/reflection-common": "^1.0.0", - "phpdocumentor/type-resolver": "^0.4.0", - "webmozart/assert": "^1.0" - }, - "require-dev": { - "doctrine/instantiator": "~1.0.5", - "mockery/mockery": "^1.0", - "phpunit/phpunit": "^6.4" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "4.x-dev" - } - }, - "autoload": { - "psr-4": { - "phpDocumentor\\Reflection\\": [ - "src/" - ] - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Mike van Riel", - "email": "me@mikevanriel.com" - } - ], - "description": "With this component, a library can provide support for annotations via DocBlocks or otherwise retrieve information that is embedded in a DocBlock.", - "time": "2019-04-30T17:48:53+00:00" - }, - { - "name": "phpdocumentor/type-resolver", - "version": "0.4.0", - "source": { - "type": "git", - "url": "https://github.com/phpDocumentor/TypeResolver.git", - "reference": "9c977708995954784726e25d0cd1dddf4e65b0f7" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/phpDocumentor/TypeResolver/zipball/9c977708995954784726e25d0cd1dddf4e65b0f7", - "reference": "9c977708995954784726e25d0cd1dddf4e65b0f7", - "shasum": "" - }, - "require": { - "php": "^5.5 || ^7.0", - "phpdocumentor/reflection-common": "^1.0" - }, - "require-dev": { - "mockery/mockery": "^0.9.4", - "phpunit/phpunit": "^5.2||^4.8.24" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.0.x-dev" - } - }, - "autoload": { - "psr-4": { - "phpDocumentor\\Reflection\\": [ - "src/" - ] - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Mike van Riel", - "email": "me@mikevanriel.com" - } - ], - "time": "2017-07-14T14:27:02+00:00" - }, - { - "name": "phpspec/prophecy", - "version": "1.8.1", - "source": { - "type": "git", - "url": "https://github.com/phpspec/prophecy.git", - "reference": "1927e75f4ed19131ec9bcc3b002e07fb1173ee76" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/phpspec/prophecy/zipball/1927e75f4ed19131ec9bcc3b002e07fb1173ee76", - "reference": "1927e75f4ed19131ec9bcc3b002e07fb1173ee76", - "shasum": "" - }, - "require": { - "doctrine/instantiator": "^1.0.2", - "php": "^5.3|^7.0", - "phpdocumentor/reflection-docblock": "^2.0|^3.0.2|^4.0", - "sebastian/comparator": "^1.1|^2.0|^3.0", - "sebastian/recursion-context": "^1.0|^2.0|^3.0" - }, - "require-dev": { - "phpspec/phpspec": "^2.5|^3.2", - "phpunit/phpunit": "^4.8.35 || ^5.7 || ^6.5 || ^7.1" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.8.x-dev" - } - }, - "autoload": { - "psr-4": { - "Prophecy\\": "src/Prophecy" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Konstantin Kudryashov", - "email": "ever.zet@gmail.com", - "homepage": "http://everzet.com" - }, - { - "name": "Marcello Duarte", - "email": "marcello.duarte@gmail.com" - } - ], - "description": "Highly opinionated mocking framework for PHP 5.3+", - "homepage": "https://github.com/phpspec/prophecy", - "keywords": [ - "Double", - "Dummy", - "fake", - "mock", - "spy", - "stub" - ], - "time": "2019-06-13T12:50:23+00:00" - }, - { - "name": "phpunit/php-code-coverage", - "version": "6.0.5", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/php-code-coverage.git", - "reference": "4cab20a326d14de7575a8e235c70d879b569a57a" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/php-code-coverage/zipball/4cab20a326d14de7575a8e235c70d879b569a57a", - "reference": "4cab20a326d14de7575a8e235c70d879b569a57a", - "shasum": "" - }, - "require": { - "ext-dom": "*", - "ext-xmlwriter": "*", - "php": "^7.1", - "phpunit/php-file-iterator": "^1.4.2", - "phpunit/php-text-template": "^1.2.1", - "phpunit/php-token-stream": "^3.0", - "sebastian/code-unit-reverse-lookup": "^1.0.1", - "sebastian/environment": "^3.1", - "sebastian/version": "^2.0.1", - "theseer/tokenizer": "^1.1" - }, - "require-dev": { - "phpunit/phpunit": "^7.0" - }, - "suggest": { - "ext-xdebug": "^2.6.0" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "6.0-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de", - "role": "lead" - } - ], - "description": "Library that provides collection, processing, and rendering functionality for PHP code coverage information.", - "homepage": "https://github.com/sebastianbergmann/php-code-coverage", - "keywords": [ - "coverage", - "testing", - "xunit" - ], - "time": "2018-05-28T11:49:20+00:00" - }, - { - "name": "phpunit/php-file-iterator", - "version": "1.4.5", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/php-file-iterator.git", - "reference": "730b01bc3e867237eaac355e06a36b85dd93a8b4" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/php-file-iterator/zipball/730b01bc3e867237eaac355e06a36b85dd93a8b4", - "reference": "730b01bc3e867237eaac355e06a36b85dd93a8b4", - "shasum": "" - }, - "require": { - "php": ">=5.3.3" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.4.x-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sb@sebastian-bergmann.de", - "role": "lead" - } - ], - "description": "FilterIterator implementation that filters files based on a list of suffixes.", - "homepage": "https://github.com/sebastianbergmann/php-file-iterator/", - "keywords": [ - "filesystem", - "iterator" - ], - "time": "2017-11-27T13:52:08+00:00" - }, - { - "name": "phpunit/php-text-template", - "version": "1.2.1", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/php-text-template.git", - "reference": "31f8b717e51d9a2afca6c9f046f5d69fc27c8686" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/php-text-template/zipball/31f8b717e51d9a2afca6c9f046f5d69fc27c8686", - "reference": "31f8b717e51d9a2afca6c9f046f5d69fc27c8686", - "shasum": "" - }, - "require": { - "php": ">=5.3.3" - }, - "type": "library", - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de", - "role": "lead" - } - ], - "description": "Simple template engine.", - "homepage": "https://github.com/sebastianbergmann/php-text-template/", - "keywords": [ - "template" - ], - "time": "2015-06-21T13:50:34+00:00" - }, - { - "name": "phpunit/php-timer", - "version": "2.1.2", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/php-timer.git", - "reference": "1038454804406b0b5f5f520358e78c1c2f71501e" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/php-timer/zipball/1038454804406b0b5f5f520358e78c1c2f71501e", - "reference": "1038454804406b0b5f5f520358e78c1c2f71501e", - "shasum": "" - }, - "require": { - "php": "^7.1" - }, - "require-dev": { - "phpunit/phpunit": "^7.0" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "2.1-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de", - "role": "lead" - } - ], - "description": "Utility class for timing", - "homepage": "https://github.com/sebastianbergmann/php-timer/", - "keywords": [ - "timer" - ], - "time": "2019-06-07T04:22:29+00:00" - }, - { - "name": "phpunit/php-token-stream", - "version": "3.1.0", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/php-token-stream.git", - "reference": "e899757bb3df5ff6e95089132f32cd59aac2220a" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/php-token-stream/zipball/e899757bb3df5ff6e95089132f32cd59aac2220a", - "reference": "e899757bb3df5ff6e95089132f32cd59aac2220a", - "shasum": "" - }, - "require": { - "ext-tokenizer": "*", - "php": "^7.1" - }, - "require-dev": { - "phpunit/phpunit": "^7.0" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "3.1-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de" - } - ], - "description": "Wrapper around PHP's tokenizer extension.", - "homepage": "https://github.com/sebastianbergmann/php-token-stream/", - "keywords": [ - "tokenizer" - ], - "time": "2019-07-25T05:29:42+00:00" - }, - { - "name": "phpunit/phpunit", - "version": "7.0.0", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/phpunit.git", - "reference": "9b3373439fdf2f3e9d1578f5e408a3a0d161c3bc" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/phpunit/zipball/9b3373439fdf2f3e9d1578f5e408a3a0d161c3bc", - "reference": "9b3373439fdf2f3e9d1578f5e408a3a0d161c3bc", - "shasum": "" - }, - "require": { - "ext-dom": "*", - "ext-json": "*", - "ext-libxml": "*", - "ext-mbstring": "*", - "ext-xml": "*", - "myclabs/deep-copy": "^1.6.1", - "phar-io/manifest": "^1.0.1", - "phar-io/version": "^1.0", - "php": "^7.1", - "phpspec/prophecy": "^1.7", - "phpunit/php-code-coverage": "^6.0", - "phpunit/php-file-iterator": "^1.4.3", - "phpunit/php-text-template": "^1.2.1", - "phpunit/php-timer": "^2.0", - "phpunit/phpunit-mock-objects": "^6.0", - "sebastian/comparator": "^2.1", - "sebastian/diff": "^3.0", - "sebastian/environment": "^3.1", - "sebastian/exporter": "^3.1", - "sebastian/global-state": "^2.0", - "sebastian/object-enumerator": "^3.0.3", - "sebastian/resource-operations": "^1.0", - "sebastian/version": "^2.0.1" - }, - "require-dev": { - "ext-pdo": "*" - }, - "suggest": { - "ext-xdebug": "*", - "phpunit/php-invoker": "^2.0" - }, - "bin": [ - "phpunit" - ], - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "7.0-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de", - "role": "lead" - } - ], - "description": "The PHP Unit Testing framework.", - "homepage": "https://phpunit.de/", - "keywords": [ - "phpunit", - "testing", - "xunit" - ], - "time": "2018-02-02T05:04:08+00:00" - }, - { - "name": "phpunit/phpunit-mock-objects", - "version": "6.1.2", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/phpunit-mock-objects.git", - "reference": "f9756fd4f43f014cb2dca98deeaaa8ce5500a36e" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/phpunit-mock-objects/zipball/f9756fd4f43f014cb2dca98deeaaa8ce5500a36e", - "reference": "f9756fd4f43f014cb2dca98deeaaa8ce5500a36e", - "shasum": "" - }, - "require": { - "doctrine/instantiator": "^1.0.5", - "php": "^7.1", - "phpunit/php-text-template": "^1.2.1", - "sebastian/exporter": "^3.1" - }, - "require-dev": { - "phpunit/phpunit": "^7.0" - }, - "suggest": { - "ext-soap": "*" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "6.1-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de", - "role": "lead" - } - ], - "description": "Mock Object library for PHPUnit", - "homepage": "https://github.com/sebastianbergmann/phpunit-mock-objects/", - "keywords": [ - "mock", - "xunit" - ], - "abandoned": true, - "time": "2018-05-29T13:54:20+00:00" - }, - { - "name": "sebastian/code-unit-reverse-lookup", - "version": "1.0.1", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/code-unit-reverse-lookup.git", - "reference": "4419fcdb5eabb9caa61a27c7a1db532a6b55dd18" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/code-unit-reverse-lookup/zipball/4419fcdb5eabb9caa61a27c7a1db532a6b55dd18", - "reference": "4419fcdb5eabb9caa61a27c7a1db532a6b55dd18", - "shasum": "" - }, - "require": { - "php": "^5.6 || ^7.0" - }, - "require-dev": { - "phpunit/phpunit": "^5.7 || ^6.0" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.0.x-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de" - } - ], - "description": "Looks up which function or method a line of code belongs to", - "homepage": "https://github.com/sebastianbergmann/code-unit-reverse-lookup/", - "time": "2017-03-04T06:30:41+00:00" - }, - { - "name": "sebastian/comparator", - "version": "2.1.3", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/comparator.git", - "reference": "34369daee48eafb2651bea869b4b15d75ccc35f9" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/comparator/zipball/34369daee48eafb2651bea869b4b15d75ccc35f9", - "reference": "34369daee48eafb2651bea869b4b15d75ccc35f9", - "shasum": "" - }, - "require": { - "php": "^7.0", - "sebastian/diff": "^2.0 || ^3.0", - "sebastian/exporter": "^3.1" - }, - "require-dev": { - "phpunit/phpunit": "^6.4" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "2.1.x-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Jeff Welch", - "email": "whatthejeff@gmail.com" - }, - { - "name": "Volker Dusch", - "email": "github@wallbash.com" - }, - { - "name": "Bernhard Schussek", - "email": "bschussek@2bepublished.at" - }, - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de" - } - ], - "description": "Provides the functionality to compare PHP values for equality", - "homepage": "https://github.com/sebastianbergmann/comparator", - "keywords": [ - "comparator", - "compare", - "equality" - ], - "time": "2018-02-01T13:46:46+00:00" - }, - { - "name": "sebastian/diff", - "version": "3.0.2", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/diff.git", - "reference": "720fcc7e9b5cf384ea68d9d930d480907a0c1a29" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/diff/zipball/720fcc7e9b5cf384ea68d9d930d480907a0c1a29", - "reference": "720fcc7e9b5cf384ea68d9d930d480907a0c1a29", - "shasum": "" - }, - "require": { - "php": "^7.1" - }, - "require-dev": { - "phpunit/phpunit": "^7.5 || ^8.0", - "symfony/process": "^2 || ^3.3 || ^4" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "3.0-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Kore Nordmann", - "email": "mail@kore-nordmann.de" - }, - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de" - } - ], - "description": "Diff implementation", - "homepage": "https://github.com/sebastianbergmann/diff", - "keywords": [ - "diff", - "udiff", - "unidiff", - "unified diff" - ], - "time": "2019-02-04T06:01:07+00:00" - }, - { - "name": "sebastian/environment", - "version": "3.1.0", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/environment.git", - "reference": "cd0871b3975fb7fc44d11314fd1ee20925fce4f5" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/environment/zipball/cd0871b3975fb7fc44d11314fd1ee20925fce4f5", - "reference": "cd0871b3975fb7fc44d11314fd1ee20925fce4f5", - "shasum": "" - }, - "require": { - "php": "^7.0" - }, - "require-dev": { - "phpunit/phpunit": "^6.1" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "3.1.x-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de" - } - ], - "description": "Provides functionality to handle HHVM/PHP environments", - "homepage": "http://www.github.com/sebastianbergmann/environment", - "keywords": [ - "Xdebug", - "environment", - "hhvm" - ], - "time": "2017-07-01T08:51:00+00:00" - }, - { - "name": "sebastian/exporter", - "version": "3.1.1", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/exporter.git", - "reference": "06a9a5947f47b3029d76118eb5c22802e5869687" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/exporter/zipball/06a9a5947f47b3029d76118eb5c22802e5869687", - "reference": "06a9a5947f47b3029d76118eb5c22802e5869687", - "shasum": "" - }, - "require": { - "php": "^7.0", - "sebastian/recursion-context": "^3.0" - }, - "require-dev": { - "ext-mbstring": "*", - "phpunit/phpunit": "^6.0" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "3.1.x-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de" - }, - { - "name": "Jeff Welch", - "email": "whatthejeff@gmail.com" - }, - { - "name": "Volker Dusch", - "email": "github@wallbash.com" - }, - { - "name": "Adam Harvey", - "email": "aharvey@php.net" - }, - { - "name": "Bernhard Schussek", - "email": "bschussek@gmail.com" - } - ], - "description": "Provides the functionality to export PHP variables for visualization", - "homepage": "http://www.github.com/sebastianbergmann/exporter", - "keywords": [ - "export", - "exporter" - ], - "time": "2019-08-11T12:43:14+00:00" - }, - { - "name": "sebastian/global-state", - "version": "2.0.0", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/global-state.git", - "reference": "e8ba02eed7bbbb9e59e43dedd3dddeff4a56b0c4" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/global-state/zipball/e8ba02eed7bbbb9e59e43dedd3dddeff4a56b0c4", - "reference": "e8ba02eed7bbbb9e59e43dedd3dddeff4a56b0c4", - "shasum": "" - }, - "require": { - "php": "^7.0" - }, - "require-dev": { - "phpunit/phpunit": "^6.0" - }, - "suggest": { - "ext-uopz": "*" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "2.0-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de" - } - ], - "description": "Snapshotting of global state", - "homepage": "http://www.github.com/sebastianbergmann/global-state", - "keywords": [ - "global state" - ], - "time": "2017-04-27T15:39:26+00:00" - }, - { - "name": "sebastian/object-enumerator", - "version": "3.0.3", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/object-enumerator.git", - "reference": "7cfd9e65d11ffb5af41198476395774d4c8a84c5" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/object-enumerator/zipball/7cfd9e65d11ffb5af41198476395774d4c8a84c5", - "reference": "7cfd9e65d11ffb5af41198476395774d4c8a84c5", - "shasum": "" - }, - "require": { - "php": "^7.0", - "sebastian/object-reflector": "^1.1.1", - "sebastian/recursion-context": "^3.0" - }, - "require-dev": { - "phpunit/phpunit": "^6.0" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "3.0.x-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de" - } - ], - "description": "Traverses array structures and object graphs to enumerate all referenced objects", - "homepage": "https://github.com/sebastianbergmann/object-enumerator/", - "time": "2017-08-03T12:35:26+00:00" - }, - { - "name": "sebastian/object-reflector", - "version": "1.1.1", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/object-reflector.git", - "reference": "773f97c67f28de00d397be301821b06708fca0be" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/object-reflector/zipball/773f97c67f28de00d397be301821b06708fca0be", - "reference": "773f97c67f28de00d397be301821b06708fca0be", - "shasum": "" - }, - "require": { - "php": "^7.0" - }, - "require-dev": { - "phpunit/phpunit": "^6.0" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.1-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de" - } - ], - "description": "Allows reflection of object attributes, including inherited and non-public ones", - "homepage": "https://github.com/sebastianbergmann/object-reflector/", - "time": "2017-03-29T09:07:27+00:00" - }, - { - "name": "sebastian/recursion-context", - "version": "3.0.0", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/recursion-context.git", - "reference": "5b0cd723502bac3b006cbf3dbf7a1e3fcefe4fa8" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/recursion-context/zipball/5b0cd723502bac3b006cbf3dbf7a1e3fcefe4fa8", - "reference": "5b0cd723502bac3b006cbf3dbf7a1e3fcefe4fa8", - "shasum": "" - }, - "require": { - "php": "^7.0" - }, - "require-dev": { - "phpunit/phpunit": "^6.0" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "3.0.x-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Jeff Welch", - "email": "whatthejeff@gmail.com" - }, - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de" - }, - { - "name": "Adam Harvey", - "email": "aharvey@php.net" - } - ], - "description": "Provides functionality to recursively process PHP variables", - "homepage": "http://www.github.com/sebastianbergmann/recursion-context", - "time": "2017-03-03T06:23:57+00:00" - }, - { - "name": "sebastian/resource-operations", - "version": "1.0.0", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/resource-operations.git", - "reference": "ce990bb21759f94aeafd30209e8cfcdfa8bc3f52" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/resource-operations/zipball/ce990bb21759f94aeafd30209e8cfcdfa8bc3f52", - "reference": "ce990bb21759f94aeafd30209e8cfcdfa8bc3f52", - "shasum": "" - }, - "require": { - "php": ">=5.6.0" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.0.x-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de" - } - ], - "description": "Provides a list of PHP built-in functions that operate on resources", - "homepage": "https://www.github.com/sebastianbergmann/resource-operations", - "time": "2015-07-28T20:34:47+00:00" - }, - { - "name": "sebastian/version", - "version": "2.0.1", - "source": { - "type": "git", - "url": "https://github.com/sebastianbergmann/version.git", - "reference": "99732be0ddb3361e16ad77b68ba41efc8e979019" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/version/zipball/99732be0ddb3361e16ad77b68ba41efc8e979019", - "reference": "99732be0ddb3361e16ad77b68ba41efc8e979019", - "shasum": "" - }, - "require": { - "php": ">=5.6" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "2.0.x-dev" - } - }, - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de", - "role": "lead" - } - ], - "description": "Library that helps with managing the version number of Git-hosted PHP projects", - "homepage": "https://github.com/sebastianbergmann/version", - "time": "2016-10-03T07:35:21+00:00" - }, - { - "name": "symfony/polyfill-ctype", - "version": "v1.12.0", - "source": { - "type": "git", - "url": "https://github.com/symfony/polyfill-ctype.git", - "reference": "550ebaac289296ce228a706d0867afc34687e3f4" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/symfony/polyfill-ctype/zipball/550ebaac289296ce228a706d0867afc34687e3f4", - "reference": "550ebaac289296ce228a706d0867afc34687e3f4", - "shasum": "" - }, - "require": { - "php": ">=5.3.3" - }, - "suggest": { - "ext-ctype": "For best performance" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.12-dev" - } - }, - "autoload": { - "psr-4": { - "Symfony\\Polyfill\\Ctype\\": "" - }, - "files": [ - "bootstrap.php" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Gert de Pagter", - "email": "BackEndTea@gmail.com" - }, - { - "name": "Symfony Community", - "homepage": "https://symfony.com/contributors" - } - ], - "description": "Symfony polyfill for ctype functions", - "homepage": "https://symfony.com", - "keywords": [ - "compatibility", - "ctype", - "polyfill", - "portable" - ], - "time": "2019-08-06T08:03:45+00:00" - }, - { - "name": "theseer/tokenizer", - "version": "1.1.3", - "source": { - "type": "git", - "url": "https://github.com/theseer/tokenizer.git", - "reference": "11336f6f84e16a720dae9d8e6ed5019efa85a0f9" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/theseer/tokenizer/zipball/11336f6f84e16a720dae9d8e6ed5019efa85a0f9", - "reference": "11336f6f84e16a720dae9d8e6ed5019efa85a0f9", - "shasum": "" - }, - "require": { - "ext-dom": "*", - "ext-tokenizer": "*", - "ext-xmlwriter": "*", - "php": "^7.0" - }, - "type": "library", - "autoload": { - "classmap": [ - "src/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "BSD-3-Clause" - ], - "authors": [ - { - "name": "Arne Blankerts", - "role": "Developer", - "email": "arne@blankerts.de" - } - ], - "description": "A small library for converting tokenized PHP source code into XML and potentially other formats", - "time": "2019-06-13T22:48:21+00:00" - }, - { - "name": "webmozart/assert", - "version": "1.5.0", - "source": { - "type": "git", - "url": "https://github.com/webmozart/assert.git", - "reference": "88e6d84706d09a236046d686bbea96f07b3a34f4" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/webmozart/assert/zipball/88e6d84706d09a236046d686bbea96f07b3a34f4", - "reference": "88e6d84706d09a236046d686bbea96f07b3a34f4", - "shasum": "" - }, - "require": { - "php": "^5.3.3 || ^7.0", - "symfony/polyfill-ctype": "^1.8" - }, - "require-dev": { - "phpunit/phpunit": "^4.8.36 || ^7.5.13" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.3-dev" - } - }, - "autoload": { - "psr-4": { - "Webmozart\\Assert\\": "src/" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Bernhard Schussek", - "email": "bschussek@gmail.com" - } - ], - "description": "Assertions to validate method input/output with nice error messages.", - "keywords": [ - "assert", - "check", - "validate" - ], - "time": "2019-08-24T08:43:50+00:00" - } - ], - "aliases": [], - "minimum-stability": "stable", - "stability-flags": [], - "prefer-stable": false, - "prefer-lowest": false, - "platform": [], - "platform-dev": [] -} From 6c403dfa65d992ae4860f403d1669646ebbf4efa Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Tue, 6 Jul 2021 13:52:51 +0200 Subject: [PATCH 05/38] Let's see we can support PHP5 --- tests/BulletsTest.php | 2 +- tests/EmptyStringTest.php | 2 +- tests/ExtraParagraphTest.php | 2 +- tests/FontFamilyTest.php | 2 +- tests/ParseSimpleTest.php | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/BulletsTest.php b/tests/BulletsTest.php index c1407f8..82beae2 100644 --- a/tests/BulletsTest.php +++ b/tests/BulletsTest.php @@ -6,7 +6,7 @@ final class BulletsTest extends TestCase { - public function testBullets(): void + public function testBullets() { $rtf = file_get_contents("tests/rtf/bullets.rtf"); $document = new Document($rtf); diff --git a/tests/EmptyStringTest.php b/tests/EmptyStringTest.php index b74b376..151f1d1 100644 --- a/tests/EmptyStringTest.php +++ b/tests/EmptyStringTest.php @@ -5,7 +5,7 @@ final class EmptyStringTest extends TestCase { - public function testParseEmptyString(): void + public function testParseEmptyString() { $document = new Document(""); $this->assertTrue(true); diff --git a/tests/ExtraParagraphTest.php b/tests/ExtraParagraphTest.php index d7a7900..1c5a688 100644 --- a/tests/ExtraParagraphTest.php +++ b/tests/ExtraParagraphTest.php @@ -6,7 +6,7 @@ final class ExtraParagraphTest extends TestCase { - public function testExtraParagraph(): void + public function testExtraParagraph() { $rtf = file_get_contents("tests/rtf/extra-closing-paragraph.rtf"); $document = new Document($rtf); diff --git a/tests/FontFamilyTest.php b/tests/FontFamilyTest.php index df8cb36..f98c4fe 100644 --- a/tests/FontFamilyTest.php +++ b/tests/FontFamilyTest.php @@ -6,7 +6,7 @@ final class FontFamilyTestTest extends TestCase { - public function testParseFontFamilyHtml(): void + public function testParseFontFamilyHtml() { $rtf = file_get_contents("tests/rtf/fonts.rtf"); $document = new Document($rtf); diff --git a/tests/ParseSimpleTest.php b/tests/ParseSimpleTest.php index 276bb78..089990d 100644 --- a/tests/ParseSimpleTest.php +++ b/tests/ParseSimpleTest.php @@ -6,14 +6,14 @@ final class ParseSimpleTest extends TestCase { - public function testParseSimple(): void + public function testParseSimple() { $rtf = file_get_contents("tests/rtf/hello-world.rtf"); $document = new Document($rtf); $this->assertTrue(true); } - public function testParseSimpleHtml(): void + public function testParseSimpleHtml() { $rtf = file_get_contents("tests/rtf/hello-world.rtf"); $document = new Document($rtf); From e03bf8a4e329509f307942bdf464aec59ee9a53b Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Tue, 6 Jul 2021 13:58:47 +0200 Subject: [PATCH 06/38] PHP5 support --- src/ControlSymbol.php | 2 +- src/Document.php | 4 ++-- src/Group.php | 6 +++--- src/Html/Font.php | 4 ++-- src/Html/Image.php | 8 ++++---- src/Text.php | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/ControlSymbol.php b/src/ControlSymbol.php index 6c50381..53aec86 100644 --- a/src/ControlSymbol.php +++ b/src/ControlSymbol.php @@ -10,5 +10,5 @@ class ControlSymbol extends Element public function toString(int $level) { return str_repeat(" ", $level) . "SYMBOL {$this->symbol} ({$this->parameter})\n"; - } + } } \ No newline at end of file diff --git a/src/Document.php b/src/Document.php index 84e9840..03fa960 100644 --- a/src/Document.php +++ b/src/Document.php @@ -35,7 +35,7 @@ protected function GetChar() * (Helper method) * Is the current character a letter? */ - protected function is_letter(): bool + protected function is_letter() { if(ord($this->char) >= 65 && ord($this->char) <= 90) return true; if(ord($this->char) >= 97 && ord($this->char) <= 122) return true; @@ -46,7 +46,7 @@ protected function is_letter(): bool * (Helper method) * Is the current character a digit? */ - protected function is_digit(): bool + protected function is_digit() { return (ord($this->char) >= 48 && ord($this->char) <= 57); } diff --git a/src/Group.php b/src/Group.php index 1aa0ad7..d97ed2b 100644 --- a/src/Group.php +++ b/src/Group.php @@ -20,7 +20,7 @@ public function GetType() { // No children? Then the group type is null. if(sizeof($this->children) == 0) return null; - + // If the first child is a control word, then // the group type is the word. $child = $this->children[0]; @@ -31,11 +31,11 @@ public function GetType() elseif ($child instanceof ControlSymbol) { return ($child->symbol == '*') ? '*' : null; } - + // If first child is neither word nor symbol, then // group type is null. return null; - } + } // If a group contains a * symbol as its first child, // then it is a destination group. diff --git a/src/Html/Font.php b/src/Html/Font.php index ce394c1..1719854 100644 --- a/src/Html/Font.php +++ b/src/Html/Font.php @@ -8,8 +8,8 @@ class Font public $name; public $charset; public $codepage; - - public function toStyle(): string { + + public function toStyle() { $list = array(); if($this->name) array_push($list, $this->name); if($this->family) array_push($list, $this->family); diff --git a/src/Html/Image.php b/src/Html/Image.php index f0f9b9d..ae85b9f 100644 --- a/src/Html/Image.php +++ b/src/Html/Image.php @@ -8,7 +8,7 @@ public function __construct() { $this->Reset(); } - + public function Reset() { $this->format = 'bmp'; @@ -21,18 +21,18 @@ public function Reset() $this->binarySize = null; // Number of bytes of the binary data $this->ImageData = null; // Binary or Hexadecimal Data } - + public function PrintImage() { // $output = "format};base64,"; - + if (isset($this->binarySize)) { // process binary data return; } else { // process hexadecimal data $output .= base64_encode(pack('H*',$this->ImageData)); } - + $output .= "\" />"; return $output; } diff --git a/src/Text.php b/src/Text.php index 745916d..05e971d 100644 --- a/src/Text.php +++ b/src/Text.php @@ -17,5 +17,5 @@ public function __construct(string $text) public function toString(int $level) { return str_repeat(" ", $level) . "TEXT {$this->text}\n"; - } + } } \ No newline at end of file From 9765aabcb33ddc6ca71d950375f30fdf94863e76 Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Tue, 6 Jul 2021 14:01:25 +0200 Subject: [PATCH 07/38] Get rid of strict_types=1 --- src/Document.php | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Document.php b/src/Document.php index 03fa960..ec171bd 100644 --- a/src/Document.php +++ b/src/Document.php @@ -1,7 +1,5 @@ Date: Tue, 6 Jul 2021 14:32:19 +0200 Subject: [PATCH 08/38] PHP5 fix --- src/Document.php | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/Document.php b/src/Document.php index ec171bd..f2adce9 100644 --- a/src/Document.php +++ b/src/Document.php @@ -13,7 +13,7 @@ class Document public function __construct($rtf) { $this->Parse($rtf); - } + } // Get the next character from the RTF stream. // Parsing is aborted when reading beyond end of input string. @@ -133,38 +133,38 @@ protected function ParseControlWord() // If no parameter present, assume control word's default (usually 1) // If no default then assign 0 to the parameter if($parameter === null) $parameter = 1; - + // Convert parameter to a negative number when applicable if($negative) $parameter = -$parameter; - + // Update uc value if ($word == "uc") { array_pop($this->uc); $this->uc[] = $parameter; } - + // Skip space delimiter if(!$this->is_space_delimiter()) $this->pos--; - + // If this is \u, then the parameter will be followed // by {$this->uc} characters. if($word == "u") { // Convert parameter to unsigned decimal unicode if($negative) $parameter = 65536 + $parameter; - + // Will ignore replacement characters $uc times $uc = end($this->uc); while ($uc > 0) { - $this->GetChar(); + $this->GetChar(); // If the replacement character is encoded as // hexadecimal value \'hh then jump over it if($this->char == '\\' && $this->rtf[$this->pos]=='\'') $this->pos = $this->pos + 3; - + // Break if it's an RTF scope delimiter elseif ($this->char == '{' || $this->char == '{') break; - + // - To include an RTF delimiter in skippable data, it must be // represented using the appropriate control symbol (that is, // escaped with a backslash,) as in plain text. @@ -176,7 +176,7 @@ protected function ParseControlWord() $uc--; } } - + // Add new RTF word as a child to the current group. $rtfword = new ControlWord(); $rtfword->word = $word; @@ -198,7 +198,7 @@ protected function ParseControlSymbol() $rtfword->parameter = 0; array_push($this->group->children, $rtfword); return; - } + } // Symbols ordinarily have no parameter. However, // if this is \' (a single quote), then it is @@ -292,7 +292,7 @@ protected function ParseText() /* * Attempt to parse an RTF string. */ - protected function Parse(string $rtf) + protected function Parse(\string $rtf) { $this->rtf = $rtf; $this->pos = 0; From da920dd0a61f534d52959ff90863f4dfc5071e4e Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Tue, 6 Jul 2021 14:33:29 +0200 Subject: [PATCH 09/38] Revert "PHP5 fix" --- src/Document.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Document.php b/src/Document.php index f2adce9..5415795 100644 --- a/src/Document.php +++ b/src/Document.php @@ -292,7 +292,7 @@ protected function ParseText() /* * Attempt to parse an RTF string. */ - protected function Parse(\string $rtf) + protected function Parse(string $rtf) { $this->rtf = $rtf; $this->pos = 0; From f8dc958e521dc8efbcf667c9a109719696322b92 Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Tue, 6 Jul 2021 14:55:30 +0200 Subject: [PATCH 10/38] CS fixes --- src/Document.php | 2 +- src/Html/State.php | 20 ++++++++++---------- src/Text.php | 2 +- tests/BulletsTest.php | 2 +- tests/EmptyStringTest.php | 2 +- tests/ExtraParagraphTest.php | 2 +- tests/FontFamilyTest.php | 2 +- tests/ParseSimpleTest.php | 2 +- 8 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/Document.php b/src/Document.php index 5415795..1e53634 100644 --- a/src/Document.php +++ b/src/Document.php @@ -292,7 +292,7 @@ protected function ParseText() /* * Attempt to parse an RTF string. */ - protected function Parse(string $rtf) + protected function Parse($rtf) { $this->rtf = $rtf; $this->pos = 0; diff --git a/src/Html/State.php b/src/Html/State.php index ab234f6..6725403 100644 --- a/src/Html/State.php +++ b/src/Html/State.php @@ -24,7 +24,7 @@ class State 15 => 'DarkGray', 16 => 'LightGray' ); - + public function __construct() { $this->Reset(); @@ -50,11 +50,11 @@ public function Reset($defaultFont = null) $this->hcolor = null; $this->font = isset($defaultFont) ? $defaultFont : null; } - + public function PrintStyle() { $style = ""; - + if($this->bold) $style .= "font-weight:bold;"; if($this->italic) $style .= "font-style:italic;"; if($this->underline) $style .= "text-decoration:underline;"; @@ -79,13 +79,13 @@ public function PrintStyle() // Check if color is set. in particular when it's the 'auto' color if (self::$colortbl[$this->background]) $style .= "background-color:" . self::$colortbl[$this->background] . ";"; - + // Highlight color: - } elseif (isset($this->hcolor)) { + } elseif (isset($this->hcolor)) { if (isset(self::$highlight[$this->hcolor])) $style .= "background-color:" . self::$highlight[$this->hcolor] . ";"; } - + return $style; } @@ -102,15 +102,15 @@ public function equals($state) if ($this->strike != $state->strike) return false; if ($this->hidden != $state->hidden) return false; if ($this->fontsize != $state->fontsize) return false; - + // Compare colors if ($this->fontcolor != $state->fontcolor) return false; if ($this->background != $state->background) return false; if ($this->hcolor != $state->hcolor) return false; - + // Compare fonts if ($this->font != $state->font) return false; - + return true; } -} \ No newline at end of file +} diff --git a/src/Text.php b/src/Text.php index 05e971d..64514f2 100644 --- a/src/Text.php +++ b/src/Text.php @@ -18,4 +18,4 @@ public function toString(int $level) { return str_repeat(" ", $level) . "TEXT {$this->text}\n"; } -} \ No newline at end of file +} diff --git a/tests/BulletsTest.php b/tests/BulletsTest.php index 82beae2..786abd8 100644 --- a/tests/BulletsTest.php +++ b/tests/BulletsTest.php @@ -4,7 +4,7 @@ use RtfHtmlPhp\Document; use RtfHtmlPhp\Html\HtmlFormatter; -final class BulletsTest extends TestCase +class BulletsTest extends TestCase { public function testBullets() { diff --git a/tests/EmptyStringTest.php b/tests/EmptyStringTest.php index 151f1d1..58a0f6e 100644 --- a/tests/EmptyStringTest.php +++ b/tests/EmptyStringTest.php @@ -3,7 +3,7 @@ use PHPUnit\Framework\TestCase; use RtfHtmlPhp\Document; -final class EmptyStringTest extends TestCase +class EmptyStringTest extends TestCase { public function testParseEmptyString() { diff --git a/tests/ExtraParagraphTest.php b/tests/ExtraParagraphTest.php index 1c5a688..18fede5 100644 --- a/tests/ExtraParagraphTest.php +++ b/tests/ExtraParagraphTest.php @@ -4,7 +4,7 @@ use RtfHtmlPhp\Document; use RtfHtmlPhp\Html\HtmlFormatter; -final class ExtraParagraphTest extends TestCase +class ExtraParagraphTest extends TestCase { public function testExtraParagraph() { diff --git a/tests/FontFamilyTest.php b/tests/FontFamilyTest.php index f98c4fe..e24b7ec 100644 --- a/tests/FontFamilyTest.php +++ b/tests/FontFamilyTest.php @@ -4,7 +4,7 @@ use RtfHtmlPhp\Document; use RtfHtmlPhp\Html\HtmlFormatter; -final class FontFamilyTestTest extends TestCase +class FontFamilyTestTest extends TestCase { public function testParseFontFamilyHtml() { diff --git a/tests/ParseSimpleTest.php b/tests/ParseSimpleTest.php index 089990d..7fdcd7a 100644 --- a/tests/ParseSimpleTest.php +++ b/tests/ParseSimpleTest.php @@ -4,7 +4,7 @@ use RtfHtmlPhp\Document; use RtfHtmlPhp\Html\HtmlFormatter; -final class ParseSimpleTest extends TestCase +class ParseSimpleTest extends TestCase { public function testParseSimple() { From 1c00fb4c2379a532e6b041fe95cf4d53d71c1bb1 Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Tue, 6 Jul 2021 14:56:58 +0200 Subject: [PATCH 11/38] PHP5 fix --- src/Group.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Group.php b/src/Group.php index d97ed2b..2656423 100644 --- a/src/Group.php +++ b/src/Group.php @@ -52,7 +52,7 @@ public function IsDestination() // // Convert Group to string for debugging purposes. // - public function toString(int $level = 0) + public function toString(int $level) { $str = str_repeat(" ", $level) . "{\n"; From 3fe4ba4a911d2f11ca3add7f75db6ab0ed95be76 Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Tue, 6 Jul 2021 15:01:14 +0200 Subject: [PATCH 12/38] PHP5: Get rid of scalar type hints --- src/ControlSymbol.php | 2 +- src/ControlWord.php | 2 +- src/Group.php | 2 +- src/Html/State.php | 2 +- src/Text.php | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/ControlSymbol.php b/src/ControlSymbol.php index 53aec86..d8b0995 100644 --- a/src/ControlSymbol.php +++ b/src/ControlSymbol.php @@ -7,7 +7,7 @@ class ControlSymbol extends Element public $symbol; public $parameter = 0; - public function toString(int $level) + public function toString($level) { return str_repeat(" ", $level) . "SYMBOL {$this->symbol} ({$this->parameter})\n"; } diff --git a/src/ControlWord.php b/src/ControlWord.php index 919cd5c..5d602f8 100644 --- a/src/ControlWord.php +++ b/src/ControlWord.php @@ -7,7 +7,7 @@ class ControlWord extends Element public $word; public $parameter; - public function toString(int $level) + public function toString($level) { return str_repeat(" ", $level) . "WORD {$this->word} ({$this->parameter})\n"; } diff --git a/src/Group.php b/src/Group.php index 2656423..ab17d5a 100644 --- a/src/Group.php +++ b/src/Group.php @@ -52,7 +52,7 @@ public function IsDestination() // // Convert Group to string for debugging purposes. // - public function toString(int $level) + public function toString($level = 0) { $str = str_repeat(" ", $level) . "{\n"; diff --git a/src/Html/State.php b/src/Html/State.php index 6725403..749a875 100644 --- a/src/Html/State.php +++ b/src/Html/State.php @@ -33,7 +33,7 @@ public function __construct() /* * Store a font in the font table at the specified index. */ - public static function SetFont(int $index, Font $font) { + public static function SetFont($index, Font $font) { State::$fonttbl[$index] = $font; } diff --git a/src/Text.php b/src/Text.php index 64514f2..b4bef4b 100644 --- a/src/Text.php +++ b/src/Text.php @@ -9,12 +9,12 @@ class Text extends Element /* * Create a new Text instance with string content. */ - public function __construct(string $text) + public function __construct($text) { $this->text = $text; } - public function toString(int $level) + public function toString($level) { return str_repeat(" ", $level) . "TEXT {$this->text}\n"; } From fef9f076609afcca0a0d59d17a348536680f7749 Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Tue, 6 Jul 2021 15:02:45 +0200 Subject: [PATCH 13/38] GA: Add PHP 5.4 to the matrix --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a4d24c0..162b299 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -12,7 +12,7 @@ jobs: strategy: fail-fast: true matrix: - php: [5.5, 5.6, 7.0, 7.1, 7.2, 7.3, 7.4, 8.0] + php: [5.4, 5.5, 5.6, 7.0, 7.1, 7.2, 7.3, 7.4, 8.0] name: PHP ${{ matrix.php }}/Linux From 9113c31e69f3781c681a0dd092ab2e310a36a3c5 Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Tue, 6 Jul 2021 15:03:55 +0200 Subject: [PATCH 14/38] GA: Add PHP 8.1 to the matrix --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 162b299..d4749e0 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -12,7 +12,7 @@ jobs: strategy: fail-fast: true matrix: - php: [5.4, 5.5, 5.6, 7.0, 7.1, 7.2, 7.3, 7.4, 8.0] + php: [5.4, 5.5, 5.6, 7.0, 7.1, 7.2, 7.3, 7.4, 8.0, 8.1] name: PHP ${{ matrix.php }}/Linux From ca083b1c39dde37c76986755506c11662dfdaefc Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Thu, 8 Jul 2021 13:44:54 +0200 Subject: [PATCH 15/38] Add support for encapsulated HTML --- src/Document.php | 14 +++--- src/Group.php | 2 +- src/Html/HtmlFormatter.php | 90 +++++++++++++++++++++++++++++++------- src/Html/State.php | 1 + tests/HtmlTest.php | 89 +++++++++++++++++++++++++++++++++++++ tests/rtf/html1.rtf | 7 +++ tests/rtf/html2.rtf | 33 ++++++++++++++ tests/rtf/html3.rtf | 39 +++++++++++++++++ 8 files changed, 252 insertions(+), 23 deletions(-) create mode 100644 tests/HtmlTest.php create mode 100644 tests/rtf/html1.rtf create mode 100644 tests/rtf/html2.rtf create mode 100644 tests/rtf/html3.rtf diff --git a/src/Document.php b/src/Document.php index 1e53634..4521b56 100644 --- a/src/Document.php +++ b/src/Document.php @@ -200,14 +200,14 @@ protected function ParseControlSymbol() return; } - // Symbols ordinarily have no parameter. However, - // if this is \' (a single quote), then it is + // Symbols ordinarily have no parameter. However, + // if this is \' (a single quote), then it is // followed by a 2-digit hex-code: $parameter = 0; if ($symbol == '\'') { - $this->GetChar(); + $this->GetChar(); $parameter = $this->char; - $this->GetChar(); + $this->GetChar(); $parameter = hexdec($parameter . $this->char); } @@ -246,13 +246,13 @@ protected function ParseText() continue; } // Is this an escape? - if($this->char == '\\') { + if($this->char == "\\") { // Perform lookahead to see if this // is really an escape sequence. $this->GetChar(); switch($this->char) { - case '\\': break; + case "\\": break; case '{': break; case '}': break; default: @@ -317,7 +317,7 @@ protected function Parse($rtf) case '}': $this->ParseEndGroup(); break; - case '\\': + case "\\": $this->ParseControl(); break; default: diff --git a/src/Group.php b/src/Group.php index ab17d5a..93f5c09 100644 --- a/src/Group.php +++ b/src/Group.php @@ -58,7 +58,7 @@ public function toString($level = 0) foreach($this->children as $child) { - /* + /* // Skip some group types: if($child instanceof Group) { if ($child->GetType() == "fonttbl") continue; diff --git a/src/Html/HtmlFormatter.php b/src/Html/HtmlFormatter.php index cc9372b..a20a520 100644 --- a/src/Html/HtmlFormatter.php +++ b/src/Html/HtmlFormatter.php @@ -9,6 +9,7 @@ class HtmlFormatter private $output = ''; private $encoding; private $defaultFont; + private $fromhtml = false; // By default, HtmlFormatter uses HTML_ENTITIES for code conversion. // You can optionally support a different endoing when creating @@ -33,25 +34,28 @@ public function Format(Document $document) { // Clear current output $this->output = ''; + // Keep track of style modifications $this->previousState = null; + // and create a stack of states $this->states = array(); + // Put an initial standard state onto the stack $this->state = new State(); array_push($this->states, $this->state); // Keep track of opened html tags - $this->openedTags = array('span' => false, 'p' => false); - // Create the first paragraph - $this->OpenTag('p'); + $this->openedTags = array('span' => false, 'p' => null); + // Begin format $this->ProcessGroup($document->root); + // Instead of removing opened tags, we close them $this->output .= $this->openedTags['span'] ? '' : ''; $this->output .= $this->openedTags['p'] ? '

' : ''; - // Remove extra empty paragraph + // Remove extra empty paragraph at the end // TODO: Find the real reason it's there and fix it $this->output = preg_replace('|

$|', '', $this->output); @@ -252,7 +256,23 @@ protected function ProcessDestination($dest) $c = count($dest); for ($i=2;$i<$c;$i++) $this->FormatEntry($dest[$i]); + } elseif ($dest[1]->word == "htmltag") { + for ($i = 2; $i < count($dest); $i++) { + if (isset($dest[$i])) { + $entry = $dest[$i]; + + if ($entry instanceof \RtfHtmlPhp\Text) { + $this->output .= $entry->text; + } elseif($entry instanceof \RtfHtmlPhp\Group) { + $this->ProcessGroup($entry); + } elseif($entry instanceof \RtfHtmlPhp\ControlSymbol) { + $this->FormatControlSymbol($entry); + } elseif($entry instanceof \RtfHtmlPhp\ControlWord) { + $this->FormatControlWord($entry, true); + } + } } + } } protected function FormatEntry($entry) @@ -323,17 +343,17 @@ protected function FormatControlWord($word) * Special characters */ - case 'lquote': $this->Write("‘"); break; // ‘ ‘ - case 'rquote': $this->Write("’"); break; // ’ ’ - case 'ldblquote': $this->Write("“"); break; // “ “ - case 'rdblquote': $this->Write("”"); break; // ” ” - case 'bullet': $this->Write("•"); break; // • • - case 'endash': $this->Write("–"); break; // – – - case 'emdash': $this->Write("—"); break; // — — - case 'enspace': $this->Write(" "); break; //   - case 'emspace': $this->Write(" "); break; //   - case 'tab': $this->Write(" "); break; // Character value 9 - case 'line': $this->output .= "
"; break; // character value (line feed = ) (carriage return = ) + case 'lquote': $this->Write($this->fromhtml ? "‘" : "‘"); break; // ‘ ‘ + case 'rquote': $this->Write($this->fromhtml ? "’" : "’"); break; // ’ ’ + case 'ldblquote': $this->Write($this->fromhtml ? "“" : "“"); break; // “ “ + case 'rdblquote': $this->Write($this->fromhtml ? "”" : "”"); break; // ” ” + case 'bullet': $this->Write($this->fromhtml ? "•" : "•"); break; // • • + case 'endash': $this->Write($this->fromhtml ? "–" : "–"); break; // – – + case 'emdash': $this->Write($this->fromhtml ? "—" : "—"); break; // — — + case 'enspace': $this->Write($this->fromhtml ? " " : " "); break; //   + case 'emspace': $this->Write($this->fromhtml ? " " : " "); break; //   + case 'tab': $this->Write($this->fromhtml ? "\t" : " "); break; // Character value 9 + case 'line': $this->output .= $this->fromhtml ? "\n" : "
"; break; // character value (line feed = ) (carriage return = ) /* * Unicode characters @@ -349,6 +369,10 @@ protected function FormatControlWord($word) */ case 'par': case 'row': + if ($this->fromhtml) { + $this->output .= "\n"; + break; + } // Close previously opened tags $this->CloseTags(); // Begin a new paragraph @@ -367,6 +391,14 @@ protected function FormatControlWord($word) $this->RTFencoding = $this->GetEncodingFromCodepage($word->parameter); } break; + + case 'fromhtml': + $this->fromhtml = $word->parameter > 0; + break; + + case 'htmlrtf': + $this->state->htmlrtf = $word->parameter > 0; + break; } } @@ -392,6 +424,21 @@ protected function DecodeUnicode($code, $srcEnc = 'UTF-8') protected function Write($txt) { + // Ignore regions that are not part of the original (encapsulated) HTML content + if ($this->state->htmlrtf) { + return; + } + + if ($this->fromhtml) { + $this->output .= $txt; + return; + } + + if ($this->openedTags['p'] === null) { + // Create the first paragraph + $this->OpenTag('p'); + } + // Create a new 'span' element only when a style change occurs. // 1st case: style change occured // 2nd case: there is no change in style but the already created 'span' @@ -411,17 +458,27 @@ protected function Write($txt) $attr = $style ? "style=\"{$style}\"" : ""; $this->OpenTag('span', $attr); } + $this->output .= $txt; } protected function OpenTag($tag, $attr = '') { + // Ignore regions that are not part of the original (encapsulated) HTML content + if ($this->fromhtml) { + return; + } + $this->output .= $attr ? "<{$tag} {$attr}>" : "<{$tag}>"; $this->openedTags[$tag] = true; } protected function CloseTag($tag) { + if ($this->fromhtml) { + return; + } + if ($this->openedTags[$tag]) { // Check for empty html elements if (substr($this->output ,-strlen("<{$tag}>")) == "<{$tag}>"){ @@ -437,6 +494,7 @@ protected function CloseTag($tag) } else { $this->output .= ""; } + $this->openedTags[$tag] = false; } } @@ -460,6 +518,8 @@ protected function FormatControlSymbol($symbol) $this->Write("­"); // Optional hyphen }elseif ($symbol->symbol == '_') { $this->Write("‑"); // Non breaking hyphen + }elseif ($symbol->symbol == '{') { + $this->Write("{"); // Non breaking hyphen } } diff --git a/src/Html/State.php b/src/Html/State.php index 749a875..acc3ff5 100644 --- a/src/Html/State.php +++ b/src/Html/State.php @@ -49,6 +49,7 @@ public function Reset($defaultFont = null) $this->background = null; $this->hcolor = null; $this->font = isset($defaultFont) ? $defaultFont : null; + $this->htmlrtf = false; } public function PrintStyle() diff --git a/tests/HtmlTest.php b/tests/HtmlTest.php new file mode 100644 index 0000000..0a8cb74 --- /dev/null +++ b/tests/HtmlTest.php @@ -0,0 +1,89 @@ +Format($document); + + $this->assertEquals( + "" + . " ", + $html + ); + } + + public function testHtml2() + { + $rtf = file_get_contents("tests/rtf/html2.rtf"); + $document = new Document($rtf); + $formatter = new HtmlFormatter(); + $html = $formatter->Format($document); + + $expected = << + + + + +

Note the line break inside a P tag. This is a bold text + +

+

+This is a normal text with a character references:  < ¨
+ +characters that have special meaning in RTF: {}\
+ + +

+
    +
  1. This is a list item + +
+ + + +EOT; + $this->assertEquals($expected, $html); + } + + public function testHtml3() + { + $rtf = file_get_contents("tests/rtf/html3.rtf"); + $document = new Document($rtf); + $formatter = new HtmlFormatter('UTF-8'); + $html = $formatter->Format($document); + + $expected = <<這是一個文本字符串
+זהו מחרוזת טקסט.

+ +
This is your third encoding.... maybe? +
+
+
+
+
+
+
+
+ +EOT; + $this->assertEquals($expected, $html); + } +} diff --git a/tests/rtf/html1.rtf b/tests/rtf/html1.rtf new file mode 100644 index 0000000..ecafa46 --- /dev/null +++ b/tests/rtf/html1.rtf @@ -0,0 +1,7 @@ +{\rtf1\ansi\ansicpg1252\fromhtml1 \fbidis \deff0{\fonttbl{\f0\fswiss\fcharset0 Arial;}} +{\*\htmltag19 } +{\*\htmltag241 } +{\htmlrtf0 {\*\htmltag72} +{\*\htmltag148 }\htmlrtf {\f0 \htmlrtf0 +{\*\htmltag84  }\htmlrtf \'a0\htmlrtf0 +{\*\htmltag156 }\htmlrtf }\htmlrtf0 \htmlrtf } diff --git a/tests/rtf/html2.rtf b/tests/rtf/html2.rtf new file mode 100644 index 0000000..e8dcd7c --- /dev/null +++ b/tests/rtf/html2.rtf @@ -0,0 +1,33 @@ +{\rtf1\ANSI\ansicpg1251\fromhtml1 \deff0 +{\fonttbl {\f0\fmodern Courier New;}{\f1\fswiss Arial;}{\f2\fswiss\fcharset0 Arial;}} +{\colortbl\red0\green0\blue0;\red0\green0\blue255;} +{\*\htmltag64} +\uc1\pard\plain\deftab360 \f0\fs24 +{\*\htmltag \par +\par +\tab \par +\par +\par +} +{\htmlrtf \f1 \htmlrtf0 Note the line break inside a P tag. {\*\htmltag }{\htmlrtf \b \htmlrtf0 This is a bold text{\*\htmltag }} \htmlrtf\par\htmlrtf0} +\htmlrtf \par \htmlrtf0 +{\*\htmltag

\par +

\par} +{\htmlrtf \f1 \htmlrtf0 This is a normal text with a character references: +{\*\htmltag  }\htmlrtf \'a0\htmlrtf0 {\*\htmltag <}\htmlrtf <\htmlrtf0 {\*\htmltag ¨}\htmlrtf {\f2\'a8}\htmlrtf0{\*\htmltag
\par}\htmlrtf\line\htmlrtf0 +characters that have special meaning in RTF: \{\}\\{\*\htmltag
\par}\htmlrtf\line\htmlrtf0\htmlrtf\par\htmlrtf0} +{\*\htmltag

\par +
    \par +
  1. }{\htmlrtf {{\*\pn\pnlvlbody\pndec\pnstart1\pnindent360{\pntxta.}}\li360\fi-360{\pntext 1.\tab} \f1 \htmlrtf0 This is a list item}\htmlrtf\par\htmlrtf0} +{\*\htmltag \par +
\par +\par +\par }} \ No newline at end of file diff --git a/tests/rtf/html3.rtf b/tests/rtf/html3.rtf new file mode 100644 index 0000000..d7d773a --- /dev/null +++ b/tests/rtf/html3.rtf @@ -0,0 +1,39 @@ +{\rtf1\ansi\ansicpg1251\fromhtml1 \fbidis \deff0{\fonttbl +{\f0\fswiss Arial;}} +{\colortbl\red0\green0\blue0;} +\uc1\pard\plain\deftab360 \f0\fs24 +{\*\htmltag96
}\htmlrtf {\htmlrtf0 {\*\htmltag64}\htmlrtf {\htmlrtf0\u36889 ?\u26159 ?\u19968 ?\u20491 ?\u25991 ?\u26412 ?\u23383 ?\u31526 ?\u20018 ? +{\*\htmltag116
}\htmlrtf \line +\htmlrtf0 \u1494 ?\u1492 ?\u1493 ? \u32 ? \u1502 ?\u1495 ?\u1512 ?\u1493 ?\u1494 ?\u1514 ?\u32 ? \u1496 ?\u1511 ?\u1505 ?\u1496 ?. +{\*\htmltag240
} +{\*\htmltag96
}\htmlrtf {\htmlrtf0 {\*\htmltag64}\htmlrtf {\htmlrtf0 +{\*\htmltag116
}\htmlrtf \line +\htmlrtf0 {\*\htmltag72}\htmlrtf\par}\htmlrtf0 + +{\*\htmltag104
}\htmlrtf }\htmlrtf0 +{\*\htmltag96
}\htmlrtf {\htmlrtf0 {\*\htmltag64}\htmlrtf {\htmlrtf0 +{\*\htmltag96
}\htmlrtf {\htmlrtf0 {\*\htmltag64}\htmlrtf {\htmlrtf0 +{\*\htmltag96
}\htmlrtf {\htmlrtf0 {\*\htmltag64}\htmlrtf {\htmlrtf0 +{\*\htmltag96
}\htmlrtf {\htmlrtf0 {\*\htmltag64}\htmlrtf {\htmlrtf0 +{\*\htmltag96
}\htmlrtf {\htmlrtf0 {\*\htmltag64}\htmlrtf {\htmlrtf0 +{\*\htmltag96
}\htmlrtf {\htmlrtf0 {\*\htmltag64}\htmlrtf {\htmlrtf0 +{\*\htmltag96
}\htmlrtf {\htmlrtf0 {\*\htmltag64}\htmlrtf {\htmlrtf0 +{\*\htmltag96
}\htmlrtf {\htmlrtf0 {\*\htmltag64}\htmlrtf {\htmlrtf0This is your third encoding.... maybe?{\*\htmltag72}\htmlrtf\par}\htmlrtf0 + +{\*\htmltag104
}\htmlrtf }\htmlrtf0 {\*\htmltag72}\htmlrtf\par}\htmlrtf0 + +{\*\htmltag104
}\htmlrtf }\htmlrtf0 {\*\htmltag72}\htmlrtf\par}\htmlrtf0 + +{\*\htmltag104
}\htmlrtf }\htmlrtf0 {\*\htmltag72}\htmlrtf\par}\htmlrtf0 + +{\*\htmltag104
}\htmlrtf }\htmlrtf0 {\*\htmltag72}\htmlrtf\par}\htmlrtf0 + +{\*\htmltag104
}\htmlrtf }\htmlrtf0 {\*\htmltag72}\htmlrtf\par}\htmlrtf0 + +{\*\htmltag104
}\htmlrtf }\htmlrtf0 {\*\htmltag72}\htmlrtf\par}\htmlrtf0 + +{\*\htmltag104
}\htmlrtf }\htmlrtf0 {\*\htmltag72}\htmlrtf\par}\htmlrtf0 + +{\*\htmltag104
}\htmlrtf }\htmlrtf0 +{\*\htmltag104
} +{\*\htmltag0 \par }} \ No newline at end of file From 9e960069a67b822870dd4caaf02ff9280c84cbfa Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Thu, 8 Jul 2021 13:58:36 +0200 Subject: [PATCH 16/38] Don't trigger errors, only exceptions --- src/Document.php | 2 -- src/Html/HtmlFormatter.php | 22 ++++++++-------------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/src/Document.php b/src/Document.php index 4521b56..41360a3 100644 --- a/src/Document.php +++ b/src/Document.php @@ -24,7 +24,6 @@ protected function GetChar() $this->char = $this->rtf[$this->pos++]; } else { $err = "Parse error: Tried to read past end of input; RTF is probably truncated."; - trigger_error($err); throw new \Exception($err); } } @@ -281,7 +280,6 @@ protected function ParseText() // Throw an exception. if($this->group == null) { $err = "Parse error: RTF text outside of group."; - trigger_error($err); throw new \Exception($err); } diff --git a/src/Html/HtmlFormatter.php b/src/Html/HtmlFormatter.php index a20a520..0e4eca7 100644 --- a/src/Html/HtmlFormatter.php +++ b/src/Html/HtmlFormatter.php @@ -16,17 +16,17 @@ class HtmlFormatter // the HtmlFormatter instance. public function __construct($encoding = 'HTML-ENTITIES') { + if (!extension_loaded('mbstring')) { + throw new \Exception("PHP mbstring extension not enabled"); + } + if ($encoding != 'HTML-ENTITIES') { - // Check if mbstring extension is loaded - if (!extension_loaded('mbstring')) { - trigger_error("PHP mbstring extension not enabled, reverting back to HTML-ENTITIES"); - $encoding = 'HTML-ENTITIES'; // Check if the encoding is reconized by mbstring extension - } elseif (!in_array($encoding, mb_list_encodings())){ - trigger_error("Unrecognized Encoding, reverting back to HTML-ENTITIES"); - $encoding = 'HTML-ENTITIES'; + if (!in_array($encoding, mb_list_encodings())) { + throw new \Exception("Unsupported encoding: $encoding"); } } + $this->encoding = $encoding; } @@ -577,8 +577,6 @@ protected function GetEncodingFromCharset($fcharset) if (isset($charset[$fcharset])) return $charset[$fcharset]; - else { - trigger_error("Unknown charset: {$fcharset}"); } } @@ -624,9 +622,6 @@ protected function GetEncodingFromCodepage($cpg) if (isset($codePage[$cpg])) return $codePage[$cpg]; - else { - // Debug Error - trigger_error("Unknown codepage: {$cpg}"); } } @@ -655,7 +650,6 @@ protected function ord_utf8($chr) if ($ord0 >= 252 && $ord0 <= 253) return ($ord0 - 252) * 1073741824 + ($ord1 - 128) * 16777216 + ($ord2 - 128) * 262144 + ($ord3 - 128) * 4096 + ($ord4 - 128) * 64 + (ord($chr[5]) - 128); - trigger_error("Invalid Unicode character: {$chr}"); + // trigger_error("Invalid Unicode character: {$chr}"); } } - From c566e2a7fbed35873b2bf49070a1c1de911f686d Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Thu, 8 Jul 2021 14:00:37 +0200 Subject: [PATCH 17/38] Fix parse errors --- src/Html/HtmlFormatter.php | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Html/HtmlFormatter.php b/src/Html/HtmlFormatter.php index 0e4eca7..9549fc0 100644 --- a/src/Html/HtmlFormatter.php +++ b/src/Html/HtmlFormatter.php @@ -577,7 +577,6 @@ protected function GetEncodingFromCharset($fcharset) if (isset($charset[$fcharset])) return $charset[$fcharset]; - } } protected function GetEncodingFromCodepage($cpg) @@ -622,7 +621,6 @@ protected function GetEncodingFromCodepage($cpg) if (isset($codePage[$cpg])) return $codePage[$cpg]; - } } protected function ord_utf8($chr) From 7c057db1fc95eddbcd7c614f1ff53f54db2f111a Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Thu, 8 Jul 2021 14:04:17 +0200 Subject: [PATCH 18/38] Add some more tests --- tests/ParseTest.php | 28 ++++++++++++++++++++++++++++ tests/rtf/test1.rtf | 7 +++++++ 2 files changed, 35 insertions(+) create mode 100644 tests/ParseTest.php create mode 100644 tests/rtf/test1.rtf diff --git a/tests/ParseTest.php b/tests/ParseTest.php new file mode 100644 index 0000000..3d9047f --- /dev/null +++ b/tests/ParseTest.php @@ -0,0 +1,28 @@ +Format($document); + + // We only test that it does not throw an exception + $this->assertTrue(true); + } + + public function testParseException1() + { + $this->expectException(\Exception::class); + + $document = new Document('{\rtf1\ansi\ansicpg1252\deff0\deflang1046'); + $formatter = new HtmlFormatter(); + $html = $formatter->Format($document); + } +} diff --git a/tests/rtf/test1.rtf b/tests/rtf/test1.rtf new file mode 100644 index 0000000..697410a --- /dev/null +++ b/tests/rtf/test1.rtf @@ -0,0 +1,7 @@ +{\rtf1\ansi\ansicpg1252\deff0\deflang1046{\fonttbl{\f0\fnil\fcharset0 Microsoft Sans Serif;}} +{\colortbl ;\red0\green0\blue0;} +{\*\generator Msftedit 5.41.21.2510;}\viewkind4\uc1\pard\cf1\f0\fs16 Blusa Alongada decote V, manga longa.\par +Shape contornando o corpo, aplica\'e7\'e3o do s\'edmbolo ROLAMO\'c7A na lateral.\par +Tecido de viscose e elastano com amaciamento ecol\'f3gico que utiliza o \'f3leo extra\'eddo da casca de arroz.\par +Possui \'f3timo caimento, \'e9 agrad\'e1vel ao toque e resistente ao pilling.\par +} \ No newline at end of file From bec62dd8a22cb4d5e4000db3d11d445d117cc3a9 Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Thu, 8 Jul 2021 15:13:11 +0200 Subject: [PATCH 19/38] More sane code style --- src/ControlSymbol.php | 14 +- src/ControlWord.php | 12 +- src/Document.php | 635 +++++++++--------- src/Element.php | 2 +- src/Group.php | 132 ++-- src/Html/Font.php | 35 +- src/Html/HtmlFormatter.php | 1188 +++++++++++++++++----------------- src/Html/Image.php | 57 +- src/Html/State.php | 233 ++++--- src/Text.php | 24 +- tests/BulletsTest.php | 24 +- tests/EmptyStringTest.php | 10 +- tests/ExtraParagraphTest.php | 28 +- tests/FontFamilyTest.php | 22 +- tests/HtmlTest.php | 62 +- tests/ParseSimpleTest.php | 34 +- tests/ParseTest.php | 32 +- 17 files changed, 1331 insertions(+), 1213 deletions(-) diff --git a/src/ControlSymbol.php b/src/ControlSymbol.php index d8b0995..15c51d9 100644 --- a/src/ControlSymbol.php +++ b/src/ControlSymbol.php @@ -4,11 +4,11 @@ class ControlSymbol extends Element { - public $symbol; - public $parameter = 0; + public $symbol; + public $parameter = 0; - public function toString($level) - { - return str_repeat(" ", $level) . "SYMBOL {$this->symbol} ({$this->parameter})\n"; - } -} \ No newline at end of file + public function toString($level) + { + return str_repeat(" ", $level) . "SYMBOL {$this->symbol} ({$this->parameter})\n"; + } +} diff --git a/src/ControlWord.php b/src/ControlWord.php index 5d602f8..da25b74 100644 --- a/src/ControlWord.php +++ b/src/ControlWord.php @@ -4,11 +4,11 @@ class ControlWord extends Element { - public $word; - public $parameter; + public $word; + public $parameter; - public function toString($level) - { - return str_repeat(" ", $level) . "WORD {$this->word} ({$this->parameter})\n"; - } + public function toString($level) + { + return str_repeat(" ", $level) . "WORD {$this->word} ({$this->parameter})\n"; + } } diff --git a/src/Document.php b/src/Document.php index 41360a3..128155c 100644 --- a/src/Document.php +++ b/src/Document.php @@ -4,329 +4,372 @@ class Document { - private $rtf; // RTF string being parsed - private $pos; // Current position in RTF string - private $len; // Length of RTF string - public $root = null; // Root group - private $group; // Current RTF group - - public function __construct($rtf) - { - $this->Parse($rtf); - } - - // Get the next character from the RTF stream. - // Parsing is aborted when reading beyond end of input string. - protected function GetChar() - { - $this->char = null; - if ($this->pos < strlen($this->rtf)) { - $this->char = $this->rtf[$this->pos++]; - } else { - $err = "Parse error: Tried to read past end of input; RTF is probably truncated."; - throw new \Exception($err); - } - } - - /* - * (Helper method) - * Is the current character a letter? - */ - protected function is_letter() - { - if(ord($this->char) >= 65 && ord($this->char) <= 90) return true; - if(ord($this->char) >= 97 && ord($this->char) <= 122) return true; - return false; - } - - /* - * (Helper method) - * Is the current character a digit? - */ - protected function is_digit() - { - return (ord($this->char) >= 48 && ord($this->char) <= 57); - } - - /* - * (Helper method) - * Is the current character end-of-line (EOL)? - */ - protected function is_endofline() - { - if ($this->char == "\r" || $this->char == "\n") { - // Checks for a Windows/Acron type EOL - if( $this->rtf[$this->pos] == "\n" || $this->rtf[$this->pos] == "\r" ) { - $this->GetChar(); - } - return true; - } - return false; - } - - /* - * (Helper method) - * Is the current character for a space delimiter? - */ - protected function is_space_delimiter() - { - return ($this->char == " " || $this->is_endofline()); - } - - // Store state of document on stack. - protected function ParseStartGroup() - { - $group = new Group(); - - // Is there a current group? Then make the new group its child: - if($this->group != null) { - $group->parent = $this->group; - array_push($this->group->children, $group); - array_push($this->uc, end($this->uc)); - } - // If there is no parent group, then set this group - // as the root group. - else { - $this->root = $group; - // Create uc stack and insert the first default value - $this->uc = array(1); + /** @var string RTF string being parsed */ + private $rtf; + /** @var int Current position in RTF string */ + private $pos; + /** @var int Length of RTF string */ + private $len; + /** @var Group Current RTF group */ + private $group; + + /** @var Group Root group */ + public $root = null; + + /** + * Object contructor + * + * @param string The RTF content + */ + public function __construct($rtf) + { + $this->Parse($rtf); } - // Set the new group as the current group: - $this->group = $group; - } - - // Retrieve state of document from stack. - protected function ParseEndGroup() - { - $this->group = $this->group->parent; - // Retrieve last uc value from stack - array_pop($this->uc); - } - - protected function ParseControlWord() - { - // Read letters until a non-letter is reached. - $word = ""; - $this->GetChar(); - while($this->is_letter()) + /** + * Get the next character from the RTF stream. + * Parsing is aborted when reading beyond end of input string. + * + * @return string + */ + protected function GetChar() { - $word .= $this->char; - $this->GetChar(); - } + $this->char = null; - // Read parameter (if any) consisting of digits. - // Parameter may be negative, i.e., starting with a '-' - $parameter = null; - $negative = false; - if($this->char == '-') { - $this->GetChar(); - $negative = true; + if ($this->pos < strlen($this->rtf)) { + $this->char = $this->rtf[$this->pos++]; + } else { + $err = "Parse error: Tried to read past end of input; RTF is probably truncated."; + throw new \Exception($err); + } } - while($this->is_digit()) + + /** + * (Helper method) + * Is the current character a letter? + */ + protected function is_letter() { - if($parameter == null) $parameter = 0; - $parameter = $parameter * 10 + $this->char; - $this->GetChar(); - } - // If no parameter present, assume control word's default (usually 1) - // If no default then assign 0 to the parameter - if($parameter === null) $parameter = 1; + if (ord($this->char) >= 65 && ord($this->char) <= 90) { + return true; + } + + if (ord($this->char) >= 97 && ord($this->char) <= 122) { + return true; + } - // Convert parameter to a negative number when applicable - if($negative) $parameter = -$parameter; + return false; + } - // Update uc value - if ($word == "uc") { - array_pop($this->uc); - $this->uc[] = $parameter; + /** + * (Helper method) + * Is the current character a digit? + */ + protected function is_digit() + { + return (ord($this->char) >= 48 && ord($this->char) <= 57); } - // Skip space delimiter - if(!$this->is_space_delimiter()) $this->pos--; + /** + * (Helper method) + * Is the current character end-of-line (EOL)? + */ + protected function is_endofline() + { + if ($this->char == "\r" || $this->char == "\n") { + // Checks for a Windows/Acron type EOL + if ($this->rtf[$this->pos] == "\n" || $this->rtf[$this->pos] == "\r") { + $this->GetChar(); + } - // If this is \u, then the parameter will be followed - // by {$this->uc} characters. - if($word == "u") { - // Convert parameter to unsigned decimal unicode - if($negative) $parameter = 65536 + $parameter; + return true; + } - // Will ignore replacement characters $uc times - $uc = end($this->uc); - while ($uc > 0) { - $this->GetChar(); - // If the replacement character is encoded as - // hexadecimal value \'hh then jump over it - if($this->char == '\\' && $this->rtf[$this->pos]=='\'') - $this->pos = $this->pos + 3; - - // Break if it's an RTF scope delimiter - elseif ($this->char == '{' || $this->char == '{') - break; - - // - To include an RTF delimiter in skippable data, it must be - // represented using the appropriate control symbol (that is, - // escaped with a backslash,) as in plain text. - // - // - Any RTF control word or symbol is considered a single character - // for the purposes of counting skippable characters. For this reason - // it's more appropriate to create a $skip flag and let the Parse() - // function take care of the skippable characters. - $uc--; - } + return false; } - // Add new RTF word as a child to the current group. - $rtfword = new ControlWord(); - $rtfword->word = $word; - $rtfword->parameter = $parameter; - array_push($this->group->children, $rtfword); - } - - protected function ParseControlSymbol() - { - // Read symbol (one character only). - $this->GetChar(); - $symbol = $this->char; - - // Exceptional case: - // Treat EOL symbols as \par control word - if ($this->is_endofline()) { - $rtfword = new ControlWord(); - $rtfword->word = 'par'; - $rtfword->parameter = 0; - array_push($this->group->children, $rtfword); - return; + /** + * (Helper method) + * Is the current character for a space delimiter? + */ + protected function is_space_delimiter() + { + return ($this->char == " " || $this->is_endofline()); } - // Symbols ordinarily have no parameter. However, - // if this is \' (a single quote), then it is - // followed by a 2-digit hex-code: - $parameter = 0; - if ($symbol == '\'') { - $this->GetChar(); - $parameter = $this->char; - $this->GetChar(); - $parameter = hexdec($parameter . $this->char); + /** + * Store state of document on stack. + */ + protected function ParseStartGroup() + { + $group = new Group(); + + if ($this->group != null) { + // Make the new group a child of the current group + $group->parent = $this->group; + + array_push($this->group->children, $group); + array_push($this->uc, end($this->uc)); + } else { + // If there is no parent group, then set this group + // as the root group. + $this->root = $group; + // Create uc stack and insert the first default value + $this->uc = array(1); + } + + // Set the new group as the current group: + $this->group = $group; } - // Add new control symbol as a child to the current group: - $rtfsymbol = new ControlSymbol(); - $rtfsymbol->symbol = $symbol; - $rtfsymbol->parameter = $parameter; - array_push($this->group->children, $rtfsymbol); - } - - protected function ParseControl() - { - // Beginning of an RTF control word or control symbol. - // Look ahead by one character to see if it starts with - // a letter (control world) or another symbol (control symbol): - $this->GetChar(); - $this->pos--; // (go back after look-ahead) - if($this->is_letter()) { - $this->ParseControlWord(); - } else { - $this->ParseControlSymbol(); + /** + * Retrieve state of document from stack. + */ + protected function ParseEndGroup() + { + $this->group = $this->group->parent; + // Retrieve last uc value from stack + array_pop($this->uc); } - } - - protected function ParseText() - { - // Parse plain text up to backslash or brace, - // unless escaped. - $text = ""; - $terminate = false; - do + + protected function ParseControlWord() { - // Ignore EOL characters - if($this->char == "\r" || $this->char == "\n") { + // Read letters until a non-letter is reached. + $word = ''; $this->GetChar(); - continue; - } - // Is this an escape? - if($this->char == "\\") { - // Perform lookahead to see if this - // is really an escape sequence. + + while ($this->is_letter()) { + $word .= $this->char; + $this->GetChar(); + } + + // Read parameter (if any) consisting of digits. + // Parameter may be negative, i.e., starting with a '-' + $parameter = null; + $negative = false; + + if ($this->char == '-') { + $this->GetChar(); + $negative = true; + } + + while ($this->is_digit()) { + if ($parameter === null) { + $parameter = 0; + } + $parameter = $parameter * 10 + $this->char; + $this->GetChar(); + } + + // If no parameter present, assume control word's default (usually 1) + // If no default then assign 0 to the parameter + if ($parameter === null) { + $parameter = 1; + } + + // Convert parameter to a negative number when applicable + if ($negative) { + $parameter = -$parameter; + } + + // Update uc value + if ($word == "uc") { + array_pop($this->uc); + $this->uc[] = $parameter; + } + + // Skip space delimiter + if (!$this->is_space_delimiter()) { + $this->pos--; + } + + // If this is \u, then the parameter will be followed + // by {$this->uc} characters. + if ($word == "u") { + // Convert parameter to unsigned decimal unicode + if ($negative) { + $parameter = 65536 + $parameter; + } + + // Will ignore replacement characters $uc times + $uc = end($this->uc); + + while ($uc > 0) { + $this->GetChar(); + // If the replacement character is encoded as + // hexadecimal value \'hh then jump over it + if ($this->char == "\\" && $this->rtf[$this->pos] == '\'') { + $this->pos = $this->pos + 3; + } elseif ($this->char == '{' || $this->char == '{') { + // Break if it's an RTF scope delimiter + break; + } + + // - To include an RTF delimiter in skippable data, it must be + // represented using the appropriate control symbol (that is, + // escaped with a backslash,) as in plain text. + // + // - Any RTF control word or symbol is considered a single character + // for the purposes of counting skippable characters. For this reason + // it's more appropriate to create a $skip flag and let the Parse() + // function take care of the skippable characters. + $uc--; + } + } + + // Add new RTF word as a child to the current group. + $rtfword = new ControlWord(); + $rtfword->word = $word; + $rtfword->parameter = $parameter; + array_push($this->group->children, $rtfword); + } + + protected function ParseControlSymbol() + { + // Read symbol (one character only). $this->GetChar(); - switch($this->char) - { - case "\\": break; - case '{': break; - case '}': break; - default: - // Not an escape. Roll back. - $this->pos = $this->pos - 2; - $terminate = true; - break; + $symbol = $this->char; + + // Exceptional case: + // Treat EOL symbols as \par control word + if ($this->is_endofline()) { + $rtfword = new ControlWord(); + $rtfword->word = 'par'; + $rtfword->parameter = 0; + array_push($this->group->children, $rtfword); + return; } - } elseif ($this->char == '{' || $this->char == '}') { - $this->pos--; - $terminate = true; - } - - if(!$terminate) { - // Save plain text - $text .= $this->char; + + // Symbols ordinarily have no parameter. However, + // if this is \' (a single quote), then it is + // followed by a 2-digit hex-code: + $parameter = 0; + if ($symbol == '\'') { + $this->GetChar(); + $parameter = $this->char; + $this->GetChar(); + $parameter = hexdec($parameter . $this->char); + } + + // Add new control symbol as a child to the current group: + $rtfsymbol = new ControlSymbol(); + $rtfsymbol->symbol = $symbol; + $rtfsymbol->parameter = $parameter; + array_push($this->group->children, $rtfsymbol); + } + + protected function ParseControl() + { + // Beginning of an RTF control word or control symbol. + // Look ahead by one character to see if it starts with + // a letter (control world) or another symbol (control symbol): $this->GetChar(); - } - } - while(!$terminate && $this->pos < $this->len); - - // Create new Text element: - $text = new Text($text); - - // If there is no current group, then this is not a valid RTF file. - // Throw an exception. - if($this->group == null) { - $err = "Parse error: RTF text outside of group."; - throw new \Exception($err); + $this->pos--; // (go back after look-ahead) + + if ($this->is_letter()) { + $this->ParseControlWord(); + } else { + $this->ParseControlSymbol(); + } } - // Add text as a child to the current group: - array_push($this->group->children, $text); - } - - /* - * Attempt to parse an RTF string. - */ - protected function Parse($rtf) - { - $this->rtf = $rtf; - $this->pos = 0; - $this->len = strlen($this->rtf); - $this->group = null; - $this->root = null; - - while($this->pos < $this->len-1) + protected function ParseText() { - // Read next character: - $this->GetChar(); - - // Ignore \r and \n - if($this->char == "\n" || $this->char == "\r") continue; - - // What type of character is this? - switch($this->char) - { - case '{': - $this->ParseStartGroup(); - break; - case '}': - $this->ParseEndGroup(); - break; - case "\\": - $this->ParseControl(); - break; - default: - $this->ParseText(); - break; - } + // Parse plain text up to backslash or brace, + // unless escaped. + $text = ''; + $terminate = false; + + do { + // Ignore EOL characters + if ($this->char == "\r" || $this->char == "\n") { + $this->GetChar(); + continue; + } + // Is this an escape? + if ($this->char == "\\") { + // Perform lookahead to see if this + // is really an escape sequence. + $this->GetChar(); + switch ($this->char) { + case "\\": break; + case '{': break; + case '}': break; + default: + // Not an escape. Roll back. + $this->pos = $this->pos - 2; + $terminate = true; + break; + } + } elseif ($this->char == '{' || $this->char == '}') { + $this->pos--; + $terminate = true; + } + + if (!$terminate) { + // Save plain text + $text .= $this->char; + $this->GetChar(); + } + } while (!$terminate && $this->pos < $this->len); + + // Create new Text element: + $text = new Text($text); + + // If there is no current group, then this is not a valid RTF file. + // Throw an exception. + if ($this->group == null) { + throw new \Exception("Parse error: RTF text outside of group."); + } + + // Add text as a child to the current group: + array_push($this->group->children, $text); } - } - public function __toString() { - if(!$this->root) return "No root group"; - return $this->root->toString(); - } + /** + * Attempt to parse an RTF string. + */ + protected function Parse($rtf) + { + $this->rtf = $rtf; + $this->pos = 0; + $this->len = strlen($this->rtf); + $this->group = null; + $this->root = null; + + while ($this->pos < $this->len-1) { + // Read next character: + $this->GetChar(); + + // Ignore \r and \n + if ($this->char == "\n" || $this->char == "\r") { + continue; + } + + // What type of character is this? + switch ($this->char) { + case '{': + $this->ParseStartGroup(); + break; + case '}': + $this->ParseEndGroup(); + break; + case "\\": + $this->ParseControl(); + break; + default: + $this->ParseText(); + break; + } + } + } + + public function __toString() + { + if (!$this->root) { + return "No root group"; + } + + return $this->root->toString(); + } } diff --git a/src/Element.php b/src/Element.php index 6bce034..137b09b 100644 --- a/src/Element.php +++ b/src/Element.php @@ -2,7 +2,7 @@ namespace RtfHtmlPhp; -/* +/** * Element is the parent class of all RTF elements, * like Group, ControlWord and ControlSymbol. */ diff --git a/src/Group.php b/src/Group.php index 93f5c09..469b2e1 100644 --- a/src/Group.php +++ b/src/Group.php @@ -4,74 +4,88 @@ class Group extends Element { - public $parent; - public $children; + public $parent; + public $children; - /* - * Create a new Group, with no parent and no children. - */ - public function __construct() - { - $this->parent = null; - $this->children = array(); - } + /** + * Create a new Group, with no parent and no children. + */ + public function __construct() + { + $this->parent = null; + $this->children = array(); + } + + public function GetType() + { + // No children? Then the group type is null. + if (sizeof($this->children) == 0) { + return null; + } + + $child = $this->children[0]; - public function GetType() - { - // No children? Then the group type is null. - if(sizeof($this->children) == 0) return null; + // If the first child is a control word, then + // the group type is the word. + if ($child instanceof ControlWord) { + return $child->word; + } - // If the first child is a control word, then - // the group type is the word. - $child = $this->children[0]; - if($child instanceof ControlWord) return $child->word; + // If the first child is a control symbol, then + // the group type is * for a special symbol, or null. + if ($child instanceof ControlSymbol) { + return ($child->symbol == '*') ? '*' : null; + } - // If the first child is a control symbol, then - // the group type is * for a special symbol, or null. - elseif ($child instanceof ControlSymbol) { - return ($child->symbol == '*') ? '*' : null; + // If first child is neither word nor symbol, then + // group type is null. + return null; } - // If first child is neither word nor symbol, then - // group type is null. - return null; - } + /** + * If a group contains a '*' symbol as its first child, + * then it is a destination group. + */ + public function IsDestination() + { + // If group has no children, then destination is null. + if (sizeof($this->children) == 0) { + return null; + } - // If a group contains a * symbol as its first child, - // then it is a destination group. - public function IsDestination() - { - // If group has no children, then destination is null. - if(sizeof($this->children) == 0) return null; - // First child not a control symbol? - $child = $this->children[0]; - if(!$child instanceof ControlSymbol) return null; - return $child->symbol == '*'; - } + $child = $this->children[0]; - // - // Convert Group to string for debugging purposes. - // - public function toString($level = 0) - { - $str = str_repeat(" ", $level) . "{\n"; + // First child not a control symbol? + if (!$child instanceof ControlSymbol) { + return null; + } - foreach($this->children as $child) - { - /* - // Skip some group types: - if($child instanceof Group) { - if ($child->GetType() == "fonttbl") continue; - if ($child->GetType() == "colortbl") continue; - if ($child->GetType() == "stylesheet") continue; - if ($child->GetType() == "info") continue; - // Skip any pictures: - if (substr($child->GetType(), 0, 4) == "pict") continue; - if ($child->IsDestination()) continue; - } */ - $str .= $child->toString($level + 1); + return $child->symbol == '*'; } - return $str . str_repeat(" ", $level) . "}\n"; - } + /** + * Convert Group to string for debugging purposes. + */ + public function toString($level = 0) + { + $str = str_repeat(" ", $level) . "{\n"; + + foreach ($this->children as $child) { + /* + // Skip some group types: + if ($child instanceof Group) { + if ($child->GetType() == "fonttbl") continue; + if ($child->GetType() == "colortbl") continue; + if ($child->GetType() == "stylesheet") continue; + if ($child->GetType() == "info") continue; + // Skip any pictures: + if (substr($child->GetType(), 0, 4) == "pict") continue; + if ($child->IsDestination()) continue; + } + */ + $str .= $child->toString($level + 1); + } + + return $str . str_repeat(" ", $level) . "}\n"; + } } diff --git a/src/Html/Font.php b/src/Html/Font.php index 1719854..bd93bfc 100644 --- a/src/Html/Font.php +++ b/src/Html/Font.php @@ -4,16 +4,27 @@ class Font { - public $family; - public $name; - public $charset; - public $codepage; + public $family; + public $name; + public $charset; + public $codepage; - public function toStyle() { - $list = array(); - if($this->name) array_push($list, $this->name); - if($this->family) array_push($list, $this->family); - if(sizeof($list) == 0) return ""; - return "font-family:" . implode(',', $list) . ";"; - } -} \ No newline at end of file + public function toStyle() + { + $list = array(); + + if ($this->name) { + $list[] = $this->name; + } + + if ($this->family) { + $list[] = $this->family; + } + + if (sizeof($list) == 0) { + return ''; + } + + return "font-family:" . implode(',', $list); + } +} diff --git a/src/Html/HtmlFormatter.php b/src/Html/HtmlFormatter.php index 9549fc0..bdd957f 100644 --- a/src/Html/HtmlFormatter.php +++ b/src/Html/HtmlFormatter.php @@ -6,648 +6,672 @@ class HtmlFormatter { - private $output = ''; - private $encoding; - private $defaultFont; - private $fromhtml = false; - - // By default, HtmlFormatter uses HTML_ENTITIES for code conversion. - // You can optionally support a different endoing when creating - // the HtmlFormatter instance. - public function __construct($encoding = 'HTML-ENTITIES') - { - if (!extension_loaded('mbstring')) { - throw new \Exception("PHP mbstring extension not enabled"); - } + private $output = ''; + private $encoding; + private $defaultFont; + private $fromhtml = false; + + /** + * By default, HtmlFormatter uses HTML_ENTITIES for code conversion. + * You can optionally support a different endoing when creating + * the HtmlFormatter instance. + */ + public function __construct($encoding = 'HTML-ENTITIES') + { + if (!extension_loaded('mbstring')) { + throw new \Exception("PHP mbstring extension not enabled"); + } - if ($encoding != 'HTML-ENTITIES') { - // Check if the encoding is reconized by mbstring extension - if (!in_array($encoding, mb_list_encodings())) { - throw new \Exception("Unsupported encoding: $encoding"); - } - } + if ($encoding != 'HTML-ENTITIES') { + // Check if the encoding is reconized by mbstring extension + if (!in_array($encoding, mb_list_encodings())) { + throw new \Exception("Unsupported encoding: $encoding"); + } + } - $this->encoding = $encoding; - } + $this->encoding = $encoding; + } - public function Format(Document $document) - { - // Clear current output - $this->output = ''; + public function Format(Document $document) + { + // Clear current output + $this->output = ''; - // Keep track of style modifications - $this->previousState = null; + // Keep track of style modifications + $this->previousState = null; - // and create a stack of states - $this->states = array(); + // and create a stack of states + $this->states = array(); - // Put an initial standard state onto the stack - $this->state = new State(); - array_push($this->states, $this->state); + // Put an initial standard state onto the stack + $this->state = new State(); + array_push($this->states, $this->state); - // Keep track of opened html tags - $this->openedTags = array('span' => false, 'p' => null); + // Keep track of opened html tags + $this->openedTags = array('span' => false, 'p' => null); - // Begin format - $this->ProcessGroup($document->root); + // Begin format + $this->ProcessGroup($document->root); - // Instead of removing opened tags, we close them - $this->output .= $this->openedTags['span'] ? '' : ''; - $this->output .= $this->openedTags['p'] ? '

' : ''; + // Instead of removing opened tags, we close them + $this->output .= $this->openedTags['span'] ? '' : ''; + $this->output .= $this->openedTags['p'] ? '

' : ''; - // Remove extra empty paragraph at the end - // TODO: Find the real reason it's there and fix it - $this->output = preg_replace('|

$|', '', $this->output); + // Remove extra empty paragraph at the end + // TODO: Find the real reason it's there and fix it + $this->output = preg_replace('|

$|', '', $this->output); - return $this->output; - } + return $this->output; + } - protected function LoadFont(\RtfHtmlPhp\Group $fontGroup) { - $fontNumber = 0; - $font = new Font(); + protected function LoadFont(\RtfHtmlPhp\Group $fontGroup) + { + $fontNumber = 0; + $font = new Font(); + + // Loop through children of the font group. The font group + // contains control words with the font number and charset, + // and a control text with the font name. + foreach($fontGroup->children as $child) { + // Control word + if ($child instanceof \RtfHtmlPhp\ControlWord) { + switch ($child->word) { + case 'f': + $fontNumber = $child->parameter; + break; + + // Font family names + case 'froman': $font->family = "serif"; break; + case 'fswiss': $font->family = "sans-serif"; break; + case 'fmodern': $font->family = "monospace"; break; + case 'fscript': $font->family = "cursive"; break; + case 'fdecor': $font->family = "fantasy"; break; + + // case 'fnil': break; // default font + // case 'ftech': break; // symbol + // case 'fbidi': break; // bidirectional font + + case 'fcharset': // charset + $font->charset = $this->GetEncodingFromCharset($child->parameter); + break; + case 'cpg': // code page + $font->codepage = $this->GetEncodingFromCodepage($child->parameter); + break; + case 'fprq': // Font pitch + $font->fprq = $child->parameter; + break; + } + } - // Loop through children of the font group. The font group - // contains control words with the font number and charset, - // and a control text with the font name. - foreach($fontGroup->children as $child) { + // Control text contains the font name, if any: + if ($child instanceof \RtfHtmlPhp\Text) { + // Store font name (except ; delimiter at end) + $font->name = substr($child->text, 0, -1); + } - // Control word - if ($child instanceof \RtfHtmlPhp\ControlWord){ - switch ($child->word) { - case 'f': - $fontNumber = $child->parameter; - break; + /* + elseif ($child instanceof \RtfHtmlPhp\Group) { + // possible subgroups: + // '{\*' \falt #PCDATA '}' = alternate font name + // '{\*' \fontemb ? ? '}' + // '{\*' \fontfile ? #PCDATA '}' + // '{\*' \panose '}' + continue; + } elseif ($child instanceof \RtfHtmlPhp\ControlSymbol) { + // the only authorized symbol here is '*': + // \*\fname = non tagged file name (only WordPad uses it) + continue; + } + */ + } - // Font family names - case 'froman': $font->family = "serif"; break; - case 'fswiss': $font->family = "sans-serif"; break; - case 'fmodern': $font->family = "monospace"; break; - case 'fscript': $font->family = "cursive"; break; - case 'fdecor': $font->family = "fantasy"; break; + State::SetFont($fontNumber, $font); + } - // case 'fnil': break; // default font - // case 'ftech': break; // symbol - // case 'fbidi': break; // bidirectional font + protected function ExtractFontTable($fontTblGrp) + { + // {' \fonttbl ( | ('{' '}'))+ '}' + // ??? + // ??? ? ';' + + // The Font Table group contains the control word "fonttbl" and some + // subgroups. Go through the subgroups, ignoring the "fonttbl" + // identifier. + foreach ($fontTblGrp->children as $child) { + // Ignore non-group, which should be the fonttbl identified word. + if (!($child instanceof \RtfHtmlPhp\Group)) { + continue; + } - case 'fcharset': // charset - $font->charset = $this->GetEncodingFromCharset($child->parameter); - break; - case 'cpg': // code page - $font->codepage = $this->GetEncodingFromCodepage($child->parameter); - break; - case 'fprq': // Font pitch - $font->fprq = $child->parameter; - break; + // Load the font specification in the subgroup: + $this->LoadFont($child); } - } - - // Control text contains the font name, if any: - if ($child instanceof \RtfHtmlPhp\Text) { - // Store font name (except ; delimiter at end) - $font->name = substr($child->text, 0, -1); - } - - /* - elseif ($child instanceof \RtfHtmlPhp\Group) { - // possible subgroups: - // '{\*' \falt #PCDATA '}' = alternate font name - // '{\*' \fontemb ? ? '}' - // '{\*' \fontfile ? #PCDATA '}' - // '{\*' \panose '}' - continue; - } elseif ($child instanceof \RtfHtmlPhp\ControlSymbol) { - // the only authorized symbol here is '*': - // \*\fname = non tagged file name (only WordPad uses it) - continue; + } + + protected function ExtractColorTable($colorTblGrp) + { + // {\colortbl;\red0\green0\blue0;} + // Index 0 of the RTF color table is the 'auto' color + $colortbl = array(); + $c = count($colorTblGrp); + $color = ''; + + for ($i=1; $i<$c; $i++) { // Iterate through colors + if ($colorTblGrp[$i] instanceof \RtfHtmlPhp\ControlWord) { + // Extract RGB color and convert it to hex string + $color = sprintf( + '#%02x%02x%02x', // hex string format + $colorTblGrp[$i]->parameter, // red + $colorTblGrp[$i+1]->parameter, // green + $colorTblGrp[$i+2]->parameter // blue + ); + $i+=2; + } elseif ($colorTblGrp[$i] instanceof \RtfHtmlPhp\Text) { + // This is a delimiter ';' so + if ($i != 1) { // Store the already extracted color + $colortbl[] = $color; + } else { // This is the 'auto' color + $colortbl[] = 0; + } } - */ + } + + State::$colortbl = $colortbl; } - State::SetFont($fontNumber, $font); - } - - protected function ExtractFontTable($fontTblGrp) - { - // {' \fonttbl ( | ('{' '}'))+ '}' - // ??? - // ??? ? ';' - - // The Font Table group contains the control word "fonttbl" and some - // subgroups. Go through the subgroups, ignoring the "fonttbl" - // identifier. - foreach($fontTblGrp->children as $child) { - // Ignore non-group, which should be the fonttbl identified word. - if(!($child instanceof \RtfHtmlPhp\Group)) continue; - // Load the font specification in the subgroup: - $this->LoadFont($child); + protected function ExtractImage($pictGrp) + { + $Image = new Image(); + foreach ($pictGrp as $child) { + if ($child instanceof \RtfHtmlPhp\ControlWord) { + switch ($child->word) { + // Picture Format + case "emfblip": $Image->format = 'emf'; break; + case "pngblip": $Image->format = 'png'; break; + case "jpegblip": $Image->format = 'jpeg'; break; + case "macpict": $Image->format = 'pict'; break; + // case "wmetafile": $Image->format = 'bmp'; break; + + // Picture size and scaling + case "picw": $Image->width = $child->parameter; break; + case "pich": $Image->height = $child->parameter; break; + case "picwgoal": $Image->goalWidth = $child->parameter; break; + case "pichgoal": $Image->goalHeight = $child->parameter; break; + case "picscalex": $Image->pcScaleX = $child->parameter; break; + case "picscaley": $Image->pcScaleY = $child->parameter; break; + + // Binary or Hexadecimal Data ? + case "bin": $Image->binarySize = $child->parameter; break; + + default: break; + } + } elseif ($child instanceof \RtfHtmlPhp\Text) { + // store Data + $Image->ImageData = $child->text; + } + } + + // output Image + $this->output .= $Image->PrintImage(); + unset($Image); } - } - - protected function ExtractColorTable($colorTblGrp) { - // {\colortbl;\red0\green0\blue0;} - // Index 0 of the RTF color table is the 'auto' color - $colortbl = array(); - $c = count($colorTblGrp); - $color = ''; - for ($i=1; $i<$c; $i++) { // Iterate through colors - if($colorTblGrp[$i] instanceof \RtfHtmlPhp\ControlWord) { - // Extract RGB color and convert it to hex string - $color = sprintf('#%02x%02x%02x', // hex string format - $colorTblGrp[$i]->parameter, // red - $colorTblGrp[$i+1]->parameter, // green - $colorTblGrp[$i+2]->parameter); // blue - $i+=2; - } elseif($colorTblGrp[$i] instanceof \RtfHtmlPhp\Text) { - // This is a delimiter ';' so - if ($i != 1) { // Store the already extracted color - $colortbl[] = $color; - } else { // This is the 'auto' color - $colortbl[] = 0; - } - } + + protected function ProcessGroup($group) + { + // Special group processing: + switch ($group->GetType()) { + case "fonttbl": // Extract font table + $this->ExtractFontTable($group); + return; + case "colortbl": // Extract color table + $this->ExtractColorTable($group->children); + return; + case "stylesheet": + // Stylesheet extraction not yet supported + return; + case "info": + // Ignore Document information + return; + case "pict": + $this->ExtractImage($group->children); + return; + case "nonshppict": + // Ignore alternative images + return; + case "*": // Process destination + $this->ProcessDestination($group->children); + return; + } + + // Pictures extraction not yet supported + // if (substr($group->GetType(), 0, 4) == "pict") { return; } + + // Push a new state onto the stack: + $this->state = clone $this->state; + array_push($this->states, $this->state); + + foreach ($group->children as $child) { + $this->FormatEntry($child); + } + + // Pop state from stack + array_pop($this->states); + $this->state = $this->states[sizeof($this->states) - 1]; } - State::$colortbl = $colortbl; - } - - protected function ExtractImage($pictGrp) - { - $Image = new Image(); - foreach ($pictGrp as $child) { - if ($child instanceof \RtfHtmlPhp\ControlWord) { - switch ($child->word) { - // Picture Format - case "emfblip": $Image->format = 'emf'; break; - case "pngblip": $Image->format = 'png'; break; - case "jpegblip": $Image->format = 'jpeg'; break; - case "macpict": $Image->format = 'pict'; break; - // case "wmetafile": $Image->format = 'bmp'; break; - - // Picture size and scaling - case "picw": $Image->width = $child->parameter; break; - case "pich": $Image->height = $child->parameter; break; - case "picwgoal": $Image->goalWidth = $child->parameter; break; - case "pichgoal": $Image->goalHeight = $child->parameter; break; - case "picscalex": $Image->pcScaleX = $child->parameter; break; - case "picscaley": $Image->pcScaleY = $child->parameter; break; - - // Binary or Hexadecimal Data ? - case "bin": $Image->binarySize = $child->parameter; break; - default: break; - } - - } elseif ($child instanceof \RtfHtmlPhp\Text) { // store Data - $Image->ImageData = $child->text; - } + + protected function ProcessDestination($dest) + { + if (!$dest[1] instanceof \RtfHtmlPhp\ControlWord) { + return; + } + + // Check if this is a Word 97 picture + if ($dest[1]->word == "shppict") { + $c = count($dest); + for ($i = 2; $i < $c; $i++) { + $this->FormatEntry($dest[$i]); + } + } elseif ($dest[1]->word == "htmltag") { + $c = count($dest); + for ($i = 2; $i < $c; $i++) { + $entry = $dest[$i]; + + if ($entry instanceof \RtfHtmlPhp\Text) { + $this->output .= $entry->text; + } elseif ($entry instanceof \RtfHtmlPhp\Group) { + $this->ProcessGroup($entry); + } elseif ($entry instanceof \RtfHtmlPhp\ControlSymbol) { + $this->FormatControlSymbol($entry); + } elseif ($entry instanceof \RtfHtmlPhp\ControlWord) { + $this->FormatControlWord($entry); + } + } + } } - // output Image - $this->output .= $Image->PrintImage(); - unset($Image); - } - - protected function ProcessGroup($group) - { - // Special group processing: - switch ($group->GetType()) + + protected function FormatEntry($entry) { - case "fonttbl": // Extract font table - $this->ExtractFontTable($group); - return; - case "colortbl": // Extract color table - $this->ExtractColorTable($group->children); - return; - case "stylesheet": - // Stylesheet extraction not yet supported - return; - case "info": - // Ignore Document information - return; - case "pict": - $this->ExtractImage($group->children); - return; - case "nonshppict": - // Ignore alternative images - return; - case "*": // Process destination - $this->ProcessDestination($group->children); - return; + if ($entry instanceof \RtfHtmlPhp\Group) { + $this->ProcessGroup($entry); + } elseif ($entry instanceof \RtfHtmlPhp\ControlWord) { + $this->FormatControlWord($entry); + } elseif ($entry instanceof \RtfHtmlPhp\ControlSymbol) { + $this->FormatControlSymbol($entry); + } elseif ($entry instanceof \RtfHtmlPhp\Text) { + $this->FormatText($entry); + } } - // Pictures extraction not yet supported - //if(substr($group->GetType(), 0, 4) == "pict") return; + protected function FormatControlWord($word) + { + switch($word->word) { + + case 'fromhtml': + $this->fromhtml = $word->parameter > 0; + break; + + case 'htmlrtf': + $this->state->htmlrtf = $word->parameter > 0; + break; + + case 'plain': // Reset font formatting properties to default. + case 'pard': // Reset to default paragraph properties. + $this->state->Reset($this->defaultFont); + break; + + // Font formatting properties: + + case 'b': // bold + $this->state->bold = $word->parameter; + break; + case 'i': // italic + $this->state->italic = $word->parameter; + break; + case 'ul': // underline + $this->state->underline = $word->parameter; + break; + case 'ulnone': // no underline + $this->state->underline = false; + break; + case 'strike': // strike-through + $this->state->strike = $word->parameter; + break; + case 'v': // hidden + $this->state->hidden = $word->parameter; + break; + case 'fs': // Font size + $this->state->fontsize = ceil(($word->parameter / 24) * 16); + break; + case 'f': // Font + $this->state->font = $word->parameter; + break; + case 'deff': // Store default font + $this->defaultFont = $word->parameter; + break; + + // Colors + + case 'cf': + case 'chcfpat': + $this->state->fontcolor = $word->parameter; + break; + case 'cb': + case 'chcbpat': + $this->state->background = $word->parameter; + break; + case 'highlight': + $this->state->hcolor = $word->parameter; + break; + + // Special characters + + case 'lquote': $this->Write($this->fromhtml ? "‘" : "‘"); break; // ‘ ‘ + case 'rquote': $this->Write($this->fromhtml ? "’" : "’"); break; // ’ ’ + case 'ldblquote': $this->Write($this->fromhtml ? "“" : "“"); break; // “ “ + case 'rdblquote': $this->Write($this->fromhtml ? "”" : "”"); break; // ” ” + case 'bullet': $this->Write($this->fromhtml ? "•" : "•"); break; // • • + case 'endash': $this->Write($this->fromhtml ? "–" : "–"); break; // – – + case 'emdash': $this->Write($this->fromhtml ? "—" : "—"); break; // — — + case 'enspace': $this->Write($this->fromhtml ? " " : " "); break; //   + case 'emspace': $this->Write($this->fromhtml ? " " : " "); break; //   + case 'tab': $this->Write($this->fromhtml ? "\t" : " "); break; // Character value 9 + case 'line': $this->output .= $this->fromhtml ? "\n" : "
"; break; // character value (line feed = ) (carriage return = ) + + // Unicode characters + + case 'u': + $uchar = $this->DecodeUnicode($word->parameter); + $this->Write($uchar); + break; + + // Paragraphs + + case 'par': + case 'row': + if ($this->fromhtml) { + $this->output .= "\n"; + break; + } + // Close previously opened tags + $this->CloseTags(); + // Begin a new paragraph + $this->OpenTag('p'); + break; - // Push a new state onto the stack: - $this->state = clone $this->state; - array_push($this->states, $this->state); + // Code pages - foreach($group->children as $child) { - $this->FormatEntry($child); + case 'ansi': + case 'mac': + case 'pc': + case 'pca': + $this->RTFencoding = $this->GetEncodingFromCodepage($word->word); + break; + case 'ansicpg': + if ($word->parameter) { + $this->RTFencoding = $this->GetEncodingFromCodepage($word->parameter); + } + break; + } } - // Pop state from stack - array_pop($this->states); - $this->state = $this->states[sizeof($this->states)-1]; - } - - protected function ProcessDestination($dest) - { - if (!$dest[1] instanceof \RtfHtmlPhp\ControlWord) return; - // Check if this is a Word 97 picture - if ($dest[1]->word == "shppict") { - $c = count($dest); - for ($i=2;$i<$c;$i++) - $this->FormatEntry($dest[$i]); - } elseif ($dest[1]->word == "htmltag") { - for ($i = 2; $i < count($dest); $i++) { - if (isset($dest[$i])) { - $entry = $dest[$i]; - - if ($entry instanceof \RtfHtmlPhp\Text) { - $this->output .= $entry->text; - } elseif($entry instanceof \RtfHtmlPhp\Group) { - $this->ProcessGroup($entry); - } elseif($entry instanceof \RtfHtmlPhp\ControlSymbol) { - $this->FormatControlSymbol($entry); - } elseif($entry instanceof \RtfHtmlPhp\ControlWord) { - $this->FormatControlWord($entry, true); - } + protected function DecodeUnicode($code, $srcEnc = 'UTF-8') + { + $utf8 = ''; + + if ($srcEnc != 'UTF-8') { // convert character to Unicode + $utf8 = mb_convert_encoding(chr($code), 'UTF-8', $srcEnc); + } + + if ($this->encoding == 'HTML-ENTITIES') { + return $utf8 ? "&#{$this->ord_utf8($utf8)};" : "&#{$code};"; + } + + if ($this->encoding == 'UTF-8') { + return $utf8 ? $utf8 : mb_convert_encoding("&#{$code};", $this->encoding, 'HTML-ENTITIES'); } - } + + return $utf8 ? mb_convert_encoding($utf8, $this->encoding, 'UTF-8') : + mb_convert_encoding("&#{$code};", $this->encoding, 'HTML-ENTITIES'); } - } - - protected function FormatEntry($entry) - { - if($entry instanceof \RtfHtmlPhp\Group) $this->ProcessGroup($entry); - elseif($entry instanceof \RtfHtmlPhp\ControlWord) $this->FormatControlWord($entry); - elseif($entry instanceof \RtfHtmlPhp\ControlSymbol) $this->FormatControlSymbol($entry); - elseif($entry instanceof \RtfHtmlPhp\Text) $this->FormatText($entry); - } - - protected function FormatControlWord($word) - { - switch($word->word) { - case 'plain': // Reset font formatting properties to default. - case 'pard': // Reset to default paragraph properties. - $this->state->Reset($this->defaultFont); - break; - - /* - * Font formatting properties: - */ - - case 'b': // bold - $this->state->bold = $word->parameter; - break; - case 'i': // italic - $this->state->italic = $word->parameter; - break; - case 'ul': // underline - $this->state->underline = $word->parameter; - break; - case 'ulnone': // no underline - $this->state->underline = false; - break; - case 'strike': // strike-through - $this->state->strike = $word->parameter; - break; - case 'v': // hidden - $this->state->hidden = $word->parameter; - break; - case 'fs': // Font size - $this->state->fontsize = ceil(($word->parameter / 24) * 16); - break; - case 'f': // Font - $this->state->font = $word->parameter; - break; - case 'deff': // Store default font - $this->defaultFont = $word->parameter; - break; - - /* - * Colors - */ - - case 'cf': - case 'chcfpat': - $this->state->fontcolor = $word->parameter; - break; - case 'cb': - case 'chcbpat': - $this->state->background = $word->parameter; - break; - case 'highlight': - $this->state->hcolor = $word->parameter; - break; - - /* - * Special characters - */ - - case 'lquote': $this->Write($this->fromhtml ? "‘" : "‘"); break; // ‘ ‘ - case 'rquote': $this->Write($this->fromhtml ? "’" : "’"); break; // ’ ’ - case 'ldblquote': $this->Write($this->fromhtml ? "“" : "“"); break; // “ “ - case 'rdblquote': $this->Write($this->fromhtml ? "”" : "”"); break; // ” ” - case 'bullet': $this->Write($this->fromhtml ? "•" : "•"); break; // • • - case 'endash': $this->Write($this->fromhtml ? "–" : "–"); break; // – – - case 'emdash': $this->Write($this->fromhtml ? "—" : "—"); break; // — — - case 'enspace': $this->Write($this->fromhtml ? " " : " "); break; //   - case 'emspace': $this->Write($this->fromhtml ? " " : " "); break; //   - case 'tab': $this->Write($this->fromhtml ? "\t" : " "); break; // Character value 9 - case 'line': $this->output .= $this->fromhtml ? "\n" : "
"; break; // character value (line feed = ) (carriage return = ) - - /* - * Unicode characters - */ - - case 'u': - $uchar = $this->DecodeUnicode($word->parameter); - $this->Write($uchar); - break; - - /* - * Paragraphs - */ - case 'par': - case 'row': + + protected function Write($txt) + { + // Ignore regions that are not part of the original (encapsulated) HTML content + if ($this->state->htmlrtf) { + return; + } + if ($this->fromhtml) { - $this->output .= "\n"; - break; - } - // Close previously opened tags - $this->CloseTags(); - // Begin a new paragraph - $this->OpenTag('p'); - break; - - /* Code pages */ - case 'ansi': - case 'mac': - case 'pc': - case 'pca': - $this->RTFencoding = $this->GetEncodingFromCodepage($word->word); - break; - case 'ansicpg': - if($word->parameter) { - $this->RTFencoding = $this->GetEncodingFromCodepage($word->parameter); - } - break; - - case 'fromhtml': - $this->fromhtml = $word->parameter > 0; - break; - - case 'htmlrtf': - $this->state->htmlrtf = $word->parameter > 0; - break; + $this->output .= $txt; + return; + } + + if ($this->openedTags['p'] === null) { + // Create the first paragraph + $this->OpenTag('p'); + } + + // Create a new 'span' element only when a style change occurs. + // 1st case: style change occured + // 2nd case: there is no change in style but the already created 'span' + // element is somehow closed (ex. because of an end of paragraph) + if (!$this->state->equals($this->previousState) + || ($this->state->equals($this->previousState) && !$this->openedTags['span']) + ) { + // If applicable close previously opened 'span' tag + $this->CloseTag('span'); + + $style = $this->state->PrintStyle(); + + // Keep track of preceding style + $this->previousState = clone $this->state; + + // Create style attribute and open span + $attr = $style ? "style=\"{$style}\"" : ""; + $this->OpenTag('span', $attr); + } + + $this->output .= $txt; } - } - protected function DecodeUnicode($code, $srcEnc = 'UTF-8') - { - $utf8 = ''; + protected function OpenTag($tag, $attr = '') + { + // Ignore regions that are not part of the original (encapsulated) HTML content + if ($this->fromhtml) { + return; + } - if ($srcEnc != 'UTF-8') { // convert character to Unicode - $utf8 = mb_convert_encoding(chr($code), 'UTF-8', $srcEnc); + $this->output .= $attr ? "<{$tag} {$attr}>" : "<{$tag}>"; + $this->openedTags[$tag] = true; } - if ($this->encoding == 'HTML-ENTITIES') { - return $utf8 ? "&#{$this->ord_utf8($utf8)};" : "&#{$code};"; + protected function CloseTag($tag) + { + if ($this->fromhtml) { + return; + } - } elseif ($this->encoding == 'UTF-8') { - return $utf8 ? $utf8 : mb_convert_encoding("&#{$code};", $this->encoding, 'HTML-ENTITIES'); + if ($this->openedTags[$tag]) { + // Check for empty html elements + if (substr($this->output, -strlen("<{$tag}>")) == "<{$tag}>") { + switch ($tag) { + case 'p': // Replace empty 'p' element with a line break + $this->output = substr($this->output, 0, -3) . "
"; + break; + default: // Delete empty elements + $this->output = substr($this->output, 0, -strlen("<{$tag}>")); + break; + } + } else { + $this->output .= ""; + } - } else { - return $utf8 ? mb_convert_encoding($utf8, $this->encoding, 'UTF-8') : - mb_convert_encoding("&#{$code};", $this->encoding, 'HTML-ENTITIES'); + $this->openedTags[$tag] = false; + } } - } - protected function Write($txt) - { - // Ignore regions that are not part of the original (encapsulated) HTML content - if ($this->state->htmlrtf) { - return; + protected function CloseTags() + { + // Close all opened tags + foreach ($this->openedTags as $tag => $b) { + $this->CloseTag($tag); + } } - if ($this->fromhtml) { - $this->output .= $txt; - return; + protected function FormatControlSymbol($symbol) + { + if ($symbol->symbol == '\'') { + $enc = $this->GetSourceEncoding(); + $uchar = $this->DecodeUnicode($symbol->parameter, $enc); + $this->Write($uchar); + } elseif ($symbol->symbol == '~') { + $this->Write(" "); // Non breaking space + } elseif ($symbol->symbol == '-') { + $this->Write("­"); // Optional hyphen + } elseif ($symbol->symbol == '_') { + $this->Write("‑"); // Non breaking hyphen + } elseif ($symbol->symbol == '{') { + $this->Write("{"); // Non breaking hyphen + } } - if ($this->openedTags['p'] === null) { - // Create the first paragraph - $this->OpenTag('p'); + protected function FormatText($text) + { + // Convert special characters to HTML entities + $txt = htmlspecialchars($text->text, ENT_NOQUOTES, 'UTF-8'); + if ($this->encoding == 'HTML-ENTITIES') { + $this->Write($txt); + } else { + $this->Write(mb_convert_encoding($txt, $this->encoding, 'UTF-8')); + } } - // Create a new 'span' element only when a style change occurs. - // 1st case: style change occured - // 2nd case: there is no change in style but the already created 'span' - // element is somehow closed (ex. because of an end of paragraph) - if (!$this->state->equals($this->previousState) || - ($this->state->equals($this->previousState) && !$this->openedTags['span'])) + protected function GetSourceEncoding() { - // If applicable close previously opened 'span' tag - $this->CloseTag('span'); + if (isset($this->state->font)) { + if (isset(State::$fonttbl[$this->state->font]->codepage)) { + return State::$fonttbl[$this->state->font]->codepage; + } + if (isset(State::$fonttbl[$this->state->font]->charset)) { + return State::$fonttbl[$this->state->font]->charset; + } + } - $style = $this->state->PrintStyle(); + return $this->RTFencoding; + } - // Keep track of preceding style - $this->previousState = clone $this->state; + protected function GetEncodingFromCharset($fcharset) + { + // maps windows character sets to iconv encoding names + $charset = array ( + 0 => 'CP1252', // ANSI: Western Europe + 1 => 'CP1252', //*Default + 2 => 'CP1252', //*Symbol + 3 => null, // Invalid + 77 => 'MAC', //*also [MacRoman]: Macintosh + 128 => 'CP932', //*or [Shift_JIS]?: Japanese + 129 => 'CP949', //*also [UHC]: Korean (Hangul) + 130 => 'CP1361', //*also [JOHAB]: Korean (Johab) + 134 => 'CP936', //*or [GB2312]?: Simplified Chinese + 136 => 'CP950', //*or [BIG5]?: Traditional Chinese + 161 => 'CP1253', // Greek + 162 => 'CP1254', // Turkish (latin 5) + 163 => 'CP1258', // Vietnamese + 177 => 'CP1255', // Hebrew + 178 => 'CP1256', // Simplified Arabic + 179 => 'CP1256', //*Traditional Arabic + 180 => 'CP1256', //*Arabic User + 181 => 'CP1255', //*Hebrew User + 186 => 'CP1257', // Baltic + 204 => 'CP1251', // Russian (Cyrillic) + 222 => 'CP874', // Thai + 238 => 'CP1250', // Eastern European (latin 2) + 254 => 'CP437', //*also [IBM437][437]: PC437 + 255 => 'CP437', //*OEM still PC437 + ); + + if (isset($charset[$fcharset])) { + return $charset[$fcharset]; + } + } - // Create style attribute and open span - $attr = $style ? "style=\"{$style}\"" : ""; - $this->OpenTag('span', $attr); + protected function GetEncodingFromCodepage($cpg) + { + $codePage = array ( + 'ansi' => 'CP1252', + 'mac' => 'MAC', + 'pc' => 'CP437', + 'pca' => 'CP850', + 437 => 'CP437', // United States IBM + 708 => 'ASMO-708', // also [ISO-8859-6][ARABIC] Arabic + /* Not supported by iconv + 709, => '' // Arabic (ASMO 449+, BCON V4) + 710, => '' // Arabic (transparent Arabic) + 711, => '' // Arabic (Nafitha Enhanced) + 720, => '' // Arabic (transparent ASMO) + */ + 819 => 'CP819', // Windows 3.1 (US and Western Europe) + 850 => 'CP850', // IBM multilingual + 852 => 'CP852', // Eastern European + 860 => 'CP860', // Portuguese + 862 => 'CP862', // Hebrew + 863 => 'CP863', // French Canadian + 864 => 'CP864', // Arabic + 865 => 'CP865', // Norwegian + 866 => 'CP866', // Soviet Union + 874 => 'CP874', // Thai + 932 => 'CP932', // Japanese + 936 => 'CP936', // Simplified Chinese + 949 => 'CP949', // Korean + 950 => 'CP950', // Traditional Chinese + 1250 => 'CP1250', // Windows 3.1 (Eastern European) + 1251 => 'CP1251', // Windows 3.1 (Cyrillic) + 1252 => 'CP1252', // Western European + 1253 => 'CP1253', // Greek + 1254 => 'CP1254', // Turkish + 1255 => 'CP1255', // Hebrew + 1256 => 'CP1256', // Arabic + 1257 => 'CP1257', // Baltic + 1258 => 'CP1258', // Vietnamese + 1361 => 'CP1361', // Johab + ); + + if (isset($codePage[$cpg])) { + return $codePage[$cpg]; + } } - $this->output .= $txt; - } + protected function ord_utf8($chr) + { + $ord0 = ord($chr); + if ($ord0 >= 0 && $ord0 <= 127) { + return $ord0; + } - protected function OpenTag($tag, $attr = '') - { - // Ignore regions that are not part of the original (encapsulated) HTML content - if ($this->fromhtml) { - return; - } + $ord1 = ord($chr[1]); + if ($ord0 >= 192 && $ord0 <= 223) { + return ($ord0 - 192) * 64 + ($ord1 - 128); + } - $this->output .= $attr ? "<{$tag} {$attr}>" : "<{$tag}>"; - $this->openedTags[$tag] = true; - } + $ord2 = ord($chr[2]); + if ($ord0 >= 224 && $ord0 <= 239) { + return ($ord0 - 224) * 4096 + ($ord1 - 128) * 64 + ($ord2 - 128); + } - protected function CloseTag($tag) - { - if ($this->fromhtml) { - return; - } + $ord3 = ord($chr[3]); + if ($ord0 >= 240 && $ord0 <= 247) { + return ($ord0 - 240) * 262144 + ($ord1 - 128) * 4096 + ($ord2 - 128) * 64 + ($ord3 - 128); + } - if ($this->openedTags[$tag]) { - // Check for empty html elements - if (substr($this->output ,-strlen("<{$tag}>")) == "<{$tag}>"){ - switch ($tag) - { - case 'p': // Replace empty 'p' element with a line break - $this->output = substr($this->output ,0, -3) . "
"; - break; - default: // Delete empty elements - $this->output = substr($this->output ,0, -strlen("<{$tag}>")); - break; + $ord4 = ord($chr[4]); + if ($ord0 >= 248 && $ord0 <= 251) { + return ($ord0 - 248) * 16777216 + ($ord1 - 128) * 262144 + ($ord2 - 128) * 4096 + ($ord3 - 128) * 64 + ($ord4 - 128); } - } else { - $this->output .= ""; - } - $this->openedTags[$tag] = false; - } - } - - protected function CloseTags() - { - // Close all opened tags - foreach ($this->openedTags as $tag => $b) - $this->CloseTag($tag); - } - - protected function FormatControlSymbol($symbol) - { - if($symbol->symbol == '\'') { - $enc = $this->GetSourceEncoding(); - $uchar = $this->DecodeUnicode($symbol->parameter, $enc); - $this->Write($uchar); - }elseif ($symbol->symbol == '~') { - $this->Write(" "); // Non breaking space - }elseif ($symbol->symbol == '-') { - $this->Write("­"); // Optional hyphen - }elseif ($symbol->symbol == '_') { - $this->Write("‑"); // Non breaking hyphen - }elseif ($symbol->symbol == '{') { - $this->Write("{"); // Non breaking hyphen - } - } - - protected function FormatText($text) - { - // Convert special characters to HTML entities - $txt = htmlspecialchars($text->text, ENT_NOQUOTES, 'UTF-8'); - if($this->encoding == 'HTML-ENTITIES') - $this->Write($txt); - else - $this->Write(mb_convert_encoding($txt, $this->encoding, 'UTF-8')); - } - - protected function GetSourceEncoding() - { - if (isset($this->state->font)) { - if (isset(State::$fonttbl[$this->state->font]->codepage)) { - return State::$fonttbl[$this->state->font]->codepage; - - } elseif (isset(State::$fonttbl[$this->state->font]->charset)) { - return State::$fonttbl[$this->state->font]->charset; - } + if ($ord0 >= 252 && $ord0 <= 253) { + return ($ord0 - 252) * 1073741824 + ($ord1 - 128) * 16777216 + ($ord2 - 128) * 262144 + ($ord3 - 128) * 4096 + ($ord4 - 128) * 64 + (ord($chr[5]) - 128); + } + + // trigger_error("Invalid Unicode character: {$chr}"); } - return $this->RTFencoding; - } - - protected function GetEncodingFromCharset($fcharset) - { - /* maps windows character sets to iconv encoding names */ - $charset = array ( - 0 => 'CP1252', // ANSI: Western Europe - 1 => 'CP1252', //*Default - 2 => 'CP1252', //*Symbol - 3 => null, // Invalid - 77 => 'MAC', //*also [MacRoman]: Macintosh - 128 => 'CP932', //*or [Shift_JIS]?: Japanese - 129 => 'CP949', //*also [UHC]: Korean (Hangul) - 130 => 'CP1361', //*also [JOHAB]: Korean (Johab) - 134 => 'CP936', //*or [GB2312]?: Simplified Chinese - 136 => 'CP950', //*or [BIG5]?: Traditional Chinese - 161 => 'CP1253', // Greek - 162 => 'CP1254', // Turkish (latin 5) - 163 => 'CP1258', // Vietnamese - 177 => 'CP1255', // Hebrew - 178 => 'CP1256', // Simplified Arabic - 179 => 'CP1256', //*Traditional Arabic - 180 => 'CP1256', //*Arabic User - 181 => 'CP1255', //*Hebrew User - 186 => 'CP1257', // Baltic - 204 => 'CP1251', // Russian (Cyrillic) - 222 => 'CP874', // Thai - 238 => 'CP1250', // Eastern European (latin 2) - 254 => 'CP437', //*also [IBM437][437]: PC437 - 255 => 'CP437'); //*OEM still PC437 - - if (isset($charset[$fcharset])) - return $charset[$fcharset]; - } - - protected function GetEncodingFromCodepage($cpg) - { - $codePage = array ( - 'ansi' => 'CP1252', - 'mac' => 'MAC', - 'pc' => 'CP437', - 'pca' => 'CP850', - 437 => 'CP437', // United States IBM - 708 => 'ASMO-708', // also [ISO-8859-6][ARABIC] Arabic - /* Not supported by iconv - 709, => '' // Arabic (ASMO 449+, BCON V4) - 710, => '' // Arabic (transparent Arabic) - 711, => '' // Arabic (Nafitha Enhanced) - 720, => '' // Arabic (transparent ASMO) - */ - 819 => 'CP819', // Windows 3.1 (US and Western Europe) - 850 => 'CP850', // IBM multilingual - 852 => 'CP852', // Eastern European - 860 => 'CP860', // Portuguese - 862 => 'CP862', // Hebrew - 863 => 'CP863', // French Canadian - 864 => 'CP864', // Arabic - 865 => 'CP865', // Norwegian - 866 => 'CP866', // Soviet Union - 874 => 'CP874', // Thai - 932 => 'CP932', // Japanese - 936 => 'CP936', // Simplified Chinese - 949 => 'CP949', // Korean - 950 => 'CP950', // Traditional Chinese - 1250 => 'CP1250', // Windows 3.1 (Eastern European) - 1251 => 'CP1251', // Windows 3.1 (Cyrillic) - 1252 => 'CP1252', // Western European - 1253 => 'CP1253', // Greek - 1254 => 'CP1254', // Turkish - 1255 => 'CP1255', // Hebrew - 1256 => 'CP1256', // Arabic - 1257 => 'CP1257', // Baltic - 1258 => 'CP1258', // Vietnamese - 1361 => 'CP1361'); // Johab - - if (isset($codePage[$cpg])) - return $codePage[$cpg]; - } - - protected function ord_utf8($chr) - { - $ord0 = ord($chr); - if ($ord0 >= 0 && $ord0 <= 127) - return $ord0; - - $ord1 = ord($chr[1]); - if ($ord0 >= 192 && $ord0 <= 223) - return ($ord0 - 192) * 64 + ($ord1 - 128); - - $ord2 = ord($chr[2]); - if ($ord0 >= 224 && $ord0 <= 239) - return ($ord0 - 224) * 4096 + ($ord1 - 128) * 64 + ($ord2 - 128); - - $ord3 = ord($chr[3]); - if ($ord0 >= 240 && $ord0 <= 247) - return ($ord0 - 240) * 262144 + ($ord1 - 128) * 4096 + ($ord2 - 128) * 64 + ($ord3 - 128); - - $ord4 = ord($chr[4]); - if ($ord0 >= 248 && $ord0 <= 251) - return ($ord0 - 248) * 16777216 + ($ord1 - 128) * 262144 + ($ord2 - 128) * 4096 + ($ord3 - 128) * 64 + ($ord4 - 128); - - if ($ord0 >= 252 && $ord0 <= 253) - return ($ord0 - 252) * 1073741824 + ($ord1 - 128) * 16777216 + ($ord2 - 128) * 262144 + ($ord3 - 128) * 4096 + ($ord4 - 128) * 64 + (ord($chr[5]) - 128); - - // trigger_error("Invalid Unicode character: {$chr}"); - } } diff --git a/src/Html/Image.php b/src/Html/Image.php index ae85b9f..daa734c 100644 --- a/src/Html/Image.php +++ b/src/Html/Image.php @@ -4,36 +4,37 @@ class Image { - public function __construct() - { - $this->Reset(); - } + public function __construct() + { + $this->Reset(); + } - public function Reset() - { - $this->format = 'bmp'; - $this->width = 0; // in xExt if wmetafile otherwise in px - $this->height = 0; // in yExt if wmetafile otherwise in px - $this->goalWidth = 0; // in twips - $this->goalHeight = 0; // in twips - $this->pcScaleX = 100; // 100% - $this->pcScaleY = 100; // 100% - $this->binarySize = null; // Number of bytes of the binary data - $this->ImageData = null; // Binary or Hexadecimal Data - } + public function Reset() + { + $this->format = 'bmp'; + $this->width = 0; // in xExt if wmetafile otherwise in px + $this->height = 0; // in yExt if wmetafile otherwise in px + $this->goalWidth = 0; // in twips + $this->goalHeight = 0; // in twips + $this->pcScaleX = 100; // 100% + $this->pcScaleY = 100; // 100% + $this->binarySize = null; // Number of bytes of the binary data + $this->ImageData = null; // Binary or Hexadecimal Data + } - public function PrintImage() - { - // - $output = "format};base64,"; + public function PrintImage() + { + // + $output = "format};base64,"; - if (isset($this->binarySize)) { // process binary data - return; - } else { // process hexadecimal data - $output .= base64_encode(pack('H*',$this->ImageData)); - } + if (isset($this->binarySize)) { // process binary data + return; + } else { // process hexadecimal data + $output .= base64_encode(pack('H*', $this->ImageData)); + } - $output .= "\" />"; - return $output; - } + $output .= "\" />"; + + return $output; + } } diff --git a/src/Html/State.php b/src/Html/State.php index acc3ff5..6657461 100644 --- a/src/Html/State.php +++ b/src/Html/State.php @@ -4,114 +4,135 @@ class State { - public static $fonttbl = array(); - public static $colortbl = array(); - private static $highlight = array( - 1 => 'Black', - 2 => 'Blue', - 3 => 'Cyan', - 4 => 'Green', - 5 => 'Magenta', - 6 => 'Red', - 7 => 'Yellow', - 8 => 'Unused', - 9 => 'DarkBlue', - 10 => 'DarkCyan', - 11 => 'DarkGreen', - 12 => 'DarkMagenta', - 13 => 'DarkRed', - 14 => 'DarkYellow', - 15 => 'DarkGray', - 16 => 'LightGray' - ); - - public function __construct() - { - $this->Reset(); - } - - /* - * Store a font in the font table at the specified index. - */ - public static function SetFont($index, Font $font) { - State::$fonttbl[$index] = $font; - } - - public function Reset($defaultFont = null) - { - $this->bold = false; - $this->italic = false; - $this->underline = false; - $this->strike = false; - $this->hidden = false; - $this->fontsize = 0; - $this->fontcolor = null; - $this->background = null; - $this->hcolor = null; - $this->font = isset($defaultFont) ? $defaultFont : null; - $this->htmlrtf = false; - } - - public function PrintStyle() - { - $style = ""; - - if($this->bold) $style .= "font-weight:bold;"; - if($this->italic) $style .= "font-style:italic;"; - if($this->underline) $style .= "text-decoration:underline;"; - // state->underline is a toggle switch variable so no need for - // a dedicated state->end_underline variable - // if($this->state->end_underline) {$span .= "text-decoration:none;";} - if($this->strike) $style .= "text-decoration:line-through;"; - if($this->hidden) $style .= "display:none;"; - if(isset($this->font)) { - $font = self::$fonttbl[$this->font]; - $style .= $font->toStyle(); + public static $fonttbl = array(); + public static $colortbl = array(); + private static $highlight = array( + 1 => 'Black', + 2 => 'Blue', + 3 => 'Cyan', + 4 => 'Green', + 5 => 'Magenta', + 6 => 'Red', + 7 => 'Yellow', + 8 => 'Unused', + 9 => 'DarkBlue', + 10 => 'DarkCyan', + 11 => 'DarkGreen', + 12 => 'DarkMagenta', + 13 => 'DarkRed', + 14 => 'DarkYellow', + 15 => 'DarkGray', + 16 => 'LightGray' + ); + + public function __construct() + { + $this->Reset(); } - if($this->fontsize != 0) $style .= "font-size:{$this->fontsize}px;"; - // Font color: - if(isset($this->fontcolor)) { - // Check if color is set. in particular when it's the 'auto' color - if (array_key_exists($this->fontcolor, self::$colortbl) && self::$colortbl[$this->fontcolor]) - $style .= "color:" . self::$colortbl[$this->fontcolor] . ";"; + + /* + * Store a font in the font table at the specified index. + */ + public static function SetFont($index, Font $font) + { + State::$fonttbl[$index] = $font; } - // Background color: - if (isset($this->background)) { - // Check if color is set. in particular when it's the 'auto' color - if (self::$colortbl[$this->background]) - $style .= "background-color:" . self::$colortbl[$this->background] . ";"; - - // Highlight color: - } elseif (isset($this->hcolor)) { - if (isset(self::$highlight[$this->hcolor])) - $style .= "background-color:" . self::$highlight[$this->hcolor] . ";"; + + public function Reset($defaultFont = null) + { + $this->bold = false; + $this->italic = false; + $this->underline = false; + $this->strike = false; + $this->hidden = false; + $this->fontsize = 0; + $this->fontcolor = null; + $this->background = null; + $this->hcolor = null; + $this->font = isset($defaultFont) ? $defaultFont : null; + $this->htmlrtf = false; } - return $style; - } - - /* - * Check whether this State is equal to another State. - */ - public function equals($state) - { - if (!($state instanceof State)) return false; - - if ($this->bold != $state->bold) return false; - if ($this->italic != $state->italic) return false; - if ($this->underline != $state->underline) return false; - if ($this->strike != $state->strike) return false; - if ($this->hidden != $state->hidden) return false; - if ($this->fontsize != $state->fontsize) return false; - - // Compare colors - if ($this->fontcolor != $state->fontcolor) return false; - if ($this->background != $state->background) return false; - if ($this->hcolor != $state->hcolor) return false; - - // Compare fonts - if ($this->font != $state->font) return false; - - return true; - } + public function PrintStyle() + { + $style = array(); + + if ($this->bold) { + $style[] = "font-weight:bold"; + } + + if ($this->italic) { + $style[] = "font-style:italic"; + } + + if ($this->underline) { + $style[] = "text-decoration:underline"; + } + + // state->underline is a toggle switch variable so no need for + // a dedicated state->end_underline variable + // if($this->state->end_underline) {$span .= "text-decoration:none";} + if ($this->strike) { + $style .= "text-decoration:line-through"; + } + + if ($this->hidden) { + $style .= "display:none"; + } + + if (isset($this->font)) { + $font = self::$fonttbl[$this->font]; + $style[] = $font->toStyle(); + } + + if ($this->fontsize != 0) { + $style[] = "font-size:{$this->fontsize}px"; + } + + // Font color: + if (isset($this->fontcolor)) { + // Check if color is set. in particular when it's the 'auto' color + if (array_key_exists($this->fontcolor, self::$colortbl) && self::$colortbl[$this->fontcolor]) { + $style[] = "color:" . self::$colortbl[$this->fontcolor]; + } + } + + // Background color: + if (isset($this->background)) { + // Check if color is set. in particular when it's the 'auto' color + if (array_key_exists($this->background, self::$colortbl) && self::$colortbl[$this->background]) { + $style[] = "background-color:" . self::$colortbl[$this->background]; + } + } elseif (isset($this->hcolor)) { + // Highlight color: + if (array_key_exists($this->hcolor, self::$highlight) && self::$highlight[$this->hcolor]) { + $style[] = "background-color:" . self::$highlight[$this->hcolor]; + } + } + + return empty($style) ? '' : implode(';', $style) . ';'; + } + + /** + * Check whether this State is equal to another State. + */ + public function equals($state) + { + if (!($state instanceof State)) { + return false; + } + + return $this->bold == $state->bold + && $this->italic == $state->italic + && $this->underline == $state->underline + && $this->strike == $state->strike + && $this->hidden == $state->hidden + && $this->fontsize == $state->fontsize + // Compare colors + && $this->fontcolor == $state->fontcolor + && $this->background == $state->background + && $this->hcolor == $state->hcolor + // Compare fonts + && $this->font == $state->font; + } } diff --git a/src/Text.php b/src/Text.php index b4bef4b..54ff16d 100644 --- a/src/Text.php +++ b/src/Text.php @@ -4,18 +4,18 @@ class Text extends Element { - public $text; + public $text; - /* - * Create a new Text instance with string content. - */ - public function __construct($text) - { - $this->text = $text; - } + /** + * Create a new Text instance with string content. + */ + public function __construct($text) + { + $this->text = $text; + } - public function toString($level) - { - return str_repeat(" ", $level) . "TEXT {$this->text}\n"; - } + public function toString($level) + { + return str_repeat(" ", $level) . "TEXT {$this->text}\n"; + } } diff --git a/tests/BulletsTest.php b/tests/BulletsTest.php index 786abd8..04a0409 100644 --- a/tests/BulletsTest.php +++ b/tests/BulletsTest.php @@ -6,16 +6,18 @@ class BulletsTest extends TestCase { - public function testBullets() - { - $rtf = file_get_contents("tests/rtf/bullets.rtf"); - $document = new Document($rtf); - $formatter = new HtmlFormatter(); - $html = $formatter->Format($document); + public function testBullets() + { + $rtf = file_get_contents("tests/rtf/bullets.rtf"); + $document = new Document($rtf); + $formatter = new HtmlFormatter(); + $html = $formatter->Format($document); - $this->assertEquals( - '

· A

· B

· C

', - $html - ); - } + $this->assertEquals( + '

· A

' + .'

· B

' + . '

· C

', + $html + ); + } } diff --git a/tests/EmptyStringTest.php b/tests/EmptyStringTest.php index 58a0f6e..55029bb 100644 --- a/tests/EmptyStringTest.php +++ b/tests/EmptyStringTest.php @@ -5,9 +5,9 @@ class EmptyStringTest extends TestCase { - public function testParseEmptyString() - { - $document = new Document(""); - $this->assertTrue(true); - } + public function testParseEmptyString() + { + $document = new Document(""); + $this->assertTrue(true); + } } diff --git a/tests/ExtraParagraphTest.php b/tests/ExtraParagraphTest.php index 18fede5..ce2a985 100644 --- a/tests/ExtraParagraphTest.php +++ b/tests/ExtraParagraphTest.php @@ -6,19 +6,19 @@ class ExtraParagraphTest extends TestCase { - public function testExtraParagraph() - { - $rtf = file_get_contents("tests/rtf/extra-closing-paragraph.rtf"); - $document = new Document($rtf); - $formatter = new HtmlFormatter(); - $html = $formatter->Format($document); + public function testExtraParagraph() + { + $rtf = file_get_contents("tests/rtf/extra-closing-paragraph.rtf"); + $document = new Document($rtf); + $formatter = new HtmlFormatter(); + $html = $formatter->Format($document); - $this->assertEquals( - '

Conditions
' - . 'Delivery: FCA in our warehouse in Rotterdam
' - . 'Lead Time: 25 working days after confirmation, subject to prior sale
Payment: 60 days after invoice date
' - . 'Quote validity: 30 days

', - $html - ); - } + $this->assertEquals( + '

Conditions
' + . 'Delivery: FCA in our warehouse in Rotterdam
' + . 'Lead Time: 25 working days after confirmation, subject to prior sale
Payment: 60 days after invoice date
' + . 'Quote validity: 30 days

', + $html + ); + } } diff --git a/tests/FontFamilyTest.php b/tests/FontFamilyTest.php index e24b7ec..98e1e68 100644 --- a/tests/FontFamilyTest.php +++ b/tests/FontFamilyTest.php @@ -6,16 +6,16 @@ class FontFamilyTestTest extends TestCase { - public function testParseFontFamilyHtml() - { - $rtf = file_get_contents("tests/rtf/fonts.rtf"); - $document = new Document($rtf); - $formatter = new HtmlFormatter(); - $html = $formatter->Format($document); + public function testParseFontFamilyHtml() + { + $rtf = file_get_contents("tests/rtf/fonts.rtf"); + $document = new Document($rtf); + $formatter = new HtmlFormatter(); + $html = $formatter->Format($document); - $this->assertEquals( - '

Hello, world.

', - $html - ); - } + $this->assertEquals( + '

Hello, world.

', + $html + ); + } } diff --git a/tests/HtmlTest.php b/tests/HtmlTest.php index 0a8cb74..3adab23 100644 --- a/tests/HtmlTest.php +++ b/tests/HtmlTest.php @@ -6,28 +6,28 @@ class HtmlTest extends TestCase { - public function testHtml1() - { - $rtf = file_get_contents("tests/rtf/html1.rtf"); - $document = new Document($rtf); - $formatter = new HtmlFormatter(); - $html = $formatter->Format($document); - - $this->assertEquals( - "" - . " ", - $html - ); - } + public function testHtml1() + { + $rtf = file_get_contents("tests/rtf/html1.rtf"); + $document = new Document($rtf); + $formatter = new HtmlFormatter(); + $html = $formatter->Format($document); + + $this->assertEquals( + "" + . " ", + $html + ); + } - public function testHtml2() - { - $rtf = file_get_contents("tests/rtf/html2.rtf"); - $document = new Document($rtf); - $formatter = new HtmlFormatter(); - $html = $formatter->Format($document); + public function testHtml2() + { + $rtf = file_get_contents("tests/rtf/html2.rtf"); + $document = new Document($rtf); + $formatter = new HtmlFormatter(); + $html = $formatter->Format($document); - $expected = << " @@ -25,7 +25,7 @@ public function testHtml2() $rtf = file_get_contents("tests/rtf/html2.rtf"); $document = new Document($rtf); $formatter = new HtmlFormatter(); - $html = $formatter->Format($document); + $html = $formatter->format($document); $expected = << @@ -68,7 +68,7 @@ public function testHtml3() $rtf = file_get_contents("tests/rtf/html3.rtf"); $document = new Document($rtf); $formatter = new HtmlFormatter('UTF-8'); - $html = $formatter->Format($document); + $html = $formatter->format($document); $expected = <<這是一個文本字符串
diff --git a/tests/ImagesTest.php b/tests/ImagesTest.php new file mode 100644 index 0000000..7e56df8 --- /dev/null +++ b/tests/ImagesTest.php @@ -0,0 +1,52 @@ +format($document); + + $this->assertEquals( + '', + $html + ); + } +} diff --git a/tests/ParseSimpleTest.php b/tests/ParseSimpleTest.php index 0e712ce..53a72a0 100644 --- a/tests/ParseSimpleTest.php +++ b/tests/ParseSimpleTest.php @@ -18,7 +18,7 @@ public function testParseSimpleHtml() $rtf = file_get_contents("tests/rtf/hello-world.rtf"); $document = new Document($rtf); $formatter = new HtmlFormatter(); - $html = $formatter->Format($document); + $html = $formatter->format($document); $this->assertEquals( '

Hello, world.

', diff --git a/tests/ParseTest.php b/tests/ParseTest.php index 16f5bad..117734c 100644 --- a/tests/ParseTest.php +++ b/tests/ParseTest.php @@ -11,7 +11,7 @@ public function testParse1() $rtf = file_get_contents("tests/rtf/test1.rtf"); $document = new Document($rtf); $formatter = new HtmlFormatter(); - $html = $formatter->Format($document); + $html = $formatter->format($document); // We only test that it does not throw an exception $this->assertTrue(true); @@ -28,6 +28,6 @@ public function testParseException1() $document = new Document('{\rtf1\ansi\ansicpg1252\deff0\deflang1046'); $formatter = new HtmlFormatter(); - $html = $formatter->Format($document); + $html = $formatter->format($document); } } diff --git a/tests/rtf/image1.rtf b/tests/rtf/image1.rtf new file mode 100644 index 0000000..4d16af2 --- /dev/null +++ b/tests/rtf/image1.rtf @@ -0,0 +1,25 @@ +{\rtf1{\pict\jpegblip +FFD8FFE000104A46494600010101004800480000FFDB004300191113161310191614161C1B191E253E29252222254C373A2D3E5A505F5E595057566470907A64 +6A886C56577DAA7E889499A1A2A16178B0BDAF9CBB909EA19AFFDB0043011B1C1C252125492929499A6757679A9A9A9A9A9A9A9A9A9A9A9A9A9A9A9A9A9A9A9A +9A9A9A9A9A9A9A9A9A9A9A9A9A9A9A9A9A9A9A9A9A9A9A9A9A9A9A9A9A9AFFC0001108004D006403011100021101031101FFC400190000020301000000000000 +000000000000010200030405FFC4002E100002010203060601050100000000000001020003111221310413415152912261627181A132234243C1D1E1FFC40018 +01010101010100000000000000000000000001020304FFC4002511010100020202030000070000000000000001021103211231415171041322324261D1FFDA00 +0C03010002110311003F007C0BD23B4E8D0E05E91DA412C9D23B428597A07685D54183A476841C2BD23B42A605E91DA0428BD23B4214A2F48ED014AAF48ED2A0 +155E91DA002ABC8768145655C5A0D394237C2949BC8BA1442EE141009E2663933F0C6E5A6F19B5AA4A1349C5984CE39CCE794769D7447B13E35F91373FD3396B +E61701D50DFCB8CBB62E1F401F8112B037BC0060298429940308A2B7E43DA11B8E92351152E2F26DD3C562D30C0F2109F88D4CD501598EF13F16E63919E5CB1B +C596F1F55DF1CA59D96E540DE0057830D3FE4EB8F24CBF56C029C54CE9B73B8FD21B1C9C7CC7E25D5F6464619A9B8976CDC3E898F9CAC0DE0298429845158F88 +7B40DCDA42883600904807300DAE263396CB31F6ED8D93DAE0DB3B3D92A321E96245E793CB9F1EB2EDD27E19EE84781B2D2EF94D4B739AF26A485BD32AD53030 +2D93A83600CE731CE65E16FAF54EFD11569630AA2AA13A78C58CE995E5C26ED9AFC37BEC30A1254AD4C40E777B4D4BC966E5859019513334EA8F3DE4BAE4FB86 +A42D5A48956D76B150733720CD71679653759CFC6CED8F6AAC367650016BEA4709D76E171ED13684A9C73E52CBB66CB0F79514562310F684746D710A08D66CF3 +1C4495B941D72B300441DC0467A7953720749CC4E79716397B8E9392FCF6BA9D65B115295AE332998ED3CF9F1673BC6EDD265E5EA81A2C56F4996A21E5FE4D63 +FC44F59CD3371EFE8BBC23C3555881A752FF00B2F87F971D6B7AF6BE9AE3CB1074199B6BF22633E5B26ACD55B74CF54A956AACD9EB6FEA77C278C92267239C7F +326A1272CEDA09BDCDF6F3D975B667C02A1C02E38CAE76D45DA2A02421CBCE368956B96605945EDAF38DABB62694196F98D614A1AD91EC645946C0E86C79185E +AA7894F10617B87425C6EC1B31CD0F9F29E6E5C7C2F9FC7CBB619EFD8AD5A9FC803A8EA1FDC5E1C6CDE1746F57562C0D4B740A3EE9EAD88C47503CE79F7979FF +0054F291AB2ED936B77A7501AF4FC00101D742C74BF30277C2E37FB2A5F4CB4D9372EAC7F50F3E53B6BB72B6698F6820B5C5BDE571AA16A14BF9F38D22EC4A40 +26C2F1A5D5AED89D01810807585294B7E26410315C8C69A99582703712A79C9675A6B1B37B58D48ED02E1954E8E2FF00627926578B78DEFEBFE3B7D33ED15519 +99FF008D4587B09DB8B1F1C7BF6C677BD3254DBAA50744501D0E669B66233E397BF54FE6DDB1D427166709D6DC26E7A71CBBA537FDCBF2B2EE33655245DAC339 +596B6A192D9869DA65DEE36751D713A390C037854810D8EB2042838650A4653E7F12592B78F2658FA67DAEA108A29D2B906E40E4234972DB0229AB5D98E5737B +7299612A8235B91E70955A54606DCE34B2ADA7858E3B69136D492F65A8016B9835B76C4E8C0C030242A4012006148CA0E708C9B56CED52CC96B8F8264B12ED81 +83AB61A80AF999349B406CEA105C716B6A64D2AD202F846435875935D2B739C8B5D7DF7A7EE75711DF7A7EE04DF7A7EE04DF7A7EE04DF7A7EE00DF7A7EE400D6 +F4FDC281ADE9FB8086AFA7EE023D456521D030E46118F1056200384682F335AC6765C77CCF192B78DF923367A48B6BFFD9 +}} \ No newline at end of file From 8ecac1706914c9c64d4e03b08539654377b8c1f1 Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Sat, 10 Jul 2021 10:16:03 +0200 Subject: [PATCH 26/38] Various CS fixes and code documentation --- src/ControlSymbol.php | 7 +++ src/ControlWord.php | 7 +++ src/Document.php | 74 ++++++++++++++++++++------ src/Group.php | 13 ++++- src/Html/HtmlFormatter.php | 104 +++++++++++++++++++++++++++---------- src/Html/Image.php | 4 +- src/Html/State.php | 29 ++++++++++- src/Text.php | 9 ++++ 8 files changed, 199 insertions(+), 48 deletions(-) diff --git a/src/ControlSymbol.php b/src/ControlSymbol.php index 15c51d9..2cd8411 100644 --- a/src/ControlSymbol.php +++ b/src/ControlSymbol.php @@ -7,6 +7,13 @@ class ControlSymbol extends Element public $symbol; public $parameter = 0; + /** + * Returns string representation of the object for debug purposes + * + * @param int $level Indentation level + * + * @return string + */ public function toString($level) { return str_repeat(" ", $level) . "SYMBOL {$this->symbol} ({$this->parameter})\n"; diff --git a/src/ControlWord.php b/src/ControlWord.php index da25b74..6c636c6 100644 --- a/src/ControlWord.php +++ b/src/ControlWord.php @@ -7,6 +7,13 @@ class ControlWord extends Element public $word; public $parameter; + /** + * Returns string representation of the object for debug purposes + * + * @param int $level Indentation level + * + * @return string + */ public function toString($level) { return str_repeat(" ", $level) . "WORD {$this->word} ({$this->parameter})\n"; diff --git a/src/Document.php b/src/Document.php index 5b208d2..3a47423 100644 --- a/src/Document.php +++ b/src/Document.php @@ -5,13 +5,13 @@ class Document { /** @var string RTF string being parsed */ - private $rtf; + protected $rtf; /** @var int Current position in RTF string */ - private $pos; + protected $pos; /** @var int Length of RTF string */ - private $len; + protected $len; /** @var Group Current RTF group */ - private $group; + protected $group; /** @var Group Root group */ public $root = null; @@ -19,7 +19,7 @@ class Document /** * Object contructor * - * @param string The RTF content + * @param string $rtf The RTF content */ public function __construct($rtf) { @@ -45,8 +45,9 @@ protected function getChar() } /** - * (Helper method) - * Is the current character a letter? + * (Helper method) Is the current character a letter? + * + * @return bool */ protected function isLetter() { @@ -62,8 +63,9 @@ protected function isLetter() } /** - * (Helper method) - * Is the current character a digit? + * (Helper method) Is the current character a digit? + * + * @return bool */ protected function isDigit() { @@ -71,8 +73,9 @@ protected function isDigit() } /** - * (Helper method) - * Is the current character end-of-line (EOL)? + * (Helper method) Is the current character end-of-line (EOL)? + * + * @return bool */ protected function isEndOfLine() { @@ -89,8 +92,9 @@ protected function isEndOfLine() } /** - * (Helper method) - * Is the current character for a space delimiter? + * (Helper method) Is the current character for a space delimiter? + * + * @return bool */ protected function isSpaceDelimiter() { @@ -99,6 +103,8 @@ protected function isSpaceDelimiter() /** * Store state of document on stack. + * + * @return void */ protected function parseStartGroup() { @@ -124,6 +130,8 @@ protected function parseStartGroup() /** * Retrieve state of document from stack. + * + * @return void */ protected function parseEndGroup() { @@ -132,6 +140,11 @@ protected function parseEndGroup() array_pop($this->uc); } + /** + * Parse ControlWord element + * + * @return void + */ protected function parseControlWord() { // Read letters until a non-letter is reached. @@ -224,6 +237,11 @@ protected function parseControlWord() array_push($this->group->children, $rtfword); } + /** + * Parse ControlSymbol element + * + * @return void + */ protected function parseControlSymbol() { // Read symbol (one character only). @@ -258,6 +276,11 @@ protected function parseControlSymbol() array_push($this->group->children, $rtfsymbol); } + /** + * Parse Control element + * + * @return void + */ protected function parseControl() { // Beginning of an RTF control word or control symbol. @@ -273,7 +296,12 @@ protected function parseControl() } } - protected function ParseText() + /** + * Parse Text element + * + * @return void + */ + protected function parseText() { // Parse plain text up to backslash or brace, // unless escaped. @@ -292,9 +320,12 @@ protected function ParseText() // is really an escape sequence. $this->getChar(); switch ($this->char) { - case "\\": break; - case '{': break; - case '}': break; + case "\\": + break; + case '{': + break; + case '}': + break; default: // Not an escape. Roll back. $this->pos = $this->pos - 2; @@ -328,6 +359,10 @@ protected function ParseText() /** * Attempt to parse an RTF string. + * + * @param string $rtf RTF content + * + * @return void */ protected function parse($rtf) { @@ -364,6 +399,11 @@ protected function parse($rtf) } } + /** + * Returns string representation of the document for debug purposes. + * + * @return string + */ public function __toString() { if (!$this->root) { diff --git a/src/Group.php b/src/Group.php index b6740e4..31f8c2a 100644 --- a/src/Group.php +++ b/src/Group.php @@ -16,6 +16,11 @@ public function __construct() $this->children = []; } + /** + * Returns group type + * + * @return string|null + */ public function getType() { // No children? Then the group type is null. @@ -45,6 +50,8 @@ public function getType() /** * If a group contains a '*' symbol as its first child, * then it is a destination group. + * + * @return bool|null Group destination */ public function isDestination() { @@ -64,7 +71,11 @@ public function isDestination() } /** - * Convert Group to string for debugging purposes. + * Returns string representation of the object for debug purposes + * + * @param int $level Indentation level + * + * @return string */ public function toString($level = 0) { diff --git a/src/Html/HtmlFormatter.php b/src/Html/HtmlFormatter.php index ea33dd2..f5cd294 100644 --- a/src/Html/HtmlFormatter.php +++ b/src/Html/HtmlFormatter.php @@ -6,15 +6,19 @@ class HtmlFormatter { - private $output = ''; - private $encoding; - private $defaultFont; - private $fromhtml = false; + protected $output = ''; + protected $encoding; + protected $defaultFont; + protected $fromhtml = false; /** + * Object constructor. + * * By default, HtmlFormatter uses HTML_ENTITIES for code conversion. * You can optionally support a different endoing when creating * the HtmlFormatter instance. + * + * @param string $encoding Output encoding */ public function __construct($encoding = 'HTML-ENTITIES') { @@ -32,6 +36,13 @@ public function __construct($encoding = 'HTML-ENTITIES') $this->encoding = $encoding; } + /** + * Generates HTML output for the document + * + * @param Document $document The document + * + * @return string HTML content + */ public function format(Document $document) { // Clear current output @@ -64,6 +75,13 @@ public function format(Document $document) return $this->output; } + /** + * Registers a font definition. + * + * @param \RtfHtmlPhp\Group $fontGroup A group element with a font definition + * + * @return void + */ protected function loadFont(\RtfHtmlPhp\Group $fontGroup) { $fontNumber = 0; @@ -72,7 +90,7 @@ protected function loadFont(\RtfHtmlPhp\Group $fontGroup) // Loop through children of the font group. The font group // contains control words with the font number and charset, // and a control text with the font name. - foreach($fontGroup->children as $child) { + foreach ($fontGroup->children as $child) { // Control word if ($child instanceof \RtfHtmlPhp\ControlWord) { switch ($child->word) { @@ -81,11 +99,21 @@ protected function loadFont(\RtfHtmlPhp\Group $fontGroup) break; // Font family names - case 'froman': $font->family = "serif"; break; - case 'fswiss': $font->family = "sans-serif"; break; - case 'fmodern': $font->family = "monospace"; break; - case 'fscript': $font->family = "cursive"; break; - case 'fdecor': $font->family = "fantasy"; break; + case 'froman': + $font->family = "serif"; + break; + case 'fswiss': + $font->family = "sans-serif"; + break; + case 'fmodern': + $font->family = "monospace"; + break; + case 'fscript': + $font->family = "cursive"; + break; + case 'fdecor': + $font->family = "fantasy"; + break; // case 'fnil': break; // default font // case 'ftech': break; // symbol @@ -144,7 +172,7 @@ protected function extractFontTable($fontTblGrp) } // Load the font specification in the subgroup: - $this->LoadFont($child); + $this->loadFont($child); } } @@ -181,39 +209,59 @@ protected function extractColorTable($colorTblGrp) protected function extractImage($pictGrp) { - $Image = new Image(); + $image = new Image(); foreach ($pictGrp as $child) { if ($child instanceof \RtfHtmlPhp\ControlWord) { switch ($child->word) { // Picture Format - case "emfblip": $Image->format = 'emf'; break; - case "pngblip": $Image->format = 'png'; break; - case "jpegblip": $Image->format = 'jpeg'; break; - case "macpict": $Image->format = 'pict'; break; + case "emfblip": + $image->format = 'emf'; + break; + case "pngblip": + $image->format = 'png'; + break; + case "jpegblip": + $image->format = 'jpeg'; + break; + case "macpict": + $image->format = 'pict'; + break; // case "wmetafile": $Image->format = 'bmp'; break; // Picture size and scaling - case "picw": $Image->width = $child->parameter; break; - case "pich": $Image->height = $child->parameter; break; - case "picwgoal": $Image->goalWidth = $child->parameter; break; - case "pichgoal": $Image->goalHeight = $child->parameter; break; - case "picscalex": $Image->pcScaleX = $child->parameter; break; - case "picscaley": $Image->pcScaleY = $child->parameter; break; + case "picw": + $image->width = $child->parameter; + break; + case "pich": + $image->height = $child->parameter; + break; + case "picwgoal": + $image->goalWidth = $child->parameter; + break; + case "pichgoal": + $image->goalHeight = $child->parameter; + break; + case "picscalex": + $image->pcScaleX = $child->parameter; + break; + case "picscaley": + $image->pcScaleY = $child->parameter; + break; // Binary or Hexadecimal Data ? - case "bin": $Image->binarySize = $child->parameter; break; - - default: break; + case "bin": + $image->binarySize = $child->parameter; + break; } } elseif ($child instanceof \RtfHtmlPhp\Text) { // store Data - $Image->imageData = $child->text; + $image->imageData = $child->text; } } // output Image - $this->output .= $Image->printImage(); + $this->output .= $image->printImage(); } protected function processGroup($group) @@ -508,6 +556,8 @@ protected function closeTag($tag) /** * Closes all opened tags + * + * @return void */ protected function closeTags() { diff --git a/src/Html/Image.php b/src/Html/Image.php index 41dac0b..93b4c11 100644 --- a/src/Html/Image.php +++ b/src/Html/Image.php @@ -13,7 +13,9 @@ public function __construct() } /** - * Resets the object to the default state + * Resets the object to the initial state + * + * @return void */ public function reset() { diff --git a/src/Html/State.php b/src/Html/State.php index 8e6cdf7..068d5c5 100644 --- a/src/Html/State.php +++ b/src/Html/State.php @@ -6,7 +6,8 @@ class State { public static $fonttbl = []; public static $colortbl = []; - private static $highlight = [ + + protected static $highlight = [ 1 => 'Black', 2 => 'Blue', 3 => 'Cyan', @@ -25,19 +26,34 @@ class State 16 => 'LightGray' ]; + /** + * Object constructor + */ public function __construct() { $this->reset(); } - /* + /** * Store a font in the font table at the specified index. + * + * @param int $index Font number + * @param Font $font Font object + * + * @return void */ public static function setFont($index, Font $font) { State::$fonttbl[$index] = $font; } + /** + * Resets the object to the initial state + * + * @param string|null $defaultFont Font name + * + * @return void + */ public function reset($defaultFont = null) { $this->bold = false; @@ -53,6 +69,11 @@ public function reset($defaultFont = null) $this->htmlrtf = false; } + /** + * Generates css style for the state. + * + * @return string The css string + */ public function printStyle() { $style = []; @@ -115,6 +136,10 @@ public function printStyle() /** * Check whether this State is equal to another State. + * + * @param State $state A state to compare with + * + * @return bool True if the state is identical, False otherwise */ public function equals($state) { diff --git a/src/Text.php b/src/Text.php index 54ff16d..66f6073 100644 --- a/src/Text.php +++ b/src/Text.php @@ -8,12 +8,21 @@ class Text extends Element /** * Create a new Text instance with string content. + * + * @param string $text The content */ public function __construct($text) { $this->text = $text; } + /** + * Returns string representation of the object for debug purposes + * + * @param int $level Indentation level + * + * @return string + */ public function toString($level) { return str_repeat(" ", $level) . "TEXT {$this->text}\n"; From 9f832d040ac055cbbdb5809f814dc405b4beac3a Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Sat, 10 Jul 2021 10:24:11 +0200 Subject: [PATCH 27/38] Update changelog and readme --- CHANGELOG.md | 8 ++++++++ README.md | 9 ++------- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d135c81..62c9d6b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## 2.0 + +- Fork, various code style fixes, camel-case use +- Added some more tests +- Removed trigger_error() use +- Added support for encapsulated HTML +- Support PHP >= 5.5, up to 8.1 + ## 1.1 ### Update 11 Sep '19: diff --git a/README.md b/README.md index f37b8ce..78cff1d 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ $rtf = file_get_contents("test.rtf"); $document = new Document($rtf); // or use a string directly ``` -`Document` will raise an exception if the RTF document could not be parsed. Parse errors will generate PHP notices. +`Document` will raise an exception if the RTF document could not be parsed. If you’d like to see what the parser read (for debug purposes), then call this: @@ -42,10 +42,5 @@ $formatter = new HtmlFormatter('UTF-8'); ## Install via Composer ```shell -composer require henck/rtf-to-html +composer require roundcube/rtf-to-html ``` - -## Caveats - -* Please note that rtf-html-php requires your PHP installation to support the `mb_convert_encoding` function. Therefore you must have the `php-mbstring` module installed. For fresh PHP installations, it will usually be there. - From f3c23de7e5b3e3ca5862e0dfb9b7286c3fa4a670 Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Sat, 10 Jul 2021 10:27:50 +0200 Subject: [PATCH 28/38] Add test for debug mode --- tests/FontFamilyTest.php | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/FontFamilyTest.php b/tests/FontFamilyTest.php index 0f6b60e..c1be796 100644 --- a/tests/FontFamilyTest.php +++ b/tests/FontFamilyTest.php @@ -17,5 +17,43 @@ public function testParseFontFamilyHtml() '

Hello, world.

', $html ); + + $expected = <<assertSame($expected, (string) $document); } } From 21607784896ab0504ca0cf26aa27217121ff9072 Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Sat, 10 Jul 2021 17:43:31 +0200 Subject: [PATCH 29/38] Some more code documentation --- src/Html/HtmlFormatter.php | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/Html/HtmlFormatter.php b/src/Html/HtmlFormatter.php index f5cd294..b6bde1c 100644 --- a/src/Html/HtmlFormatter.php +++ b/src/Html/HtmlFormatter.php @@ -609,10 +609,17 @@ protected function getSourceEncoding() return $this->rtfEncoding; } - protected function getEncodingFromCharset($fcharset) + /** + * Convert RTF charset identifier into an encoding name (for iconv) + * + * @param int $charset Charset identifier + * + * @return string|null Encoding name or NULL on unknown CodePage + */ + protected function getEncodingFromCharset($charset) { // maps windows character sets to iconv encoding names - $charset = array ( + $map = array ( 0 => 'CP1252', // ANSI: Western Europe 1 => 'CP1252', //*Default 2 => 'CP1252', //*Symbol @@ -639,14 +646,21 @@ protected function getEncodingFromCharset($fcharset) 255 => 'CP437', //*OEM still PC437 ); - if (isset($charset[$fcharset])) { - return $charset[$fcharset]; + if (isset($map[$charset])) { + return $map[$charset]; } } + /** + * Convert RTF CodePage identifier into an encoding name (for iconv) + * + * @param string $cpg CodePage identifier + * + * @return string|null Encoding name or NULL on unknown CodePage + */ protected function getEncodingFromCodepage($cpg) { - $codePage = array ( + $map = array ( 'ansi' => 'CP1252', 'mac' => 'MAC', 'pc' => 'CP437', @@ -685,8 +699,8 @@ protected function getEncodingFromCodepage($cpg) 1361 => 'CP1361', // Johab ); - if (isset($codePage[$cpg])) { - return $codePage[$cpg]; + if (isset($map[$cpg])) { + return $map[$cpg]; } } From c5b86bbb352ddf225b3ae7235873f8da87436a2e Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Sat, 10 Jul 2021 17:45:31 +0200 Subject: [PATCH 30/38] Add .gitattributes with some export-ignores --- .gitattributes | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..180ab2c --- /dev/null +++ b/.gitattributes @@ -0,0 +1,4 @@ +.gitignore export-ignore +.gitattributes export-ignore +.github/ export-ignore +tests/ export-ignore From d7bc176bc608cb92df57fc89b6b1eb5a1eda4bb0 Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Sat, 10 Jul 2021 17:50:52 +0200 Subject: [PATCH 31/38] Typos --- CHANGELOG.md | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 62c9d6b..c533d97 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ - Added some more tests - Removed trigger_error() use - Added support for encapsulated HTML -- Support PHP >= 5.5, up to 8.1 +- Support PHP >= 5.4, up to 8.1 ## 1.1 diff --git a/README.md b/README.md index 78cff1d..ecaf9d4 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ To convert the parser’s parse tree to HTML, call this (but only if the RTF was ```php use RtfHtmlPhp\Html\HtmlFormatter; $formatter = new HtmlFormatter(); -echo $formatter->Format($document); +echo $formatter->format($document); ``` For enhanced compatibility the default character encoding of the converted RTF unicode characters is set to `HTML-ENTITIES`. To change the default encoding, you can initialize the `Html` object with the desired encoding supported by `mb_list_encodings()`: ex. `UTF-8` From 7b488113e6fe2fccd84643e5121070fe7f335cb2 Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Sun, 11 Jul 2021 08:43:46 +0200 Subject: [PATCH 32/38] Get rid of version from composer.json --- composer.json | 1 - 1 file changed, 1 deletion(-) diff --git a/composer.json b/composer.json index 49bad89..c19ed18 100644 --- a/composer.json +++ b/composer.json @@ -3,7 +3,6 @@ "description": "RTF to HTML converter in PHP", "keywords": ["rtf", "converter"], "type": "library", - "version": "1.1", "license": "GPL-2.0", "authors": [ { From 3f46a8a292d5b64e3d9c1a139adbd239ebcb29dd Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Sun, 31 Dec 2023 12:19:13 +0100 Subject: [PATCH 33/38] Various code improvements/fixes - Fix "Creation of dynamic property" warnings - Fix a couple of code errors found in static code analisys - Enable testing on PHP 8.2 and 8.3 --- .gitattributes | 1 + .github/workflows/tests.yml | 2 +- CHANGELOG.md | 6 ++++++ composer.json | 3 ++- phpstan.neon | 5 +++++ src/Document.php | 18 +++++++++++------- src/Html/Font.php | 1 + src/Html/HtmlFormatter.php | 25 ++++++++++++++++--------- src/Html/Image.php | 13 ++++++++++++- src/Html/State.php | 16 ++++++++++++++-- tests/FontFamilyTest.php | 2 +- 11 files changed, 70 insertions(+), 22 deletions(-) create mode 100644 phpstan.neon diff --git a/.gitattributes b/.gitattributes index 180ab2c..3c9c42f 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2,3 +2,4 @@ .gitattributes export-ignore .github/ export-ignore tests/ export-ignore +phpstan.neon export-ignore diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d4749e0..1cc308f 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -12,7 +12,7 @@ jobs: strategy: fail-fast: true matrix: - php: [5.4, 5.5, 5.6, 7.0, 7.1, 7.2, 7.3, 7.4, 8.0, 8.1] + php: [5.4, 5.5, 5.6, 7.0, 7.1, 7.2, 7.3, 7.4, 8.0, 8.1, 8.2, 8.3] name: PHP ${{ matrix.php }}/Linux diff --git a/CHANGELOG.md b/CHANGELOG.md index c533d97..bf5812c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## 2.1 + +- Fix "Creation of dynamic property" warnings +- Fix a couple of code errors found in static code analisys +- Enable testing on PHP 8.2 and 8.3 + ## 2.0 - Fork, various code style fixes, camel-case use diff --git a/composer.json b/composer.json index c19ed18..7409da1 100644 --- a/composer.json +++ b/composer.json @@ -30,6 +30,7 @@ } }, "require-dev": { - "phpunit/phpunit": "^4.8.36 || ^5.7.21 || ^6 || ^7" + "phpunit/phpunit": "^4.8.36 || ^5.7.21 || ^6 || ^7 | ^9.6", + "phpstan/phpstan": "^1.2" } } diff --git a/phpstan.neon b/phpstan.neon new file mode 100644 index 0000000..46b0f47 --- /dev/null +++ b/phpstan.neon @@ -0,0 +1,5 @@ +parameters: + level: 5 + paths: + - src + - tests diff --git a/src/Document.php b/src/Document.php index 3a47423..75f1f45 100644 --- a/src/Document.php +++ b/src/Document.php @@ -4,16 +4,20 @@ class Document { + /** @var ?string Current character in an RTF stream */ + protected $char; /** @var string RTF string being parsed */ protected $rtf; /** @var int Current position in RTF string */ protected $pos; /** @var int Length of RTF string */ protected $len; - /** @var Group Current RTF group */ + /** @var ?Group Current RTF group */ protected $group; + /** @var array */ + protected $uc = []; - /** @var Group Root group */ + /** @var ?Group Root group */ public $root = null; /** @@ -27,10 +31,10 @@ public function __construct($rtf) } /** - * Get the next character from the RTF stream. + * Position on the next character from the RTF stream. * Parsing is aborted when reading beyond end of input string. * - * @return string + * @return void */ protected function getChar() { @@ -110,7 +114,7 @@ protected function parseStartGroup() { $group = new Group(); - if ($this->group != null) { + if ($this->group) { // Make the new group a child of the current group $group->parent = $this->group; @@ -170,7 +174,7 @@ protected function parseControlWord() if ($parameter === null) { $parameter = 0; } - $parameter = $parameter * 10 + $this->char; + $parameter = $parameter * 10 + (int) $this->char; $this->getChar(); } @@ -349,7 +353,7 @@ protected function parseText() // If there is no current group, then this is not a valid RTF file. // Throw an exception. - if ($this->group == null) { + if (!$this->group) { throw new \Exception("Parse error: RTF text outside of group."); } diff --git a/src/Html/Font.php b/src/Html/Font.php index 73e6cc6..2d1baa6 100644 --- a/src/Html/Font.php +++ b/src/Html/Font.php @@ -5,6 +5,7 @@ class Font { public $family; + public $fprq; public $name; public $charset; public $codepage; diff --git a/src/Html/HtmlFormatter.php b/src/Html/HtmlFormatter.php index b6bde1c..30567fb 100644 --- a/src/Html/HtmlFormatter.php +++ b/src/Html/HtmlFormatter.php @@ -6,10 +6,15 @@ class HtmlFormatter { - protected $output = ''; protected $encoding; protected $defaultFont; protected $fromhtml = false; + protected $openedTags = []; + protected $output = ''; + protected $previousState; + protected $rtfEncoding; + protected $state; + protected $states = []; /** * Object constructor. @@ -65,8 +70,8 @@ public function format(Document $document) $this->processGroup($document->root); // Instead of removing opened tags, we close them - $this->output .= $this->openedTags['span'] ? '' : ''; - $this->output .= $this->openedTags['p'] ? '

' : ''; + $this->output .= $this->openedTags['span'] ? '' : ''; // @phpstan-ignore-line + $this->output .= $this->openedTags['p'] ? '

' : ''; // @phpstan-ignore-line // Remove extra empty paragraph at the end // TODO: Find the real reason it's there and fix it @@ -490,7 +495,7 @@ protected function write($txt) return; } - if ($this->openedTags['p'] === null) { + if (!isset($this->openedTags['p'])) { // Create the first paragraph $this->openTag('p'); } @@ -499,9 +504,7 @@ protected function write($txt) // 1st case: style change occured // 2nd case: there is no change in style but the already created 'span' // element is somehow closed (ex. because of an end of paragraph) - if (!$this->state->equals($this->previousState) - || ($this->state->equals($this->previousState) && !$this->openedTags['span']) - ) { + if (!$this->state->equals($this->previousState) || empty($this->openedTags['span'])) { // If applicable close previously opened 'span' tag $this->closeTag('span'); @@ -535,7 +538,7 @@ protected function closeTag($tag) return; } - if ($this->openedTags[$tag]) { + if (!empty($this->openedTags[$tag])) { // Check for empty html elements if (substr($this->output, -strlen("<{$tag}>")) == "<{$tag}>") { switch ($tag) { @@ -649,6 +652,8 @@ protected function getEncodingFromCharset($charset) if (isset($map[$charset])) { return $map[$charset]; } + + return null; } /** @@ -702,12 +707,14 @@ protected function getEncodingFromCodepage($cpg) if (isset($map[$cpg])) { return $map[$cpg]; } + + return null; } protected function ordUtf8($chr) { $ord0 = ord($chr); - if ($ord0 >= 0 && $ord0 <= 127) { + if ($ord0 <= 127) { return $ord0; } diff --git a/src/Html/Image.php b/src/Html/Image.php index 93b4c11..9e912b1 100644 --- a/src/Html/Image.php +++ b/src/Html/Image.php @@ -4,6 +4,17 @@ class Image { + public $format; + public $width; + public $height; + public $goalWidth; + public $goalHeight; + public $pcScaleX; + public $pcScaleY; + public $binarySize; + public $imageData; + + /** * Object constructor. */ @@ -33,7 +44,7 @@ public function reset() /** * Generate a HTML content for the image * - * @return string tag content, An empty string for unsupported/empty image + * @return string Image tag content, An empty string for unsupported/empty image */ public function printImage() { diff --git a/src/Html/State.php b/src/Html/State.php index 068d5c5..5b41d0f 100644 --- a/src/Html/State.php +++ b/src/Html/State.php @@ -6,6 +6,17 @@ class State { public static $fonttbl = []; public static $colortbl = []; + public $bold; + public $italic; + public $underline; + public $strike; + public $hidden; + public $fontsize; + public $fontcolor; + public $background; + public $hcolor; + public $font; + public $htmlrtf; protected static $highlight = [ 1 => 'Black', @@ -26,6 +37,7 @@ class State 16 => 'LightGray' ]; + /** * Object constructor */ @@ -94,11 +106,11 @@ public function printStyle() // a dedicated state->end_underline variable // if($this->state->end_underline) {$span .= "text-decoration:none";} if ($this->strike) { - $style .= "text-decoration:line-through"; + $style[] = "text-decoration:line-through"; } if ($this->hidden) { - $style .= "display:none"; + $style[] = "display:none"; } if (isset($this->font)) { diff --git a/tests/FontFamilyTest.php b/tests/FontFamilyTest.php index c1be796..4f72afe 100644 --- a/tests/FontFamilyTest.php +++ b/tests/FontFamilyTest.php @@ -4,7 +4,7 @@ use RtfHtmlPhp\Document; use RtfHtmlPhp\Html\HtmlFormatter; -class FontFamilyTestTest extends TestCase +class FontFamilyTest extends TestCase { public function testParseFontFamilyHtml() { From c05a1d56c3f3bc7376f0e5c5f9d0cfb8b006ee04 Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Sun, 31 Dec 2023 14:50:44 +0100 Subject: [PATCH 34/38] CI: use ubuntu-latest --- .github/workflows/tests.yml | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1cc308f..25bb886 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -6,19 +6,19 @@ on: jobs: linux_tests: - runs-on: ubuntu-18.04 + runs-on: ubuntu-latest if: "!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, '[ci skip]')" strategy: fail-fast: true matrix: - php: [5.4, 5.5, 5.6, 7.0, 7.1, 7.2, 7.3, 7.4, 8.0, 8.1, 8.2, 8.3] + php: ['5.4', '5.5', '5.6', '7.0', '7.1', '7.2', '7.3', '7.4', '8.0', '8.1', '8.2', '8.3'] name: PHP ${{ matrix.php }}/Linux steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Setup PHP uses: shivammathur/setup-php@v2 @@ -31,10 +31,6 @@ jobs: - name: Setup problem matchers run: echo "::add-matcher::${{ runner.tool_cache }}/phpunit.json" - - name: Fix PHPUnit for PHP8 - run: composer config platform.php 7.4 - if: matrix.php >= 8 - - name: Install dependencies run: composer install --prefer-dist --no-interaction --no-progress From 428238bf54db2ca65300418384abed0c58898302 Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Sun, 31 Dec 2023 14:53:47 +0100 Subject: [PATCH 35/38] CI: remove phpstan on PHP < 7.1 --- .github/workflows/tests.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 25bb886..6c88102 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -31,6 +31,10 @@ jobs: - name: Setup problem matchers run: echo "::add-matcher::${{ runner.tool_cache }}/phpunit.json" + - name: Fix PHPUnit for PHP8 + run: composer remove phpstan/phpstan + if: matrix.php < 7.1 + - name: Install dependencies run: composer install --prefer-dist --no-interaction --no-progress From 3e0f201a7fee3b098aa8786c16159709bedb4e17 Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Sun, 31 Dec 2023 14:59:23 +0100 Subject: [PATCH 36/38] CI: Fix phpstan removal --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6c88102..eae8651 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -32,7 +32,7 @@ jobs: run: echo "::add-matcher::${{ runner.tool_cache }}/phpunit.json" - name: Fix PHPUnit for PHP8 - run: composer remove phpstan/phpstan + run: composer remove phpstan/phpstan --dev --no-update if: matrix.php < 7.1 - name: Install dependencies From a3432ca249b73bf24fec50114191a63ad8b1478c Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Sun, 31 Dec 2023 15:01:02 +0100 Subject: [PATCH 37/38] Fix version number --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf5812c..4dc3982 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -## 2.1 +## 2.2 - Fix "Creation of dynamic property" warnings - Fix a couple of code errors found in static code analisys From 307e6cb2d937a809bd6099d37ececefe7c27f14b Mon Sep 17 00:00:00 2001 From: Aleksander Machniak Date: Sat, 4 Jan 2025 13:47:21 +0100 Subject: [PATCH 38/38] CI: Add PHP 8.4 to the matrix --- .github/workflows/tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index eae8651..c69594a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -12,7 +12,7 @@ jobs: strategy: fail-fast: true matrix: - php: ['5.4', '5.5', '5.6', '7.0', '7.1', '7.2', '7.3', '7.4', '8.0', '8.1', '8.2', '8.3'] + php: ['5.4', '5.5', '5.6', '7.0', '7.1', '7.2', '7.3', '7.4', '8.0', '8.1', '8.2', '8.3', '8.4'] name: PHP ${{ matrix.php }}/Linux @@ -31,7 +31,7 @@ jobs: - name: Setup problem matchers run: echo "::add-matcher::${{ runner.tool_cache }}/phpunit.json" - - name: Fix PHPUnit for PHP8 + - name: Fix for PHP < 7.1 run: composer remove phpstan/phpstan --dev --no-update if: matrix.php < 7.1