|
| 1 | +<?php |
| 2 | + |
| 3 | +declare(strict_types=1); |
| 4 | + |
| 5 | +namespace PhpOffice\PhpSpreadsheetBenchmarks; |
| 6 | + |
| 7 | +use PhpOffice\PhpSpreadsheet\Calculation\Calculation; |
| 8 | +use PhpOffice\PhpSpreadsheet\Spreadsheet; |
| 9 | +use PHPUnit\Framework\TestCase; |
| 10 | + |
| 11 | +/** |
| 12 | + * Benchmark tests for the formula token cache in the Calculation engine. |
| 13 | + * |
| 14 | + * These tests demonstrate the performance benefit of caching parsed formula |
| 15 | + * tokens so that identical formula strings are not re-parsed on every evaluation. |
| 16 | + * |
| 17 | + * Run with: vendor/bin/phpunit --testsuite Benchmark --filter FormulaTokenCacheBenchmark --stderr |
| 18 | + */ |
| 19 | +#[\PHPUnit\Framework\Attributes\Group('benchmark')] |
| 20 | +class FormulaTokenCacheBenchmarkTest extends TestCase |
| 21 | +{ |
| 22 | + private const CACHE_SIZE = 1000; |
| 23 | + |
| 24 | + /** Formula patterns used across benchmarks. */ |
| 25 | + private const FORMULA_PATTERNS = [ |
| 26 | + '=A%d+B%d', |
| 27 | + '=SUM(A%d:B%d)', |
| 28 | + '=IF(A%d>0,B%d,C%d)', |
| 29 | + '=AVERAGE(A%d:D%d)', |
| 30 | + '=A%d*B%d+C%d', |
| 31 | + '=MAX(A%d,B%d,C%d)', |
| 32 | + '=MIN(A%d:C%d)/D%d', |
| 33 | + '=CONCATENATE(A%d,"-",B%d)', |
| 34 | + '=ROUND(A%d/B%d,2)', |
| 35 | + '=IFERROR(A%d/B%d,0)', |
| 36 | + ]; |
| 37 | + |
| 38 | + /** |
| 39 | + * Benchmark: parse 1000 spreadsheet-like formulas with cache enabled vs disabled. |
| 40 | + */ |
| 41 | + public function testParseFormulaCacheEnabledVsDisabled(): void |
| 42 | + { |
| 43 | + $spreadsheet = new Spreadsheet(); |
| 44 | + $calculation = Calculation::getInstance($spreadsheet); |
| 45 | + $cellCount = 1000; |
| 46 | + |
| 47 | + // Build a realistic set of formulas that a spreadsheet might contain |
| 48 | + $patternCount = count(self::FORMULA_PATTERNS); |
| 49 | + $formulas = []; |
| 50 | + for ($row = 1; $row <= $cellCount; ++$row) { |
| 51 | + $pattern = self::FORMULA_PATTERNS[$row % $patternCount]; |
| 52 | + $refRow = (($row - 1) % 100) + 1; |
| 53 | + $formulas[] = $this->buildFormula($pattern, $refRow); |
| 54 | + } |
| 55 | + |
| 56 | + // --- Run 1: Cache disabled (default) --- |
| 57 | + $calculation->setFormulaTokenCacheMaxSize(0); |
| 58 | + |
| 59 | + $noCacheStart = hrtime(true); |
| 60 | + foreach ($formulas as $formula) { |
| 61 | + $calculation->parseFormula($formula); |
| 62 | + } |
| 63 | + $noCacheNs = hrtime(true) - $noCacheStart; |
| 64 | + |
| 65 | + // --- Run 2: Cache enabled, cold --- |
| 66 | + $calculation->setFormulaTokenCacheMaxSize(self::CACHE_SIZE); |
| 67 | + |
| 68 | + $coldStart = hrtime(true); |
| 69 | + foreach ($formulas as $formula) { |
| 70 | + $calculation->parseFormula($formula); |
| 71 | + } |
| 72 | + $coldNs = hrtime(true) - $coldStart; |
| 73 | + $cacheSize = $calculation->getFormulaTokenCacheSize(); |
| 74 | + |
| 75 | + // --- Run 3: Cache enabled, warm --- |
| 76 | + $warmStart = hrtime(true); |
| 77 | + foreach ($formulas as $formula) { |
| 78 | + $calculation->parseFormula($formula); |
| 79 | + } |
| 80 | + $warmNs = hrtime(true) - $warmStart; |
| 81 | + |
| 82 | + $noCacheMs = $noCacheNs / 1_000_000; |
| 83 | + $coldMs = $coldNs / 1_000_000; |
| 84 | + $warmMs = $warmNs / 1_000_000; |
| 85 | + |
| 86 | + fwrite(STDERR, "\n"); |
| 87 | + fwrite(STDERR, "=== parseFormula() Enabled vs Disabled ({$cellCount} formulas) ===\n"); |
| 88 | + fwrite(STDERR, sprintf(" PHP version: %s (%s)\n", PHP_VERSION, PHP_OS)); |
| 89 | + fwrite(STDERR, sprintf(" Cache disabled: %8.2f ms\n", $noCacheMs)); |
| 90 | + fwrite(STDERR, sprintf(" Cache enabled (cold): %8.2f ms\n", $coldMs)); |
| 91 | + fwrite(STDERR, sprintf(" Cache enabled (warm): %8.2f ms\n", $warmMs)); |
| 92 | + fwrite(STDERR, sprintf(" Cache entries: %d\n", $cacheSize)); |
| 93 | + fwrite(STDERR, "\n"); |
| 94 | + |
| 95 | + self::assertGreaterThan(0, $cacheSize); |
| 96 | + self::assertLessThan($noCacheMs, $warmMs, 'Warm cache should be faster than no cache'); |
| 97 | + |
| 98 | + $spreadsheet->disconnectWorksheets(); |
| 99 | + } |
| 100 | + |
| 101 | + /** |
| 102 | + * Benchmark: directly parse 10,000 formulas (mix of repeated and unique) |
| 103 | + * with cache enabled vs after clearing the cache. |
| 104 | + * |
| 105 | + * The formula set is designed so the total distinct formulas stay well |
| 106 | + * under the cache limit, ensuring cache hits are reliable. |
| 107 | + */ |
| 108 | + public function testParseFormulaCacheVsNocache(): void |
| 109 | + { |
| 110 | + $spreadsheet = new Spreadsheet(); |
| 111 | + $calculation = Calculation::getInstance($spreadsheet); |
| 112 | + $calculation->setFormulaTokenCacheMaxSize(self::CACHE_SIZE); |
| 113 | + $totalFormulas = 10_000; |
| 114 | + |
| 115 | + // Build a pool of 200 distinct formulas (well under the cache limit). |
| 116 | + // Each formula will be parsed ~50 times on average across 10,000 calls. |
| 117 | + $distinctPool = []; |
| 118 | + $patternCount = count(self::FORMULA_PATTERNS); |
| 119 | + for ($i = 1; $i <= 200; ++$i) { |
| 120 | + $pattern = self::FORMULA_PATTERNS[$i % $patternCount]; |
| 121 | + $distinctPool[] = $this->buildFormula($pattern, $i); |
| 122 | + } |
| 123 | + |
| 124 | + $formulas = []; |
| 125 | + for ($i = 0; $i < $totalFormulas; ++$i) { |
| 126 | + $formulas[] = $distinctPool[$i % count($distinctPool)]; |
| 127 | + } |
| 128 | + |
| 129 | + // --- Run 1: Cold cache (every formula must be fully parsed) --- |
| 130 | + $calculation->clearFormulaTokenCache(); |
| 131 | + |
| 132 | + $coldStart = hrtime(true); |
| 133 | + foreach ($formulas as $formula) { |
| 134 | + $calculation->parseFormula($formula); |
| 135 | + } |
| 136 | + $coldNs = hrtime(true) - $coldStart; |
| 137 | + |
| 138 | + // --- Run 2: Warm cache (repeated formulas served from cache) --- |
| 139 | + $cacheSize = $calculation->getFormulaTokenCacheSize(); |
| 140 | + |
| 141 | + $warmStart = hrtime(true); |
| 142 | + foreach ($formulas as $formula) { |
| 143 | + $calculation->parseFormula($formula); |
| 144 | + } |
| 145 | + $warmNs = hrtime(true) - $warmStart; |
| 146 | + |
| 147 | + // --- Run 3: Cleared cache (simulates re-parsing) --- |
| 148 | + $calculation->clearFormulaTokenCache(); |
| 149 | + |
| 150 | + $clearedStart = hrtime(true); |
| 151 | + foreach ($formulas as $formula) { |
| 152 | + $calculation->parseFormula($formula); |
| 153 | + } |
| 154 | + $clearedNs = hrtime(true) - $clearedStart; |
| 155 | + |
| 156 | + $coldMs = $coldNs / 1_000_000; |
| 157 | + $warmMs = $warmNs / 1_000_000; |
| 158 | + $clearedMs = $clearedNs / 1_000_000; |
| 159 | + $warmVsColdPct = $coldMs > 0 ? (($coldMs - $warmMs) / $coldMs) * 100 : 0; |
| 160 | + $warmVsClearedPct = $clearedMs > 0 ? (($clearedMs - $warmMs) / $clearedMs) * 100 : 0; |
| 161 | + |
| 162 | + fwrite(STDERR, "\n"); |
| 163 | + $distinctCount = count($distinctPool); |
| 164 | + fwrite(STDERR, "=== parseFormula() Benchmark ({$totalFormulas} calls, {$distinctCount} distinct) ===\n"); |
| 165 | + fwrite(STDERR, sprintf(" PHP version: %s (%s)\n", PHP_VERSION, PHP_OS)); |
| 166 | + fwrite(STDERR, sprintf(" Cold cache (first parse): %8.2f ms\n", $coldMs)); |
| 167 | + fwrite(STDERR, sprintf(" Warm cache (all cached): %8.2f ms\n", $warmMs)); |
| 168 | + fwrite(STDERR, sprintf(" Cleared cache (re-parse): %8.2f ms\n", $clearedMs)); |
| 169 | + fwrite(STDERR, sprintf(" Warm vs cold improvement: %8.2f %%\n", $warmVsColdPct)); |
| 170 | + fwrite(STDERR, sprintf(" Warm vs cleared improvement:%7.2f %%\n", $warmVsClearedPct)); |
| 171 | + fwrite(STDERR, sprintf(" Cache entries after cold: %d\n", $cacheSize)); |
| 172 | + fwrite(STDERR, sprintf(" Cache entries after clear: %d\n", $calculation->getFormulaTokenCacheSize())); |
| 173 | + fwrite(STDERR, "\n"); |
| 174 | + |
| 175 | + // Warm cache should be faster than cold cache for repeated formulas |
| 176 | + self::assertLessThan($coldMs, $warmMs, 'Warm cache should be faster than cold cache'); |
| 177 | + self::assertLessThan($clearedMs, $warmMs, 'Warm cache should be faster than cleared cache'); |
| 178 | + |
| 179 | + $spreadsheet->disconnectWorksheets(); |
| 180 | + } |
| 181 | + |
| 182 | + /** |
| 183 | + * Build a concrete formula from a pattern and row number. |
| 184 | + * |
| 185 | + * Patterns use %d placeholders; all are replaced with the row number. |
| 186 | + */ |
| 187 | + private function buildFormula(string $pattern, int $row): string |
| 188 | + { |
| 189 | + return sprintf( |
| 190 | + $pattern, |
| 191 | + ...array_fill(0, substr_count($pattern, '%d'), $row) |
| 192 | + ); |
| 193 | + } |
| 194 | +} |
0 commit comments