<?php

require_once 'vendor/autoload.php';

// Debug script to examine each page
class PageDebugger
{
    public function debugPages()
    {
        $pdfPath = __DIR__ . '/colourways.pdf';
        $tabulaJar = __DIR__ . '/tabula/tabula-1.0.5-jar-with-dependencies.jar';
        
        if (!file_exists($pdfPath) || !file_exists($tabulaJar)) {
            echo "Required files not found\n";
            return;
        }
        
        echo "Debugging PDF pages...\n\n";
        
        $outputJson = __DIR__ . '/debug_output.json';
        $cmd = sprintf('java -jar %s -p all -f JSON -o %s %s', 
            escapeshellarg($tabulaJar), 
            escapeshellarg($outputJson), 
            escapeshellarg($pdfPath)
        );
        
        exec($cmd . ' 2>&1', $out, $code);
        
        if ($code === 0 && file_exists($outputJson)) {
            $json = json_decode(file_get_contents($outputJson), true);
            $this->examineEachPage($json);
            unlink($outputJson);
        }
    }
    
    private function examineEachPage($jsonData)
    {
        if (!is_array($jsonData)) {
            echo "Invalid JSON data\n";
            return;
        }
        
        foreach ($jsonData as $pageIndex => $page) {
            if (isset($page['data']) && is_array($page['data'])) {
                $pageNum = $pageIndex + 1;
                echo "=== PAGE {$pageNum} ===\n";
                
                $this->examinePageContent($page['data'], $pageNum);
                echo "\n";
            }
        }
    }
    
    private function examinePageContent($pageData, $pageNum)
    {
        $allText = '';
        $firstRowText = '';
        $hasNumberedHeading = false;
        
        // Collect text and check first row
        foreach ($pageData as $rowIndex => $row) {
            if (isset($row[0]['text'])) {
                $text = trim($row[0]['text']);
                $allText .= $text . ' ';
                
                if ($rowIndex === 0) {
                    $firstRowText = $text;
                    // Check if this looks like a numbered heading
                    if (preg_match('/^\d[\s\.\-]/', $text)) {
                        $hasNumberedHeading = true;
                    }
                }
            }
        }
        
        echo "First row text: '{$firstRowText}'\n";
        echo "Has numbered heading: " . ($hasNumberedHeading ? 'YES' : 'NO') . "\n";
        echo "Total text length: " . strlen($allText) . " characters\n";
        
        // Look for 6-digit numbers
        preg_match_all('/\b\d{6}\b/', $allText, $matches);
        $codeCount = count($matches[0]);
        echo "6-digit numbers found: {$codeCount}\n";
        
        if ($codeCount > 0) {
            echo "First few codes: " . implode(', ', array_slice($matches[0], 0, 5)) . "\n";
        }
        
        // Show some sample text
        $sampleText = substr($allText, 0, 200);
        echo "Sample text: '{$sampleText}...'\n";
    }
}

$debugger = new PageDebugger();
$debugger->debugPages();
