<?php

require_once 'vendor/autoload.php';

use Illuminate\Support\Facades\Storage;

// Test the colourway extraction logic
class ColourwayTester
{
    public function testExtraction()
    {
        $pdfPath = __DIR__ . '/colourways.pdf';
        
        if (!file_exists($pdfPath)) {
            echo "PDF file not found: $pdfPath\n";
            return;
        }
        
        echo "Testing PDF: " . basename($pdfPath) . "\n";
        echo "File size: " . number_format(filesize($pdfPath) / 1024 / 1024, 2) . " MB\n\n";
        
        // Test Tabula extraction
        $this->testTabulaExtraction($pdfPath);
    }
    
    private function testTabulaExtraction($pdfPath)
    {
        $tabulaJar = __DIR__ . '/tabula/tabula-1.0.5-jar-with-dependencies.jar';
        
        if (!file_exists($tabulaJar)) {
            echo "Tabula JAR not found: $tabulaJar\n";
            return;
        }
        
        echo "Testing Tabula extraction...\n";
        
        $outputJson = __DIR__ . '/test_output.json';
        $cmd = sprintf('java -jar %s -p all -f JSON -o %s %s', 
            escapeshellarg($tabulaJar), 
            escapeshellarg($outputJson), 
            escapeshellarg($pdfPath)
        );
        
        echo "Command: $cmd\n\n";
        
        $code = null;
        $out = [];
        exec($cmd . ' 2>&1', $out, $code);
        
        echo "Exit code: $code\n";
        echo "Output:\n" . implode("\n", $out) . "\n\n";
        
        if ($code === 0 && file_exists($outputJson)) {
            echo "JSON output file created successfully\n";
            $this->analyzeTabulaOutput($outputJson);
        } else {
            echo "Tabula failed or output file not created\n";
        }
    }
    
    private function analyzeTabulaOutput($jsonFile)
    {
        $json = json_decode(file_get_contents($jsonFile), true);
        
        if (!$json) {
            echo "Failed to parse JSON output\n";
            return;
        }
        
        echo "JSON parsed successfully\n";
        echo "Structure:\n";
        $this->printStructure($json, 0);
        
        // Test the extraction logic
        $this->testExtractionLogic($json);
        
        // Clean up
        unlink($jsonFile);
    }
    
    private function printStructure($data, $depth)
    {
        $indent = str_repeat('  ', $depth);
        
        if (is_array($data)) {
            foreach ($data as $key => $value) {
                if (is_array($value)) {
                    echo "{$indent}{$key}: [array]\n";
                    if ($depth < 2) { // Limit depth for readability
                        $this->printStructure($value, $depth + 1);
                    }
                } else {
                    echo "{$indent}{$key}: " . substr((string)$value, 0, 100) . "\n";
                }
            }
        }
    }
    
    private function testExtractionLogic($jsonData)
    {
        echo "\n=== Testing Extraction Logic ===\n";
        
        $colourCodesWithContext = [];
        $this->extractAllColourCodes($jsonData, $colourCodesWithContext);
        
        echo "Total 6-digit numbers found: " . count($colourCodesWithContext) . "\n";
        
        // Show first few
        echo "First 10 codes:\n";
        for ($i = 0; $i < min(10, count($colourCodesWithContext)); $i++) {
            $item = $colourCodesWithContext[$i];
            echo "  {$item['code']} - " . substr($item['text'], 0, 50) . "\n";
        }
        
        // Filter and show results
        $filteredCodes = array_filter($colourCodesWithContext, function($item) {
            return $this->isLikelyColourCode($item['code'], $item['text']);
        });
        
        echo "\nAfter filtering: " . count($filteredCodes) . " codes\n";
        echo "First 10 filtered codes:\n";
        $filteredArray = array_values($filteredCodes);
        for ($i = 0; $i < min(10, count($filteredArray)); $i++) {
            $item = $filteredArray[$i];
            echo "  {$item['code']} - " . substr($item['text'], 0, 50) . "\n";
        }
    }
    
    private function extractAllColourCodes($data, &$colourCodesWithContext)
    {
        static $pageCount = 0;
        static $processedPages = 0;
        static $skippedPages = 0;
        
        if (is_array($data)) {
            if (isset($data['data']) && is_array($data['data'])) {
                $pageCount++;
                
                if ($this->hasNumberedHeading($data['data'])) {
                    $processedPages++;
                    echo "Processing page {$pageCount} (numbered heading)\n";
                    $this->processPageForColourCodes($data['data'], $colourCodesWithContext);
                } else {
                    $skippedPages++;
                    echo "Skipping page {$pageCount} (no numbered heading)\n";
                }
            } else {
                foreach ($data as $key => $value) {
                    if (is_array($value)) {
                        $this->extractAllColourCodes($value, $colourCodesWithContext);
                    }
                }
            }
        }
        
        if ($pageCount > 0 && !isset($data['data'])) {
            echo "Page processing complete: {$pageCount} total pages, {$processedPages} processed, {$skippedPages} skipped\n";
        }
    }
    
    private function hasNumberedHeading(array $pageData): bool
    {
        foreach ($pageData as $row) {
            if (isset($row[0]['text'])) {
                $text = trim($row[0]['text']);
                if (preg_match('/^\d[\s\.\-]/', $text)) {
                    return true;
                }
            }
        }
        return false;
    }
    
    private function processPageForColourCodes(array $pageData, &$colourCodesWithContext): void
    {
        foreach ($pageData as $row) {
            foreach ($row as $cell) {
                if (isset($cell['text']) && is_string($cell['text'])) {
                    preg_match_all('/\b\d{6}\b/', $cell['text'], $matches, PREG_OFFSET_CAPTURE);
                    
                    if (!empty($matches[0])) {
                        foreach ($matches[0] as $match) {
                            $colourCodesWithContext[] = [
                                'code' => $match[0],
                                'text' => $cell['text'],
                                'raw_data' => $cell
                            ];
                        }
                    }
                }
            }
        }
    }
    
    private function isLikelyColourCode(string $code, string $context): bool
    {
        $context = strtolower($context);
        if (strpos($context, 'id nr') !== false || 
            strpos($context, '4s-') !== false) {
            return false;
        }
        
        if (preg_match('/[a-z]-\d{6}/i', $context) || 
            preg_match('/\d{6}-[a-z]/i', $context)) {
            return false;
        }
        
        return true;
    }
}

// Run the test
$tester = new ColourwayTester();
$tester->testExtraction();
