<?php

namespace App\Services\CommissionImports;

class TabulaService
{
    private string $jarPath;
    
    public function __construct()
    {
        $this->jarPath = $this->ensureTabulaJarPath();
    }

    /**
     * Extract tables from PDF using Tabula
     */
    public function extractTables(
        string $pdfPath, 
        array $pages = [], 
        string $mode = 'lattice',
        array $area = []
    ): array {
        if (!$this->hasJavaInstalled()) {
            throw new \RuntimeException('Java is not installed or not on PATH. Install OpenJDK and retry.');
        }

        $workDir = storage_path('app/tabula');
        if (!is_dir($workDir)) {
            @mkdir($workDir, 0775, true);
        }
        
        $outFile = $workDir . DIRECTORY_SEPARATOR . 'extract_' . uniqid() . '.json';

        $command = $this->buildTabulaCommand($pdfPath, $outFile, $pages, $mode, $area);
        $output = shell_exec($command) ?? '';

        $rows = $this->parseTabulaJsonToRows($outFile);

        // Try fallback mode if no results
        if (empty($rows) && $mode === 'lattice') {
            @unlink($outFile);
            $commandFallback = $this->buildTabulaCommand($pdfPath, $outFile, $pages, 'stream', $area);
            $outputFallback = shell_exec($commandFallback) ?? '';
            $rows = $this->parseTabulaJsonToRows($outFile);
        }

        // Clean up
        @unlink($outFile);

        if (empty($rows)) {
            throw new \RuntimeException('No table rows detected. Debug output: ' . substr($output, 0, 500));
        }

        return $rows;
    }

    /**
     * Build Tabula command
     */
    private function buildTabulaCommand(
        string $pdfPath, 
        string $outFile, 
        array $pages, 
        string $mode, 
        array $area
    ): string {
        $escapedJar = escapeshellarg($this->jarPath);
        $escapedPdf = escapeshellarg($pdfPath);
        $escapedOut = escapeshellarg($outFile);

        $command = 'java -Dfile.encoding=UTF8 -jar ' . $escapedJar;
        
        // Add pages
        if (!empty($pages)) {
            $command .= ' --pages ' . implode(',', $pages);
        } else {
            // Default to all pages when no pages specified
            $command .= ' --pages all';
        }
        
        // Add extraction mode
        $command .= ' --' . $mode;
        
        // Add area if specified
        if (!empty($area) && count($area) === 4) {
            $command .= ' --area ' . implode(',', $area);
        } else {
            $command .= ' --guess';
        }
        
        $command .= ' -f JSON -o ' . $escapedOut . ' ' . $escapedPdf . ' 2>&1';
        
        return $command;
    }

    /**
     * Parse Tabula JSON output file into array of rows
     */
    private function parseTabulaJsonToRows(string $jsonPath): array
    {
        if (!file_exists($jsonPath)) {
            return [];
        }
        
        $json = @file_get_contents($jsonPath);
        if ($json === false || $json === '') {
            return [];
        }
        
        $decoded = json_decode($json, true);
        if (!is_array($decoded)) {
            return [];
        }
        
        // Handle different Tabula JSON formats
        $tables = [];
        if (array_is_list($decoded)) {
            $tables = $decoded;
        } elseif (isset($decoded['tables']) && is_array($decoded['tables'])) {
            $tables = $decoded['tables'];
        } else {
            $tables = [$decoded];
        }

        // Extract rows from ALL tables (not just the first one)
        $allRows = [];
        foreach ($tables as $table) {
            if (!isset($table['data']) || !is_array($table['data'])) {
                continue;
            }
            
            foreach ($table['data'] as $row) {
                $cells = [];
                foreach ($row as $cell) {
                    $cells[] = isset($cell['text']) ? trim((string) $cell['text']) : '';
                }
                
                // Skip completely empty rows
                if (implode('', $cells) !== '') {
                    $allRows[] = $cells;
                }
            }
        }

        return $allRows;
    }

    /**
     * Ensure Tabula JAR exists locally; download if missing
     */
    private function ensureTabulaJarPath(): string
    {
        $dir = storage_path('app/tabula');
        if (!is_dir($dir)) {
            @mkdir($dir, 0775, true);
        }
        
        $jarPath = $dir . DIRECTORY_SEPARATOR . 'tabula-1.0.5-jar-with-dependencies.jar';
        
        if (!file_exists($jarPath)) {
            $url = 'https://github.com/tabulapdf/tabula-java/releases/download/v1.0.5/tabula-1.0.5-jar-with-dependencies.jar';
            $tmp = $jarPath . '.part';
            
            $ctx = stream_context_create([
                'http' => ['timeout' => 120],
                'https' => ['timeout' => 120],
            ]);
            
            $data = @file_get_contents($url, false, $ctx);
            if ($data === false) {
                throw new \RuntimeException('Failed to download Tabula JAR. Ensure internet access or pre-provision the JAR at ' . $jarPath);
            }
            
            file_put_contents($tmp, $data);
            rename($tmp, $jarPath);
        }
        
        return $jarPath;
    }

    /**
     * Check if Java is installed
     */
    private function hasJavaInstalled(): bool
    {
        $which = shell_exec('command -v java 2>&1') ?? '';
        return trim($which) !== '';
    }
}


