<?php

require_once 'vendor/autoload.php';

use Smalot\PdfParser\Parser;

// Initialize Laravel
$app = require_once 'bootstrap/app.php';
$app->make('Illuminate\Contracts\Console\Kernel')->bootstrap();

echo "Testing PDF parsing...\n";

try {
    $pdfPath = 'file.pdf';
    
    if (!file_exists($pdfPath)) {
        die("PDF file not found: $pdfPath\n");
    }
    
    echo "PDF file found: " . filesize($pdfPath) . " bytes\n";
    
    // Extract text
    $text = extractTextFromPdf($pdfPath);
    echo "Text extracted successfully. Total lines: " . count(explode("\n", $text)) . "\n";
    
    // Parse shipments
    $shipments = parseInvoiceData($text);
    echo "Shipments found: " . count($shipments) . "\n";
    
    // Show first few shipments
    echo "\nFirst 5 shipments:\n";
    foreach (array_slice($shipments, 0, 5) as $i => $shipment) {
        echo ($i + 1) . ". {$shipment['courier']} - {$shipment['parcel_number']}\n";
    }
    
    // Show all unique courier types
    $couriers = array_unique(array_column($shipments, 'courier'));
    echo "\nUnique couriers found: " . implode(', ', $couriers) . "\n";
    
    // Show pattern analysis
    analyzePatterns($text);
    
} catch (Exception $e) {
    echo "Error: " . $e->getMessage() . "\n";
    echo "Trace: " . $e->getTraceAsString() . "\n";
}

function extractTextFromPdf($filePath): string {
    if (!file_exists($filePath) || !is_readable($filePath)) {
        throw new Exception("PDF file not found or not readable: $filePath");
    }

    if (function_exists('shell_exec')) {
        $command = "pdftotext -q \"$filePath\" - 2>&1";
        $output = shell_exec($command);
        
        if ($output !== null && !empty(trim($output))) {
            return "=== PDF TEXT EXTRACTED (pdftotext) ===\n\n" . $output;
        }
    }
    
    try {
        $parser = new Parser();
        $pdf = $parser->parseFile($filePath);
        $text = $pdf->getText();
        
        if (!empty(trim($text))) {
            return "=== PDF TEXT EXTRACTED (PHP Parser) ===\n\n" . $text;
        }
    } catch (Exception $e) {
        throw new Exception("PHP PDF parser failed: " . $e->getMessage());
    }
    
    throw new Exception("PDF text extraction failed. The PDF might be empty or corrupted.");
}

function parseInvoiceData($text): array {
    $shipments = [];
    $lines = explode("\n", $text);
    
    $departmentMap = [
        'KWA' => 'KWA', 'KWT' => 'KWT', 'KTW' => 'KWT', 'KCE' => 'KCE',
        'YS' => 'YS', 'YC' => 'YC', 'OH' => 'OH'
    ];
    
    $currentShipment = null;
    $lineIndex = 0;
    
    foreach ($lines as $lineIndex => $line) {
        $line = trim($line);
        
        // Look for carrier lines (DHL, TNT, FedEx) - multiple patterns
        $shipmentFound = false;
        $courier = null;
        $parcelNumber = null;
        
        // Pattern 1: Standard format (DHL - 1234567890)
        if (preg_match('/^(DHL|TNT|FedEx)\s*[-–]\s*(\d+)/', $line, $matches)) {
            $shipmentFound = true;
            $courier = $matches[1];
            $parcelNumber = $matches[2];
        }
        // Pattern 2: Domestic/Express format (DHL Domestic 1234567890)
        elseif (preg_match('/^(DHL|TNT|FedEx)\s+(?:Domestic|Express|Parcel|Worldwide)\s+(\d+)/', $line, $matches)) {
            $shipmentFound = true;
            $courier = $matches[1];
            $parcelNumber = $matches[2];
        }
        // Pattern 3: Any line starting with courier + parcel number (but exclude two-line format)
        elseif (preg_match('/^(DHL|TNT|FedEx).*?(\d{8,})/', $line, $matches) && !preg_match('/^(DHL|TNT|FedEx)\s+(?:Parcel|Express|Domestic|Worldwide)$/', $line)) {
            $shipmentFound = true;
            $courier = $matches[1];
            $parcelNumber = $matches[2];
        }
        // Pattern 4: Two-line format - courier name only (look ahead for parcel number)
        elseif (preg_match('/^(DHL|TNT|FedEx)\s+(?:Parcel|Express|Domestic|Worldwide)/', $line, $matches)) {
            $courier = $matches[1];
            // Look ahead to next line for parcel number
            if ($lineIndex + 1 < count($lines)) {
                $nextLine = trim($lines[$lineIndex + 1]);
                if (preg_match('/(?:Domestic|Express|Parcel|Worldwide)\s+(\d{8,})/', $nextLine, $nextMatches)) {
                    $shipmentFound = true;
                    $parcelNumber = $nextMatches[1];
                }
            }
        }
        // Pattern 5: New shipment detection after total lines - this is the key pattern
        elseif (preg_match('/^(DHL|TNT|FedEx)/', $line, $matches)) {
            // Look back to find the most recent total line (end of previous shipment)
            $foundTotal = false;
            $totalLineIndex = -1;
            
            for ($i = $lineIndex - 1; $i >= max(0, $lineIndex - 20); $i--) {
                $checkLine = trim($lines[$i]);
                            // Look for total lines that contain £ amount (more flexible)
            if (preg_match('/£\s*\d+\.?\d*/', $checkLine)) {
                    $foundTotal = true;
                    $totalLineIndex = $i;
                    break;
                }
            }
            
            if ($foundTotal) {
                $courier = $matches[1];
                // Look ahead to next line for parcel number
                if ($lineIndex + 1 < count($lines)) {
                    $nextLine = trim($lines[$lineIndex + 1]);
                    if (preg_match('/(\d{8,})/', $nextLine, $nextMatches)) {
                        $parcelNumber = $nextMatches[1];
                        // Check if already processed
                        $alreadyProcessed = false;
                        foreach ($shipments as $existingShipment) {
                            if ($existingShipment['parcel_number'] === $parcelNumber) {
                                $alreadyProcessed = true;
                                break;
                            }
                        }
                        if (!$alreadyProcessed) {
                            $shipmentFound = true;
                        }
                    }
                }
            }
        }
        
        // Prevent duplicate shipments by checking if we already have this parcel number
        if ($shipmentFound && $parcelNumber) {
            foreach ($shipments as $existingShipment) {
                if ($existingShipment['parcel_number'] === $parcelNumber) {
                    $shipmentFound = false;
                    break;
                }
            }
        }
        
        if ($shipmentFound) {
            if ($currentShipment) {
                $shipments[] = $currentShipment;
            }
            
            $currentShipment = [
                'parcel_number' => $parcelNumber,
                'courier' => $courier,
                'department' => null,
                'weight' => null,
                'no_parcels' => null,
                'destination_name' => null,
                'sent_from_name' => null,
                'sent_from_address' => null,
                'destination_address' => null,
                'country' => null,
                'vat_amount' => null,
                'total' => null
            ];
        }
        
        // Extract total amount (look for £ amount at end of line)
        if ($currentShipment && preg_match('/£\s*(\d+\.?\d*)\s*$/', $line, $matches)) {
            $currentShipment['total'] = $matches[1];
        }
    }
    
    // Add the last shipment
    if ($currentShipment) {
        $shipments[] = $currentShipment;
    }
    
    return $shipments;
}

function analyzePatterns($text) {
    $lines = explode("\n", $text);
    
    echo "\n=== PATTERN ANALYSIS ===\n";
    
    // Count lines starting with courier names
    $courierLines = 0;
    $courierWithParcel = 0;
    $courierOnly = 0;
    $totalLines = 0;
    
    foreach ($lines as $line) {
        $line = trim($line);
        
        if (preg_match('/^(DHL|TNT|FedEx)/', $line)) {
            $courierLines++;
            
            if (preg_match('/^(DHL|TNT|FedEx).*\d{8,}/', $line)) {
                $courierWithParcel++;
            } else {
                $courierOnly++;
            }
        }
        
        if (preg_match('/£\s*\d+\.?\d*\s*$/', $line)) {
            $totalLines++;
        }
    }
    
    echo "Total lines: " . count($lines) . "\n";
    echo "Lines starting with courier: $courierLines\n";
    echo "Courier lines with parcel number: $courierWithParcel\n";
    echo "Courier lines without parcel: $courierOnly\n";
    echo "Total lines (ending with £): $totalLines\n";
    
    // Show some examples of courier lines
    echo "\nExamples of courier lines:\n";
    $examples = 0;
    foreach ($lines as $line) {
        $line = trim($line);
        if (preg_match('/^(DHL|TNT|FedEx)/', $line) && $examples < 10) {
            echo "  '$line'\n";
            $examples++;
        }
    }
}
