<?php

namespace App\Services\CommissionImports\Extractors;

use App\Services\CommissionImports\BaseExtractor;
use App\Services\VertexAiService;
use Illuminate\Support\Facades\Log;
use Livewire\Features\SupportFileUploads\TemporaryUploadedFile;

class EwmExtractor extends BaseExtractor
{
    protected VertexAiService $vertexAi;

    public function __construct()
    {
        $this->vertexAi = new VertexAiService();
    }

    public function getCustomerName(): string
    {
        return 'EWM';
    }

    public function getCustomerId(): int
    {
        // EWM (Edinburgh Woollen Mill) customer ID
        return 66;
    }

    public function getSupportedExtensions(): array
    {
        return ['pdf', 'xlsx', 'xls'];
    }

    public function extractData(TemporaryUploadedFile $file): array
    {
        $extension = strtolower($file->getClientOriginalExtension());

        if (!in_array($extension, $this->getSupportedExtensions())) {
            throw new \InvalidArgumentException("Unsupported file type for EWM: {$extension}");
        }

        Log::info('EWM - Starting Vertex AI extraction', [
            'file' => $file->getClientOriginalName(),
            'extension' => $extension,
            'size' => $file->getSize(),
        ]);

        // Get the file path
        $filePath = $file->getRealPath();
        
        // Calculate traditional total from raw file
        $traditionalTotal = $this->calculateTraditionalTotal($file, $extension);

        // Convert PDF to CSV for better table structure recognition
        $fileToProcess = $filePath;
        $mimeType = VertexAiService::getMimeType($extension);
        $convertedFile = null;
        
        if ($extension === 'pdf') {
            Log::info('EWM - Converting PDF to CSV for better extraction');
            $convertedFile = $this->convertPdfToCsv($file);
            if ($convertedFile) {
                $fileToProcess = $convertedFile;
                $mimeType = 'text/csv';
                Log::info('EWM - Successfully converted PDF to CSV');
            } else {
                Log::warning('EWM - PDF conversion failed, using original PDF');
            }
        }

        // Create extraction prompt
        $prompt = $this->buildExtractionPrompt();

        try {
            // Extract data using Vertex AI
            $extractedData = $this->vertexAi->extractFromFile($fileToProcess, $prompt, $mimeType);

            if (!$extractedData) {
                throw new \RuntimeException('Vertex AI returned empty response');
            }

            // Validate and normalize the extracted data
            $normalizedData = $this->normalizeExtractedData($extractedData);

            // Calculate actual total units from extracted quantities
            $geminiTotal = 0;
            foreach ($normalizedData['orders'] ?? [] as $order) {
                foreach ($order['lines'] ?? [] as $line) {
                    foreach ($line['quantities'] ?? [] as $qty) {
                        $geminiTotal += (int)($qty['qty'] ?? 0);
                    }
                }
            }

            // Check if totals match
            $totalsMismatch = ($traditionalTotal !== null && $geminiTotal !== $traditionalTotal);
            $warningMessage = null;
            
            if ($totalsMismatch) {
                $warningMessage = "Total quantity mismatch! Gemini extracted: {$geminiTotal} units, but file shows: {$traditionalTotal} units. Please verify the data.";
                Log::warning('EWM - Total mismatch detected', [
                    'gemini_total' => $geminiTotal,
                    'traditional_total' => $traditionalTotal,
                    'difference' => abs($geminiTotal - $traditionalTotal)
                ]);
            } else {
                Log::info('EWM - Totals match or validation skipped', [
                    'gemini_total' => $geminiTotal,
                    'traditional_total' => $traditionalTotal,
                    'validation_status' => $traditionalTotal === null ? 'skipped (no total found)' : 'passed'
                ]);
            }

            Log::info('EWM - Successfully extracted data', [
                'orders_count' => count($normalizedData['orders'] ?? []),
                'total_lines' => array_sum(array_map(fn($order) => count($order['lines'] ?? []), $normalizedData['orders'] ?? [])),
                'detected_season' => $normalizedData['season'] ?? null,
                'gemini_total' => $geminiTotal,
                'traditional_total' => $traditionalTotal,
                'totals_match' => !$totalsMismatch
            ]);

            // Add metadata for season detection and total validation
            $normalizedData['metadata'] = [
                'detected_season' => $normalizedData['season'] ?? null,
                'extraction_method' => 'vertex_ai_gemini',
                'gemini_total' => $geminiTotal,
                'traditional_total' => $traditionalTotal,
                'totals_match' => !$totalsMismatch,
                'total_warning' => $warningMessage,
                'filename' => $file->getClientOriginalName(),
            ];

            return $normalizedData;

        } catch (\Throwable $e) {
            Log::error('EWM - Vertex AI extraction failed', [
                'error' => $e->getMessage(),
                'file' => $file->getClientOriginalName(),
                'memory_usage_mb' => round(memory_get_usage(true) / 1048576, 2),
            ]);
            throw new \RuntimeException("Failed to extract EWM data: {$e->getMessage()}");
        } finally {
            // Clean up converted file if it exists
            if ($convertedFile && file_exists($convertedFile)) {
                @unlink($convertedFile);
                Log::debug('EWM - Cleaned up converted file');
            }
        }
    }

    public function validateCustomerData(array $data): array
    {
        $errors = [];

        foreach (($data['orders'] ?? []) as $orderIndex => $order) {
            // EWM PO numbers are typically numeric (e.g., 103467)
            if (!empty($order['purchase_order_number'])) {
                if (!preg_match('/^\d+$/', $order['purchase_order_number'])) {
                    $errors[] = "Order {$orderIndex}: EWM PO numbers should be numeric.";
                }
            }

            foreach (($order['lines'] ?? []) as $lineIndex => $line) {
                // EWM style codes follow pattern like LMK245CM2NS, LWL64GY1NS
                $styleNumber = $line['colourway']['style_number'] ?? '';
                if (!empty($styleNumber) && !preg_match('/^[A-Z]{2,4}\d+[A-Z0-9]+$/i', $styleNumber)) {
                    $errors[] = "Order {$orderIndex}, Line {$lineIndex}: Style number format may be incorrect.";
                }
            }
        }

        return $errors;
    }

    /**
     * Calculate traditional total by reading the Excel/PDF file directly
     */
    protected function calculateTraditionalTotal(TemporaryUploadedFile $file, string $extension): ?int
    {
        try {
            if (in_array($extension, ['xlsx', 'xls'])) {
                return $this->calculateTotalFromExcel($file);
            } elseif ($extension === 'pdf') {
                return $this->calculateTotalFromPdf($file);
            }
        } catch (\Exception $e) {
            Log::warning('EWM - Could not calculate traditional total', [
                'error' => $e->getMessage()
            ]);
        }
        
        return null;
    }

    /**
     * Calculate total from Excel file by looking for "Grand Total Units" row
     */
    protected function calculateTotalFromExcel(TemporaryUploadedFile $file): ?int
    {
        $sheets = \Maatwebsite\Excel\Facades\Excel::toArray(null, $file);
        $mainSheet = $sheets[0] ?? [];
        
        // Look for "Grand Total Units" in the first column
        foreach ($mainSheet as $row) {
            $firstCell = strtolower(trim((string)($row[0] ?? '')));
            if (str_contains($firstCell, 'grand total units') || str_contains($firstCell, 'total units')) {
                // Total is usually in the next cell or a few cells over
                for ($i = 1; $i < min(count($row), 10); $i++) {
                    $value = $row[$i] ?? null;
                    if (is_numeric($value) && $value > 0) {
                        return (int)$value;
                    }
                }
            }
        }
        
        return null;
    }

    /**
     * Calculate total from PDF by looking for "Grand Total Units" text
     */
    protected function calculateTotalFromPdf(TemporaryUploadedFile $file): ?int
    {
        try {
            $text = \Spatie\PdfToText\Pdf::getText($file->getRealPath());
            
            // Try multiple patterns for total units - EWM uses "Grand Total Units"
            $patterns = [
                '/Grand\s+Total\s+Units\s*[:\s]*(\d+)/i',    // "Grand Total Units 10713"
                '/Grand\s+Total\s*[:\s]*(\d+)\s+units?/i',  // "Grand Total: 123 units"
                '/Total\s+Units\s*[:\s]*(\d+)/i',           // "Total Units: 123"
                '/Total\s*[:\s]*(\d+)\s+units?/i',          // "Total: 123 units"
            ];
            
            foreach ($patterns as $pattern) {
                if (preg_match($pattern, $text, $matches)) {
                    Log::info('EWM - Found total in PDF using pattern', [
                        'pattern' => $pattern,
                        'total' => $matches[1]
                    ]);
                    return (int)$matches[1];
                }
            }
            
            Log::warning('EWM - Could not find total units in PDF text', [
                'text_sample' => substr($text, 0, 500)
            ]);
        } catch (\Exception $e) {
            Log::debug('Could not extract text from PDF', ['error' => $e->getMessage()]);
        }
        
        return null;
    }

    /**
     * Build the extraction prompt for Vertex AI
     */
    protected function buildExtractionPrompt(): string
    {
        return <<<'PROMPT'
Extract purchase order data from this EWM (Edinburgh Woollen Mill) / Pure Pay Retail file.

HEADER FIELDS TO FIND:
- p.o. number: Numeric PO number (e.g., 103467, 103446)
- p.o. date: Order date in DD/MM/YYYY format
- Ex Factory Date: Ship date in DD/MM/YYYY format
- currency: Usually "United States Dollar" (USD)
- FOB Port: Port name for shipping

LINE ITEM COLUMNS:
- Line No.: Line number
- Style: Their style number (e.g., 2155515, 2138278) - numeric
- Colour/Dimension: Colour code like CM2, GY1, BLK, NV3, PK3, RD3, DN3, GR2, GR3
- Description: Product description (e.g., "JMPR RN CASH FAIRISLE YOKE CAMEL", "CARDI HB CASH BLACK")
- Colour name: Full colour description with code prefix (e.g., "0528 NEW CAMEL", "1077 DOVE", "019 BLACK", "8253 MIDNIGHT NAVY")
- Style code: Our internal style code like LMK245CM2NS, LWL64GY1NS, LMK189NV3NS - USE THIS AS THE STYLE NUMBER
- Size columns: S, M, L, XL (or numeric sizes)
- Quantities: Numbers under each size column
- unit cost: Price per unit in USD

CRITICAL RULES:
- The style code (like LMK245CM2NS) is what we need as "style_number" - NOT the numeric "Style" column
- Each row represents one style+colour combination
- Size quantities are in columns labeled S, M, L, XL
- Read each size/quantity pair independently
- Price is per unit
- Convert dates from DD/MM/YYYY to YYYY-MM-DD format in output
- "Ex Factory Date" is the customer_exfty_date

OUTPUT (JSON only, no markdown):
{
  "purchase_order_number": "103467",
  "order_date": "YYYY-MM-DD",
  "incoterms": "FOB",
  "orders": [{
    "order_date": "YYYY-MM-DD",
    "lines": [{
      "colourway": {
        "style_number": "LMK245CM2NS",
        "description": "JMPR RN CASH FAIRISLE YOKE CAMEL",
        "colour_name": "New Camel",
        "colour_code": "CM2"
      },
      "customer_exfty_date": "YYYY-MM-DD",
      "quantities": [
        {"size": "S", "qty": 55, "price": 47.00},
        {"size": "M", "qty": 71, "price": 47.00},
        {"size": "L", "qty": 53, "price": 47.00},
        {"size": "XL", "qty": 21, "price": 47.00}
      ]
    }]
  }]
}

Extract all products from the document:
PROMPT;
    }

    /**
     * Normalize extracted data to match expected format
     */
    protected function normalizeExtractedData(array $data): array
    {
        // Get order date from top level or use current date as fallback
        $orderDate = $this->parseDate($data['order_date'] ?? '') ?? date('Y-m-d');
        
        $normalized = [
            'purchase_order_number' => $data['purchase_order_number'] ?? '',
            'season' => $data['season'] ?? $this->extractSeasonFromText($data['purchase_order_number'] ?? ''),
            'incoterms' => $data['incoterms'] ?? 'FOB',
            'orders' => []
        ];

        // Normalize orders
        foreach (($data['orders'] ?? []) as $order) {
            $normalizedOrder = [
                'order_date' => $this->parseDate($order['order_date'] ?? '') ?? $orderDate,
                'purchase_order_number' => $data['purchase_order_number'] ?? '',
                'lines' => []
            ];

            foreach (($order['lines'] ?? []) as $line) {
                // Use customer_exfty_date if available, otherwise fall back to ship_date
                $exftyDate = $this->parseDate($line['customer_exfty_date'] ?? $line['ship_date'] ?? '');
                
                $normalizedLine = [
                    'colourway' => [
                        'style_number' => $this->normalizeStyleNumber($line['colourway']['style_number'] ?? ''),
                        'description' => trim($line['colourway']['description'] ?? ''),
                        'colour_name' => $this->normalizeColourName($line['colourway']['colour_name'] ?? ''),
                        'colour_code' => strtoupper(trim($line['colourway']['colour_code'] ?? '')),
                    ],
                    'customer_exfty_date' => $exftyDate,
                    'quantities' => []
                ];

                // Normalize quantities and ensure numeric values
                foreach (($line['quantities'] ?? []) as $qty) {
                    $size = trim(strtoupper((string)($qty['size'] ?? '')));
                    $quantity = (int)($qty['qty'] ?? 0);
                    $price = (float)($qty['price'] ?? 0);

                    if ($size && $quantity > 0) {
                        $normalizedLine['quantities'][] = [
                            'size' => $size,
                            'qty' => $quantity,
                            'price' => $price,
                        ];
                    }
                }

                // Only add lines with quantities
                if (!empty($normalizedLine['quantities'])) {
                    $normalizedOrder['lines'][] = $normalizedLine;
                }
            }

            if (!empty($normalizedOrder['lines'])) {
                $normalized['orders'][] = $normalizedOrder;
            }
        }

        return $normalized;
    }

    /**
     * Override style number normalization for EWM format
     */
    protected function normalizeStyleNumber(string $styleNumber): string
    {
        $normalized = trim(strtoupper($styleNumber));
        
        // EWM style codes typically follow pattern like LMK245CM2NS, LWL64GY1NS
        // Remove any trailing "NS" if present (indicates no suffix/standard)
        // Keep the full code as-is for matching
        
        return $normalized;
    }

    /**
     * Override colour name normalization to strip leading number codes
     */
    protected function normalizeColourName(string $colourName): string
    {
        $colourName = trim($colourName);
        
        // EWM colour names often have leading codes like "0528 NEW CAMEL" or "1077 DOVE"
        // Strip the leading number code if present
        if (preg_match('/^\d+\s+(.+)$/', $colourName, $matches)) {
            $colourName = $matches[1];
        }
        
        return ucwords(strtolower($colourName));
    }

    /**
     * Convert PDF to CSV using tabula-py for better table extraction
     */
    protected function convertPdfToCsv(TemporaryUploadedFile $file): ?string
    {
        try {
            // Create temporary CSV file path
            $tempCsvPath = sys_get_temp_dir() . '/' . uniqid('ewm_') . '.csv';
            
            // Helper to run a command with a hard timeout to avoid hanging workers
            $runWithTimeout = function (string $cmd, array &$out, int &$code, int $seconds = 60): void {
                $wrapped = sprintf('timeout %ds %s', $seconds, $cmd);
                $out = [];
                exec($wrapped, $out, $code);
            };
            
            // 1) Try stream mode first (better for text-based PDFs)
            $cmdStream = sprintf(
                'python3 -c "import tabula; tabula.convert_into(%s, %s, output_format=\'csv\', pages=\'all\', stream=True, guess=False)" 2>&1',
                escapeshellarg($file->getRealPath()),
                escapeshellarg($tempCsvPath)
            );
            $output = []; $returnCode = 0;
            $runWithTimeout($cmdStream, $output, $returnCode, 60);
            
            if ($returnCode === 0 && file_exists($tempCsvPath) && filesize($tempCsvPath) > 0) {
                $csvSample = implode("\n", array_slice(file($tempCsvPath), 0, 10));
                Log::info('EWM - PDF converted to CSV successfully (stream mode)', [
                    'output_file' => $tempCsvPath,
                    'size' => filesize($tempCsvPath),
                    'csv_sample' => $csvSample
                ]);
                return $tempCsvPath;
            }
            
            Log::warning('EWM - PDF to CSV (stream mode) failed, trying lattice', [
                'return_code' => $returnCode,
                'output' => implode("\n", $output)
            ]);
            
            // 2) Fallback: lattice mode (better for bordered tables)
            $cmdLattice = sprintf(
                'python3 -c "import tabula; tabula.convert_into(%s, %s, output_format=\'csv\', pages=\'all\', lattice=True)" 2>&1',
                escapeshellarg($file->getRealPath()),
                escapeshellarg($tempCsvPath)
            );
            $output2 = []; $returnCode2 = 0;
            $runWithTimeout($cmdLattice, $output2, $returnCode2, 60);
            
            if ($returnCode2 === 0 && file_exists($tempCsvPath) && filesize($tempCsvPath) > 0) {
                $csvSample = implode("\n", array_slice(file($tempCsvPath), 0, 10));
                Log::info('EWM - PDF converted to CSV successfully (lattice mode)', [
                    'output_file' => $tempCsvPath,
                    'size' => filesize($tempCsvPath),
                    'csv_sample' => $csvSample
                ]);
                return $tempCsvPath;
            }
            
            Log::warning('EWM - PDF to CSV conversion failed (both modes)', [
                'stream_code' => $returnCode,
                'stream_output' => implode("\n", $output),
                'lattice_code' => $returnCode2,
                'lattice_output' => implode("\n", $output2),
            ]);
            
            // Clean up failed conversion file
            if (file_exists($tempCsvPath)) {
                @unlink($tempCsvPath);
            }
            
            return null;
            
        } catch (\Throwable $e) {
            Log::error('EWM - PDF conversion error', [
                'error' => $e->getMessage()
            ]);
            return null;
        }
    }
}


































