<?php

namespace App\Services\OCR;

use App\Models\Receipt;
use App\Models\ReceiptLine;
use App\Services\AI\VertexClient;
use Illuminate\Support\Facades\Storage;
use Carbon\Carbon;

class ReceiptOCRService
{
    public function __construct(private VertexClient $vertex) {}

    public function extractReceiptData(Receipt $receipt): void
    {
        // Get the image path
        $imagePath = Storage::disk($receipt->image_disk)->path($receipt->image_path);
        
        if (!file_exists($imagePath)) {
            return;
        }

        // Resize image to max 1000px wide for faster OCR processing
        $resizedImagePath = $this->resizeImageForOCR($imagePath);
        if (!$resizedImagePath) {
            \Log::warning('Could not resize image for OCR', ['path' => $imagePath]);
            return;
        }

        // Use Vertex AI to extract receipt data from the resized image
        $model = config('vertex.models.ocr', 'gemini-2.5-flash-lite');
        $system = 'You are an expert at extracting comprehensive data from receipt images. Extract ALL available information to help with future analysis and matching.

REQUIRED FIELDS:
- merchant_name: Business name
- receipt_date: Date in YYYY-MM-DD format
- total_amount: Total amount as decimal (e.g., 12.50)
- currency: Currency code (e.g., GBP, USD, EUR)
- lines: Array of line items with description, quantity, unit_price, line_total

VAT/TAX FIELDS (CRITICAL FOR UK BUSINESSES):
- vat_number: VAT registration number (if visible)
- vat_rate: Standard VAT rate (e.g., 20% for UK standard rate)
- subtotal: Amount before VAT/tax
- vat_amount: Total VAT amount
- net_amount: Amount excluding VAT
- vat_breakdown: Object with different VAT rates and amounts (e.g., {"20%": 10.00, "5%": 2.50})
- is_vat_inclusive: Boolean - whether prices include VAT
- vat_exemption_notes: Any notes about VAT exemptions or zero-rated items

OPTIONAL FIELDS (extract if visible):
- receipt_number: Receipt/invoice number
- cashier_name: Cashier or staff member name
- store_location: Store address or location
- phone_number: Business phone number
- email: Business email
- website: Business website
- payment_method: Cash, card, etc.
- discount_amount: Any discounts applied
- tip_amount: Tip or service charge
- transaction_id: Transaction reference number
- loyalty_points: Loyalty points earned
- opening_hours: Store hours if visible
- return_policy: Return policy text if visible
- additional_notes: Any other relevant text

For line items, extract:
- description: Item description
- quantity: Number of items
- unit_price: Price per unit
- line_total: Total for this line
- category: Product category if identifiable (food, fuel, clothing, etc.)
- brand: Brand name if visible
- barcode: Barcode number if visible
- vat_rate: VAT rate for this line (e.g., 20%, 5%, 0%, "VAT FREE", "EXEMPT")
- vat_amount: VAT amount for this line
- net_amount: Amount excluding VAT for this line
- is_vat_inclusive: Whether this line price includes VAT
- vat_code: VAT code if shown (e.g., "S", "R", "Z", "E")
- vat_status: Status like "STANDARD", "REDUCED", "ZERO-RATED", "EXEMPT"

VAT EXTRACTION PRIORITY:
- Look for VAT registration numbers (usually 9 or 12 digits starting with GB)
- Extract any VAT breakdowns showing different rates (20%, 5%, 0%, etc.)
- Identify VAT-inclusive vs VAT-exclusive pricing
- Look for VAT codes (S=Standard, R=Reduced, Z=Zero-rated, E=Exempt)
- Extract subtotals, VAT amounts, and net amounts
- Pay attention to VAT-free or exempt items

Output strict JSON with all available fields. Be thorough - extract every piece of text and data visible on the receipt, with special attention to VAT/tax information.';
        
        // Handle different file types (JPEG, PNG, PDF, etc.) using resized image
        $imageData = $this->processImageFile($resizedImagePath);
        if (!$imageData) {
            \Log::warning('Could not process resized image file', ['path' => $resizedImagePath]);
            return; // Skip if image can't be processed
        }
        
        $prompt = [
            'image' => $imageData,
            'instructions' => 'Extract all receipt data from this image'
        ];

        $response = $this->vertex->generate($model, json_encode($prompt), $system, ['responseMimeType' => 'application/json']);
        
        if (!isset($response['json']) || !is_array($response['json'])) {
            // Fallback to sample data if OCR fails
            $data = [
                'merchant_name' => 'Unknown Merchant ' . $receipt->id,
                'receipt_date' => now()->format('Y-m-d'),
                'total_amount' => 0,
                'currency' => 'GBP',
                'lines' => []
            ];
        } else {
            $data = $response['json'];
        }

        // Update receipt with extracted data
        if (!empty($data['merchant_name'])) {
            $receipt->merchant_name = $data['merchant_name'];
        }

        if (!empty($data['receipt_date'])) {
            try {
                // Parse the date and convert to Y-m-d format
                $date = Carbon::parse($data['receipt_date']);
                $receipt->receipt_date = $date->format('Y-m-d');
            } catch (\Exception $e) {
                // If date parsing fails, keep the original date
            }
        }

        if (!empty($data['total_amount'])) {
            $receipt->total_amount = (float) $data['total_amount'];
        }

        if (!empty($data['currency'])) {
            $receipt->currency = strtoupper($data['currency']);
        }

        // Store the complete OCR response in meta field for future reference
        $receipt->meta = $data;

        $receipt->save();

        // Process line items
        if (!empty($data['lines']) && is_array($data['lines'])) {
            // Clear existing lines
            $receipt->lines()->delete();
            
            foreach ($data['lines'] as $lineData) {
                if (empty($lineData['description'])) {
                    continue;
                }

                ReceiptLine::create([
                    'receipt_id' => $receipt->id,
                    'description' => $lineData['description'],
                    'quantity' => (float) ($lineData['quantity'] ?? 1),
                    'unit_price' => (float) ($lineData['unit_price'] ?? 0),
                    'line_total' => (float) ($lineData['line_total'] ?? 0),
                    'vat_rate' => $this->parseVatRate($lineData['vat_rate'] ?? $lineData['tax_rate'] ?? 0),
                    'meta' => $lineData, // Store complete line data for future reference
                ]);
            }
        }
    }

    /**
     * Parse VAT rate from various formats (20%, "20", "STANDARD", etc.)
     */
    private function parseVatRate($vatRate): float
    {
        if (is_numeric($vatRate)) {
            return (float) $vatRate;
        }

        if (is_string($vatRate)) {
            $vatRate = trim($vatRate);
            
            // Handle percentage format (20%, 5%, etc.)
            if (preg_match('/(\d+(?:\.\d+)?)%/', $vatRate, $matches)) {
                return (float) $matches[1];
            }
            
            // Handle VAT status strings
            $vatStatusMap = [
                'STANDARD' => 20.0,
                'STANDARD RATE' => 20.0,
                'REDUCED' => 5.0,
                'REDUCED RATE' => 5.0,
                'ZERO-RATED' => 0.0,
                'ZERO RATED' => 0.0,
                'VAT FREE' => 0.0,
                'EXEMPT' => 0.0,
                'EXEMPTION' => 0.0,
            ];
            
            $upperVatRate = strtoupper($vatRate);
            if (isset($vatStatusMap[$upperVatRate])) {
                return $vatStatusMap[$upperVatRate];
            }
        }

        return 0.0; // Default to 0% if can't parse
    }

    /**
     * Process different file types and convert to base64
     */
    private function processImageFile(string $filePath): ?string
    {
        if (!file_exists($filePath)) {
            return null;
        }

        $extension = strtolower(pathinfo($filePath, PATHINFO_EXTENSION));
        
        try {
            switch ($extension) {
                case 'jpg':
                case 'jpeg':
                    return $this->processJpegFile($filePath);
                
                case 'png':
                    return $this->processPngFile($filePath);
                
                case 'pdf':
                    return $this->processPdfFile($filePath);
                
                default:
                    \Log::warning('Unsupported file type for OCR', [
                        'file' => $filePath,
                        'extension' => $extension
                    ]);
                    return null;
            }
        } catch (\Exception $e) {
            \Log::error('Error processing file for OCR', [
                'file' => $filePath,
                'error' => $e->getMessage()
            ]);
            return null;
        }
    }

    private function processJpegFile(string $filePath): ?string
    {
        $image = imagecreatefromjpeg($filePath);
        if (!$image) {
            return null;
        }
        
        ob_start();
        imagejpeg($image, null, 90);
        $imageData = base64_encode(ob_get_contents());
        ob_end_clean();
        imagedestroy($image);
        
        return $imageData;
    }

    private function processPngFile(string $filePath): ?string
    {
        $image = imagecreatefrompng($filePath);
        if (!$image) {
            return null;
        }
        
        ob_start();
        imagepng($image, null, 9);
        $imageData = base64_encode(ob_get_contents());
        ob_end_clean();
        imagedestroy($image);
        
        return $imageData;
    }

    private function processPdfFile(string $filePath): ?string
    {
        \Log::info('PDF file detected, processing with LLM', ['file' => $filePath]);
        
        // For PDF files, we'll send the file content directly to the LLM
        // This allows the LLM to extract text from PDF files without image conversion
        return $this->extractPdfTextWithLLM($filePath);
    }

    /**
     * Extract text from PDF using LLM with text extraction first
     */
    private function extractPdfTextWithLLM(string $filePath): ?string
    {
        try {
            // First, try to extract text from PDF using a simple approach
            $pdfText = $this->extractTextFromPdf($filePath);
            
            if (empty($pdfText)) {
                \Log::warning('Could not extract text from PDF', ['file' => $filePath]);
                return null;
            }

            // Send the extracted text to LLM for processing
            $model = config('vertex.models.ocr', 'gemini-2.5-flash');
            $system = 'You are an expert at extracting comprehensive data from receipt text. Extract ALL available information to help with future analysis and matching.

REQUIRED FIELDS:
- merchant_name: Business name
- receipt_date: Date in YYYY-MM-DD format  
- total_amount: Total amount as decimal (e.g., 12.50)
- currency: Currency code (e.g., GBP, USD, EUR)
- lines: Array of line items with description, quantity, unit_price, line_total

VAT/TAX FIELDS (CRITICAL FOR UK BUSINESSES):
- vat_number: VAT registration number (if visible)
- vat_rate: Standard VAT rate (e.g., 20% for UK standard rate)
- subtotal: Amount before VAT/tax
- vat_amount: Total VAT amount
- net_amount: Amount excluding VAT
- vat_breakdown: Object with different VAT rates and amounts (e.g., {"20%": 10.00, "5%": 2.50})
- is_vat_inclusive: Boolean - whether prices include VAT
- vat_exemption_notes: Any notes about VAT exemptions or zero-rated items

OPTIONAL FIELDS (extract if visible):
- receipt_number: Receipt/invoice number
- cashier_name: Cashier or staff member name
- store_location: Store address or location
- phone_number: Business phone number
- email: Business email
- website: Business website
- payment_method: Cash, card, etc.
- discount_amount: Any discounts applied
- tip_amount: Tip or service charge
- transaction_id: Transaction reference number
- loyalty_points: Loyalty points earned
- opening_hours: Store hours if visible
- return_policy: Return policy text if visible
- additional_notes: Any other relevant text

For line items, extract:
- description: Item description
- quantity: Number of items
- unit_price: Price per unit
- line_total: Total for this line
- category: Product category if identifiable (food, fuel, clothing, etc.)
- brand: Brand name if visible
- barcode: Barcode number if visible
- vat_rate: VAT rate for this line (e.g., 20%, 5%, 0%, "VAT FREE", "EXEMPT")
- vat_amount: VAT amount for this line
- net_amount: Amount excluding VAT for this line
- is_vat_inclusive: Whether this line price includes VAT
- vat_code: VAT code if shown (e.g., "S", "R", "Z", "E")
- vat_status: Status like "STANDARD", "REDUCED", "ZERO-RATED", "EXEMPT"

VAT EXTRACTION PRIORITY:
- Look for VAT registration numbers (usually 9 or 12 digits starting with GB)
- Extract any VAT breakdowns showing different rates (20%, 5%, 0%, etc.)
- Identify VAT-inclusive vs VAT-exclusive pricing
- Look for VAT codes (S=Standard, R=Reduced, Z=Zero-rated, E=Exempt)
- Extract subtotals, VAT amounts, and net amounts
- Pay attention to VAT-free or exempt items

Output strict JSON with all available fields. Be thorough - extract every piece of text and data visible in the receipt text, with special attention to VAT/tax information.';

            $prompt = [
                'receipt_text' => $pdfText,
                'instructions' => 'Extract all receipt data from this text'
            ];

            $response = $this->vertex->generate($model, json_encode($prompt), $system, ['responseMimeType' => 'application/json']);
            
            if (!isset($response['json']) || !is_array($response['json'])) {
                \Log::warning('LLM failed to extract PDF content', ['file' => $filePath]);
                return null;
            }

            // Return the extracted data as a JSON string for processing
            return json_encode($response['json']);
            
        } catch (\Exception $e) {
            \Log::error('Error processing PDF with LLM', [
                'file' => $filePath,
                'error' => $e->getMessage()
            ]);
            return null;
        }
    }

    /**
     * Extract text from PDF using a simple approach
     */
    private function extractTextFromPdf(string $filePath): string
    {
        try {
            // For now, use a simple approach - in production you might want to use a PDF library
            // This is a basic implementation that might work for simple PDFs
            
            $content = file_get_contents($filePath);
            if (!$content) {
                return '';
            }

            // Try to extract text using a simple regex approach
            // This is not perfect but might work for some PDFs
            $text = '';
            
            // Look for text between BT and ET markers (PDF text objects)
            if (preg_match_all('/BT\s*(.*?)\s*ET/s', $content, $matches)) {
                foreach ($matches[1] as $match) {
                    // Extract text from PDF text objects
                    if (preg_match_all('/\((.*?)\)/s', $match, $textMatches)) {
                        foreach ($textMatches[1] as $textMatch) {
                            $text .= $textMatch . ' ';
                        }
                    }
                }
            }
            
            // Clean up the text
            $text = preg_replace('/\s+/', ' ', $text);
            $text = trim($text);
            
            \Log::info('Extracted PDF text length', ['length' => strlen($text), 'file' => $filePath]);
            
            return $text;
            
        } catch (\Exception $e) {
            \Log::error('Error extracting text from PDF', [
                'file' => $filePath,
                'error' => $e->getMessage()
            ]);
            return '';
        }
    }

    /**
     * Resize image to max 1000px wide for faster OCR processing
     */
    private function resizeImageForOCR(string $imagePath): ?string
    {
        try {
            $extension = strtolower(pathinfo($imagePath, PATHINFO_EXTENSION));
            
            // Load image based on type
            switch ($extension) {
                case 'jpg':
                case 'jpeg':
                    $image = imagecreatefromjpeg($imagePath);
                    break;
                case 'png':
                    $image = imagecreatefrompng($imagePath);
                    break;
                default:
                    \Log::warning('Unsupported image type for resizing', ['extension' => $extension]);
                    return $imagePath; // Return original if can't resize
            }
            
            if (!$image) {
                \Log::warning('Could not load image for resizing', ['path' => $imagePath]);
                return $imagePath; // Return original if can't load
            }
            
            $originalWidth = imagesx($image);
            $originalHeight = imagesy($image);
            
            // If image is already 1000px or less, return original
            if ($originalWidth <= 1000) {
                imagedestroy($image);
                return $imagePath;
            }
            
            // Calculate new dimensions maintaining aspect ratio
            $newWidth = 1000;
            $newHeight = (int) (($originalHeight * $newWidth) / $originalWidth);
            
            // Create resized image
            $resizedImage = imagecreatetruecolor($newWidth, $newHeight);
            
            // Preserve transparency for PNG
            if ($extension === 'png') {
                imagealphablending($resizedImage, false);
                imagesavealpha($resizedImage, true);
                $transparent = imagecolorallocatealpha($resizedImage, 255, 255, 255, 127);
                imagefill($resizedImage, 0, 0, $transparent);
            }
            
            // Resize the image
            imagecopyresampled($resizedImage, $image, 0, 0, 0, 0, $newWidth, $newHeight, $originalWidth, $originalHeight);
            
            // Save resized image to temporary file
            $tempPath = tempnam(sys_get_temp_dir(), 'resized_receipt_') . '.' . $extension;
            
            switch ($extension) {
                case 'jpg':
                case 'jpeg':
                    imagejpeg($resizedImage, $tempPath, 90);
                    break;
                case 'png':
                    imagepng($resizedImage, $tempPath, 9);
                    break;
            }
            
            // Clean up
            imagedestroy($image);
            imagedestroy($resizedImage);
            
            \Log::info('Image resized for OCR', [
                'original' => $originalWidth . 'x' . $originalHeight,
                'resized' => $newWidth . 'x' . $newHeight,
                'temp_path' => $tempPath
            ]);
            
            return $tempPath;
            
        } catch (\Exception $e) {
            \Log::error('Error resizing image for OCR', [
                'path' => $imagePath,
                'error' => $e->getMessage()
            ]);
            return $imagePath; // Return original if resize fails
        }
    }
}
