<?php
error_reporting(E_ALL);
ini_set('display_errors', 1);
ini_set('log_errors',1);
ini_set('memory_limit','2048M');
ini_set('max_execution_time',0);

require_once __DIR__ . '/../classes/PdfHelper.php';
require_once __DIR__ . '/../classes/PdfOcrHelper.php';
require_once __DIR__ . '/../admin/config.php';

echo "<pre>STARTE INDEXIERUNG…</pre>";

$basePath = rtrim($_SERVER['DOCUMENT_ROOT'], '/');

$rii = new RecursiveIteratorIterator(
    new RecursiveDirectoryIterator($basePath, FilesystemIterator::SKIP_DOTS)
);

$indexed = 0;
$checked = 0;

foreach ($rii as $file) {

    $abs = $file->getPathname();
    $ext = strtolower(pathinfo($abs, PATHINFO_EXTENSION));

    // nur PDFs
    if ($ext !== 'pdf') continue;

    $checked++;

    echo "<pre>CHECK: $abs</pre>";
    flush();

    try {

        // 1) Standard PDF Parser
        $text = PdfHelper::extractText($abs);

        // 2) Wenn leer → OCR
        if ($text === null || trim($text) === '') {
            echo "<pre>→ OCR</pre>";
            $text = PdfOcrHelper::extractOcr($abs);
        }

        // 3) Wenn trotzdem leer → leerer Fallback
        if ($text === null || trim($text) === '') {
            echo "<pre>→ KEIN TEXT</pre>";
            $text = "NO_TEXT";
        }

        // DB path
        $dbPath = str_replace($_SERVER['DOCUMENT_ROOT'], '', $abs);
        $title  = basename($abs);
        $now    = date('Y-m-d H:i:s');

        $stmt = $pdo->prepare("
            REPLACE INTO pdf_index (path, title, content, last_index)
            VALUES (:path, :title, :content, :last_index)
        ");

        $stmt->execute([
            ':path'       => $dbPath,
            ':title'      => $title,
            ':content'    => $text,
            ':last_index' => $now
        ]);

        $indexed++;

        echo "<pre>INDEXED ✓ $dbPath</pre>";
        flush();

        /**
         * 🚨 Sicherheit: NICHT ALLE PDFs AUF EINMAL
         * Wenn du VIELE PDFs hast, lass pro Lauf max. 15–30
         */
        if ($indexed >= 30) {
            echo "<pre>STOP SAFETY LIMIT REACHED</pre>";
            break;
        }

    } catch (Throwable $e) {
        error_log("PDF_INDEX_ERROR: " . $e->getMessage());
        echo "<pre>ERROR: {$e->getMessage()}</pre>";
        flush();
        continue;
    }
}

echo "<h3>Indexierung abgeschlossen!</h3>";
echo "<p>$indexed PDF-Dateien indexiert.</p>";
echo "<p>$checked PDFs gescannt.</p>";
