"""Phase 8 tests: Ingest API integration with strategy selection (Sub-Phase 8.3).

Covers:
- POST /api/v1/api/v1/ingest?strategy=token — existing behavior unchanged
- POST /api/v1/api/v1/ingest?strategy=question — Q&A chunking applied
- Invalid strategy values return 400
- IngestResponse includes strategy field
- DOCX with Q&A format uses question strategy
- Document without Q&A falls back gracefully
"""
import io
import json
from typing import List, Tuple
from unittest.mock import MagicMock

import pytest
from fastapi import FastAPI
from fastapi.testclient import TestClient
from pypdf import PdfWriter

from app.routers.ingest import router


class _DeterministicEmbedding:
    def name(self) -> str:
        return "test_deterministic"

    def __call__(self, input):
        return self._embed(input)

    def embed_query(self, input):
        return self._embed(input)

    @staticmethod
    def _embed(texts):
        vectors = []
        for text in texts:
            vec = [0.0] * 384
            for i, ch in enumerate(text[:384]):
                vec[i] = ord(ch) / 1000.0
            vectors.append(vec)
        return vectors


def _create_real_pdf(content: str) -> bytes:
    writer = PdfWriter()
    writer.add_blank_page(width=200, height=200)
    buf = io.BytesIO()
    writer.write(buf)
    return buf.getvalue()


def _create_text_txt(content: str) -> bytes:
    return content.encode("utf-8")


@pytest.fixture
def client(tmp_path, monkeypatch):
    """TestClient with real ChromaDB isolated in tmp_path + deterministic embeddings."""
    chroma_path = str(tmp_path / "chroma_db")
    chunk_path = str(tmp_path / "document_chunk")
    prompts_path = str(tmp_path / "prompts.db")
    history_path = str(tmp_path / "history.db")

    monkeypatch.setenv("CHROMA_DB_PATH", chroma_path)
    monkeypatch.setenv("DOCUMENT_CHUNK_PATH", chunk_path)
    monkeypatch.setenv("PROMPTS_DB_PATH", prompts_path)
    monkeypatch.setenv("HISTORY_DB_PATH", history_path)
    monkeypatch.setenv("EMBEDDING_MODEL", "test-mock")
    monkeypatch.setenv("LLM_API_KEY", "test-key")

    from app.core.config import get_settings
    get_settings.cache_clear()
    from app.core.dependencies import get_settings_cached
    get_settings_cached.cache_clear()

    from app.core.sqlite_db import _get_db, init_prompts_db, init_history_db, seed_default_profiles
    conn = _get_db(prompts_path)
    init_prompts_db(conn)
    seed_default_profiles(conn)
    conn.close()

    hconn = _get_db(history_path)
    init_history_db(hconn)
    hconn.close()

    monkeypatch.setattr(
        "app.core.database.get_embedding_function_settings",
        lambda settings: _DeterministicEmbedding(),
    )

    test_app = FastAPI()
    test_app.include_router(router, prefix="/api/v1")

    yield TestClient(test_app)

    get_settings_cached.cache_clear()
    get_settings.cache_clear()


def test_ingest_with_strategy_token(client):
    """Existing behavior unchanged: strategy=token uses TokenChunkingStrategy."""
    txt_bytes = _create_text_txt("This is a test document with enough content to generate chunks.")
    resp = client.post(
        "/api/v1/ingest?strategy=token",
        files={"file": ("test.txt", txt_bytes, "text/plain")},
    )
    assert resp.status_code == 200
    data = resp.json()
    assert data["strategy"] == "token"
    assert data["chunk_count"] > 0


def test_ingest_invalid_strategy_rejected(client):
    """Invalid strategy values return 400."""
    txt_bytes = _create_text_txt("test")
    resp = client.post(
        "/api/v1/ingest?strategy=invalid",
        files={"file": ("test.txt", txt_bytes, "text/plain")},
    )
    assert resp.status_code == 400
    assert "strategy" in resp.json()["detail"].lower()


def test_ingest_response_includes_strategy(client):
    """IngestResponse includes the strategy field."""
    txt_bytes = _create_text_txt("Strategy response test content with more text to ensure chunks.")
    resp = client.post(
        "/api/v1/ingest?strategy=token",
        files={"file": ("test.txt", txt_bytes, "text/plain")},
    )
    assert resp.status_code == 200
    assert "strategy" in resp.json()


def test_ingest_default_strategy_is_token(client):
    """When no strategy param provided, default to token."""
    txt_bytes = _create_text_txt("Default strategy test with enough text to generate output.")
    resp = client.post(
        "/api/v1/ingest",
        files={"file": ("test.txt", txt_bytes, "text/plain")},
    )
    assert resp.status_code == 200
    assert resp.json()["strategy"] == "token"


def test_ingest_question_strategy_txt(client, monkeypatch):
    """TXT with Q&A format uses question strategy and produces chunks."""
    _mock_question_chunker(monkeypatch)

    txt_bytes = _create_text_txt("問A1：test question\n答A1：test answer with more text here to ensure chunking works properly.")

    resp = client.post(
        "/api/v1/ingest?strategy=question",
        files={"file": ("test.txt", txt_bytes, "text/plain")},
    )
    assert resp.status_code == 200
    data = resp.json()
    assert data["strategy"] == "question"
    assert data["chunk_count"] > 0


def test_ingest_question_strategy_no_qa_fallback(client, monkeypatch):
    """Document without Q&A markers falls back to narrative chunking without error."""
    _mock_question_chunker(monkeypatch)

    txt_bytes = _create_text_txt("This is plain text without any Q&A markers, but it needs to be long enough to generate at least one chunk when processed by the tokenizer.")

    resp = client.post(
        "/api/v1/ingest?strategy=question",
        files={"file": ("plain.txt", txt_bytes, "text/plain")},
    )
    assert resp.status_code == 200
    data = resp.json()
    assert data["strategy"] == "question"
    assert data["chunk_count"] > 0


def _mock_question_chunker(monkeypatch):
    """Replace QuestionChunkingStrategy with a mock that returns test chunks."""

    class _MockQuestionChunker:
        def __init__(self, settings=None, llm_client=None):
            self._chunk_metadata = [
                {
                    "strategy_type": "question",
                    "section_type": "qa",
                    "question_index": 0,
                    "question_id": "A1",
                    "question_text": "What is X?",
                    "section_heading": "(A) Topic",
                    "answer_contains_table": False,
                    "source_page_range": [1, 2],
                }
            ]
            self._max_tokens = 3000

        def chunk(self, text):
            self._chunk_metadata = self._chunk_metadata[:1]
            return ["Question: What is X?\n\nAnswer: X is Y."]

        async def chunk_pages(self, pages, overlap_tokens=0):
            self._chunk_metadata = self._chunk_metadata[:1]
            return [("Question: What is X?\n\nAnswer: X is Y.", 1)]

    monkeypatch.setattr(
        "app.utils.chunking.QuestionChunkingStrategy",
        _MockQuestionChunker,
    )