From 3712397d642ca3fe4bffe27d0eb8a0c5de2e7933 Mon Sep 17 00:00:00 2001 From: Woody Date: Wed, 22 Apr 2026 16:13:52 +0800 Subject: [PATCH] feat: Phase 1.1 project setup with config, database, and models - Add requirements.txt with all dependencies - Add .env.example with required environment variables - Add Pydantic Settings (config.py) with .env loading - Add ChromaDB persistent client (database.py) - Add Pydantic schemas (ingest.py) for request/response - Add FastAPI main.py with CORS middleware - Add package __init__.py files - Add tests: test_phase1_config.py, test_phase1_database.py - All 5 tests pass --- backend/.env.example | 6 ++++ backend/app/__init__.py | 0 backend/app/core/__init__.py | 0 backend/app/core/config.py | 20 ++++++++++++ backend/app/core/database.py | 16 ++++++++++ backend/app/main.py | 17 +++++++++++ backend/app/models/__init__.py | 0 backend/app/models/ingest.py | 27 ++++++++++++++++ backend/app/routers/__init__.py | 0 backend/app/services/__init__.py | 0 backend/app/test/test_phase1_config.py | 33 ++++++++++++++++++++ backend/app/test/test_phase1_database.py | 39 ++++++++++++++++++++++++ backend/app/utils/__init__.py | 0 backend/requirements.txt | 14 +++++++++ 14 files changed, 172 insertions(+) create mode 100644 backend/.env.example create mode 100644 backend/app/__init__.py create mode 100644 backend/app/core/__init__.py create mode 100644 backend/app/core/config.py create mode 100644 backend/app/core/database.py create mode 100644 backend/app/main.py create mode 100644 backend/app/models/__init__.py create mode 100644 backend/app/models/ingest.py create mode 100644 backend/app/routers/__init__.py create mode 100644 backend/app/services/__init__.py create mode 100644 backend/app/test/test_phase1_config.py create mode 100644 backend/app/test/test_phase1_database.py create mode 100644 backend/app/utils/__init__.py create mode 100644 backend/requirements.txt diff --git a/backend/.env.example b/backend/.env.example new file mode 100644 index 0000000..cf230d4 --- /dev/null +++ b/backend/.env.example @@ -0,0 +1,6 @@ +LLM_BASE_URL=https://openrouter.ai/api/v1 +LLM_API_KEY=your_openrouter_key_here +LLM_MODEL_NAME=qwen/qwen3.5-35b-a3b +EMBEDDING_MODEL=qwen/qwen3-embedding-4b +EMBEDDING_BASE_URL=https://openrouter.ai/api/v1 +CHROMA_DB_PATH=./chroma_db diff --git a/backend/app/__init__.py b/backend/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app/core/__init__.py b/backend/app/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app/core/config.py b/backend/app/core/config.py new file mode 100644 index 0000000..e8c93c6 --- /dev/null +++ b/backend/app/core/config.py @@ -0,0 +1,20 @@ +from functools import lru_cache +from pathlib import Path + +from pydantic_settings import BaseSettings + + +class Settings(BaseSettings): + llm_base_url: str = "https://openrouter.ai/api/v1" + llm_api_key: str = "" + llm_model_name: str = "qwen/qwen3.5-35b-a3b" + embedding_model: str = "qwen/qwen3-embedding-4b" + embedding_base_url: str = "https://openrouter.ai/api/v1" + chroma_db_path: str = "./chroma_db" + + model_config = {"env_file": ".env", "env_file_encoding": "utf-8"} + + +@lru_cache +def get_settings() -> Settings: + return Settings() diff --git a/backend/app/core/database.py b/backend/app/core/database.py new file mode 100644 index 0000000..c5ddccf --- /dev/null +++ b/backend/app/core/database.py @@ -0,0 +1,16 @@ +from pathlib import Path + +import chromadb + +from app.core.config import get_settings + + +def get_chroma_client() -> chromadb.Client: + settings = get_settings() + persist_dir = Path(settings.chroma_db_path) + persist_dir.mkdir(parents=True, exist_ok=True) + return chromadb.PersistentClient(path=str(persist_dir)) + + +def get_or_create_collection(client: chromadb.Client, name: str): + return client.get_or_create_collection(name=name) diff --git a/backend/app/main.py b/backend/app/main.py new file mode 100644 index 0000000..2ebd0f3 --- /dev/null +++ b/backend/app/main.py @@ -0,0 +1,17 @@ +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware + +app = FastAPI(title="RAG Video Q&A", version="1.0.0") + +app.add_middleware( + CORSMiddleware, + allow_origins=["http://localhost:5173", "http://localhost:3000"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +@app.get("/health") +def health_check(): + return {"status": "ok"} diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app/models/ingest.py b/backend/app/models/ingest.py new file mode 100644 index 0000000..e93a4be --- /dev/null +++ b/backend/app/models/ingest.py @@ -0,0 +1,27 @@ +from datetime import datetime +from typing import List + +from pydantic import BaseModel + + +class SourceMetadata(BaseModel): + filename: str + upload_date: datetime + content_summary: str + chunk_index: int + + +class IngestResponse(BaseModel): + document_id: str + chunk_count: int + filename: str + + +class QueryRequest(BaseModel): + question: str + + +class QueryResponse(BaseModel): + keywords: List[str] + answer: str + sources: List[SourceMetadata] diff --git a/backend/app/routers/__init__.py b/backend/app/routers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app/services/__init__.py b/backend/app/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app/test/test_phase1_config.py b/backend/app/test/test_phase1_config.py new file mode 100644 index 0000000..37dbac2 --- /dev/null +++ b/backend/app/test/test_phase1_config.py @@ -0,0 +1,33 @@ +import pytest +import os +from pathlib import Path + + +def test_config_loads_from_env(tmp_path, monkeypatch): + env_file = tmp_path / ".env" + env_file.write_text( + "LLM_BASE_URL=https://openrouter.ai/api/v1\n" + "LLM_API_KEY=test_key_123\n" + "LLM_MODEL_NAME=qwen/qwen3.5-35b-a3b\n" + "EMBEDDING_MODEL=qwen/qwen3-embedding-4b\n" + "EMBEDDING_BASE_URL=https://openrouter.ai/api/v1\n" + "CHROMA_DB_PATH=./test_chroma\n" + ) + + monkeypatch.chdir(tmp_path) + from app.core.config import Settings + + settings = Settings() + assert settings.llm_base_url == "https://openrouter.ai/api/v1" + assert settings.llm_api_key == "test_key_123" + assert settings.llm_model_name == "qwen/qwen3.5-35b-a3b" + assert settings.chroma_db_path == "./test_chroma" + + +def test_config_default_values(monkeypatch): + monkeypatch.delenv("LLM_BASE_URL", raising=False) + from app.core.config import Settings + + settings = Settings() + assert settings.llm_base_url == "https://openrouter.ai/api/v1" + assert settings.llm_model_name == "qwen/qwen3.5-35b-a3b" diff --git a/backend/app/test/test_phase1_database.py b/backend/app/test/test_phase1_database.py new file mode 100644 index 0000000..4362a8e --- /dev/null +++ b/backend/app/test/test_phase1_database.py @@ -0,0 +1,39 @@ +import pytest +from pathlib import Path + + +def test_chroma_client_creates_persist_directory(tmp_path): + import os + os.environ["CHROMA_DB_PATH"] = str(tmp_path / "test_chroma") + + from app.core.database import get_chroma_client + + client = get_chroma_client() + assert client is not None + assert (tmp_path / "test_chroma").exists() + + +def test_chroma_client_creates_new_collection(tmp_path): + import os + os.environ["CHROMA_DB_PATH"] = str(tmp_path / "test_chroma") + + from app.core.database import get_chroma_client, get_or_create_collection + + client = get_chroma_client() + collection = get_or_create_collection(client, "test_docs") + assert collection.name == "test_docs" + assert collection.count() == 0 + + +def test_chroma_client_returns_existing_collection(tmp_path): + import os + os.environ["CHROMA_DB_PATH"] = str(tmp_path / "test_chroma") + + from app.core.database import get_chroma_client, get_or_create_collection + + client = get_chroma_client() + collection1 = get_or_create_collection(client, "test_docs") + collection1.add(documents=["test"], ids=["1"]) + + collection2 = get_or_create_collection(client, "test_docs") + assert collection2.count() == 1 diff --git a/backend/app/utils/__init__.py b/backend/app/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/requirements.txt b/backend/requirements.txt new file mode 100644 index 0000000..bfc8883 --- /dev/null +++ b/backend/requirements.txt @@ -0,0 +1,14 @@ +fastapi==0.109.0 +uvicorn[standard]==0.27.0 +pydantic==2.5.3 +pydantic-settings==2.1.0 +chromadb==0.4.22 +sentence-transformers==2.3.1 +python-docx==1.1.0 +pypdf==4.0.2 +python-dotenv==1.0.0 +httpx==0.26.0 +pytest==8.0.0 +pytest-asyncio==0.23.4 +tiktoken==0.5.2 +python-multipart==0.0.6