feat: Phase 1.1 project setup with config, database, and models

- Add requirements.txt with all dependencies
- Add .env.example with required environment variables
- Add Pydantic Settings (config.py) with .env loading
- Add ChromaDB persistent client (database.py)
- Add Pydantic schemas (ingest.py) for request/response
- Add FastAPI main.py with CORS middleware
- Add package __init__.py files
- Add tests: test_phase1_config.py, test_phase1_database.py
- All 5 tests pass
This commit is contained in:
Woody 2026-04-22 16:13:52 +08:00
parent abffc9cf1d
commit 3712397d64
14 changed files with 172 additions and 0 deletions

6
backend/.env.example Normal file
View File

@ -0,0 +1,6 @@
LLM_BASE_URL=https://openrouter.ai/api/v1
LLM_API_KEY=your_openrouter_key_here
LLM_MODEL_NAME=qwen/qwen3.5-35b-a3b
EMBEDDING_MODEL=qwen/qwen3-embedding-4b
EMBEDDING_BASE_URL=https://openrouter.ai/api/v1
CHROMA_DB_PATH=./chroma_db

0
backend/app/__init__.py Normal file
View File

View File

View File

@ -0,0 +1,20 @@
from functools import lru_cache
from pathlib import Path
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
llm_base_url: str = "https://openrouter.ai/api/v1"
llm_api_key: str = ""
llm_model_name: str = "qwen/qwen3.5-35b-a3b"
embedding_model: str = "qwen/qwen3-embedding-4b"
embedding_base_url: str = "https://openrouter.ai/api/v1"
chroma_db_path: str = "./chroma_db"
model_config = {"env_file": ".env", "env_file_encoding": "utf-8"}
@lru_cache
def get_settings() -> Settings:
return Settings()

View File

@ -0,0 +1,16 @@
from pathlib import Path
import chromadb
from app.core.config import get_settings
def get_chroma_client() -> chromadb.Client:
settings = get_settings()
persist_dir = Path(settings.chroma_db_path)
persist_dir.mkdir(parents=True, exist_ok=True)
return chromadb.PersistentClient(path=str(persist_dir))
def get_or_create_collection(client: chromadb.Client, name: str):
return client.get_or_create_collection(name=name)

17
backend/app/main.py Normal file
View File

@ -0,0 +1,17 @@
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
app = FastAPI(title="RAG Video Q&A", version="1.0.0")
app.add_middleware(
CORSMiddleware,
allow_origins=["http://localhost:5173", "http://localhost:3000"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/health")
def health_check():
return {"status": "ok"}

View File

View File

@ -0,0 +1,27 @@
from datetime import datetime
from typing import List
from pydantic import BaseModel
class SourceMetadata(BaseModel):
filename: str
upload_date: datetime
content_summary: str
chunk_index: int
class IngestResponse(BaseModel):
document_id: str
chunk_count: int
filename: str
class QueryRequest(BaseModel):
question: str
class QueryResponse(BaseModel):
keywords: List[str]
answer: str
sources: List[SourceMetadata]

View File

View File

View File

@ -0,0 +1,33 @@
import pytest
import os
from pathlib import Path
def test_config_loads_from_env(tmp_path, monkeypatch):
env_file = tmp_path / ".env"
env_file.write_text(
"LLM_BASE_URL=https://openrouter.ai/api/v1\n"
"LLM_API_KEY=test_key_123\n"
"LLM_MODEL_NAME=qwen/qwen3.5-35b-a3b\n"
"EMBEDDING_MODEL=qwen/qwen3-embedding-4b\n"
"EMBEDDING_BASE_URL=https://openrouter.ai/api/v1\n"
"CHROMA_DB_PATH=./test_chroma\n"
)
monkeypatch.chdir(tmp_path)
from app.core.config import Settings
settings = Settings()
assert settings.llm_base_url == "https://openrouter.ai/api/v1"
assert settings.llm_api_key == "test_key_123"
assert settings.llm_model_name == "qwen/qwen3.5-35b-a3b"
assert settings.chroma_db_path == "./test_chroma"
def test_config_default_values(monkeypatch):
monkeypatch.delenv("LLM_BASE_URL", raising=False)
from app.core.config import Settings
settings = Settings()
assert settings.llm_base_url == "https://openrouter.ai/api/v1"
assert settings.llm_model_name == "qwen/qwen3.5-35b-a3b"

View File

@ -0,0 +1,39 @@
import pytest
from pathlib import Path
def test_chroma_client_creates_persist_directory(tmp_path):
import os
os.environ["CHROMA_DB_PATH"] = str(tmp_path / "test_chroma")
from app.core.database import get_chroma_client
client = get_chroma_client()
assert client is not None
assert (tmp_path / "test_chroma").exists()
def test_chroma_client_creates_new_collection(tmp_path):
import os
os.environ["CHROMA_DB_PATH"] = str(tmp_path / "test_chroma")
from app.core.database import get_chroma_client, get_or_create_collection
client = get_chroma_client()
collection = get_or_create_collection(client, "test_docs")
assert collection.name == "test_docs"
assert collection.count() == 0
def test_chroma_client_returns_existing_collection(tmp_path):
import os
os.environ["CHROMA_DB_PATH"] = str(tmp_path / "test_chroma")
from app.core.database import get_chroma_client, get_or_create_collection
client = get_chroma_client()
collection1 = get_or_create_collection(client, "test_docs")
collection1.add(documents=["test"], ids=["1"])
collection2 = get_or_create_collection(client, "test_docs")
assert collection2.count() == 1

View File

14
backend/requirements.txt Normal file
View File

@ -0,0 +1,14 @@
fastapi==0.109.0
uvicorn[standard]==0.27.0
pydantic==2.5.3
pydantic-settings==2.1.0
chromadb==0.4.22
sentence-transformers==2.3.1
python-docx==1.1.0
pypdf==4.0.2
python-dotenv==1.0.0
httpx==0.26.0
pytest==8.0.0
pytest-asyncio==0.23.4
tiktoken==0.5.2
python-multipart==0.0.6