68 lines
2.0 KiB
Python
68 lines
2.0 KiB
Python
import re
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
|
|
import pytest
|
|
import sys
|
|
from pathlib import Path
|
|
import importlib.util
|
|
|
|
|
|
# Dynamically load the metadata extractor to avoid package-path import issues
|
|
# The module lives at backend/app/utils/metadata.py relative to this test file.
|
|
MODULE_PATH = Path(__file__).resolve().parents[1] / "utils" / "metadata.py"
|
|
spec = importlib.util.spec_from_file_location("metadata_module", str(MODULE_PATH))
|
|
metadata_module = importlib.util.module_from_spec(spec) # type: ignore
|
|
assert spec is not None and spec.loader is not None
|
|
spec.loader.exec_module(metadata_module) # type: ignore
|
|
extract_metadata = getattr(metadata_module, "extract_metadata")
|
|
|
|
|
|
def _is_iso8601(s: str) -> bool:
|
|
try:
|
|
datetime.fromisoformat(s)
|
|
return True
|
|
except ValueError:
|
|
return False
|
|
|
|
|
|
def test_extract_metadata_basic(tmp_path):
|
|
# Prepare a dummy file path that exists
|
|
dummy_file = tmp_path / "dir with spaces" / "sample.txt"
|
|
dummy_file.parent.mkdir(parents=True, exist_ok=True)
|
|
dummy_file.write_text("content")
|
|
|
|
chunks = ["a" * 250, "short"]
|
|
|
|
metadata = extract_metadata(str(dummy_file), chunks)
|
|
|
|
assert isinstance(metadata, list)
|
|
assert len(metadata) == 2
|
|
|
|
# First chunk
|
|
m0 = metadata[0]
|
|
assert m0["filename"] == "sample.txt"
|
|
assert m0["chunk_index"] == 0
|
|
assert m0["upload_date"] is not None
|
|
assert _is_iso8601(m0["upload_date"])
|
|
assert m0["content_summary"] == "a" * 200
|
|
|
|
# Second chunk
|
|
m1 = metadata[1]
|
|
assert m1["filename"] == "sample.txt"
|
|
assert m1["chunk_index"] == 1
|
|
assert m1["content_summary"] == "short"
|
|
|
|
|
|
def test_extract_metadata_empty_chunks(tmp_path):
|
|
dummy_file = tmp_path / "file.txt"
|
|
dummy_file.write_text("data")
|
|
metadata = extract_metadata(str(dummy_file), [])
|
|
assert metadata == []
|
|
|
|
|
|
def test_extract_metadata_missing_file_raises(tmp_path):
|
|
missing = tmp_path / "nonexistent" / "nofile.txt"
|
|
with pytest.raises(FileNotFoundError):
|
|
extract_metadata(str(missing), ["data"])
|