from __future__ import annotations import sys import base64 from pathlib import Path REPO_ROOT = Path(__file__).resolve().parents[1] for module_name in list(sys.modules): if module_name == "app" or module_name.startswith("app."): del sys.modules[module_name] for path in [ REPO_ROOT / "libs" / "core-domain" / "src", REPO_ROOT / "libs" / "core-shared" / "src", REPO_ROOT / "libs" / "core-db" / "src", REPO_ROOT / "services" / "knowledge-service", ]: sys.path.insert(0, str(path)) from core_db import Base from app.application.services import KnowledgeApplicationService from app.bootstrap.settings import KnowledgeServiceSettings from app.db.session import build_session_factory from app.domain.repositories import ( KnowledgeBaseRepository, KnowledgeChunkRepository, KnowledgeDocumentRepository, ) from app.schemas.knowledge import ( KnowledgeBaseCreateRequest, KnowledgeDocumentCreateRequest, KnowledgeSearchRequest, ) def test_knowledge_search_falls_back_without_pgvector(tmp_path: Path) -> None: settings = KnowledgeServiceSettings( database_url=f"sqlite:///{tmp_path / 'knowledge_service.db'}", embedding_provider="local", ) session_factory = build_session_factory(settings) Base.metadata.create_all(bind=session_factory.kw["bind"]) with session_factory() as db: service = KnowledgeApplicationService( settings=settings, base_repository=KnowledgeBaseRepository(db), document_repository=KnowledgeDocumentRepository(db), chunk_repository=KnowledgeChunkRepository(db), ) base = service.create_base( KnowledgeBaseCreateRequest(tenant_id="t1", code="kb", name="KB") ) _, chunks = service.create_document( KnowledgeDocumentCreateRequest( tenant_id="t1", knowledge_base_id=base.id, title="Refund Policy", content_text="Refunds are available within seven days for eligible orders.", chunk_size=40, chunk_overlap=5, ) ) assert chunks[0].embedding_vector is not None results = service.search( KnowledgeSearchRequest( tenant_id="t1", knowledge_base_id=base.id, query="refund seven days", top_k=3, ) ) assert results assert results[0][3]["retrieval_mode"] == "hybrid" assert results[0][3]["rerank_enabled"] is True assert "citation" in results[0][3] assert results[0][3]["weights"]["rerank"] == settings.retrieval_rerank_weight session_factory.kw["bind"].dispose() def test_create_document_parses_base64_markdown_before_indexing(tmp_path: Path) -> None: settings = KnowledgeServiceSettings( database_url=f"sqlite:///{tmp_path / 'knowledge_service.db'}", embedding_provider="local", ) session_factory = build_session_factory(settings) Base.metadata.create_all(bind=session_factory.kw["bind"]) with session_factory() as db: service = KnowledgeApplicationService( settings=settings, base_repository=KnowledgeBaseRepository(db), document_repository=KnowledgeDocumentRepository(db), chunk_repository=KnowledgeChunkRepository(db), ) base = service.create_base( KnowledgeBaseCreateRequest(tenant_id="t1", code="kb", name="KB") ) encoded = base64.b64encode( "# Refund Policy\nRefunds are available within seven days.".encode("utf-8") ).decode("ascii") document, chunks = service.create_document( KnowledgeDocumentCreateRequest( tenant_id="t1", knowledge_base_id=base.id, title="Refund Policy", source_type="markdown", content_base64=encoded, chunk_size=80, chunk_overlap=0, ) ) assert document.source_type == "markdown" assert document.content_text.startswith("Refund Policy") assert document.metadata_json is not None assert document.metadata_json["parser_metadata"]["parser"] == "knowledge-document-parser-v1" assert chunks assert chunks[0].content_text.startswith("Refund Policy") session_factory.kw["bind"].dispose() Base.metadata.clear()