| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125 |
- from __future__ import annotations
- import sys
- import base64
- from pathlib import Path
- REPO_ROOT = Path(__file__).resolve().parents[1]
- for module_name in list(sys.modules):
- if module_name == "app" or module_name.startswith("app."):
- del sys.modules[module_name]
- for path in [
- REPO_ROOT / "libs" / "core-domain" / "src",
- REPO_ROOT / "libs" / "core-shared" / "src",
- REPO_ROOT / "libs" / "core-db" / "src",
- REPO_ROOT / "services" / "knowledge-service",
- ]:
- sys.path.insert(0, str(path))
- from core_db import Base
- from app.application.services import KnowledgeApplicationService
- from app.bootstrap.settings import KnowledgeServiceSettings
- from app.db.session import build_session_factory
- from app.domain.repositories import (
- KnowledgeBaseRepository,
- KnowledgeChunkRepository,
- KnowledgeDocumentRepository,
- )
- from app.schemas.knowledge import (
- KnowledgeBaseCreateRequest,
- KnowledgeDocumentCreateRequest,
- KnowledgeSearchRequest,
- )
- def test_knowledge_search_falls_back_without_pgvector(tmp_path: Path) -> None:
- settings = KnowledgeServiceSettings(
- database_url=f"sqlite:///{tmp_path / 'knowledge_service.db'}",
- embedding_provider="local",
- )
- session_factory = build_session_factory(settings)
- Base.metadata.create_all(bind=session_factory.kw["bind"])
- with session_factory() as db:
- service = KnowledgeApplicationService(
- settings=settings,
- base_repository=KnowledgeBaseRepository(db),
- document_repository=KnowledgeDocumentRepository(db),
- chunk_repository=KnowledgeChunkRepository(db),
- )
- base = service.create_base(
- KnowledgeBaseCreateRequest(tenant_id="t1", code="kb", name="KB")
- )
- _, chunks = service.create_document(
- KnowledgeDocumentCreateRequest(
- tenant_id="t1",
- knowledge_base_id=base.id,
- title="Refund Policy",
- content_text="Refunds are available within seven days for eligible orders.",
- chunk_size=40,
- chunk_overlap=5,
- )
- )
- assert chunks[0].embedding_vector is not None
- results = service.search(
- KnowledgeSearchRequest(
- tenant_id="t1",
- knowledge_base_id=base.id,
- query="refund seven days",
- top_k=3,
- )
- )
- assert results
- assert results[0][3]["retrieval_mode"] == "hybrid"
- assert results[0][3]["rerank_enabled"] is True
- assert "citation" in results[0][3]
- assert results[0][3]["weights"]["rerank"] == settings.retrieval_rerank_weight
- session_factory.kw["bind"].dispose()
- def test_create_document_parses_base64_markdown_before_indexing(tmp_path: Path) -> None:
- settings = KnowledgeServiceSettings(
- database_url=f"sqlite:///{tmp_path / 'knowledge_service.db'}",
- embedding_provider="local",
- )
- session_factory = build_session_factory(settings)
- Base.metadata.create_all(bind=session_factory.kw["bind"])
- with session_factory() as db:
- service = KnowledgeApplicationService(
- settings=settings,
- base_repository=KnowledgeBaseRepository(db),
- document_repository=KnowledgeDocumentRepository(db),
- chunk_repository=KnowledgeChunkRepository(db),
- )
- base = service.create_base(
- KnowledgeBaseCreateRequest(tenant_id="t1", code="kb", name="KB")
- )
- encoded = base64.b64encode(
- "# Refund Policy\nRefunds are available within seven days.".encode("utf-8")
- ).decode("ascii")
- document, chunks = service.create_document(
- KnowledgeDocumentCreateRequest(
- tenant_id="t1",
- knowledge_base_id=base.id,
- title="Refund Policy",
- source_type="markdown",
- content_base64=encoded,
- chunk_size=80,
- chunk_overlap=0,
- )
- )
- assert document.source_type == "markdown"
- assert document.content_text.startswith("Refund Policy")
- assert document.metadata_json is not None
- assert document.metadata_json["parser_metadata"]["parser"] == "knowledge-document-parser-v1"
- assert chunks
- assert chunks[0].content_text.startswith("Refund Policy")
- session_factory.kw["bind"].dispose()
- Base.metadata.clear()
|