test_knowledge_pgvector_fallback.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. from __future__ import annotations
  2. import sys
  3. import base64
  4. from pathlib import Path
  5. REPO_ROOT = Path(__file__).resolve().parents[1]
  6. for module_name in list(sys.modules):
  7. if module_name == "app" or module_name.startswith("app."):
  8. del sys.modules[module_name]
  9. for path in [
  10. REPO_ROOT / "libs" / "core-domain" / "src",
  11. REPO_ROOT / "libs" / "core-shared" / "src",
  12. REPO_ROOT / "libs" / "core-db" / "src",
  13. REPO_ROOT / "services" / "knowledge-service",
  14. ]:
  15. sys.path.insert(0, str(path))
  16. from core_db import Base
  17. from app.application.services import KnowledgeApplicationService
  18. from app.bootstrap.settings import KnowledgeServiceSettings
  19. from app.db.session import build_session_factory
  20. from app.domain.repositories import (
  21. KnowledgeBaseRepository,
  22. KnowledgeChunkRepository,
  23. KnowledgeDocumentRepository,
  24. )
  25. from app.schemas.knowledge import (
  26. KnowledgeBaseCreateRequest,
  27. KnowledgeDocumentCreateRequest,
  28. KnowledgeSearchRequest,
  29. )
  30. def test_knowledge_search_falls_back_without_pgvector(tmp_path: Path) -> None:
  31. settings = KnowledgeServiceSettings(
  32. database_url=f"sqlite:///{tmp_path / 'knowledge_service.db'}",
  33. embedding_provider="local",
  34. )
  35. session_factory = build_session_factory(settings)
  36. Base.metadata.create_all(bind=session_factory.kw["bind"])
  37. with session_factory() as db:
  38. service = KnowledgeApplicationService(
  39. settings=settings,
  40. base_repository=KnowledgeBaseRepository(db),
  41. document_repository=KnowledgeDocumentRepository(db),
  42. chunk_repository=KnowledgeChunkRepository(db),
  43. )
  44. base = service.create_base(
  45. KnowledgeBaseCreateRequest(tenant_id="t1", code="kb", name="KB")
  46. )
  47. _, chunks = service.create_document(
  48. KnowledgeDocumentCreateRequest(
  49. tenant_id="t1",
  50. knowledge_base_id=base.id,
  51. title="Refund Policy",
  52. content_text="Refunds are available within seven days for eligible orders.",
  53. chunk_size=40,
  54. chunk_overlap=5,
  55. )
  56. )
  57. assert chunks[0].embedding_vector is not None
  58. results = service.search(
  59. KnowledgeSearchRequest(
  60. tenant_id="t1",
  61. knowledge_base_id=base.id,
  62. query="refund seven days",
  63. top_k=3,
  64. )
  65. )
  66. assert results
  67. assert results[0][3]["retrieval_mode"] == "hybrid"
  68. assert results[0][3]["rerank_enabled"] is True
  69. assert "citation" in results[0][3]
  70. assert results[0][3]["weights"]["rerank"] == settings.retrieval_rerank_weight
  71. session_factory.kw["bind"].dispose()
  72. def test_create_document_parses_base64_markdown_before_indexing(tmp_path: Path) -> None:
  73. settings = KnowledgeServiceSettings(
  74. database_url=f"sqlite:///{tmp_path / 'knowledge_service.db'}",
  75. embedding_provider="local",
  76. )
  77. session_factory = build_session_factory(settings)
  78. Base.metadata.create_all(bind=session_factory.kw["bind"])
  79. with session_factory() as db:
  80. service = KnowledgeApplicationService(
  81. settings=settings,
  82. base_repository=KnowledgeBaseRepository(db),
  83. document_repository=KnowledgeDocumentRepository(db),
  84. chunk_repository=KnowledgeChunkRepository(db),
  85. )
  86. base = service.create_base(
  87. KnowledgeBaseCreateRequest(tenant_id="t1", code="kb", name="KB")
  88. )
  89. encoded = base64.b64encode(
  90. "# Refund Policy\nRefunds are available within seven days.".encode("utf-8")
  91. ).decode("ascii")
  92. document, chunks = service.create_document(
  93. KnowledgeDocumentCreateRequest(
  94. tenant_id="t1",
  95. knowledge_base_id=base.id,
  96. title="Refund Policy",
  97. source_type="markdown",
  98. content_base64=encoded,
  99. chunk_size=80,
  100. chunk_overlap=0,
  101. )
  102. )
  103. assert document.source_type == "markdown"
  104. assert document.content_text.startswith("Refund Policy")
  105. assert document.metadata_json is not None
  106. assert document.metadata_json["parser_metadata"]["parser"] == "knowledge-document-parser-v1"
  107. assert chunks
  108. assert chunks[0].content_text.startswith("Refund Policy")
  109. session_factory.kw["bind"].dispose()
  110. Base.metadata.clear()