test_knowledge_pgvector_fallback.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. from __future__ import annotations
  2. import sys
  3. import base64
  4. from pathlib import Path
  5. REPO_ROOT = Path(__file__).resolve().parents[1]
  6. for module_name in list(sys.modules):
  7. if module_name == "app" or module_name.startswith("app."):
  8. del sys.modules[module_name]
  9. for path in [
  10. REPO_ROOT / "libs" / "core-domain" / "src",
  11. REPO_ROOT / "libs" / "core-shared" / "src",
  12. REPO_ROOT / "libs" / "core-db" / "src",
  13. REPO_ROOT / "services" / "knowledge-service",
  14. ]:
  15. sys.path.insert(0, str(path))
  16. from core_db import Base
  17. from app.application.services import KnowledgeApplicationService
  18. from app.bootstrap.settings import KnowledgeServiceSettings
  19. from app.db.session import build_session_factory
  20. from app.domain.repositories import (
  21. KnowledgeBaseRepository,
  22. KnowledgeChunkRepository,
  23. KnowledgeDocumentRepository,
  24. )
  25. from app.schemas.knowledge import (
  26. KnowledgeBaseCreateRequest,
  27. KnowledgeDocumentCreateRequest,
  28. KnowledgeSearchRequest,
  29. )
  30. def test_knowledge_search_falls_back_without_pgvector(tmp_path: Path) -> None:
  31. settings = KnowledgeServiceSettings(
  32. database_url=f"sqlite:///{tmp_path / 'knowledge_service.db'}",
  33. embedding_provider="local",
  34. )
  35. session_factory = build_session_factory(settings)
  36. Base.metadata.create_all(bind=session_factory.kw["bind"])
  37. with session_factory() as db:
  38. service = KnowledgeApplicationService(
  39. settings=settings,
  40. base_repository=KnowledgeBaseRepository(db),
  41. document_repository=KnowledgeDocumentRepository(db),
  42. chunk_repository=KnowledgeChunkRepository(db),
  43. )
  44. base = service.create_base(
  45. KnowledgeBaseCreateRequest(tenant_id="t1", code="kb", name="KB")
  46. )
  47. _, chunks = service.create_document(
  48. KnowledgeDocumentCreateRequest(
  49. tenant_id="t1",
  50. knowledge_base_id=base.id,
  51. title="Refund Policy",
  52. content_text="Refunds are available within seven days for eligible orders.",
  53. chunk_size=40,
  54. chunk_overlap=5,
  55. )
  56. )
  57. assert chunks[0].embedding_vector is not None
  58. results = service.search(
  59. KnowledgeSearchRequest(
  60. tenant_id="t1",
  61. knowledge_base_id=base.id,
  62. query="refund seven days",
  63. top_k=3,
  64. )
  65. )
  66. assert results
  67. assert results[0][3]["retrieval_mode"] == "hybrid"
  68. session_factory.kw["bind"].dispose()
  69. def test_create_document_parses_base64_markdown_before_indexing(tmp_path: Path) -> None:
  70. settings = KnowledgeServiceSettings(
  71. database_url=f"sqlite:///{tmp_path / 'knowledge_service.db'}",
  72. embedding_provider="local",
  73. )
  74. session_factory = build_session_factory(settings)
  75. Base.metadata.create_all(bind=session_factory.kw["bind"])
  76. with session_factory() as db:
  77. service = KnowledgeApplicationService(
  78. settings=settings,
  79. base_repository=KnowledgeBaseRepository(db),
  80. document_repository=KnowledgeDocumentRepository(db),
  81. chunk_repository=KnowledgeChunkRepository(db),
  82. )
  83. base = service.create_base(
  84. KnowledgeBaseCreateRequest(tenant_id="t1", code="kb", name="KB")
  85. )
  86. encoded = base64.b64encode(
  87. "# Refund Policy\nRefunds are available within seven days.".encode("utf-8")
  88. ).decode("ascii")
  89. document, chunks = service.create_document(
  90. KnowledgeDocumentCreateRequest(
  91. tenant_id="t1",
  92. knowledge_base_id=base.id,
  93. title="Refund Policy",
  94. source_type="markdown",
  95. content_base64=encoded,
  96. chunk_size=80,
  97. chunk_overlap=0,
  98. )
  99. )
  100. assert document.source_type == "markdown"
  101. assert document.content_text.startswith("Refund Policy")
  102. assert document.metadata_json is not None
  103. assert document.metadata_json["parser_metadata"]["parser"] == "knowledge-document-parser-v1"
  104. assert chunks
  105. assert chunks[0].content_text.startswith("Refund Policy")
  106. session_factory.kw["bind"].dispose()
  107. Base.metadata.clear()