test_knowledge_pgvector_fallback.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. from __future__ import annotations
  2. import base64
  3. import sys
  4. from pathlib import Path
  5. REPO_ROOT = Path(__file__).resolve().parents[1]
  6. for module_name in list(sys.modules):
  7. if module_name == "app" or module_name.startswith("app."):
  8. del sys.modules[module_name]
  9. for path in [
  10. REPO_ROOT / "libs" / "core-domain" / "src",
  11. REPO_ROOT / "libs" / "core-shared" / "src",
  12. REPO_ROOT / "libs" / "core-db" / "src",
  13. REPO_ROOT / "services" / "knowledge-service",
  14. ]:
  15. sys.path.insert(0, str(path))
  16. from app.application.services import KnowledgeApplicationService
  17. from app.bootstrap.settings import KnowledgeServiceSettings
  18. from app.db.session import build_session_factory
  19. from app.domain.repositories import (
  20. KnowledgeBaseRepository,
  21. KnowledgeChunkRepository,
  22. KnowledgeDocumentRepository,
  23. )
  24. from app.schemas.knowledge import (
  25. KnowledgeBaseCreateRequest,
  26. KnowledgeDocumentCreateRequest,
  27. KnowledgeSearchRequest,
  28. )
  29. from core_db import Base
  30. def test_knowledge_search_falls_back_without_pgvector(tmp_path: Path) -> None:
  31. settings = KnowledgeServiceSettings(
  32. database_url=f"sqlite:///{tmp_path / 'knowledge_service.db'}",
  33. embedding_provider="local")
  34. session_factory = build_session_factory(settings)
  35. Base.metadata.create_all(bind=session_factory.kw["bind"])
  36. with session_factory() as db:
  37. service = KnowledgeApplicationService(
  38. settings=settings,
  39. base_repository=KnowledgeBaseRepository(db),
  40. document_repository=KnowledgeDocumentRepository(db),
  41. chunk_repository=KnowledgeChunkRepository(db))
  42. base = service.create_base(
  43. KnowledgeBaseCreateRequest(code="kb", name="KB")
  44. )
  45. _, chunks = service.create_document(
  46. KnowledgeDocumentCreateRequest(
  47. knowledge_base_id=base.id,
  48. title="Refund Policy",
  49. content_text="Refunds are available within seven days for eligible orders.",
  50. chunk_size=40,
  51. chunk_overlap=5)
  52. )
  53. assert chunks[0].embedding_vector is not None
  54. results = service.search(
  55. KnowledgeSearchRequest(
  56. knowledge_base_id=base.id,
  57. query="refund seven days",
  58. top_k=3)
  59. )
  60. assert results
  61. assert results[0][3]["retrieval_mode"] == "hybrid"
  62. assert results[0][3]["rerank_enabled"] is True
  63. assert "citation" in results[0][3]
  64. assert results[0][3]["weights"]["rerank"] == settings.retrieval_rerank_weight
  65. session_factory.kw["bind"].dispose()
  66. def test_create_document_parses_base64_markdown_before_indexing(tmp_path: Path) -> None:
  67. settings = KnowledgeServiceSettings(
  68. database_url=f"sqlite:///{tmp_path / 'knowledge_service.db'}",
  69. embedding_provider="local")
  70. session_factory = build_session_factory(settings)
  71. Base.metadata.create_all(bind=session_factory.kw["bind"])
  72. with session_factory() as db:
  73. service = KnowledgeApplicationService(
  74. settings=settings,
  75. base_repository=KnowledgeBaseRepository(db),
  76. document_repository=KnowledgeDocumentRepository(db),
  77. chunk_repository=KnowledgeChunkRepository(db))
  78. base = service.create_base(
  79. KnowledgeBaseCreateRequest(code="kb", name="KB")
  80. )
  81. encoded = base64.b64encode(
  82. b"# Refund Policy\nRefunds are available within seven days."
  83. ).decode("ascii")
  84. document, chunks = service.create_document(
  85. KnowledgeDocumentCreateRequest(
  86. knowledge_base_id=base.id,
  87. title="Refund Policy",
  88. source_type="markdown",
  89. content_base64=encoded,
  90. chunk_size=80,
  91. chunk_overlap=0)
  92. )
  93. assert document.source_type == "markdown"
  94. assert document.content_text.startswith("Refund Policy")
  95. assert document.metadata_json is not None
  96. assert document.metadata_json["parser_metadata"]["parser"] == "knowledge-document-parser-v1"
  97. assert chunks
  98. assert chunks[0].content_text.startswith("Refund Policy")
  99. session_factory.kw["bind"].dispose()
  100. Base.metadata.clear()