test_knowledge_service_api.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. from pathlib import Path
  2. from tests.conftest import (
  3. build_fastapi_test_client,
  4. build_postgres_database_url,
  5. build_postgres_engine,
  6. prepare_known_service_import,
  7. )
  8. def test_knowledge_service_post_contract_matches_frontend(
  9. tmp_path: Path,
  10. monkeypatch,
  11. ) -> None:
  12. prepare_known_service_import("knowledge-service")
  13. from app.bootstrap.app import create_app
  14. from app.db.models import Base
  15. from core_db import create_session_factory
  16. database_url = build_postgres_database_url(tmp_path, "knowledge-api")
  17. monkeypatch.setenv("AGENT_PLATFORM_DATABASE_URL", database_url)
  18. monkeypatch.setenv("AGENT_PLATFORM_OBJECT_STORAGE_BACKEND", "memory")
  19. engine = build_postgres_engine(database_url)
  20. Base.metadata.create_all(engine)
  21. app = create_app()
  22. app.state.session_factory = create_session_factory(engine)
  23. client = build_fastapi_test_client(app)
  24. storage_health_response = client.post("/knowledge/storage/health", json={})
  25. assert storage_health_response.status_code == 200
  26. assert storage_health_response.json()["data"]["backend"] == "memory"
  27. assert storage_health_response.json()["data"]["available"] is True
  28. base_response = client.post(
  29. "/knowledge/bases/create",
  30. json={
  31. "name": "Product Knowledge",
  32. "description": "Customer-facing docs",
  33. "metadata": {"owner": "support"},
  34. },
  35. )
  36. assert base_response.status_code == 200
  37. base_payload = base_response.json()["data"]
  38. assert base_payload["name"] == "Product Knowledge"
  39. assert "code" not in base_payload
  40. assert base_payload["createdTime"]
  41. list_bases_response = client.post(
  42. "/knowledge/bases/list",
  43. json={"page": 1, "pageSize": 20, "keyword": "Product"},
  44. )
  45. assert list_bases_response.status_code == 200
  46. assert list_bases_response.json()["data"]["total"] == 1
  47. archive_base_response = client.post(
  48. "/knowledge/bases/status",
  49. json={"knowledgeBaseId": base_payload["id"], "status": "archived"},
  50. )
  51. assert archive_base_response.status_code == 200
  52. assert archive_base_response.json()["data"]["status"] == "archived"
  53. restore_base_response = client.post(
  54. "/knowledge/bases/status",
  55. json={"knowledgeBaseId": base_payload["id"], "status": "active"},
  56. )
  57. assert restore_base_response.status_code == 200
  58. assert restore_base_response.json()["data"]["status"] == "active"
  59. document_response = client.post(
  60. "/knowledge/documents/create",
  61. json={
  62. "knowledgeBaseId": base_payload["id"],
  63. "title": "Invoice FAQ",
  64. "sourceType": "markdown",
  65. "contentText": "# Invoice FAQ\nCustomers can download invoices from Billing.",
  66. "chunkSize": 40,
  67. "chunkOverlap": 5,
  68. "asyncMode": False,
  69. "metadata": {"category": "billing"},
  70. },
  71. )
  72. assert document_response.status_code == 200
  73. document_payload = document_response.json()["data"]["document"]
  74. chunk_payloads = document_response.json()["data"]["chunks"]
  75. assert document_payload["knowledgeBaseId"] == base_payload["id"]
  76. assert document_payload["status"] == "indexed"
  77. assert document_payload["objectStorage"]["backend"] == "memory"
  78. assert chunk_payloads
  79. storage_status_response = client.post(
  80. "/knowledge/documents/storage/status",
  81. json={"documentId": document_payload["id"]},
  82. )
  83. assert storage_status_response.status_code == 200
  84. storage_status_payload = storage_status_response.json()["data"]
  85. assert storage_status_payload["exists"] is True
  86. assert storage_status_payload["sizeBytes"] > 0
  87. assert storage_status_payload["checkedTime"]
  88. content_response = client.post(
  89. "/knowledge/documents/content",
  90. json={
  91. "documentId": document_payload["id"],
  92. "includeText": True,
  93. "includeBase64": True,
  94. },
  95. )
  96. assert content_response.status_code == 200
  97. content_payload = content_response.json()["data"]
  98. assert content_payload["contentText"].startswith("# Invoice FAQ")
  99. assert content_payload["contentBase64"]
  100. reindex_response = client.post(
  101. "/knowledge/documents/reindex",
  102. json={
  103. "documentId": document_payload["id"],
  104. "chunkSize": 40,
  105. "chunkOverlap": 5,
  106. "asyncMode": False,
  107. },
  108. )
  109. assert reindex_response.status_code == 200
  110. assert reindex_response.json()["data"]["chunks"]
  111. chunks_response = client.post(
  112. "/knowledge/chunks/list",
  113. json={"page": 1, "pageSize": 20, "documentId": document_payload["id"]},
  114. )
  115. assert chunks_response.status_code == 200
  116. assert chunks_response.json()["data"]["total"] == len(chunk_payloads)
  117. search_response = client.post(
  118. "/knowledge/search/query",
  119. json={
  120. "knowledgeBaseId": base_payload["id"],
  121. "query": "download invoices",
  122. "topK": 3,
  123. "filters": {"status": "indexed"},
  124. },
  125. )
  126. assert search_response.status_code == 200
  127. search_payload = search_response.json()["data"]
  128. assert search_payload
  129. assert search_payload[0]["document"]["id"] == document_payload["id"]
  130. assert "scoreDetails" in search_payload[0]
  131. settings_response = client.post(
  132. "/knowledge/settings/update",
  133. json={
  134. "knowledgeBaseId": base_payload["id"],
  135. "retrievalMode": "hybrid",
  136. "embeddingModelId": "auto",
  137. "rerankModelId": "auto",
  138. "chunkSize": 600,
  139. "chunkOverlap": 80,
  140. "topK": 6,
  141. "minScore": 0.1,
  142. "maxCandidates": 40,
  143. "keywordWeight": 0.4,
  144. "vectorWeight": 0.45,
  145. "rerankWeight": 0.15,
  146. "queryRewrite": True,
  147. "requireCitations": True,
  148. },
  149. )
  150. assert settings_response.status_code == 200
  151. assert settings_response.json()["data"]["knowledgeBaseId"] == base_payload["id"]
  152. assert settings_response.json()["data"]["chunkSize"] == 600
  153. from app.infrastructure.object_storage import InMemoryObjectStorage
  154. InMemoryObjectStorage(bucket="agent-platform-knowledge").delete_object(
  155. object_key=document_payload["objectStorage"]["objectKey"])
  156. missing_storage_status_response = client.post(
  157. "/knowledge/documents/storage/status",
  158. json={"documentId": document_payload["id"]},
  159. )
  160. assert missing_storage_status_response.status_code == 200
  161. assert missing_storage_status_response.json()["data"]["exists"] is False
  162. missing_reindex_response = client.post(
  163. "/knowledge/documents/reindex",
  164. json={
  165. "documentId": document_payload["id"],
  166. "chunkSize": 40,
  167. "chunkOverlap": 5,
  168. "asyncMode": False,
  169. },
  170. )
  171. assert missing_reindex_response.status_code == 503
  172. document_detail_response = client.post(
  173. "/knowledge/documents/detail",
  174. json={"documentId": document_payload["id"]},
  175. )
  176. assert document_detail_response.status_code == 200
  177. assert document_detail_response.json()["data"]["status"] == "failed"
  178. delete_document_response = client.post(
  179. "/knowledge/documents/delete",
  180. json={"documentId": document_payload["id"]},
  181. )
  182. assert delete_document_response.status_code == 200
  183. assert delete_document_response.json()["data"]["deleted"] is True
  184. assert delete_document_response.json()["data"]["objectDeleted"] is False
  185. engine.dispose()
  186. Base.metadata.clear()