routes.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. from datetime import datetime
  2. from typing import TypeVar
  3. from core_domain import ServiceHealth
  4. from fastapi import APIRouter, Depends, HTTPException
  5. from sqlalchemy import text
  6. from sqlalchemy.orm import Session
  7. from app.application.document_parsers import DocumentParseError
  8. from app.application.services import (
  9. KnowledgeApplicationService,
  10. KnowledgeIndexingError,
  11. build_knowledge_application_service,
  12. )
  13. from app.bootstrap.settings import KnowledgeServiceSettings
  14. from app.db.session import get_db
  15. from app.infrastructure.object_storage import ObjectStorageError
  16. from app.schemas.knowledge import (
  17. ApiResponse,
  18. DeleteData,
  19. KnowledgeBaseCreateRequestDto,
  20. KnowledgeBaseDeleteRequestDto,
  21. KnowledgeBaseDetailRequestDto,
  22. KnowledgeBaseDto,
  23. KnowledgeBaseListRequestDto,
  24. KnowledgeBaseStatusRequestDto,
  25. KnowledgeBaseStatusUpdateRequest,
  26. KnowledgeBaseUpdateRequestDto,
  27. KnowledgeBaseReindexData,
  28. KnowledgeBaseReindexRequestDto,
  29. KnowledgeChunkDetailRequestDto,
  30. KnowledgeChunkDeleteRequestDto,
  31. KnowledgeChunkDto,
  32. KnowledgeChunkListRequestDto,
  33. KnowledgeDocumentCreateRequestDto,
  34. KnowledgeDocumentContentData,
  35. KnowledgeDocumentContentRequestDto,
  36. KnowledgeDocumentDeleteRequestDto,
  37. KnowledgeDocumentDetailRequestDto,
  38. KnowledgeDocumentDto,
  39. KnowledgeDocumentIngestData,
  40. KnowledgeDocumentListRequestDto,
  41. KnowledgeDocumentParseRequest,
  42. KnowledgeDocumentParseRequestDto,
  43. KnowledgeDocumentParseData,
  44. KnowledgeDocumentReindexRequestDto,
  45. KnowledgeDocumentStorageStatusData,
  46. KnowledgeDocumentStorageStatusRequestDto,
  47. KnowledgeDocumentStatusRequestDto,
  48. KnowledgeDocumentUpdateRequestDto,
  49. KnowledgeIndexJobData,
  50. KnowledgeIndexJobDetailRequestDto,
  51. KnowledgeIndexJobListRequestDto,
  52. KnowledgeIndexJobRetryRequestDto,
  53. KnowledgeSearchRequestDto,
  54. KnowledgeSearchRequest,
  55. KnowledgeSearchResultDto,
  56. KnowledgeSettingsDto,
  57. KnowledgeSettingsUpdateRequestDto,
  58. KnowledgeStorageHealthData,
  59. KnowledgeStorageHealthRequestDto,
  60. PageResult,
  61. )
  62. router = APIRouter()
  63. T = TypeVar("T")
  64. def ok(data: T) -> ApiResponse[T]:
  65. return ApiResponse[T](
  66. data=data,
  67. requestId="",
  68. serverTime=datetime.utcnow())
  69. def paginate(items: list[T], *, page: int, page_size: int) -> PageResult[T]:
  70. start = (page - 1) * page_size
  71. return PageResult.from_items(
  72. items=items[start:start + page_size],
  73. total=len(items),
  74. page=page,
  75. page_size=page_size)
  76. def get_knowledge_settings() -> KnowledgeServiceSettings:
  77. return KnowledgeServiceSettings()
  78. def get_knowledge_application_service(
  79. db: Session = Depends(get_db),
  80. settings: KnowledgeServiceSettings = Depends(get_knowledge_settings)) -> KnowledgeApplicationService:
  81. return build_knowledge_application_service(db=db, settings=settings)
  82. @router.get("/health", response_model=ServiceHealth)
  83. def health_check(db: Session = Depends(get_db)) -> ServiceHealth:
  84. db.execute(text("SELECT 1"))
  85. return ServiceHealth(service="knowledge-service", status="ok", database="ok")
  86. @router.post("/storage/health", response_model=ApiResponse[KnowledgeStorageHealthData])
  87. def storage_health_contract(
  88. payload: KnowledgeStorageHealthRequestDto,
  89. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[KnowledgeStorageHealthData]:
  90. try:
  91. health = service.read_storage_health()
  92. except ObjectStorageError as exc:
  93. health = {
  94. "backend": service.settings.object_storage_backend,
  95. "bucket": service.settings.object_storage_bucket,
  96. "available": False,
  97. "message": str(exc),
  98. }
  99. return ok(KnowledgeStorageHealthData(
  100. backend=str(health.get("backend") or ""),
  101. bucket=str(health.get("bucket") or ""),
  102. available=bool(health.get("available")),
  103. message=str(health["message"]) if health.get("message") is not None else None,
  104. checkedTime=datetime.utcnow()))
  105. @router.post("/bases/list", response_model=ApiResponse[PageResult[KnowledgeBaseDto]])
  106. def list_bases_contract(
  107. payload: KnowledgeBaseListRequestDto,
  108. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[PageResult[KnowledgeBaseDto]]:
  109. items = [
  110. KnowledgeBaseDto.from_entity(item)
  111. for item in service.list_bases_filtered(
  112. keyword=payload.keyword,
  113. status=payload.status)
  114. ]
  115. return ok(paginate(items, page=payload.page, page_size=payload.pageSize))
  116. @router.post("/bases/create", response_model=ApiResponse[KnowledgeBaseDto])
  117. def create_base_contract(
  118. payload: KnowledgeBaseCreateRequestDto,
  119. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[KnowledgeBaseDto]:
  120. return ok(KnowledgeBaseDto.from_entity(service.create_base_from_contract(payload)))
  121. @router.post("/bases/detail", response_model=ApiResponse[KnowledgeBaseDto])
  122. def detail_base_contract(
  123. payload: KnowledgeBaseDetailRequestDto,
  124. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[KnowledgeBaseDto]:
  125. entity = service.base_repository.get_by_id(knowledge_base_id=payload.knowledgeBaseId)
  126. if entity is None:
  127. raise HTTPException(status_code=404, detail=f"knowledge base not found: {payload.knowledgeBaseId}")
  128. return ok(KnowledgeBaseDto.from_entity(entity))
  129. @router.post("/bases/update", response_model=ApiResponse[KnowledgeBaseDto])
  130. def update_base_contract(
  131. payload: KnowledgeBaseUpdateRequestDto,
  132. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[KnowledgeBaseDto]:
  133. entity = service.update_base_from_contract(payload)
  134. if entity is None:
  135. raise HTTPException(status_code=404, detail=f"knowledge base not found: {payload.knowledgeBaseId}")
  136. return ok(KnowledgeBaseDto.from_entity(entity))
  137. @router.post("/bases/status", response_model=ApiResponse[KnowledgeBaseDto])
  138. def update_base_status_contract(
  139. payload: KnowledgeBaseStatusRequestDto,
  140. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[KnowledgeBaseDto]:
  141. entity = service.update_base_status(
  142. knowledge_base_id=payload.knowledgeBaseId,
  143. payload=KnowledgeBaseStatusUpdateRequest(status=payload.status))
  144. if entity is None:
  145. raise HTTPException(status_code=404, detail=f"knowledge base not found: {payload.knowledgeBaseId}")
  146. return ok(KnowledgeBaseDto.from_entity(entity))
  147. @router.post("/bases/delete", response_model=ApiResponse[DeleteData])
  148. def delete_base_contract(
  149. payload: KnowledgeBaseDeleteRequestDto,
  150. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[DeleteData]:
  151. try:
  152. deleted = service.delete_base(knowledge_base_id=payload.knowledgeBaseId)
  153. except ObjectStorageError as exc:
  154. raise HTTPException(status_code=503, detail=str(exc)) from exc
  155. return ok(DeleteData(
  156. deleted=deleted,
  157. knowledgeBaseId=payload.knowledgeBaseId))
  158. @router.post("/documents/list", response_model=ApiResponse[PageResult[KnowledgeDocumentDto]])
  159. def list_documents_contract(
  160. payload: KnowledgeDocumentListRequestDto,
  161. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[PageResult[KnowledgeDocumentDto]]:
  162. items = [
  163. KnowledgeDocumentDto.from_entity(item)
  164. for item in service.list_documents_filtered(
  165. knowledge_base_id=payload.knowledgeBaseId,
  166. keyword=payload.keyword,
  167. status=payload.status,
  168. source_type=payload.sourceType)
  169. ]
  170. return ok(paginate(items, page=payload.page, page_size=payload.pageSize))
  171. @router.post("/documents/create", response_model=ApiResponse[KnowledgeDocumentIngestData])
  172. def create_document_contract(
  173. payload: KnowledgeDocumentCreateRequestDto,
  174. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[KnowledgeDocumentIngestData]:
  175. try:
  176. result = service.create_document_from_contract_result(payload)
  177. except KnowledgeIndexingError as exc:
  178. raise HTTPException(status_code=503, detail=str(exc)) from exc
  179. except ObjectStorageError as exc:
  180. raise HTTPException(status_code=503, detail=str(exc)) from exc
  181. except ValueError as exc:
  182. raise HTTPException(status_code=422, detail=str(exc)) from exc
  183. return ok(KnowledgeDocumentIngestData(
  184. document=KnowledgeDocumentDto.from_entity(result.document),
  185. chunks=[KnowledgeChunkDto.from_entity(item) for item in result.chunks],
  186. queued=result.queued,
  187. job=result.job))
  188. @router.post("/documents/detail", response_model=ApiResponse[KnowledgeDocumentDto])
  189. def detail_document_contract(
  190. payload: KnowledgeDocumentDetailRequestDto,
  191. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[KnowledgeDocumentDto]:
  192. entity = service.document_repository.get_by_id(document_id=payload.documentId)
  193. if entity is None:
  194. raise HTTPException(status_code=404, detail=f"knowledge document not found: {payload.documentId}")
  195. return ok(KnowledgeDocumentDto.from_entity(entity))
  196. @router.post("/documents/update", response_model=ApiResponse[KnowledgeDocumentDto])
  197. def update_document_contract(
  198. payload: KnowledgeDocumentUpdateRequestDto,
  199. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[KnowledgeDocumentDto]:
  200. entity = service.update_document_from_contract(payload)
  201. if entity is None:
  202. raise HTTPException(status_code=404, detail=f"knowledge document not found: {payload.documentId}")
  203. return ok(KnowledgeDocumentDto.from_entity(entity))
  204. @router.post("/documents/status", response_model=ApiResponse[KnowledgeDocumentDto])
  205. def update_document_status_contract(
  206. payload: KnowledgeDocumentStatusRequestDto,
  207. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[KnowledgeDocumentDto]:
  208. entity = service.document_repository.update_status(
  209. document_id=payload.documentId,
  210. status=payload.status)
  211. if entity is None:
  212. raise HTTPException(status_code=404, detail=f"knowledge document not found: {payload.documentId}")
  213. return ok(KnowledgeDocumentDto.from_entity(entity))
  214. @router.post("/documents/delete", response_model=ApiResponse[DeleteData])
  215. def delete_document_contract(
  216. payload: KnowledgeDocumentDeleteRequestDto,
  217. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[DeleteData]:
  218. try:
  219. result = service.delete_document_result(document_id=payload.documentId)
  220. except ObjectStorageError as exc:
  221. raise HTTPException(status_code=503, detail=str(exc)) from exc
  222. return ok(DeleteData(
  223. deleted=bool(result.get("deleted")),
  224. documentId=payload.documentId,
  225. objectDeleted=bool(result.get("objectDeleted"))))
  226. @router.post("/documents/reindex", response_model=ApiResponse[KnowledgeDocumentIngestData])
  227. def reindex_document_contract(
  228. payload: KnowledgeDocumentReindexRequestDto,
  229. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[KnowledgeDocumentIngestData]:
  230. try:
  231. result = service.reindex_document_from_contract_result(payload)
  232. except KnowledgeIndexingError as exc:
  233. raise HTTPException(status_code=503, detail=str(exc)) from exc
  234. except ObjectStorageError as exc:
  235. raise HTTPException(status_code=503, detail=str(exc)) from exc
  236. except ValueError as exc:
  237. raise HTTPException(status_code=422, detail=str(exc)) from exc
  238. if result is None:
  239. raise HTTPException(status_code=404, detail=f"knowledge document not found: {payload.documentId}")
  240. return ok(KnowledgeDocumentIngestData(
  241. document=KnowledgeDocumentDto.from_entity(result.document),
  242. chunks=[KnowledgeChunkDto.from_entity(item) for item in result.chunks],
  243. queued=result.queued,
  244. job=result.job))
  245. @router.post("/jobs/list", response_model=ApiResponse[PageResult[KnowledgeIndexJobData]])
  246. def list_index_jobs_contract(
  247. payload: KnowledgeIndexJobListRequestDto,
  248. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[PageResult[KnowledgeIndexJobData]]:
  249. items = service.list_index_jobs(
  250. knowledge_base_id=payload.knowledgeBaseId,
  251. document_id=payload.documentId,
  252. status=payload.status)
  253. return ok(paginate(items, page=payload.page, page_size=payload.pageSize))
  254. @router.post("/jobs/detail", response_model=ApiResponse[KnowledgeIndexJobData])
  255. def detail_index_job_contract(
  256. payload: KnowledgeIndexJobDetailRequestDto,
  257. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[KnowledgeIndexJobData]:
  258. job = service.detail_index_job(document_id=payload.documentId)
  259. if job is None:
  260. raise HTTPException(status_code=404, detail=f"knowledge index job not found for document: {payload.documentId}")
  261. return ok(job)
  262. @router.post("/jobs/retry", response_model=ApiResponse[KnowledgeDocumentIngestData])
  263. def retry_index_job_contract(
  264. payload: KnowledgeIndexJobRetryRequestDto,
  265. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[KnowledgeDocumentIngestData]:
  266. result = service.reindex_document_from_contract_result(
  267. KnowledgeDocumentReindexRequestDto(
  268. documentId=payload.documentId,
  269. chunkSize=payload.chunkSize,
  270. chunkOverlap=payload.chunkOverlap,
  271. asyncMode=True))
  272. if result is None:
  273. raise HTTPException(status_code=404, detail=f"knowledge document not found: {payload.documentId}")
  274. return ok(KnowledgeDocumentIngestData(
  275. document=KnowledgeDocumentDto.from_entity(result.document),
  276. chunks=[KnowledgeChunkDto.from_entity(item) for item in result.chunks],
  277. queued=result.queued,
  278. job=result.job))
  279. @router.post("/jobs/reindex-base", response_model=ApiResponse[KnowledgeBaseReindexData])
  280. def reindex_base_contract(
  281. payload: KnowledgeBaseReindexRequestDto,
  282. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[KnowledgeBaseReindexData]:
  283. if service.base_repository.get_by_id(knowledge_base_id=payload.knowledgeBaseId) is None:
  284. raise HTTPException(status_code=404, detail=f"knowledge base not found: {payload.knowledgeBaseId}")
  285. jobs = service.reindex_base_from_contract(payload)
  286. return ok(KnowledgeBaseReindexData(
  287. knowledgeBaseId=payload.knowledgeBaseId,
  288. queuedCount=len(jobs),
  289. jobs=jobs))
  290. @router.post("/documents/content", response_model=ApiResponse[KnowledgeDocumentContentData])
  291. def read_document_content_contract(
  292. payload: KnowledgeDocumentContentRequestDto,
  293. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[KnowledgeDocumentContentData]:
  294. try:
  295. result = service.read_document_content(
  296. document_id=payload.documentId,
  297. include_text=payload.includeText,
  298. include_base64=payload.includeBase64)
  299. except ObjectStorageError as exc:
  300. raise HTTPException(status_code=503, detail=str(exc)) from exc
  301. except ValueError as exc:
  302. raise HTTPException(status_code=422, detail=str(exc)) from exc
  303. if result is None:
  304. raise HTTPException(status_code=404, detail=f"knowledge document not found: {payload.documentId}")
  305. return ok(KnowledgeDocumentContentData.model_validate(result))
  306. @router.post("/documents/storage/status", response_model=ApiResponse[KnowledgeDocumentStorageStatusData])
  307. def read_document_storage_status_contract(
  308. payload: KnowledgeDocumentStorageStatusRequestDto,
  309. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[KnowledgeDocumentStorageStatusData]:
  310. try:
  311. result = service.read_document_storage_status(document_id=payload.documentId)
  312. except ObjectStorageError as exc:
  313. raise HTTPException(status_code=503, detail=str(exc)) from exc
  314. if result is None:
  315. raise HTTPException(status_code=404, detail=f"knowledge document not found: {payload.documentId}")
  316. return ok(KnowledgeDocumentStorageStatusData.model_validate({
  317. **result,
  318. "checkedTime": datetime.utcnow(),
  319. }))
  320. @router.post("/documents/parse", response_model=ApiResponse[KnowledgeDocumentParseData])
  321. def parse_document_contract(
  322. payload: KnowledgeDocumentParseRequestDto,
  323. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[KnowledgeDocumentParseData]:
  324. try:
  325. parsed = service.parse_document(
  326. KnowledgeDocumentParseRequest(
  327. source_type=payload.sourceType,
  328. source_uri=payload.sourceUri,
  329. content_text=payload.contentText,
  330. content_base64=payload.contentBase64))
  331. except DocumentParseError as exc:
  332. raise HTTPException(status_code=422, detail=str(exc)) from exc
  333. return ok(KnowledgeDocumentParseData(
  334. contentText=parsed.content_text,
  335. sourceType=parsed.source_type,
  336. metadata=parsed.metadata_json))
  337. @router.post("/chunks/list", response_model=ApiResponse[PageResult[KnowledgeChunkDto]])
  338. def list_chunks_contract(
  339. payload: KnowledgeChunkListRequestDto,
  340. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[PageResult[KnowledgeChunkDto]]:
  341. items = [
  342. KnowledgeChunkDto.from_entity(item)
  343. for item in service.list_chunks_filtered(
  344. knowledge_base_id=payload.knowledgeBaseId,
  345. document_id=payload.documentId,
  346. keyword=payload.keyword)
  347. ]
  348. return ok(paginate(items, page=payload.page, page_size=payload.pageSize))
  349. @router.post("/chunks/detail", response_model=ApiResponse[KnowledgeChunkDto])
  350. def detail_chunk_contract(
  351. payload: KnowledgeChunkDetailRequestDto,
  352. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[KnowledgeChunkDto]:
  353. entity = service.chunk_repository.get_by_id(chunk_id=payload.chunkId)
  354. if entity is None:
  355. raise HTTPException(status_code=404, detail=f"knowledge chunk not found: {payload.chunkId}")
  356. return ok(KnowledgeChunkDto.from_entity(entity))
  357. @router.post("/chunks/delete", response_model=ApiResponse[DeleteData])
  358. def delete_chunk_contract(
  359. payload: KnowledgeChunkDeleteRequestDto,
  360. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[DeleteData]:
  361. return ok(DeleteData(
  362. deleted=service.delete_chunk(chunk_id=payload.chunkId),
  363. chunkId=payload.chunkId))
  364. @router.post("/search/query", response_model=ApiResponse[list[KnowledgeSearchResultDto]])
  365. def search_contract(
  366. payload: KnowledgeSearchRequestDto,
  367. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[list[KnowledgeSearchResultDto]]:
  368. results = [
  369. KnowledgeSearchResultDto(
  370. chunk=KnowledgeChunkDto.from_entity(chunk),
  371. document=KnowledgeDocumentDto.from_entity(document),
  372. score=score,
  373. scoreDetails=score_json)
  374. for chunk, document, score, score_json in service.search(
  375. KnowledgeSearchRequest(
  376. knowledge_base_id=payload.knowledgeBaseId,
  377. query=payload.query,
  378. top_k=payload.topK,
  379. filters_json=payload.filters))
  380. ]
  381. return ok(results)
  382. @router.post("/settings/detail", response_model=ApiResponse[KnowledgeSettingsDto])
  383. def detail_settings_contract(
  384. payload: KnowledgeBaseDetailRequestDto,
  385. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[KnowledgeSettingsDto]:
  386. return ok(service.read_settings(knowledge_base_id=payload.knowledgeBaseId))
  387. @router.post("/settings/update", response_model=ApiResponse[KnowledgeSettingsDto])
  388. def update_settings_contract(
  389. payload: KnowledgeSettingsUpdateRequestDto,
  390. service: KnowledgeApplicationService = Depends(get_knowledge_application_service)) -> ApiResponse[KnowledgeSettingsDto]:
  391. return ok(service.update_settings(payload))