routes.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465
  1. from core_domain import ServiceHealth
  2. from fastapi import APIRouter, Depends, HTTPException, Query
  3. from sqlalchemy import text
  4. from sqlalchemy.orm import Session
  5. from app.application.services import (
  6. RuntimeApplicationService,
  7. RuntimeDebugSnapshot,
  8. build_runtime_application_service,
  9. )
  10. from app.bootstrap.settings import RuntimeServiceSettings
  11. from app.db.session import get_db
  12. from app.infrastructure.code_runner_client import CodeRunnerClientError
  13. from app.infrastructure.human_client import HumanServiceClientError
  14. from app.infrastructure.model_gateway_client import ModelGatewayClientError
  15. from app.infrastructure.tool_client import ToolServiceClientError
  16. from app.infrastructure.workflow_client import WorkflowServiceClientError
  17. from app.schemas.run import (
  18. ExecutionLogListRequest,
  19. ExecutionLogResponse,
  20. HumanNodeResumeRequest,
  21. NodeArtifactListRequest,
  22. NodeArtifactResponse,
  23. NodeRunListRequest,
  24. NodeRunExecuteRequest,
  25. NodeRunExecuteResponse,
  26. NodeRunResponse,
  27. NodeRunStatusUpdateRequest,
  28. RunBootstrapResponse,
  29. RunCreateRequest,
  30. RunExecuteRequest,
  31. RunExecuteResponse,
  32. RuntimeDebugContinueRequest,
  33. RuntimeDebugSnapshotRequest,
  34. RuntimeDebugSnapshotResponse,
  35. RuntimeDebugStepResponse,
  36. TraceSpanListRequest,
  37. TraceSpanResponse,
  38. WorkerExecuteNextRequest,
  39. WorkerExecuteNextResponse,
  40. WorkflowRunListRequest,
  41. WorkflowRunResponse,
  42. WorkflowRunStatusUpdateRequest,
  43. )
  44. router = APIRouter()
  45. def build_runtime_debug_snapshot_response(snapshot: RuntimeDebugSnapshot) -> RuntimeDebugSnapshotResponse:
  46. return RuntimeDebugSnapshotResponse(
  47. run=WorkflowRunResponse.from_entity(snapshot.run),
  48. node_runs=[
  49. NodeRunResponse.from_entity(item)
  50. for item in snapshot.node_runs
  51. ],
  52. run_state_json=snapshot.run_state_json,
  53. node_output_json_by_node_id=snapshot.node_output_json_by_node_id,
  54. node_output_text_by_node_id=snapshot.node_output_text_by_node_id,
  55. queued_node_ids=snapshot.queued_node_ids,
  56. running_node_ids=snapshot.running_node_ids,
  57. completed_node_ids=snapshot.completed_node_ids,
  58. failed_node_ids=snapshot.failed_node_ids,
  59. execution_logs=[
  60. ExecutionLogResponse.from_entity(item)
  61. for item in snapshot.execution_logs
  62. ],
  63. node_artifacts=[
  64. NodeArtifactResponse.from_entity(item)
  65. for item in snapshot.node_artifacts
  66. ],
  67. trace_spans=[
  68. TraceSpanResponse.from_entity(item)
  69. for item in snapshot.trace_spans
  70. ])
  71. def get_runtime_settings() -> RuntimeServiceSettings:
  72. return RuntimeServiceSettings()
  73. def get_runtime_application_service(
  74. db: Session = Depends(get_db),
  75. settings: RuntimeServiceSettings = Depends(get_runtime_settings)) -> RuntimeApplicationService:
  76. return build_runtime_application_service(db=db, settings=settings)
  77. @router.get("/health", response_model=ServiceHealth)
  78. def health_check(db: Session = Depends(get_db)) -> ServiceHealth:
  79. db.execute(text("SELECT 1"))
  80. return ServiceHealth(service="runtime-service", status="ok", database="ok")
  81. @router.post("/runs", response_model=RunBootstrapResponse)
  82. def create_run(
  83. payload: RunCreateRequest,
  84. service: RuntimeApplicationService = Depends(get_runtime_application_service)) -> RunBootstrapResponse:
  85. try:
  86. workflow_run, initial_node = service.create_run(payload)
  87. except (
  88. CodeRunnerClientError,
  89. ModelGatewayClientError,
  90. HumanServiceClientError,
  91. ToolServiceClientError,
  92. WorkflowServiceClientError) as exc:
  93. raise HTTPException(status_code=502, detail=str(exc)) from exc
  94. return RunBootstrapResponse(
  95. run=WorkflowRunResponse.from_entity(workflow_run),
  96. initial_node=NodeRunResponse.from_entity(initial_node) if initial_node else None)
  97. @router.get("/runs", response_model=list[WorkflowRunResponse])
  98. def list_runs(
  99. session_id: str | None = Query(default=None),
  100. limit: int = Query(default=50, ge=1, le=500),
  101. service: RuntimeApplicationService = Depends(get_runtime_application_service)) -> list[WorkflowRunResponse]:
  102. return [
  103. WorkflowRunResponse.from_entity(item)
  104. for item in service.list_runs(session_id=session_id, limit=limit)
  105. ]
  106. @router.post("/runs/list", response_model=list[WorkflowRunResponse])
  107. def list_runs_post(
  108. payload: WorkflowRunListRequest,
  109. service: RuntimeApplicationService = Depends(get_runtime_application_service)) -> list[WorkflowRunResponse]:
  110. return [
  111. WorkflowRunResponse.from_entity(item)
  112. for item in service.list_runs(session_id=payload.session_id, limit=payload.limit)
  113. ]
  114. @router.get("/node-runs", response_model=list[NodeRunResponse])
  115. def list_node_runs(
  116. run_id: str = Query(...),
  117. service: RuntimeApplicationService = Depends(get_runtime_application_service)) -> list[NodeRunResponse]:
  118. return [
  119. NodeRunResponse.from_entity(item)
  120. for item in service.list_node_runs(run_id=run_id)
  121. ]
  122. @router.post("/node-runs/list", response_model=list[NodeRunResponse])
  123. def list_node_runs_post(
  124. payload: NodeRunListRequest,
  125. service: RuntimeApplicationService = Depends(get_runtime_application_service)) -> list[NodeRunResponse]:
  126. return [
  127. NodeRunResponse.from_entity(item)
  128. for item in service.list_node_runs(run_id=payload.run_id)
  129. ]
  130. @router.get("/execution-logs", response_model=list[ExecutionLogResponse])
  131. def list_execution_logs(
  132. run_id: str | None = Query(default=None),
  133. node_run_id: str | None = Query(default=None),
  134. service: RuntimeApplicationService = Depends(get_runtime_application_service)) -> list[ExecutionLogResponse]:
  135. return [
  136. ExecutionLogResponse.from_entity(item)
  137. for item in service.list_execution_logs(
  138. run_id=run_id,
  139. node_run_id=node_run_id)
  140. ]
  141. @router.post("/execution-logs/list", response_model=list[ExecutionLogResponse])
  142. def list_execution_logs_post(
  143. payload: ExecutionLogListRequest,
  144. service: RuntimeApplicationService = Depends(get_runtime_application_service)) -> list[ExecutionLogResponse]:
  145. return [
  146. ExecutionLogResponse.from_entity(item)
  147. for item in service.list_execution_logs(
  148. run_id=payload.run_id,
  149. node_run_id=payload.node_run_id)
  150. ]
  151. @router.get("/node-artifacts", response_model=list[NodeArtifactResponse])
  152. def list_node_artifacts(
  153. run_id: str | None = Query(default=None),
  154. node_run_id: str | None = Query(default=None),
  155. artifact_type: str | None = Query(default=None),
  156. service: RuntimeApplicationService = Depends(get_runtime_application_service)) -> list[NodeArtifactResponse]:
  157. return [
  158. NodeArtifactResponse.from_entity(item)
  159. for item in service.list_node_artifacts(
  160. run_id=run_id,
  161. node_run_id=node_run_id,
  162. artifact_type=artifact_type)
  163. ]
  164. @router.post("/node-artifacts/list", response_model=list[NodeArtifactResponse])
  165. def list_node_artifacts_post(
  166. payload: NodeArtifactListRequest,
  167. service: RuntimeApplicationService = Depends(get_runtime_application_service)) -> list[NodeArtifactResponse]:
  168. return [
  169. NodeArtifactResponse.from_entity(item)
  170. for item in service.list_node_artifacts(
  171. run_id=payload.run_id,
  172. node_run_id=payload.node_run_id,
  173. artifact_type=payload.artifact_type)
  174. ]
  175. @router.get("/trace-spans", response_model=list[TraceSpanResponse])
  176. def list_trace_spans(
  177. run_id: str | None = Query(default=None),
  178. node_run_id: str | None = Query(default=None),
  179. span_type: str | None = Query(default=None),
  180. service: RuntimeApplicationService = Depends(get_runtime_application_service)) -> list[TraceSpanResponse]:
  181. return [
  182. TraceSpanResponse.from_entity(item)
  183. for item in service.list_trace_spans(
  184. run_id=run_id,
  185. node_run_id=node_run_id,
  186. span_type=span_type)
  187. ]
  188. @router.post("/trace-spans/list", response_model=list[TraceSpanResponse])
  189. def list_trace_spans_post(
  190. payload: TraceSpanListRequest,
  191. service: RuntimeApplicationService = Depends(get_runtime_application_service)) -> list[TraceSpanResponse]:
  192. return [
  193. TraceSpanResponse.from_entity(item)
  194. for item in service.list_trace_spans(
  195. run_id=payload.run_id,
  196. node_run_id=payload.node_run_id,
  197. span_type=payload.span_type)
  198. ]
  199. @router.post("/runs/{run_id}/status", response_model=WorkflowRunResponse)
  200. def update_run_status(
  201. run_id: str,
  202. payload: WorkflowRunStatusUpdateRequest,
  203. service: RuntimeApplicationService = Depends(get_runtime_application_service)) -> WorkflowRunResponse:
  204. entity = service.update_run_status(run_id=run_id, payload=payload)
  205. if entity is None:
  206. raise HTTPException(status_code=404, detail=f"workflow_run not found: {run_id}")
  207. return WorkflowRunResponse.from_entity(entity)
  208. @router.post("/node-runs/{node_run_id}/status", response_model=NodeRunResponse)
  209. def update_node_run_status(
  210. node_run_id: str,
  211. payload: NodeRunStatusUpdateRequest,
  212. service: RuntimeApplicationService = Depends(get_runtime_application_service)) -> NodeRunResponse:
  213. entity = service.update_node_run_status(node_run_id=node_run_id, payload=payload)
  214. if entity is None:
  215. raise HTTPException(status_code=404, detail=f"node_run not found: {node_run_id}")
  216. return NodeRunResponse.from_entity(entity)
  217. @router.post("/node-runs/{node_run_id}/execute", response_model=NodeRunExecuteResponse)
  218. def execute_node_run(
  219. node_run_id: str,
  220. payload: NodeRunExecuteRequest,
  221. service: RuntimeApplicationService = Depends(get_runtime_application_service)) -> NodeRunExecuteResponse:
  222. try:
  223. result = service.execute_node_run(node_run_id=node_run_id, payload=payload)
  224. except (
  225. CodeRunnerClientError,
  226. ModelGatewayClientError,
  227. HumanServiceClientError,
  228. ToolServiceClientError,
  229. WorkflowServiceClientError) as exc:
  230. raise HTTPException(status_code=502, detail=str(exc)) from exc
  231. if result is None:
  232. raise HTTPException(status_code=404, detail=f"node_run not found: {node_run_id}")
  233. workflow_run, node_run, executor_name = result
  234. return NodeRunExecuteResponse(
  235. run=WorkflowRunResponse.from_entity(workflow_run),
  236. node_run=NodeRunResponse.from_entity(node_run),
  237. executor_name=executor_name)
  238. @router.post("/runs/{run_id}/execute-next", response_model=NodeRunExecuteResponse)
  239. def execute_next_node_run(
  240. run_id: str,
  241. payload: NodeRunExecuteRequest,
  242. service: RuntimeApplicationService = Depends(get_runtime_application_service)) -> NodeRunExecuteResponse:
  243. try:
  244. result = service.execute_next_node_run(
  245. run_id=run_id,
  246. payload=payload)
  247. except (
  248. CodeRunnerClientError,
  249. ModelGatewayClientError,
  250. HumanServiceClientError,
  251. ToolServiceClientError,
  252. WorkflowServiceClientError) as exc:
  253. raise HTTPException(status_code=502, detail=str(exc)) from exc
  254. if result is None:
  255. raise HTTPException(status_code=404, detail=f"queued node_run not found for run: {run_id}")
  256. workflow_run, node_run, executor_name = result
  257. return NodeRunExecuteResponse(
  258. run=WorkflowRunResponse.from_entity(workflow_run),
  259. node_run=NodeRunResponse.from_entity(node_run),
  260. executor_name=executor_name)
  261. @router.post("/runs/{run_id}/execute", response_model=RunExecuteResponse)
  262. def execute_run(
  263. run_id: str,
  264. payload: RunExecuteRequest,
  265. service: RuntimeApplicationService = Depends(get_runtime_application_service)) -> RunExecuteResponse:
  266. try:
  267. result = service.execute_run(
  268. run_id=run_id,
  269. payload=payload)
  270. except (
  271. CodeRunnerClientError,
  272. ModelGatewayClientError,
  273. HumanServiceClientError,
  274. ToolServiceClientError,
  275. WorkflowServiceClientError) as exc:
  276. raise HTTPException(status_code=502, detail=str(exc)) from exc
  277. if result is None:
  278. raise HTTPException(status_code=404, detail=f"workflow_run not found: {run_id}")
  279. workflow_run, node_runs, executor_names = result
  280. return RunExecuteResponse(
  281. run=WorkflowRunResponse.from_entity(workflow_run),
  282. node_runs=[NodeRunResponse.from_entity(item) for item in node_runs],
  283. executor_names=executor_names)
  284. @router.get("/runs/{run_id}/debug/snapshot", response_model=RuntimeDebugSnapshotResponse)
  285. def get_runtime_debug_snapshot(
  286. run_id: str,
  287. service: RuntimeApplicationService = Depends(get_runtime_application_service)) -> RuntimeDebugSnapshotResponse:
  288. snapshot = service.get_debug_snapshot(run_id=run_id)
  289. if snapshot is None:
  290. raise HTTPException(status_code=404, detail=f"workflow_run not found: {run_id}")
  291. return build_runtime_debug_snapshot_response(snapshot)
  292. @router.post("/runs/debug/snapshot", response_model=RuntimeDebugSnapshotResponse)
  293. def get_runtime_debug_snapshot_post(
  294. payload: RuntimeDebugSnapshotRequest,
  295. service: RuntimeApplicationService = Depends(get_runtime_application_service)) -> RuntimeDebugSnapshotResponse:
  296. snapshot = service.get_debug_snapshot(run_id=payload.run_id)
  297. if snapshot is None:
  298. raise HTTPException(status_code=404, detail=f"workflow_run not found: {payload.run_id}")
  299. return build_runtime_debug_snapshot_response(snapshot)
  300. @router.post("/runs/{run_id}/debug/pause", response_model=RuntimeDebugSnapshotResponse)
  301. def pause_runtime_debug_run(
  302. run_id: str,
  303. service: RuntimeApplicationService = Depends(get_runtime_application_service)) -> RuntimeDebugSnapshotResponse:
  304. snapshot = service.pause_run(run_id=run_id)
  305. if snapshot is None:
  306. raise HTTPException(status_code=404, detail=f"workflow_run not found: {run_id}")
  307. return build_runtime_debug_snapshot_response(snapshot)
  308. @router.post("/runs/{run_id}/debug/resume", response_model=RuntimeDebugSnapshotResponse)
  309. def resume_runtime_debug_run(
  310. run_id: str,
  311. service: RuntimeApplicationService = Depends(get_runtime_application_service)) -> RuntimeDebugSnapshotResponse:
  312. snapshot = service.resume_run(run_id=run_id)
  313. if snapshot is None:
  314. raise HTTPException(status_code=404, detail=f"workflow_run not found: {run_id}")
  315. return build_runtime_debug_snapshot_response(snapshot)
  316. @router.post("/runs/{run_id}/debug/step", response_model=RuntimeDebugStepResponse)
  317. def step_runtime_debug_run(
  318. run_id: str,
  319. payload: NodeRunExecuteRequest,
  320. service: RuntimeApplicationService = Depends(get_runtime_application_service)) -> RuntimeDebugStepResponse:
  321. result = service.step_debug_run(
  322. run_id=run_id,
  323. worker_key=payload.worker_key)
  324. if result is None:
  325. raise HTTPException(status_code=404, detail=f"workflow_run not found: {run_id}")
  326. snapshot, executed_node_runs, executor_names, reason = result
  327. return RuntimeDebugStepResponse(
  328. snapshot=build_runtime_debug_snapshot_response(snapshot),
  329. executed_node_runs=[NodeRunResponse.from_entity(item) for item in executed_node_runs],
  330. executor_names=executor_names,
  331. reason=reason)
  332. @router.post("/runs/{run_id}/debug/continue", response_model=RuntimeDebugStepResponse)
  333. def continue_runtime_debug_run(
  334. run_id: str,
  335. payload: RuntimeDebugContinueRequest,
  336. service: RuntimeApplicationService = Depends(get_runtime_application_service)) -> RuntimeDebugStepResponse:
  337. result = service.continue_debug_run(
  338. run_id=run_id,
  339. payload=payload)
  340. if result is None:
  341. raise HTTPException(status_code=404, detail=f"workflow_run not found: {run_id}")
  342. snapshot, executed_node_runs, executor_names, paused_before_node_id, reason = result
  343. return RuntimeDebugStepResponse(
  344. snapshot=build_runtime_debug_snapshot_response(snapshot),
  345. executed_node_runs=[NodeRunResponse.from_entity(item) for item in executed_node_runs],
  346. executor_names=executor_names,
  347. paused_before_node_id=paused_before_node_id,
  348. reason=reason)
  349. @router.post("/node-runs/{node_run_id}/resume-human", response_model=NodeRunExecuteResponse)
  350. def resume_human_node_run(
  351. node_run_id: str,
  352. payload: HumanNodeResumeRequest,
  353. service: RuntimeApplicationService = Depends(get_runtime_application_service)) -> NodeRunExecuteResponse:
  354. try:
  355. result = service.resume_human_node_run(
  356. node_run_id=node_run_id,
  357. payload=payload)
  358. except (
  359. CodeRunnerClientError,
  360. ModelGatewayClientError,
  361. HumanServiceClientError,
  362. ToolServiceClientError,
  363. WorkflowServiceClientError) as exc:
  364. raise HTTPException(status_code=502, detail=str(exc)) from exc
  365. if result is None:
  366. raise HTTPException(
  367. status_code=404,
  368. detail=f"human node_run not found or task mismatch: {node_run_id}")
  369. workflow_run, node_run, executor_name = result
  370. return NodeRunExecuteResponse(
  371. run=WorkflowRunResponse.from_entity(workflow_run),
  372. node_run=NodeRunResponse.from_entity(node_run),
  373. executor_name=executor_name)
  374. @router.post("/workers/execute-next", response_model=WorkerExecuteNextResponse)
  375. def execute_next_worker_task(
  376. payload: WorkerExecuteNextRequest,
  377. settings: RuntimeServiceSettings = Depends(get_runtime_settings),
  378. service: RuntimeApplicationService = Depends(get_runtime_application_service)) -> WorkerExecuteNextResponse:
  379. try:
  380. result = service.execute_next_claimed_node_run(
  381. worker_key=payload.worker_key,
  382. lease_seconds=payload.lease_seconds or settings.worker_lease_seconds)
  383. except (
  384. CodeRunnerClientError,
  385. ModelGatewayClientError,
  386. HumanServiceClientError,
  387. ToolServiceClientError,
  388. WorkflowServiceClientError) as exc:
  389. raise HTTPException(status_code=502, detail=str(exc)) from exc
  390. if result is None:
  391. raise HTTPException(status_code=404, detail="queued worker task not found")
  392. workflow_run, node_run, executor_name, released_lease_count = result
  393. return WorkerExecuteNextResponse(
  394. run=WorkflowRunResponse.from_entity(workflow_run),
  395. node_run=NodeRunResponse.from_entity(node_run),
  396. executor_name=executor_name,
  397. released_lease_count=released_lease_count)