Jelajahi Sumber

feat: add runtime workflow debugger api

Jax Docker 1 bulan lalu
induk
melakukan
4490c6492a

+ 120 - 1
services/runtime-service/app/api/routes.py

@@ -3,7 +3,11 @@ from sqlalchemy import text
 from sqlalchemy.orm import Session
 
 from core_domain import ServiceHealth
-from app.application.services import RuntimeApplicationService, build_runtime_application_service
+from app.application.services import (
+    RuntimeApplicationService,
+    RuntimeDebugSnapshot,
+    build_runtime_application_service,
+)
 from app.bootstrap.settings import RuntimeServiceSettings
 from app.db.session import get_db
 from app.infrastructure.code_runner_client import CodeRunnerClientError
@@ -23,6 +27,9 @@ from app.schemas.run import (
     RunCreateRequest,
     RunExecuteRequest,
     RunExecuteResponse,
+    RuntimeDebugContinueRequest,
+    RuntimeDebugSnapshotResponse,
+    RuntimeDebugStepResponse,
     TraceSpanResponse,
     WorkerExecuteNextRequest,
     WorkerExecuteNextResponse,
@@ -33,6 +40,35 @@ from app.schemas.run import (
 router = APIRouter()
 
 
+def build_runtime_debug_snapshot_response(snapshot: RuntimeDebugSnapshot) -> RuntimeDebugSnapshotResponse:
+    return RuntimeDebugSnapshotResponse(
+        run=WorkflowRunResponse.from_entity(snapshot.run),
+        node_runs=[
+            NodeRunResponse.from_entity(item)
+            for item in snapshot.node_runs
+        ],
+        run_state_json=snapshot.run_state_json,
+        node_output_json_by_node_id=snapshot.node_output_json_by_node_id,
+        node_output_text_by_node_id=snapshot.node_output_text_by_node_id,
+        queued_node_ids=snapshot.queued_node_ids,
+        running_node_ids=snapshot.running_node_ids,
+        completed_node_ids=snapshot.completed_node_ids,
+        failed_node_ids=snapshot.failed_node_ids,
+        execution_logs=[
+            ExecutionLogResponse.from_entity(item)
+            for item in snapshot.execution_logs
+        ],
+        node_artifacts=[
+            NodeArtifactResponse.from_entity(item)
+            for item in snapshot.node_artifacts
+        ],
+        trace_spans=[
+            TraceSpanResponse.from_entity(item)
+            for item in snapshot.trace_spans
+        ],
+    )
+
+
 def get_runtime_settings() -> RuntimeServiceSettings:
     return RuntimeServiceSettings()
 
@@ -268,6 +304,89 @@ def execute_run(
     )
 
 
+@router.get("/runs/{run_id}/debug/snapshot", response_model=RuntimeDebugSnapshotResponse)
+def get_runtime_debug_snapshot(
+    run_id: str,
+    tenant_id: str = Query(...),
+    service: RuntimeApplicationService = Depends(get_runtime_application_service),
+) -> RuntimeDebugSnapshotResponse:
+    snapshot = service.get_debug_snapshot(tenant_id=tenant_id, run_id=run_id)
+    if snapshot is None:
+        raise HTTPException(status_code=404, detail=f"workflow_run not found: {run_id}")
+    return build_runtime_debug_snapshot_response(snapshot)
+
+
+@router.post("/runs/{run_id}/debug/pause", response_model=RuntimeDebugSnapshotResponse)
+def pause_runtime_debug_run(
+    run_id: str,
+    tenant_id: str = Query(...),
+    service: RuntimeApplicationService = Depends(get_runtime_application_service),
+) -> RuntimeDebugSnapshotResponse:
+    snapshot = service.pause_run(tenant_id=tenant_id, run_id=run_id)
+    if snapshot is None:
+        raise HTTPException(status_code=404, detail=f"workflow_run not found: {run_id}")
+    return build_runtime_debug_snapshot_response(snapshot)
+
+
+@router.post("/runs/{run_id}/debug/resume", response_model=RuntimeDebugSnapshotResponse)
+def resume_runtime_debug_run(
+    run_id: str,
+    tenant_id: str = Query(...),
+    service: RuntimeApplicationService = Depends(get_runtime_application_service),
+) -> RuntimeDebugSnapshotResponse:
+    snapshot = service.resume_run(tenant_id=tenant_id, run_id=run_id)
+    if snapshot is None:
+        raise HTTPException(status_code=404, detail=f"workflow_run not found: {run_id}")
+    return build_runtime_debug_snapshot_response(snapshot)
+
+
+@router.post("/runs/{run_id}/debug/step", response_model=RuntimeDebugStepResponse)
+def step_runtime_debug_run(
+    run_id: str,
+    payload: NodeRunExecuteRequest,
+    tenant_id: str = Query(...),
+    service: RuntimeApplicationService = Depends(get_runtime_application_service),
+) -> RuntimeDebugStepResponse:
+    result = service.step_debug_run(
+        tenant_id=tenant_id,
+        run_id=run_id,
+        worker_key=payload.worker_key,
+    )
+    if result is None:
+        raise HTTPException(status_code=404, detail=f"workflow_run not found: {run_id}")
+    snapshot, executed_node_runs, executor_names, reason = result
+    return RuntimeDebugStepResponse(
+        snapshot=build_runtime_debug_snapshot_response(snapshot),
+        executed_node_runs=[NodeRunResponse.from_entity(item) for item in executed_node_runs],
+        executor_names=executor_names,
+        reason=reason,
+    )
+
+
+@router.post("/runs/{run_id}/debug/continue", response_model=RuntimeDebugStepResponse)
+def continue_runtime_debug_run(
+    run_id: str,
+    payload: RuntimeDebugContinueRequest,
+    tenant_id: str = Query(...),
+    service: RuntimeApplicationService = Depends(get_runtime_application_service),
+) -> RuntimeDebugStepResponse:
+    result = service.continue_debug_run(
+        tenant_id=tenant_id,
+        run_id=run_id,
+        payload=payload,
+    )
+    if result is None:
+        raise HTTPException(status_code=404, detail=f"workflow_run not found: {run_id}")
+    snapshot, executed_node_runs, executor_names, paused_before_node_id, reason = result
+    return RuntimeDebugStepResponse(
+        snapshot=build_runtime_debug_snapshot_response(snapshot),
+        executed_node_runs=[NodeRunResponse.from_entity(item) for item in executed_node_runs],
+        executor_names=executor_names,
+        paused_before_node_id=paused_before_node_id,
+        reason=reason,
+    )
+
+
 @router.post("/node-runs/{node_run_id}/resume-human", response_model=NodeRunExecuteResponse)
 def resume_human_node_run(
     node_run_id: str,

+ 266 - 1
services/runtime-service/app/application/services.py

@@ -1,3 +1,4 @@
+from dataclasses import dataclass
 from datetime import datetime, timedelta
 
 from sqlalchemy.orm import Session
@@ -13,7 +14,7 @@ from core_domain import (
     WorkflowRunStatus,
 )
 
-from app.db.models import NodeRun, WorkflowRun
+from app.db.models import ExecutionLog, NodeArtifact, NodeRun, TraceSpan, WorkflowRun
 from app.domain.repositories import (
     ExecutionLogRepository,
     NodeArtifactRepository,
@@ -43,12 +44,29 @@ from app.schemas.run import (
     NodeRunStatusUpdateRequest,
     RunCreateRequest,
     RunExecuteRequest,
+    RuntimeDebugContinueRequest,
     WorkflowRunStatusUpdateRequest,
 )
 from core_shared import JSONValue, try_build_redis_client
 from core_shared.task_queue import TaskQueuePublisher
 
 
+@dataclass(frozen=True)
+class RuntimeDebugSnapshot:
+    run: WorkflowRun
+    node_runs: list[NodeRun]
+    run_state_json: dict[str, JSONValue]
+    node_output_json_by_node_id: dict[str, dict[str, JSONValue]]
+    node_output_text_by_node_id: dict[str, str]
+    queued_node_ids: list[str]
+    running_node_ids: list[str]
+    completed_node_ids: list[str]
+    failed_node_ids: list[str]
+    execution_logs: list[ExecutionLog]
+    node_artifacts: list[NodeArtifact]
+    trace_spans: list[TraceSpan]
+
+
 class RuntimeApplicationService:
     def __init__(
         self,
@@ -197,6 +215,17 @@ class RuntimeApplicationService:
             span_type=span_type,
         )
 
+    def get_debug_snapshot(
+        self,
+        *,
+        tenant_id: str,
+        run_id: str,
+    ) -> RuntimeDebugSnapshot | None:
+        workflow_run = self.workflow_run_repository.get_by_id(run_id)
+        if workflow_run is None or workflow_run.tenant_id != tenant_id:
+            return None
+        return self._build_debug_snapshot(workflow_run)
+
     def update_run_status(
         self,
         run_id: str,
@@ -295,6 +324,9 @@ class RuntimeApplicationService:
         if workflow_run is None:
             return None
 
+        if workflow_run.status == "paused":
+            return workflow_run, node_run, "debug_paused"
+
         if node_run.status in {"completed", "failed", "skipped"}:
             executor_name = self.execution_dispatcher.resolve_executor(
                 node_run.node_type
@@ -536,6 +568,200 @@ class RuntimeApplicationService:
             return None
         return final_run, executed_node_runs, executor_names
 
+    def pause_run(
+        self,
+        *,
+        tenant_id: str,
+        run_id: str,
+        reason: str = "debug_pause",
+    ) -> RuntimeDebugSnapshot | None:
+        workflow_run = self.workflow_run_repository.get_by_id(run_id)
+        if workflow_run is None or workflow_run.tenant_id != tenant_id:
+            return None
+        self._sync_workflow_run_status_from_nodes(tenant_id=tenant_id, run_id=run_id)
+        latest_run = self.workflow_run_repository.get_by_id(run_id)
+        if latest_run is None:
+            return None
+        if latest_run.status in {"completed", "failed", "cancelled"}:
+            paused_run = latest_run
+        else:
+            paused_run = self.workflow_run_repository.update_status(
+                run_id=run_id,
+                status="paused",
+            )
+            if paused_run is None:
+                return None
+        self._log_event(
+            tenant_id=tenant_id,
+            run_id=run_id,
+            node_run_id=None,
+            event_type="debug_run_paused",
+            message=f"workflow run paused: {reason}",
+            detail_json={"reason": reason},
+        )
+        return self._build_debug_snapshot(paused_run)
+
+    def resume_run(
+        self,
+        *,
+        tenant_id: str,
+        run_id: str,
+        reason: str = "debug_resume",
+    ) -> RuntimeDebugSnapshot | None:
+        workflow_run = self.workflow_run_repository.get_by_id(run_id)
+        if workflow_run is None or workflow_run.tenant_id != tenant_id:
+            return None
+        resumed_run = self.workflow_run_repository.update_status(
+            run_id=run_id,
+            status="running",
+        )
+        if resumed_run is None:
+            return None
+        self._log_event(
+            tenant_id=tenant_id,
+            run_id=run_id,
+            node_run_id=None,
+            event_type="debug_run_resumed",
+            message=f"workflow run resumed: {reason}",
+            detail_json={"reason": reason},
+        )
+        return self._build_debug_snapshot(resumed_run)
+
+    def step_debug_run(
+        self,
+        *,
+        tenant_id: str,
+        run_id: str,
+        worker_key: str | None = None,
+    ) -> tuple[RuntimeDebugSnapshot, list[NodeRun], list[str], str] | None:
+        workflow_run = self.workflow_run_repository.get_by_id(run_id)
+        if workflow_run is None or workflow_run.tenant_id != tenant_id:
+            return None
+        if workflow_run.status == "paused":
+            self.workflow_run_repository.update_status(run_id=run_id, status="running")
+
+        result = self.execute_next_node_run(
+            tenant_id=tenant_id,
+            run_id=run_id,
+            payload=NodeRunExecuteRequest(worker_key=worker_key),
+        )
+        executed_node_runs: list[NodeRun] = []
+        executor_names: list[str] = []
+        reason = "no_queued_node"
+        if result is not None:
+            _, node_run, executor_name = result
+            executed_node_runs.append(node_run)
+            executor_names.append(executor_name)
+            reason = "step_completed"
+
+        self._sync_workflow_run_status_from_nodes(tenant_id=tenant_id, run_id=run_id)
+        latest_run = self.workflow_run_repository.get_by_id(run_id)
+        if latest_run is None:
+            return None
+        if latest_run.status in {"completed", "failed", "cancelled"}:
+            paused_run = latest_run
+        else:
+            paused_run = self.workflow_run_repository.update_status(
+                run_id=run_id,
+                status="paused",
+            )
+            if paused_run is None:
+                return None
+        self._log_event(
+            tenant_id=tenant_id,
+            run_id=run_id,
+            node_run_id=executed_node_runs[-1].id if executed_node_runs else None,
+            event_type="debug_step_finished",
+            message=f"debug step finished: {reason}",
+            detail_json={
+                "reason": reason,
+                "executed_node_ids": [item.node_id for item in executed_node_runs],
+            },
+        )
+        return self._build_debug_snapshot(paused_run), executed_node_runs, executor_names, reason
+
+    def continue_debug_run(
+        self,
+        *,
+        tenant_id: str,
+        run_id: str,
+        payload: RuntimeDebugContinueRequest,
+    ) -> tuple[RuntimeDebugSnapshot, list[NodeRun], list[str], str | None, str] | None:
+        workflow_run = self.workflow_run_repository.get_by_id(run_id)
+        if workflow_run is None or workflow_run.tenant_id != tenant_id:
+            return None
+        if workflow_run.status == "paused":
+            self.workflow_run_repository.update_status(run_id=run_id, status="running")
+
+        breakpoint_node_ids = set(payload.breakpoint_node_ids)
+        executed_node_runs: list[NodeRun] = []
+        executor_names: list[str] = []
+        paused_before_node_id: str | None = None
+        reason = "completed"
+
+        for _ in range(payload.max_steps):
+            next_node_run = self.node_run_repository.get_next_queued_by_run(
+                tenant_id=tenant_id,
+                run_id=run_id,
+            )
+            if next_node_run is None:
+                reason = "no_queued_node"
+                break
+            if next_node_run.node_id in breakpoint_node_ids:
+                paused_before_node_id = next_node_run.node_id
+                reason = "breakpoint_hit"
+                break
+
+            step_result = self.execute_node_run(
+                node_run_id=next_node_run.id,
+                payload=NodeRunExecuteRequest(worker_key=payload.worker_key),
+            )
+            if step_result is None:
+                reason = "node_not_found"
+                break
+            _, node_run, executor_name = step_result
+            executed_node_runs.append(node_run)
+            executor_names.append(executor_name)
+            if node_run.status != "completed":
+                reason = f"node_{node_run.status}"
+                break
+        else:
+            reason = "max_steps_reached"
+
+        self._sync_workflow_run_status_from_nodes(tenant_id=tenant_id, run_id=run_id)
+        latest_run = self.workflow_run_repository.get_by_id(run_id)
+        if latest_run is None:
+            return None
+        if latest_run.status in {"completed", "failed", "cancelled"}:
+            paused_run = latest_run
+        else:
+            paused_run = self.workflow_run_repository.update_status(
+                run_id=run_id,
+                status="paused",
+            )
+            if paused_run is None:
+                return None
+        self._log_event(
+            tenant_id=tenant_id,
+            run_id=run_id,
+            node_run_id=executed_node_runs[-1].id if executed_node_runs else None,
+            event_type="debug_continue_paused",
+            message=f"debug continue paused: {reason}",
+            detail_json={
+                "reason": reason,
+                "paused_before_node_id": paused_before_node_id,
+                "executed_node_ids": [item.node_id for item in executed_node_runs],
+                "breakpoint_node_ids": list(breakpoint_node_ids),
+            },
+        )
+        return (
+            self._build_debug_snapshot(paused_run),
+            executed_node_runs,
+            executor_names,
+            paused_before_node_id,
+            reason,
+        )
+
     def resume_human_node_run(
         self,
         *,
@@ -827,6 +1053,45 @@ class RuntimeApplicationService:
 
         return run_state_json, node_output_json_by_node_id, node_output_text_by_node_id
 
+    def _build_debug_snapshot(self, workflow_run: WorkflowRun) -> RuntimeDebugSnapshot:
+        node_runs = self.node_run_repository.list_by_run(
+            tenant_id=workflow_run.tenant_id,
+            run_id=workflow_run.id,
+        )
+        run_state_json, node_output_json_by_node_id, node_output_text_by_node_id = (
+            self._build_run_state_maps(
+                tenant_id=workflow_run.tenant_id,
+                run_id=workflow_run.id,
+            )
+        )
+        return RuntimeDebugSnapshot(
+            run=workflow_run,
+            node_runs=node_runs,
+            run_state_json=run_state_json,
+            node_output_json_by_node_id=node_output_json_by_node_id,
+            node_output_text_by_node_id=node_output_text_by_node_id,
+            queued_node_ids=[
+                item.node_id for item in node_runs if item.status in {"pending", "queued"}
+            ],
+            running_node_ids=[item.node_id for item in node_runs if item.status == "running"],
+            completed_node_ids=[
+                item.node_id for item in node_runs if item.status in {"completed", "skipped"}
+            ],
+            failed_node_ids=[item.node_id for item in node_runs if item.status == "failed"],
+            execution_logs=self.execution_log_repository.list_by_scope(
+                tenant_id=workflow_run.tenant_id,
+                run_id=workflow_run.id,
+            ),
+            node_artifacts=self.node_artifact_repository.list_by_scope(
+                tenant_id=workflow_run.tenant_id,
+                run_id=workflow_run.id,
+            ),
+            trace_spans=self.trace_span_repository.list_by_scope(
+                tenant_id=workflow_run.tenant_id,
+                run_id=workflow_run.id,
+            ),
+        )
+
     def _build_node_timing(
         self,
         node_config_json: dict[str, JSONValue],

+ 29 - 0
services/runtime-service/app/schemas/run.py

@@ -75,6 +75,12 @@ class RunExecuteResponse(BaseModel):
     executor_names: list[str]
 
 
+class RuntimeDebugContinueRequest(BaseModel):
+    worker_key: str | None = None
+    max_steps: int = Field(default=32, ge=1, le=500)
+    breakpoint_node_ids: list[str] = Field(default_factory=list)
+
+
 class WorkerExecuteNextRequest(BaseModel):
     worker_key: str
     lease_seconds: int | None = Field(default=None, gt=0)
@@ -149,3 +155,26 @@ class TraceSpanResponse(BaseModel):
     @classmethod
     def from_entity(cls, entity: "TraceSpan") -> "TraceSpanResponse":
         return cls.model_validate(entity, from_attributes=True)
+
+
+class RuntimeDebugSnapshotResponse(BaseModel):
+    run: WorkflowRunResponse
+    node_runs: list[NodeRunResponse]
+    run_state_json: dict[str, JSONValue]
+    node_output_json_by_node_id: dict[str, dict[str, JSONValue]]
+    node_output_text_by_node_id: dict[str, str]
+    queued_node_ids: list[str]
+    running_node_ids: list[str]
+    completed_node_ids: list[str]
+    failed_node_ids: list[str]
+    execution_logs: list[ExecutionLogResponse]
+    node_artifacts: list[NodeArtifactResponse]
+    trace_spans: list[TraceSpanResponse]
+
+
+class RuntimeDebugStepResponse(BaseModel):
+    snapshot: RuntimeDebugSnapshotResponse
+    executed_node_runs: list[NodeRunResponse]
+    executor_names: list[str]
+    paused_before_node_id: str | None = None
+    reason: str

+ 151 - 0
tests/test_runtime_debugger.py

@@ -0,0 +1,151 @@
+from __future__ import annotations
+
+from collections.abc import Generator
+from datetime import datetime
+from pathlib import Path
+
+from sqlalchemy.orm import Session
+
+from tests.conftest import build_fastapi_test_client, prepare_service_import
+
+
+def test_runtime_debugger_pause_step_and_breakpoint_continue(tmp_path: Path) -> None:
+    prepare_service_import(
+        "runtime-service",
+        libs=("core-domain", "core-shared", "core-db", "core-events", "core-dsl"),
+    )
+
+    from app.api.routes import get_runtime_application_service
+    from app.application.services import RuntimeApplicationService
+    from app.bootstrap.app import create_app
+    from app.bootstrap.settings import RuntimeServiceSettings
+    from app.db.session import build_session_factory
+    from app.domain.repositories import (
+        ExecutionLogRepository,
+        NodeArtifactRepository,
+        NodeRunRepository,
+        TraceSpanRepository,
+        WorkflowRunRepository,
+    )
+    from app.infrastructure.executors import build_node_execution_dispatcher
+    from core_db import Base
+    from core_domain import WorkflowVersionContract
+
+    class FakeWorkflowClient:
+        def get_workflow_version(
+            self,
+            *,
+            tenant_id: str,
+            workflow_version_id: str,
+        ) -> WorkflowVersionContract:
+            assert tenant_id == "t1"
+            assert workflow_version_id == "wv1"
+            return WorkflowVersionContract(
+                id="wv1",
+                tenant_id="t1",
+                workflow_id="wf1",
+                version_no=1,
+                status="published",
+                created_time=datetime.utcnow(),
+                dsl_json={
+                    "code": "debug_flow",
+                    "nodes": [
+                        {"id": "start", "type": "template", "config": {"template": "hello"}},
+                        {"id": "answer", "type": "answer", "config": {"text": "done"}},
+                    ],
+                    "edges": [{"source": "start", "target": "answer"}],
+                },
+            )
+
+    settings = RuntimeServiceSettings(database_url=f"sqlite:///{tmp_path / 'runtime.db'}")
+    session_factory = build_session_factory(settings)
+    engine = session_factory.kw["bind"]
+    Base.metadata.create_all(bind=engine)
+
+    def override_service() -> Generator[RuntimeApplicationService, None, None]:
+        db: Session = session_factory()
+        try:
+            yield RuntimeApplicationService(
+                workflow_run_repository=WorkflowRunRepository(db),
+                node_run_repository=NodeRunRepository(db),
+                execution_log_repository=ExecutionLogRepository(db),
+                node_artifact_repository=NodeArtifactRepository(db),
+                trace_span_repository=TraceSpanRepository(db),
+                execution_dispatcher=build_node_execution_dispatcher(),
+                workflow_client=FakeWorkflowClient(),
+            )
+        finally:
+            db.close()
+
+    app = create_app()
+    app.state.session_factory = session_factory
+    app.dependency_overrides[get_runtime_application_service] = override_service
+    client = build_fastapi_test_client(app)
+
+    create_response = client.post(
+        "/runtime/runs",
+        json={
+            "tenant_id": "t1",
+            "app_id": "app1",
+            "app_version_id": "av1",
+            "workflow_id": "wf1",
+            "workflow_version_id": "wv1",
+        },
+    )
+    assert create_response.status_code == 200
+    run_id = create_response.json()["run"]["id"]
+
+    pause_response = client.post(
+        f"/runtime/runs/{run_id}/debug/pause",
+        params={"tenant_id": "t1"},
+    )
+    assert pause_response.status_code == 200
+    assert pause_response.json()["run"]["status"] == "paused"
+    assert pause_response.json()["queued_node_ids"] == ["start"]
+
+    protected_execute_response = client.post(
+        f"/runtime/runs/{run_id}/execute-next",
+        params={"tenant_id": "t1"},
+        json={"worker_key": "debugger"},
+    )
+    assert protected_execute_response.status_code == 200
+    assert protected_execute_response.json()["executor_name"] == "debug_paused"
+    assert protected_execute_response.json()["node_run"]["status"] == "queued"
+
+    step_response = client.post(
+        f"/runtime/runs/{run_id}/debug/step",
+        params={"tenant_id": "t1"},
+        json={"worker_key": "debugger"},
+    )
+    assert step_response.status_code == 200
+    step_payload = step_response.json()
+    assert step_payload["reason"] == "step_completed"
+    assert [item["node_id"] for item in step_payload["executed_node_runs"]] == ["start"]
+    assert step_payload["snapshot"]["run"]["status"] == "paused"
+    assert step_payload["snapshot"]["completed_node_ids"] == ["start"]
+    assert step_payload["snapshot"]["queued_node_ids"] == ["answer"]
+
+    breakpoint_response = client.post(
+        f"/runtime/runs/{run_id}/debug/continue",
+        params={"tenant_id": "t1"},
+        json={"worker_key": "debugger", "breakpoint_node_ids": ["answer"], "max_steps": 5},
+    )
+    assert breakpoint_response.status_code == 200
+    breakpoint_payload = breakpoint_response.json()
+    assert breakpoint_payload["reason"] == "breakpoint_hit"
+    assert breakpoint_payload["paused_before_node_id"] == "answer"
+    assert breakpoint_payload["executed_node_runs"] == []
+    assert breakpoint_payload["snapshot"]["queued_node_ids"] == ["answer"]
+
+    finish_response = client.post(
+        f"/runtime/runs/{run_id}/debug/continue",
+        params={"tenant_id": "t1"},
+        json={"worker_key": "debugger", "max_steps": 5},
+    )
+    assert finish_response.status_code == 200
+    finish_payload = finish_response.json()
+    assert finish_payload["snapshot"]["run"]["status"] == "completed"
+    assert finish_payload["snapshot"]["completed_node_ids"] == ["start", "answer"]
+    assert finish_payload["snapshot"]["queued_node_ids"] == []
+
+    engine.dispose()