Explorar el Código

feat: add runtime workflow bootstrap and transitions

Jax Docker hace 2 meses
padre
commit
d272c7b9d2

+ 31 - 0
README.md

@@ -123,6 +123,15 @@ Invoke-RestMethod -Method Post `
   -Body '{"tenant_id":"t1","app_id":"app-1","app_version_id":"appv-1","workflow_id":"wf-1","workflow_version_id":"wfv-1","session_id":"sess-1","initial_node":{"node_id":"start","node_type":"llm"}}'
 ```
 
+如果不传 `initial_node`,`runtime-service` 会调用 `workflow-service` 读取对应的 `workflow version`,并从 DSL 中自动推导首节点:
+
+```powershell
+Invoke-RestMethod -Method Post `
+  -Uri http://127.0.0.1:8003/runtime/runs `
+  -ContentType "application/json" `
+  -Body '{"tenant_id":"t1","app_id":"app-1","app_version_id":"appv-1","workflow_id":"wf-1","workflow_version_id":"wfv-1","session_id":"sess-1"}'
+```
+
 一条链直接派发到 runtime:
 
 ```powershell
@@ -148,6 +157,28 @@ Invoke-RestMethod -Method Post `
   -Body '{"tenant_id":"t1","tool_id":"tool-1","input_schema_json":{"query":{"type":"string"}},"invoke_config_json":{"method":"GET","path":"/products/search"}}'
 ```
 
+运行状态推进示例:
+
+```powershell
+Invoke-RestMethod -Method Post `
+  -Uri http://127.0.0.1:8003/runtime/node-runs/node-run-id/status `
+  -ContentType "application/json" `
+  -Body '{"status":"running","worker_key":"runtime-worker-1"}'
+```
+
+```powershell
+Invoke-RestMethod -Method Post `
+  -Uri http://127.0.0.1:8003/runtime/runs/run-id/status `
+  -ContentType "application/json" `
+  -Body '{"status":"completed"}'
+```
+
+说明:
+
+- 当你调用 `node-runs/{node_run_id}/status` 更新节点状态时,`runtime-service` 会自动聚合当前运行下所有 `node_run` 的状态,并同步刷新 `workflow_run.status`
+- 当前规则是:任一节点 `failed` 则运行 `failed`;有节点 `running` 则运行 `running`;全部节点都为 `completed/skipped` 则运行 `completed`
+- 当某个 `node_run` 被更新为 `completed` 时,`runtime-service` 还会基于 `workflow version` 的 DSL 自动查找后继节点,并创建新的 `queued` 状态 `node_run`
+
 ## 目录结构
 
 ```text

+ 1 - 1
libs/core-domain/pyproject.toml

@@ -8,6 +8,7 @@ version = "0.1.0"
 description = "Domain models for agent platform."
 requires-python = ">=3.11"
 dependencies = [
+  "core-shared",
   "pydantic>=2.7,<3.0",
 ]
 
@@ -16,4 +17,3 @@ package-dir = {"" = "src"}
 
 [tool.setuptools.packages.find]
 where = ["src"]
-

+ 6 - 0
libs/core-domain/src/core_domain/__init__.py

@@ -1,18 +1,24 @@
 from .runtime_contracts import (
     InitialNodeContract,
     NodeRunContract,
+    NodeRunStatusUpdateContract,
     RunBootstrapContract,
     RunCreateContract,
+    WorkflowRunStatusUpdateContract,
     WorkflowRunContract,
 )
 from .service import ServiceDescriptor, ServiceHealth
+from .workflow_contracts import WorkflowVersionContract
 
 __all__ = [
     "InitialNodeContract",
     "NodeRunContract",
+    "NodeRunStatusUpdateContract",
     "RunBootstrapContract",
     "RunCreateContract",
     "ServiceDescriptor",
     "ServiceHealth",
+    "WorkflowRunStatusUpdateContract",
     "WorkflowRunContract",
+    "WorkflowVersionContract",
 ]

+ 19 - 3
libs/core-domain/src/core_domain/runtime_contracts.py

@@ -1,12 +1,16 @@
 from datetime import datetime
+from typing import Literal
 
 from pydantic import BaseModel
 
+NodeRunStatus = Literal["pending", "queued", "running", "completed", "failed", "skipped"]
+WorkflowRunStatus = Literal["pending", "running", "completed", "failed", "cancelled", "paused"]
+
 
 class InitialNodeContract(BaseModel):
     node_id: str
     node_type: str
-    status: str = "queued"
+    status: NodeRunStatus = "queued"
 
 
 class RunCreateContract(BaseModel):
@@ -35,7 +39,7 @@ class WorkflowRunContract(BaseModel):
     parent_run_id: str | None = None
     root_run_id: str | None = None
     run_type: str
-    status: str
+    status: WorkflowRunStatus
     trigger_type: str
     priority: int
     current_node_count: int
@@ -50,7 +54,7 @@ class NodeRunContract(BaseModel):
     node_id: str
     node_type: str
     attempt_no: int
-    status: str
+    status: NodeRunStatus
     queued_time: datetime | None = None
     created_time: datetime
 
@@ -59,3 +63,15 @@ class RunBootstrapContract(BaseModel):
     run: WorkflowRunContract
     initial_node: NodeRunContract | None = None
 
+
+class WorkflowRunStatusUpdateContract(BaseModel):
+    status: WorkflowRunStatus
+    error_code: str | None = None
+    error_message: str | None = None
+
+
+class NodeRunStatusUpdateContract(BaseModel):
+    status: NodeRunStatus
+    worker_key: str | None = None
+    error_code: str | None = None
+    error_message: str | None = None

+ 19 - 0
libs/core-domain/src/core_domain/workflow_contracts.py

@@ -0,0 +1,19 @@
+from datetime import datetime
+
+from pydantic import BaseModel
+
+from core_shared import JSONValue
+
+
+class WorkflowVersionContract(BaseModel):
+    id: str
+    tenant_id: str
+    workflow_id: str
+    version_no: int
+    dsl_json: dict[str, JSONValue] | None = None
+    compiled_plan_json: dict[str, JSONValue] | None = None
+    schema_version: str | None = None
+    checksum: str | None = None
+    status: str
+    created_time: datetime
+

+ 48 - 4
services/runtime-service/app/api/routes.py

@@ -1,20 +1,37 @@
-from fastapi import APIRouter, Depends, Query
+from fastapi import APIRouter, Depends, HTTPException, Query
 from sqlalchemy import text
 from sqlalchemy.orm import Session
 
 from core_domain import ServiceHealth
 from app.application.services import RuntimeApplicationService
+from app.bootstrap.settings import RuntimeServiceSettings
 from app.db.session import get_db
 from app.domain.repositories import NodeRunRepository, WorkflowRunRepository
-from app.schemas.run import RunBootstrapResponse, RunCreateRequest, NodeRunResponse, WorkflowRunResponse
+from app.infrastructure.workflow_client import WorkflowServiceClient, WorkflowServiceClientError
+from app.schemas.run import (
+    NodeRunResponse,
+    NodeRunStatusUpdateRequest,
+    RunBootstrapResponse,
+    RunCreateRequest,
+    WorkflowRunResponse,
+    WorkflowRunStatusUpdateRequest,
+)
 
 router = APIRouter()
 
 
-def get_runtime_application_service(db: Session = Depends(get_db)) -> RuntimeApplicationService:
+def get_runtime_settings() -> RuntimeServiceSettings:
+    return RuntimeServiceSettings()
+
+
+def get_runtime_application_service(
+    db: Session = Depends(get_db),
+    settings: RuntimeServiceSettings = Depends(get_runtime_settings),
+) -> RuntimeApplicationService:
     return RuntimeApplicationService(
         workflow_run_repository=WorkflowRunRepository(db),
         node_run_repository=NodeRunRepository(db),
+        workflow_client=WorkflowServiceClient(base_url=settings.workflow_service_url),
     )
 
 
@@ -29,7 +46,10 @@ def create_run(
     payload: RunCreateRequest,
     service: RuntimeApplicationService = Depends(get_runtime_application_service),
 ) -> RunBootstrapResponse:
-    workflow_run, initial_node = service.create_run(payload)
+    try:
+        workflow_run, initial_node = service.create_run(payload)
+    except WorkflowServiceClientError as exc:
+        raise HTTPException(status_code=502, detail=str(exc)) from exc
     return RunBootstrapResponse(
         run=WorkflowRunResponse.from_entity(workflow_run),
         initial_node=NodeRunResponse.from_entity(initial_node) if initial_node else None,
@@ -58,3 +78,27 @@ def list_node_runs(
         NodeRunResponse.from_entity(item)
         for item in service.list_node_runs(tenant_id=tenant_id, run_id=run_id)
     ]
+
+
+@router.post("/runs/{run_id}/status", response_model=WorkflowRunResponse)
+def update_run_status(
+    run_id: str,
+    payload: WorkflowRunStatusUpdateRequest,
+    service: RuntimeApplicationService = Depends(get_runtime_application_service),
+) -> WorkflowRunResponse:
+    entity = service.update_run_status(run_id=run_id, payload=payload)
+    if entity is None:
+        raise HTTPException(status_code=404, detail=f"workflow_run not found: {run_id}")
+    return WorkflowRunResponse.from_entity(entity)
+
+
+@router.post("/node-runs/{node_run_id}/status", response_model=NodeRunResponse)
+def update_node_run_status(
+    node_run_id: str,
+    payload: NodeRunStatusUpdateRequest,
+    service: RuntimeApplicationService = Depends(get_runtime_application_service),
+) -> NodeRunResponse:
+    entity = service.update_node_run_status(node_run_id=node_run_id, payload=payload)
+    if entity is None:
+        raise HTTPException(status_code=404, detail=f"node_run not found: {node_run_id}")
+    return NodeRunResponse.from_entity(entity)

+ 132 - 8
services/runtime-service/app/application/services.py

@@ -1,6 +1,10 @@
+from core_domain import InitialNodeContract, NodeRunStatus, WorkflowRunStatus
+
 from app.db.models import NodeRun, WorkflowRun
 from app.domain.repositories import NodeRunRepository, WorkflowRunRepository
-from app.schemas.run import RunCreateRequest
+from app.infrastructure.planner import derive_initial_node, derive_successor_nodes
+from app.infrastructure.workflow_client import WorkflowServiceClient
+from app.schemas.run import NodeRunStatusUpdateRequest, RunCreateRequest, WorkflowRunStatusUpdateRequest
 
 
 class RuntimeApplicationService:
@@ -8,11 +12,14 @@ class RuntimeApplicationService:
         self,
         workflow_run_repository: WorkflowRunRepository,
         node_run_repository: NodeRunRepository,
+        workflow_client: WorkflowServiceClient | None = None,
     ) -> None:
         self.workflow_run_repository = workflow_run_repository
         self.node_run_repository = node_run_repository
+        self.workflow_client = workflow_client
 
     def create_run(self, payload: RunCreateRequest) -> tuple[WorkflowRun, NodeRun | None]:
+        initial_node = payload.initial_node or self._plan_initial_node(payload)
         workflow_run = self.workflow_run_repository.create(
             tenant_id=payload.tenant_id,
             app_id=payload.app_id,
@@ -27,21 +34,21 @@ class RuntimeApplicationService:
             priority=payload.priority,
         )
 
-        initial_node = None
-        if payload.initial_node is not None:
+        node_run = None
+        if initial_node is not None:
             self.workflow_run_repository.update_node_count(
                 run_id=workflow_run.id,
                 current_node_count=1,
             )
-            initial_node = self.node_run_repository.create(
+            node_run = self.node_run_repository.create(
                 tenant_id=payload.tenant_id,
                 run_id=workflow_run.id,
-                node_id=payload.initial_node.node_id,
-                node_type=payload.initial_node.node_type,
-                status=payload.initial_node.status,
+                node_id=initial_node.node_id,
+                node_type=initial_node.node_type,
+                status=initial_node.status,
             )
 
-        return workflow_run, initial_node
+        return workflow_run, node_run
 
     def list_runs(self, tenant_id: str, session_id: str | None = None) -> list[WorkflowRun]:
         return self.workflow_run_repository.list_by_scope(tenant_id=tenant_id, session_id=session_id)
@@ -49,3 +56,120 @@ class RuntimeApplicationService:
     def list_node_runs(self, tenant_id: str, run_id: str) -> list[NodeRun]:
         return self.node_run_repository.list_by_run(tenant_id=tenant_id, run_id=run_id)
 
+    def update_run_status(
+        self,
+        run_id: str,
+        payload: WorkflowRunStatusUpdateRequest,
+    ) -> WorkflowRun | None:
+        return self.workflow_run_repository.update_status(
+            run_id=run_id,
+            status=payload.status,
+            error_code=payload.error_code,
+            error_message=payload.error_message,
+        )
+
+    def update_node_run_status(
+        self,
+        node_run_id: str,
+        payload: NodeRunStatusUpdateRequest,
+    ) -> NodeRun | None:
+        node_run = self.node_run_repository.update_status(
+            node_run_id=node_run_id,
+            status=payload.status,
+            worker_key=payload.worker_key,
+            error_code=payload.error_code,
+            error_message=payload.error_message,
+        )
+        if node_run is None:
+            return None
+
+        if payload.status == "completed":
+            self._schedule_successor_nodes(node_run)
+
+        self._sync_workflow_run_status_from_nodes(
+            tenant_id=node_run.tenant_id,
+            run_id=node_run.run_id,
+        )
+        return node_run
+
+    def _plan_initial_node(self, payload: RunCreateRequest) -> InitialNodeContract | None:
+        if self.workflow_client is None:
+            return None
+        workflow_version = self.workflow_client.get_workflow_version(
+            tenant_id=payload.tenant_id,
+            workflow_version_id=payload.workflow_version_id,
+        )
+        return derive_initial_node(workflow_version)
+
+    def _schedule_successor_nodes(self, node_run: NodeRun) -> None:
+        if self.workflow_client is None:
+            return
+
+        workflow_run = self.workflow_run_repository.get_by_id(node_run.run_id)
+        if workflow_run is None:
+            return
+
+        workflow_version = self.workflow_client.get_workflow_version(
+            tenant_id=node_run.tenant_id,
+            workflow_version_id=workflow_run.workflow_version_id,
+        )
+        successor_nodes = derive_successor_nodes(workflow_version, node_run.node_id)
+        if not successor_nodes:
+            return
+
+        existing_nodes = self.node_run_repository.list_by_run_and_node_ids(
+            tenant_id=node_run.tenant_id,
+            run_id=node_run.run_id,
+            node_ids=[item.node_id for item in successor_nodes],
+        )
+        existing_node_ids = {item.node_id for item in existing_nodes}
+
+        for successor in successor_nodes:
+            if successor.node_id in existing_node_ids:
+                continue
+            self.node_run_repository.create(
+                tenant_id=node_run.tenant_id,
+                run_id=node_run.run_id,
+                node_id=successor.node_id,
+                node_type=successor.node_type,
+                status=successor.status,
+            )
+
+    def _sync_workflow_run_status_from_nodes(self, *, tenant_id: str, run_id: str) -> None:
+        node_runs = self.node_run_repository.list_by_run(tenant_id=tenant_id, run_id=run_id)
+        if not node_runs:
+            return
+
+        self.workflow_run_repository.update_node_count(
+            run_id=run_id,
+            current_node_count=len(node_runs),
+        )
+
+        next_status, error_code, error_message = self._derive_run_status(node_runs)
+        self.workflow_run_repository.update_status(
+            run_id=run_id,
+            status=next_status,
+            error_code=error_code,
+            error_message=error_message,
+        )
+
+    def _derive_run_status(
+        self,
+        node_runs: list[NodeRun],
+    ) -> tuple[WorkflowRunStatus, str | None, str | None]:
+        statuses = {node_run.status for node_run in node_runs}
+
+        if "failed" in statuses:
+            failed_node = next((item for item in node_runs if item.status == "failed"), None)
+            error_code = failed_node.error_code if failed_node is not None else None
+            error_message = failed_node.error_message if failed_node is not None else None
+            return "failed", error_code, error_message
+
+        if "running" in statuses:
+            return "running", None, None
+
+        terminal_statuses: set[NodeRunStatus] = {"completed", "skipped"}
+        if statuses and statuses.issubset(terminal_statuses):
+            return "completed", None, None
+
+        return "running", None, None

+ 1 - 1
services/runtime-service/app/bootstrap/settings.py

@@ -5,4 +5,4 @@ class RuntimeServiceSettings(ServiceSettings):
     service_name: str = "runtime-service"
     service_port: int = 8003
     database_url: str = "sqlite:///./runtime_service.db"
-
+    workflow_service_url: str = "http://127.0.0.1:8002"

+ 77 - 0
services/runtime-service/app/domain/repositories.py

@@ -4,6 +4,7 @@ from sqlalchemy import select
 from sqlalchemy.orm import Session
 
 from app.db.models import NodeRun, WorkflowRun
+from core_domain import NodeRunStatus, WorkflowRunStatus
 
 
 class WorkflowRunRepository:
@@ -63,6 +64,35 @@ class WorkflowRunRepository:
         entity.current_node_count = current_node_count
         self.db.commit()
 
+    def get_by_id(self, run_id: str) -> WorkflowRun | None:
+        return self.db.get(WorkflowRun, run_id)
+
+    def update_status(
+        self,
+        *,
+        run_id: str,
+        status: WorkflowRunStatus,
+        error_code: str | None = None,
+        error_message: str | None = None,
+    ) -> WorkflowRun | None:
+        entity = self.db.get(WorkflowRun, run_id)
+        if entity is None:
+            return None
+
+        entity.status = status
+        entity.error_code = error_code
+        entity.error_message = error_message
+
+        now = datetime.utcnow()
+        if status == "running" and entity.started_time is None:
+            entity.started_time = now
+        if status in {"completed", "failed", "cancelled"}:
+            entity.finished_time = now
+
+        self.db.commit()
+        self.db.refresh(entity)
+        return entity
+
 
 class NodeRunRepository:
     def __init__(self, db: Session) -> None:
@@ -100,3 +130,50 @@ class NodeRunRepository:
         )
         return list(self.db.scalars(stmt))
 
+    def list_by_run_and_node_ids(
+        self,
+        *,
+        tenant_id: str,
+        run_id: str,
+        node_ids: list[str],
+    ) -> list[NodeRun]:
+        if not node_ids:
+            return []
+        stmt = (
+            select(NodeRun)
+            .where(NodeRun.tenant_id == tenant_id)
+            .where(NodeRun.run_id == run_id)
+            .where(NodeRun.node_id.in_(node_ids))
+        )
+        return list(self.db.scalars(stmt))
+
+    def get_by_id(self, node_run_id: str) -> NodeRun | None:
+        return self.db.get(NodeRun, node_run_id)
+
+    def update_status(
+        self,
+        *,
+        node_run_id: str,
+        status: NodeRunStatus,
+        worker_key: str | None = None,
+        error_code: str | None = None,
+        error_message: str | None = None,
+    ) -> NodeRun | None:
+        entity = self.db.get(NodeRun, node_run_id)
+        if entity is None:
+            return None
+
+        entity.status = status
+        entity.worker_key = worker_key
+        entity.error_code = error_code
+        entity.error_message = error_message
+
+        now = datetime.utcnow()
+        if status == "running" and entity.started_time is None:
+            entity.started_time = now
+        if status in {"completed", "failed", "skipped"}:
+            entity.finished_time = now
+
+        self.db.commit()
+        self.db.refresh(entity)
+        return entity

+ 1 - 0
services/runtime-service/app/infrastructure/__init__.py

@@ -0,0 +1 @@
+

+ 104 - 0
services/runtime-service/app/infrastructure/planner.py

@@ -0,0 +1,104 @@
+from core_domain import InitialNodeContract, WorkflowVersionContract
+from core_shared import JSONValue
+
+
+def derive_initial_node(workflow_version: WorkflowVersionContract) -> InitialNodeContract | None:
+    dsl = workflow_version.dsl_json
+    if not isinstance(dsl, dict):
+        return None
+
+    nodes_value = dsl.get("nodes")
+    if not isinstance(nodes_value, list):
+        return None
+
+    nodes: list[dict[str, JSONValue]] = [
+        item for item in nodes_value if isinstance(item, dict)
+    ]
+    if not nodes:
+        return None
+
+    edges_value = dsl.get("edges")
+    incoming_targets = _collect_incoming_targets(edges_value)
+
+    for node in nodes:
+        node_id = node.get("id")
+        node_type = node.get("type")
+        if isinstance(node_id, str) and isinstance(node_type, str) and node_id not in incoming_targets:
+            return InitialNodeContract(node_id=node_id, node_type=node_type, status="queued")
+
+    first = nodes[0]
+    first_id = first.get("id")
+    first_type = first.get("type")
+    if isinstance(first_id, str) and isinstance(first_type, str):
+        return InitialNodeContract(node_id=first_id, node_type=first_type, status="queued")
+
+    return None
+
+
+def derive_successor_nodes(
+    workflow_version: WorkflowVersionContract,
+    current_node_id: str,
+) -> list[InitialNodeContract]:
+    dsl = workflow_version.dsl_json
+    if not isinstance(dsl, dict):
+        return []
+
+    nodes_value = dsl.get("nodes")
+    edges_value = dsl.get("edges")
+    if not isinstance(nodes_value, list) or not isinstance(edges_value, list):
+        return []
+
+    node_type_map = _build_node_type_map(nodes_value)
+    successor_ids = _collect_successor_ids(edges_value, current_node_id)
+
+    successors: list[InitialNodeContract] = []
+    for successor_id in successor_ids:
+        node_type = node_type_map.get(successor_id)
+        if node_type is None:
+            continue
+        successors.append(
+            InitialNodeContract(
+                node_id=successor_id,
+                node_type=node_type,
+                status="queued",
+            )
+        )
+    return successors
+
+
+def _collect_incoming_targets(edges_value: JSONValue | None) -> set[str]:
+    if not isinstance(edges_value, list):
+        return set()
+
+    incoming_targets: set[str] = set()
+    for item in edges_value:
+        if not isinstance(item, dict):
+            continue
+        target = item.get("target")
+        if isinstance(target, str):
+            incoming_targets.add(target)
+    return incoming_targets
+
+
+def _build_node_type_map(nodes_value: list[JSONValue]) -> dict[str, str]:
+    node_type_map: dict[str, str] = {}
+    for item in nodes_value:
+        if not isinstance(item, dict):
+            continue
+        node_id = item.get("id")
+        node_type = item.get("type")
+        if isinstance(node_id, str) and isinstance(node_type, str):
+            node_type_map[node_id] = node_type
+    return node_type_map
+
+
+def _collect_successor_ids(edges_value: list[JSONValue], current_node_id: str) -> list[str]:
+    successor_ids: list[str] = []
+    for item in edges_value:
+        if not isinstance(item, dict):
+            continue
+        source = item.get("source")
+        target = item.get("target")
+        if isinstance(source, str) and isinstance(target, str) and source == current_node_id:
+            successor_ids.append(target)
+    return successor_ids

+ 26 - 0
services/runtime-service/app/infrastructure/workflow_client.py

@@ -0,0 +1,26 @@
+import httpx
+
+from core_domain import WorkflowVersionContract
+
+
+class WorkflowServiceClientError(Exception):
+    pass
+
+
+class WorkflowServiceClient:
+    def __init__(self, base_url: str, timeout_seconds: float = 10.0) -> None:
+        self.base_url = base_url.rstrip("/")
+        self.timeout_seconds = timeout_seconds
+
+    def get_workflow_version(self, *, tenant_id: str, workflow_version_id: str) -> WorkflowVersionContract:
+        try:
+            with httpx.Client(timeout=self.timeout_seconds) as client:
+                response = client.get(
+                    f"{self.base_url}/workflows/versions/{workflow_version_id}",
+                    params={"tenant_id": tenant_id},
+                )
+                response.raise_for_status()
+                return WorkflowVersionContract.model_validate(response.json())
+        except httpx.HTTPError as exc:
+            raise WorkflowServiceClientError(f"workflow-service request failed: {exc}") from exc
+

+ 10 - 0
services/runtime-service/app/schemas/run.py

@@ -3,8 +3,10 @@ from typing import TYPE_CHECKING
 from core_domain import (
     InitialNodeContract,
     NodeRunContract,
+    NodeRunStatusUpdateContract,
     RunBootstrapContract,
     RunCreateContract,
+    WorkflowRunStatusUpdateContract,
     WorkflowRunContract,
 )
 
@@ -37,3 +39,11 @@ class NodeRunResponse(NodeRunContract):
 class RunBootstrapResponse(RunBootstrapContract):
     run: WorkflowRunResponse
     initial_node: NodeRunResponse | None = None
+
+
+class WorkflowRunStatusUpdateRequest(WorkflowRunStatusUpdateContract):
+    pass
+
+
+class NodeRunStatusUpdateRequest(NodeRunStatusUpdateContract):
+    pass

+ 1 - 0
services/runtime-service/pyproject.toml

@@ -10,6 +10,7 @@ requires-python = ">=3.11"
 dependencies = [
   "alembic>=1.13,<2.0",
   "fastapi>=0.111,<1.0",
+  "httpx>=0.27,<1.0",
   "uvicorn[standard]>=0.30,<1.0",
   "pydantic>=2.7,<3.0",
   "sqlalchemy>=2.0,<3.0",

+ 13 - 1
services/workflow-service/app/api/routes.py

@@ -1,4 +1,4 @@
-from fastapi import APIRouter, Depends, Query
+from fastapi import APIRouter, Depends, HTTPException, Query
 from sqlalchemy import text
 from sqlalchemy.orm import Session
 
@@ -122,3 +122,15 @@ def list_workflow_versions(
 ) -> list[WorkflowVersionResponse]:
     items = service.list_workflow_versions(tenant_id=tenant_id, workflow_id=workflow_id)
     return [WorkflowVersionResponse.from_entity(item) for item in items]
+
+
+@router.get("/versions/{workflow_version_id}", response_model=WorkflowVersionResponse)
+def get_workflow_version(
+    workflow_version_id: str,
+    tenant_id: str = Query(...),
+    service: WorkflowApplicationService = Depends(get_workflow_application_service),
+) -> WorkflowVersionResponse:
+    entity = service.get_workflow_version(tenant_id=tenant_id, workflow_version_id=workflow_version_id)
+    if entity is None:
+        raise HTTPException(status_code=404, detail=f"workflow_version not found: {workflow_version_id}")
+    return WorkflowVersionResponse.from_entity(entity)

+ 6 - 0
services/workflow-service/app/application/services.py

@@ -64,6 +64,12 @@ class WorkflowApplicationService:
             workflow_id=workflow_id,
         )
 
+    def get_workflow_version(self, tenant_id: str, workflow_version_id: str) -> WorkflowVersion | None:
+        return self.workflow_version_repository.get_by_id(
+            tenant_id=tenant_id,
+            workflow_version_id=workflow_version_id,
+        )
+
     def create_app_version(self, payload: AppVersionCreateRequest) -> AppVersion:
         return self.app_version_repository.create(
             tenant_id=payload.tenant_id,

+ 8 - 0
services/workflow-service/app/domain/repositories.py

@@ -156,6 +156,14 @@ class WorkflowVersionRepository:
         )
         return list(self.db.scalars(stmt))
 
+    def get_by_id(self, *, tenant_id: str, workflow_version_id: str) -> WorkflowVersion | None:
+        stmt = (
+            select(WorkflowVersion)
+            .where(WorkflowVersion.tenant_id == tenant_id)
+            .where(WorkflowVersion.id == workflow_version_id)
+        )
+        return self.db.scalar(stmt)
+
     def _next_version_no(self, workflow_id: str) -> int:
         stmt = select(func.max(WorkflowVersion.version_no)).where(
             WorkflowVersion.workflow_id == workflow_id

+ 2 - 11
services/workflow-service/app/schemas/workflow.py

@@ -2,6 +2,7 @@ from datetime import datetime
 from typing import TYPE_CHECKING
 
 from pydantic import BaseModel
+from core_domain import WorkflowVersionContract
 from core_shared import JSONValue
 
 if TYPE_CHECKING:
@@ -41,17 +42,7 @@ class WorkflowVersionCreateRequest(BaseModel):
     status: str = "draft"
 
 
-class WorkflowVersionResponse(BaseModel):
-    id: str
-    tenant_id: str
-    workflow_id: str
-    version_no: int
-    dsl_json: dict[str, JSONValue] | None = None
-    compiled_plan_json: dict[str, JSONValue] | None = None
-    schema_version: str | None = None
-    checksum: str | None = None
-    status: str
-    created_time: datetime
+class WorkflowVersionResponse(WorkflowVersionContract):
 
     @classmethod
     def from_entity(cls, entity: "WorkflowVersion") -> "WorkflowVersionResponse":