Browse Source

feat: expand runtime execution and gateway observability

Jax Docker 2 months ago
parent
commit
5cbe3a11fc
86 changed files with 5042 additions and 130 deletions
  1. 15 0
      .dockerignore
  2. 5 1
      .gitignore
  3. 383 0
      README.md
  4. 4 0
      deployments/docker/.env.example
  5. 182 0
      deployments/docker/docker-compose.yml
  6. 25 0
      deployments/docker/python-service.Dockerfile
  7. 35 0
      libs/core-domain/src/core_domain/__init__.py
  8. 18 0
      libs/core-domain/src/core_domain/code_contracts.py
  9. 36 0
      libs/core-domain/src/core_domain/execution_contracts.py
  10. 25 0
      libs/core-domain/src/core_domain/model_contracts.py
  11. 5 0
      libs/core-domain/src/core_domain/runtime_contracts.py
  12. 47 0
      libs/core-domain/src/core_domain/tool_contracts.py
  13. 1 1
      libs/core-dsl/pyproject.toml
  14. 18 3
      libs/core-dsl/src/core_dsl/__init__.py
  15. 36 2
      libs/core-dsl/src/core_dsl/workflow.py
  16. 5 2
      libs/core-shared/src/core_shared/types.py
  17. 2 0
      pyproject.toml
  18. 394 0
      scripts/smoke_runtime_no_key.py
  19. 57 0
      services/api-gateway/alembic/versions/20260423_0001_add_gateway_request_audit.py
  20. 215 1
      services/api-gateway/app/api/routes.py
  21. 2 0
      services/api-gateway/app/bootstrap/app.py
  22. 8 1
      services/api-gateway/app/bootstrap/settings.py
  23. 2 1
      services/api-gateway/app/db/models/__init__.py
  24. 20 0
      services/api-gateway/app/db/models/gateway_request_audit.py
  25. 1 0
      services/api-gateway/app/domain/__init__.py
  26. 60 0
      services/api-gateway/app/domain/repositories.py
  27. 1 0
      services/api-gateway/app/infrastructure/__init__.py
  28. 51 0
      services/api-gateway/app/infrastructure/audit.py
  29. 112 0
      services/api-gateway/app/infrastructure/proxy.py
  30. 80 0
      services/api-gateway/app/infrastructure/request_context.py
  31. 42 0
      services/api-gateway/app/schemas/gateway.py
  32. 1 0
      services/api-gateway/pyproject.toml
  33. 1 0
      services/code-runner-service/app/__init__.py
  34. 1 0
      services/code-runner-service/app/api/__init__.py
  35. 39 0
      services/code-runner-service/app/api/routes.py
  36. 1 0
      services/code-runner-service/app/application/__init__.py
  37. 29 0
      services/code-runner-service/app/application/services.py
  38. 1 0
      services/code-runner-service/app/bootstrap/__init__.py
  39. 12 0
      services/code-runner-service/app/bootstrap/app.py
  40. 8 0
      services/code-runner-service/app/bootstrap/settings.py
  41. 1 0
      services/code-runner-service/app/infrastructure/__init__.py
  42. 98 0
      services/code-runner-service/app/infrastructure/runner.py
  43. 3 0
      services/code-runner-service/app/main.py
  44. 22 0
      services/code-runner-service/pyproject.toml
  45. 1 0
      services/model-gateway-service/app/__init__.py
  46. 1 0
      services/model-gateway-service/app/api/__init__.py
  47. 40 0
      services/model-gateway-service/app/api/routes.py
  48. 1 0
      services/model-gateway-service/app/application/__init__.py
  49. 26 0
      services/model-gateway-service/app/application/services.py
  50. 1 0
      services/model-gateway-service/app/bootstrap/__init__.py
  51. 15 0
      services/model-gateway-service/app/bootstrap/app.py
  52. 10 0
      services/model-gateway-service/app/bootstrap/settings.py
  53. 1 0
      services/model-gateway-service/app/infrastructure/__init__.py
  54. 98 0
      services/model-gateway-service/app/infrastructure/provider.py
  55. 3 0
      services/model-gateway-service/app/main.py
  56. 23 0
      services/model-gateway-service/pyproject.toml
  57. 27 0
      services/runtime-service/alembic/versions/20260423_0002_add_node_run_outputs.py
  58. 50 0
      services/runtime-service/alembic/versions/20260423_0003_add_execution_logs.py
  59. 56 0
      services/runtime-service/alembic/versions/20260423_0004_add_node_artifacts.py
  60. 60 0
      services/runtime-service/alembic/versions/20260423_0005_add_trace_spans.py
  61. 178 2
      services/runtime-service/app/api/routes.py
  62. 430 5
      services/runtime-service/app/application/services.py
  63. 3 0
      services/runtime-service/app/bootstrap/settings.py
  64. 4 2
      services/runtime-service/app/db/models/__init__.py
  65. 17 0
      services/runtime-service/app/db/models/execution_log.py
  66. 21 0
      services/runtime-service/app/db/models/node_artifact.py
  67. 4 1
      services/runtime-service/app/db/models/node_run.py
  68. 25 0
      services/runtime-service/app/db/models/trace_span.py
  69. 197 1
      services/runtime-service/app/domain/repositories.py
  70. 19 0
      services/runtime-service/app/infrastructure/__init__.py
  71. 28 0
      services/runtime-service/app/infrastructure/code_runner_client.py
  72. 192 0
      services/runtime-service/app/infrastructure/context.py
  73. 1056 0
      services/runtime-service/app/infrastructure/executors.py
  74. 28 0
      services/runtime-service/app/infrastructure/model_gateway_client.py
  75. 96 71
      services/runtime-service/app/infrastructure/planner.py
  76. 30 0
      services/runtime-service/app/infrastructure/tool_client.py
  77. 85 1
      services/runtime-service/app/schemas/run.py
  78. 2 0
      services/runtime-service/pyproject.toml
  79. 20 1
      services/tool-service/app/api/routes.py
  80. 25 0
      services/tool-service/app/application/services.py
  81. 23 0
      services/tool-service/app/domain/repositories.py
  82. 14 31
      services/tool-service/app/schemas/tool.py
  83. 1 1
      services/tool-service/pyproject.toml
  84. 4 1
      services/workflow-service/app/api/routes.py
  85. 22 1
      services/workflow-service/app/application/services.py
  86. 1 0
      services/workflow-service/pyproject.toml

+ 15 - 0
.dockerignore

@@ -0,0 +1,15 @@
+.venv
+__pycache__
+*.pyc
+*.pyo
+*.pyd
+.pytest_cache
+.ruff_cache
+.mypy_cache
+dist
+build
+.git
+.idea
+.vscode
+htmlcov
+.coverage

+ 5 - 1
.gitignore

@@ -1,8 +1,13 @@
 .venv/
+.tmp/
 __pycache__/
 *.pyc
 *.pyo
 *.pyd
+*.db
+*.sqlite
+*.sqlite3
+*.egg-info/
 .pytest_cache/
 .ruff_cache/
 .mypy_cache/
@@ -13,4 +18,3 @@ htmlcov/
 .idea/
 .vscode/
 .DS_Store
-

+ 383 - 0
README.md

@@ -12,6 +12,7 @@
 ## 当前已创建的服务
 
 - `api-gateway`
+- `model-gateway-service`
 - `session-service`
 - `workflow-service`
 - `runtime-service`
@@ -208,3 +209,385 @@ tests/
 3. 接入 PostgreSQL / Redis
 4. 增加 Docker Compose
 5. 开始实现应用、流程、运行三条主链路
+
+## Runtime Execute APIs
+
+`runtime-service` now includes a typed executor skeleton for these node types:
+
+- `llm`
+- `tool`
+- `code`
+- `answer`
+- `if-else`
+- `assigner`
+- `knowledge-retrieval`
+- `template-transform`
+
+Execute a specific queued node:
+
+```powershell
+Invoke-RestMethod -Method Post `
+  -Uri http://127.0.0.1:8003/runtime/node-runs/node-run-id/execute `
+  -ContentType "application/json" `
+  -Body '{"worker_key":"runtime-worker-1"}'
+```
+
+Execute the next queued node in a run:
+
+```powershell
+Invoke-RestMethod -Method Post `
+  -Uri "http://127.0.0.1:8003/runtime/runs/run-id/execute-next?tenant_id=t1" `
+  -ContentType "application/json" `
+  -Body '{"worker_key":"runtime-worker-1"}'
+```
+
+Execute queued nodes in sequence until the run is finished, blocked, or reaches `max_steps`:
+
+```powershell
+Invoke-RestMethod -Method Post `
+  -Uri "http://127.0.0.1:8003/runtime/runs/run-id/execute?tenant_id=t1" `
+  -ContentType "application/json" `
+  -Body '{"worker_key":"runtime-worker-1","max_steps":16}'
+```
+
+Node execution results are now persisted on `node_run`:
+
+- `output_text`
+- `output_json`
+
+Node execution artifacts are also persisted on `node_artifact`:
+
+- `artifact_type`
+- `content_text`
+- `content_json`
+- `storage_uri`
+- `size_bytes`
+
+Query artifacts:
+
+```powershell
+Invoke-RestMethod `
+  -Uri "http://127.0.0.1:8003/runtime/node-artifacts?tenant_id=t1&run_id=run-id"
+```
+
+Trace spans are persisted on `trace_span` for timeline and latency analysis:
+
+- `span_type`
+- `name`
+- `status`
+- `started_time`
+- `ended_time`
+- `duration_ms`
+- `attributes_json`
+- `error_code`
+- `error_message`
+
+Query trace spans:
+
+```powershell
+Invoke-RestMethod `
+  -Uri "http://127.0.0.1:8003/runtime/trace-spans?tenant_id=t1&run_id=run-id"
+```
+
+Current behavior:
+
+- `answer` nodes persist rendered text to `output_text`
+- `assigner` nodes write `state_updates` to `output_json`
+- `condition` / `if-else` nodes write `condition_result` and `route` to `output_json`
+- `template-transform` nodes render text or JSON using previous node outputs and run state
+- `knowledge-retrieval` / `retriever` nodes run keyword retrieval over inline or HTTP JSON documents
+- `tool` nodes persist resolved binding/tool metadata to `output_json`
+- default executors persist basic executor metadata to `output_json`
+
+Runtime template context:
+
+- `state.xxx`: values written by previous `assigner` nodes
+- `nodes.node_id.output.xxx`: structured output from a previous node
+- `nodes.node_id.text`: text output from a previous node
+- `current.node_id`: current node id
+
+Assigner node config example:
+
+```json
+{
+  "id": "seed-state",
+  "type": "assigner",
+  "config": {
+    "assignments": {
+      "score": 7,
+      "user_name": "Alice"
+    }
+  }
+}
+```
+
+Condition node config example:
+
+```json
+{
+  "id": "check-score",
+  "type": "if-else",
+  "config": {
+    "expression": "state.score >= 5"
+  }
+}
+```
+
+Conditional edge example:
+
+```json
+[
+  {"source": "check-score", "target": "high-path", "condition": "true"},
+  {"source": "check-score", "target": "low-path", "condition": "false"}
+]
+```
+
+Template node config example:
+
+```json
+{
+  "id": "high-path",
+  "type": "template-transform",
+  "config": {
+    "template": "{{state.user_name}} passed with score {{state.score}}"
+  }
+}
+```
+
+Retriever node config example:
+
+```json
+{
+  "id": "retrieve-docs",
+  "type": "knowledge-retrieval",
+  "config": {
+    "query_template": "{{state.query}}",
+    "top_k": 2,
+    "documents": [
+      {
+        "id": "refund",
+        "title": "Refund Policy",
+        "text": "Refund policy allows returns within seven days."
+      },
+      {
+        "id": "shipping",
+        "title": "Shipping Policy",
+        "text": "Shipping usually takes three to five business days."
+      }
+    ]
+  }
+}
+```
+
+Retriever output is persisted to `node_run.output_json.retrieved_documents`. Template nodes can consume it:
+
+```json
+{
+  "id": "render-answer",
+  "type": "template-transform",
+  "config": {
+    "template": "Top doc: {{nodes.retrieve-docs.output.retrieved_documents.0.title}}"
+  }
+}
+```
+
+Retriever nodes can also load documents from an HTTP JSON source:
+
+```json
+{
+  "id": "retrieve-remote-docs",
+  "type": "retriever",
+  "config": {
+    "query": "refund policy",
+    "source_url": "http://127.0.0.1:9000/documents",
+    "top_k": 3
+  }
+}
+```
+
+The HTTP source should return either a document list or an object with a `documents` list.
+
+Run the no-key runtime smoke test after local services are running:
+
+```powershell
+.\.venv\Scripts\python scripts\smoke_runtime_no_key.py
+```
+
+Run the same smoke test through `api-gateway`:
+
+```powershell
+$env:AGENT_PLATFORM_SMOKE_WORKFLOW_URL="http://127.0.0.1:8000/gateway/workflows"
+$env:AGENT_PLATFORM_SMOKE_RUNTIME_URL="http://127.0.0.1:8000/gateway/runtime"
+.\.venv\Scripts\python scripts\smoke_runtime_no_key.py
+```
+
+## API Gateway
+
+`api-gateway` provides a unified entrypoint:
+
+- `GET /gateway/services/health`
+- `/gateway/workflows/**` -> `workflow-service /workflows/**`
+- `/gateway/sessions/**` -> `session-service /sessions/**`
+- `/gateway/runtime/**` -> `runtime-service /runtime/**`
+- `/gateway/tools/**` -> `tool-service /tools/**`
+- `/gateway/models/**` -> `model-gateway-service /models/**`
+- `/gateway/code/**` -> `code-runner-service /code/**`
+
+Gateway readiness:
+
+```powershell
+Invoke-RestMethod -Uri "http://127.0.0.1:8000/ready"
+```
+
+Downstream health:
+
+```powershell
+Invoke-RestMethod -Uri "http://127.0.0.1:8000/gateway/services/health"
+```
+
+Gateway request context:
+
+- Incoming `x-request-id` is reused; otherwise gateway generates one.
+- Incoming `x-tenant-id` is reused; otherwise gateway falls back to `tenant_id` query parameter, then `public`.
+- Gateway forwards both `x-request-id` and `x-tenant-id` to downstream services.
+- Gateway writes request audit records to `gateway_request_audit`.
+
+Query gateway audits:
+
+```powershell
+Invoke-RestMethod `
+  -Uri "http://127.0.0.1:8000/gateway/audits?tenant_id=t1&limit=20" `
+  -Headers @{"x-tenant-id"="t1"}
+```
+
+HTTP tool node config example:
+
+```json
+{
+  "id": "search-products",
+  "type": "tool",
+  "config": {
+    "tool_binding_id": "binding-1",
+    "query": {
+      "keyword": "milk"
+    }
+  }
+}
+```
+
+Supported HTTP tool config resolution order:
+
+- URL: `config.url` or `invoke_config_json.url`
+- Base URL: `config.base_url` or `binding.config_json.base_url` or `invoke_config_json.base_url`
+- Path: `config.path` or `invoke_config_json.path`
+- Method: `invoke_config_json.method`, default `GET`
+- Query params: merge `invoke_config_json.query` + `config.query`
+- Body JSON: merge `invoke_config_json.body` + `config.body`
+- Headers: merge `invoke_config_json.headers` + `binding.config_json.headers` + `config.headers`
+
+LLM node config example:
+
+```json
+{
+  "id": "draft-answer",
+  "type": "llm",
+  "config": {
+    "model": "gpt-4o-mini",
+    "system_prompt": "You are a customer support assistant.",
+    "prompt": "Summarize the user intent in Chinese.",
+    "temperature": 0.2,
+    "max_tokens": 400
+  }
+}
+```
+
+`llm` nodes also support explicit `messages`:
+
+```json
+{
+  "id": "rewrite-message",
+  "type": "llm",
+  "config": {
+    "model": "gpt-4o-mini",
+    "messages": [
+      {"role": "system", "content": "You are a concise editor."},
+      {"role": "user", "content": "Rewrite this sentence in a warmer tone."}
+    ]
+  }
+}
+```
+
+`runtime-service` sends `llm` execution requests to `model-gateway-service`, and the gateway forwards them to an OpenAI-compatible `/chat/completions` provider.
+
+Recommended environment variables for `model-gateway-service`:
+
+```powershell
+$env:AGENT_PLATFORM_PROVIDER_BASE_URL="https://api.openai.com/v1"
+$env:AGENT_PLATFORM_PROVIDER_API_KEY="your-api-key"
+$env:AGENT_PLATFORM_DEFAULT_MODEL="gpt-4o-mini"
+```
+
+Code node config example:
+
+```json
+{
+  "id": "compute-summary",
+  "type": "code",
+  "config": {
+    "language": "python",
+    "timeout_seconds": 5,
+    "input_json": {
+      "numbers": [1, 2, 3, 4]
+    },
+    "code": "total = sum(payload['numbers'])\nresult = {'total': total, 'count': len(payload['numbers'])}\nprint(f'total={total}')"
+  }
+}
+```
+
+`runtime-service` sends `code` execution requests to `code-runner-service`. Current `python` execution contract:
+
+- input payload is available as `payload`
+- execution result should be assigned to `result`
+- `print(...)` output is captured into `node_run.output_text`
+- structured `result` is captured into `node_run.output_json.result_json`
+
+Recommended environment variables for `code-runner-service`:
+
+```powershell
+$env:AGENT_PLATFORM_PYTHON_BIN="python"
+$env:AGENT_PLATFORM_MAX_TIMEOUT_SECONDS="30"
+```
+
+## Docker Compose
+
+Files:
+
+- `deployments/docker/docker-compose.yml`
+- `deployments/docker/python-service.Dockerfile`
+- `deployments/docker/.env.example`
+
+Start all services locally:
+
+```powershell
+cd D:\workspace\auto-platform
+Copy-Item .\deployments\docker\.env.example .\.env
+docker compose -f .\deployments\docker\docker-compose.yml up --build
+```
+
+Start in detached mode:
+
+```powershell
+docker compose -f .\deployments\docker\docker-compose.yml up --build -d
+```
+
+Stop and remove containers:
+
+```powershell
+docker compose -f .\deployments\docker\docker-compose.yml down
+```
+
+Important notes:
+
+- `workflow-service`, `session-service`, `runtime-service`, `tool-service`, and `api-gateway` use SQLite files mounted under `/data`
+- `runtime-service` automatically resolves internal URLs to `workflow-service`, `tool-service`, `model-gateway-service`, and `code-runner-service`
+- `model-gateway-service` defaults to `http://host.docker.internal:11434/v1`; replace it in `.env` if you want OpenAI or another OpenAI-compatible provider

+ 4 - 0
deployments/docker/.env.example

@@ -0,0 +1,4 @@
+AGENT_PLATFORM_PROVIDER_BASE_URL=https://api.openai.com/v1
+AGENT_PLATFORM_PROVIDER_API_KEY=replace-me
+AGENT_PLATFORM_DEFAULT_MODEL=gpt-4o-mini
+AGENT_PLATFORM_MAX_TIMEOUT_SECONDS=30

+ 182 - 0
deployments/docker/docker-compose.yml

@@ -0,0 +1,182 @@
+services:
+  workflow-service:
+    build:
+      context: ../..
+      dockerfile: deployments/docker/python-service.Dockerfile
+      args:
+        SERVICE_PATH: services/workflow-service
+    container_name: agent-platform-workflow-service
+    command: ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8002"]
+    environment:
+      AGENT_PLATFORM_DATABASE_URL: sqlite:////data/workflow_service.db
+    ports:
+      - "8002:8002"
+    volumes:
+      - workflow_service_data:/data
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8002/workflows/health').read()"]
+      interval: 15s
+      timeout: 5s
+      retries: 5
+
+  session-service:
+    build:
+      context: ../..
+      dockerfile: deployments/docker/python-service.Dockerfile
+      args:
+        SERVICE_PATH: services/session-service
+    container_name: agent-platform-session-service
+    command: ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8001"]
+    environment:
+      AGENT_PLATFORM_DATABASE_URL: sqlite:////data/session_service.db
+      AGENT_PLATFORM_RUNTIME_SERVICE_URL: http://runtime-service:8003
+    ports:
+      - "8001:8001"
+    volumes:
+      - session_service_data:/data
+    depends_on:
+      runtime-service:
+        condition: service_started
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8001/sessions/health').read()"]
+      interval: 15s
+      timeout: 5s
+      retries: 5
+
+  tool-service:
+    build:
+      context: ../..
+      dockerfile: deployments/docker/python-service.Dockerfile
+      args:
+        SERVICE_PATH: services/tool-service
+    container_name: agent-platform-tool-service
+    command: ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8004"]
+    environment:
+      AGENT_PLATFORM_DATABASE_URL: sqlite:////data/tool_service.db
+    ports:
+      - "8004:8004"
+    volumes:
+      - tool_service_data:/data
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8004/tools/health').read()"]
+      interval: 15s
+      timeout: 5s
+      retries: 5
+
+  model-gateway-service:
+    build:
+      context: ../..
+      dockerfile: deployments/docker/python-service.Dockerfile
+      args:
+        SERVICE_PATH: services/model-gateway-service
+    container_name: agent-platform-model-gateway-service
+    command: ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8005"]
+    environment:
+      AGENT_PLATFORM_PROVIDER_BASE_URL: ${AGENT_PLATFORM_PROVIDER_BASE_URL:-http://host.docker.internal:11434/v1}
+      AGENT_PLATFORM_PROVIDER_API_KEY: ${AGENT_PLATFORM_PROVIDER_API_KEY:-}
+      AGENT_PLATFORM_DEFAULT_MODEL: ${AGENT_PLATFORM_DEFAULT_MODEL:-}
+    ports:
+      - "8005:8005"
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8005/models/health').read()"]
+      interval: 15s
+      timeout: 5s
+      retries: 5
+
+  code-runner-service:
+    build:
+      context: ../..
+      dockerfile: deployments/docker/python-service.Dockerfile
+      args:
+        SERVICE_PATH: services/code-runner-service
+    container_name: agent-platform-code-runner-service
+    command: ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8006"]
+    environment:
+      AGENT_PLATFORM_PYTHON_BIN: python
+      AGENT_PLATFORM_MAX_TIMEOUT_SECONDS: ${AGENT_PLATFORM_MAX_TIMEOUT_SECONDS:-30}
+    ports:
+      - "8006:8006"
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8006/code/health').read()"]
+      interval: 15s
+      timeout: 5s
+      retries: 5
+
+  runtime-service:
+    build:
+      context: ../..
+      dockerfile: deployments/docker/python-service.Dockerfile
+      args:
+        SERVICE_PATH: services/runtime-service
+    container_name: agent-platform-runtime-service
+    command: ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8003"]
+    environment:
+      AGENT_PLATFORM_DATABASE_URL: sqlite:////data/runtime_service.db
+      AGENT_PLATFORM_WORKFLOW_SERVICE_URL: http://workflow-service:8002
+      AGENT_PLATFORM_TOOL_SERVICE_URL: http://tool-service:8004
+      AGENT_PLATFORM_MODEL_GATEWAY_SERVICE_URL: http://model-gateway-service:8005
+      AGENT_PLATFORM_CODE_RUNNER_SERVICE_URL: http://code-runner-service:8006
+    ports:
+      - "8003:8003"
+    volumes:
+      - runtime_service_data:/data
+    depends_on:
+      workflow-service:
+        condition: service_started
+      tool-service:
+        condition: service_started
+      model-gateway-service:
+        condition: service_started
+      code-runner-service:
+        condition: service_started
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8003/runtime/health').read()"]
+      interval: 15s
+      timeout: 5s
+      retries: 5
+
+  api-gateway:
+    build:
+      context: ../..
+      dockerfile: deployments/docker/python-service.Dockerfile
+      args:
+        SERVICE_PATH: services/api-gateway
+    container_name: agent-platform-api-gateway
+    command: ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
+    environment:
+      AGENT_PLATFORM_DATABASE_URL: sqlite:////data/api_gateway.db
+      AGENT_PLATFORM_WORKFLOW_SERVICE_URL: http://workflow-service:8002
+      AGENT_PLATFORM_SESSION_SERVICE_URL: http://session-service:8001
+      AGENT_PLATFORM_RUNTIME_SERVICE_URL: http://runtime-service:8003
+      AGENT_PLATFORM_TOOL_SERVICE_URL: http://tool-service:8004
+      AGENT_PLATFORM_MODEL_GATEWAY_SERVICE_URL: http://model-gateway-service:8005
+      AGENT_PLATFORM_CODE_RUNNER_SERVICE_URL: http://code-runner-service:8006
+    ports:
+      - "8000:8000"
+    volumes:
+      - api_gateway_data:/data
+    depends_on:
+      workflow-service:
+        condition: service_started
+      session-service:
+        condition: service_started
+      runtime-service:
+        condition: service_started
+      tool-service:
+        condition: service_started
+      model-gateway-service:
+        condition: service_started
+      code-runner-service:
+        condition: service_started
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000/health').read()"]
+      interval: 15s
+      timeout: 5s
+      retries: 5
+
+volumes:
+  api_gateway_data:
+  workflow_service_data:
+  session_service_data:
+  runtime_service_data:
+  tool_service_data:

+ 25 - 0
deployments/docker/python-service.Dockerfile

@@ -0,0 +1,25 @@
+FROM python:3.11-slim
+
+ARG SERVICE_PATH
+
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+
+WORKDIR /workspace
+
+RUN pip install --no-cache-dir --upgrade pip
+
+COPY libs ./libs
+COPY services ./services
+
+RUN pip install --no-cache-dir \
+    -e ./libs/core-shared \
+    -e ./libs/core-domain \
+    -e ./libs/core-dsl \
+    -e ./libs/core-events \
+    -e ./libs/core-db \
+    -e ./${SERVICE_PATH}
+
+WORKDIR /workspace/${SERVICE_PATH}
+
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

+ 35 - 0
libs/core-domain/src/core_domain/__init__.py

@@ -1,23 +1,58 @@
+from .code_contracts import CodeExecutionRequestContract, CodeExecutionResponseContract
+from .execution_contracts import (
+    NodeExecutionContextContract,
+    NodeExecutionRequestContract,
+    NodeExecutionResultContract,
+    RunExecutionRequestContract,
+)
+from .model_contracts import (
+    ChatCompletionRequestContract,
+    ChatCompletionResponseContract,
+    ChatMessageContract,
+)
 from .runtime_contracts import (
     InitialNodeContract,
     NodeRunContract,
+    NodeRunStatus,
     NodeRunStatusUpdateContract,
     RunBootstrapContract,
     RunCreateContract,
+    WorkflowRunStatus,
     WorkflowRunStatusUpdateContract,
     WorkflowRunContract,
 )
 from .service import ServiceDescriptor, ServiceHealth
+from .tool_contracts import (
+    ToolBindingContract,
+    ToolBindingDetailContract,
+    ToolDefinitionContract,
+    ToolVersionContract,
+)
 from .workflow_contracts import WorkflowVersionContract
 
 __all__ = [
+    "CodeExecutionRequestContract",
+    "CodeExecutionResponseContract",
+    "ChatCompletionRequestContract",
+    "ChatCompletionResponseContract",
+    "ChatMessageContract",
     "InitialNodeContract",
+    "NodeExecutionContextContract",
+    "NodeExecutionRequestContract",
+    "NodeExecutionResultContract",
+    "RunExecutionRequestContract",
     "NodeRunContract",
+    "NodeRunStatus",
     "NodeRunStatusUpdateContract",
     "RunBootstrapContract",
     "RunCreateContract",
     "ServiceDescriptor",
     "ServiceHealth",
+    "ToolBindingContract",
+    "ToolBindingDetailContract",
+    "ToolDefinitionContract",
+    "ToolVersionContract",
+    "WorkflowRunStatus",
     "WorkflowRunStatusUpdateContract",
     "WorkflowRunContract",
     "WorkflowVersionContract",

+ 18 - 0
libs/core-domain/src/core_domain/code_contracts.py

@@ -0,0 +1,18 @@
+from pydantic import BaseModel, Field
+
+from core_shared import JSONValue
+
+
+class CodeExecutionRequestContract(BaseModel):
+    language: str = "python"
+    code: str
+    input_json: dict[str, JSONValue] = Field(default_factory=dict)
+    timeout_seconds: int = 10
+
+
+class CodeExecutionResponseContract(BaseModel):
+    success: bool
+    stdout: str = ""
+    stderr: str = ""
+    output_json: dict[str, JSONValue] = Field(default_factory=dict)
+    error_message: str | None = None

+ 36 - 0
libs/core-domain/src/core_domain/execution_contracts.py

@@ -0,0 +1,36 @@
+from pydantic import BaseModel, Field
+
+from core_shared import JSONValue
+
+from .runtime_contracts import NodeRunStatus
+
+
+class NodeExecutionRequestContract(BaseModel):
+    worker_key: str | None = None
+
+
+class RunExecutionRequestContract(BaseModel):
+    worker_key: str | None = None
+    max_steps: int = 32
+
+
+class NodeExecutionContextContract(BaseModel):
+    tenant_id: str
+    run_id: str
+    node_run_id: str
+    node_id: str
+    node_type: str
+    node_config_json: dict[str, JSONValue]
+    run_state_json: dict[str, JSONValue] = Field(default_factory=dict)
+    node_output_json_by_node_id: dict[str, dict[str, JSONValue]] = Field(default_factory=dict)
+    node_output_text_by_node_id: dict[str, str] = Field(default_factory=dict)
+    worker_key: str | None = None
+
+
+class NodeExecutionResultContract(BaseModel):
+    status: NodeRunStatus
+    worker_key: str | None = None
+    error_code: str | None = None
+    error_message: str | None = None
+    output_text: str | None = None
+    output_json: dict[str, JSONValue] | None = None

+ 25 - 0
libs/core-domain/src/core_domain/model_contracts.py

@@ -0,0 +1,25 @@
+from pydantic import BaseModel, Field
+
+from core_shared import JSONValue
+
+
+class ChatMessageContract(BaseModel):
+    role: str
+    content: str
+    name: str | None = None
+
+
+class ChatCompletionRequestContract(BaseModel):
+    model: str | None = None
+    messages: list[ChatMessageContract] = Field(default_factory=list)
+    temperature: float | None = None
+    max_tokens: int | None = None
+    metadata_json: dict[str, JSONValue] = Field(default_factory=dict)
+
+
+class ChatCompletionResponseContract(BaseModel):
+    model: str | None = None
+    content: str
+    finish_reason: str | None = None
+    usage_json: dict[str, JSONValue] = Field(default_factory=dict)
+    raw_response_json: dict[str, JSONValue] = Field(default_factory=dict)

+ 5 - 0
libs/core-domain/src/core_domain/runtime_contracts.py

@@ -2,6 +2,7 @@ from datetime import datetime
 from typing import Literal
 
 from pydantic import BaseModel
+from core_shared import JSONValue
 
 NodeRunStatus = Literal["pending", "queued", "running", "completed", "failed", "skipped"]
 WorkflowRunStatus = Literal["pending", "running", "completed", "failed", "cancelled", "paused"]
@@ -55,6 +56,8 @@ class NodeRunContract(BaseModel):
     node_type: str
     attempt_no: int
     status: NodeRunStatus
+    output_text: str | None = None
+    output_json: dict[str, JSONValue] | None = None
     queued_time: datetime | None = None
     created_time: datetime
 
@@ -75,3 +78,5 @@ class NodeRunStatusUpdateContract(BaseModel):
     worker_key: str | None = None
     error_code: str | None = None
     error_message: str | None = None
+    output_text: str | None = None
+    output_json: dict[str, JSONValue] | None = None

+ 47 - 0
libs/core-domain/src/core_domain/tool_contracts.py

@@ -0,0 +1,47 @@
+from datetime import datetime
+
+from pydantic import BaseModel
+
+from core_shared import JSONValue
+
+
+class ToolDefinitionContract(BaseModel):
+    id: str
+    tenant_id: str
+    plugin_id: str | None = None
+    code: str
+    name: str
+    tool_type: str
+    description: str | None = None
+    created_time: datetime
+
+
+class ToolVersionContract(BaseModel):
+    id: str
+    tenant_id: str
+    tool_id: str
+    version_no: int
+    input_schema_json: dict[str, JSONValue] | None = None
+    output_schema_json: dict[str, JSONValue] | None = None
+    invoke_config_json: dict[str, JSONValue] | None = None
+    timeout_ms: int | None = None
+    retry_policy_json: dict[str, JSONValue] | None = None
+    created_time: datetime
+
+
+class ToolBindingContract(BaseModel):
+    id: str
+    tenant_id: str
+    app_id: str
+    tool_version_id: str
+    credential_id: str | None = None
+    binding_scope: str
+    enabled: bool
+    config_json: dict[str, JSONValue] | None = None
+    created_time: datetime
+
+
+class ToolBindingDetailContract(BaseModel):
+    binding: ToolBindingContract
+    tool_version: ToolVersionContract
+    tool_definition: ToolDefinitionContract

+ 1 - 1
libs/core-dsl/pyproject.toml

@@ -8,6 +8,7 @@ version = "0.1.0"
 description = "Workflow DSL models for agent platform."
 requires-python = ">=3.11"
 dependencies = [
+  "core-shared",
   "pydantic>=2.7,<3.0",
 ]
 
@@ -16,4 +17,3 @@ package-dir = {"" = "src"}
 
 [tool.setuptools.packages.find]
 where = ["src"]
-

+ 18 - 3
libs/core-dsl/src/core_dsl/__init__.py

@@ -1,4 +1,19 @@
-from .workflow import EdgeDefinition, NodeDefinition, WorkflowDefinition
-
-__all__ = ["EdgeDefinition", "NodeDefinition", "WorkflowDefinition"]
+from .workflow import (
+    EdgeDefinition,
+    NodeDefinition,
+    WorkflowDefinition,
+    get_initial_node_definition,
+    get_node_definition,
+    get_successor_node_definitions,
+    parse_workflow_definition,
+)
 
+__all__ = [
+    "EdgeDefinition",
+    "NodeDefinition",
+    "WorkflowDefinition",
+    "get_initial_node_definition",
+    "get_node_definition",
+    "get_successor_node_definitions",
+    "parse_workflow_definition",
+]

+ 36 - 2
libs/core-dsl/src/core_dsl/workflow.py

@@ -1,10 +1,13 @@
 from pydantic import BaseModel, Field
 
+from core_shared import JSONValue
+
 
 class NodeDefinition(BaseModel):
     id: str
     type: str
-    config: dict = Field(default_factory=dict)
+    name: str | None = None
+    config: dict[str, JSONValue] = Field(default_factory=dict)
 
 
 class EdgeDefinition(BaseModel):
@@ -15,7 +18,38 @@ class EdgeDefinition(BaseModel):
 
 class WorkflowDefinition(BaseModel):
     code: str
-    name: str
+    name: str = "workflow"
     nodes: list[NodeDefinition] = Field(default_factory=list)
     edges: list[EdgeDefinition] = Field(default_factory=list)
 
+
+def parse_workflow_definition(payload: dict[str, JSONValue] | None) -> WorkflowDefinition | None:
+    if payload is None:
+        return None
+    return WorkflowDefinition.model_validate(payload)
+
+
+def get_node_definition(workflow: WorkflowDefinition, node_id: str) -> NodeDefinition | None:
+    for node in workflow.nodes:
+        if node.id == node_id:
+            return node
+    return None
+
+
+def get_initial_node_definition(workflow: WorkflowDefinition) -> NodeDefinition | None:
+    incoming_targets = {edge.target for edge in workflow.edges}
+    for node in workflow.nodes:
+        if node.id not in incoming_targets:
+            return node
+    if workflow.nodes:
+        return workflow.nodes[0]
+    return None
+
+
+def get_successor_node_definitions(
+    workflow: WorkflowDefinition,
+    current_node_id: str,
+) -> list[NodeDefinition]:
+    successor_ids = [edge.target for edge in workflow.edges if edge.source == current_node_id]
+    node_map = {node.id: node for node in workflow.nodes}
+    return [node_map[item] for item in successor_ids if item in node_map]

+ 5 - 2
libs/core-shared/src/core_shared/types.py

@@ -1,5 +1,8 @@
 from typing import TypeAlias
+from typing_extensions import TypeAliasType
 
 JSONPrimitive: TypeAlias = str | int | float | bool | None
-JSONValue: TypeAlias = JSONPrimitive | dict[str, "JSONValue"] | list["JSONValue"]
-
+JSONValue = TypeAliasType(
+    "JSONValue",
+    JSONPrimitive | dict[str, "JSONValue"] | list["JSONValue"],
+)

+ 2 - 0
pyproject.toml

@@ -6,6 +6,8 @@ members = [
   "libs/core-events",
   "libs/core-shared",
   "services/api-gateway",
+  "services/code-runner-service",
+  "services/model-gateway-service",
   "services/session-service",
   "services/workflow-service",
   "services/runtime-service",

+ 394 - 0
scripts/smoke_runtime_no_key.py

@@ -0,0 +1,394 @@
+from __future__ import annotations
+
+import json
+import os
+import sys
+import uuid
+from dataclasses import dataclass
+
+import httpx
+
+
+WORKFLOW_SERVICE_URL = os.getenv(
+    "AGENT_PLATFORM_SMOKE_WORKFLOW_URL",
+    "http://127.0.0.1:8002/workflows",
+)
+RUNTIME_SERVICE_URL = os.getenv(
+    "AGENT_PLATFORM_SMOKE_RUNTIME_URL",
+    "http://127.0.0.1:8003/runtime",
+)
+TENANT_ID = os.getenv("AGENT_PLATFORM_SMOKE_TENANT_ID", "t-smoke")
+
+
+@dataclass(frozen=True)
+class SmokeScenario:
+    score: int
+    expected_branch_node_id: str
+    expected_output_text: str
+
+
+SCENARIOS = (
+    SmokeScenario(
+        score=7,
+        expected_branch_node_id="high_path",
+        expected_output_text="Alice passed with score 7",
+    ),
+    SmokeScenario(
+        score=3,
+        expected_branch_node_id="low_path",
+        expected_output_text="Alice did not pass; score 3",
+    ),
+)
+
+
+def main() -> int:
+    unique_suffix = uuid.uuid4().hex[:8]
+    with httpx.Client(timeout=20.0, headers={"x-tenant-id": TENANT_ID}) as client:
+        app_id = create_app(client, unique_suffix)
+        workflow_id = create_workflow(client, app_id, unique_suffix)
+
+        results: list[dict[str, object]] = []
+        for scenario in SCENARIOS:
+            results.append(run_scenario(client, app_id, workflow_id, unique_suffix, scenario))
+        results.append(run_retriever_scenario(client, app_id, workflow_id, unique_suffix))
+
+    print(json.dumps(results, ensure_ascii=False, indent=2))
+    return 0
+
+
+def create_app(client: httpx.Client, unique_suffix: str) -> str:
+    response = client.post(
+        f"{WORKFLOW_SERVICE_URL}/apps",
+        json={
+            "tenant_id": TENANT_ID,
+            "code": f"smoke-app-{unique_suffix}",
+            "name": f"Smoke App {unique_suffix}",
+        },
+    )
+    response.raise_for_status()
+    payload = response.json()
+    return str(payload["id"])
+
+
+def create_workflow(client: httpx.Client, app_id: str, unique_suffix: str) -> str:
+    response = client.post(
+        WORKFLOW_SERVICE_URL,
+        json={
+            "tenant_id": TENANT_ID,
+            "app_id": app_id,
+            "code": f"smoke-flow-{unique_suffix}",
+            "name": f"Smoke Flow {unique_suffix}",
+        },
+    )
+    response.raise_for_status()
+    payload = response.json()
+    return str(payload["id"])
+
+
+def run_scenario(
+    client: httpx.Client,
+    app_id: str,
+    workflow_id: str,
+    unique_suffix: str,
+    scenario: SmokeScenario,
+) -> dict[str, object]:
+    workflow_version_id = create_workflow_version(client, workflow_id, unique_suffix, scenario.score)
+    app_version_id = create_app_version(client, app_id, workflow_version_id)
+    run_id = create_run(client, app_id, app_version_id, workflow_id, workflow_version_id)
+    execute_run(client, run_id)
+    node_runs = list_node_runs(client, run_id)
+    artifacts = list_node_artifacts(client, run_id)
+    if len(artifacts) < 3:
+        raise AssertionError(f"expected at least 3 artifacts, got {len(artifacts)}")
+    trace_spans = list_trace_spans(client, run_id)
+    if len(trace_spans) < 3:
+        raise AssertionError(f"expected at least 3 trace spans, got {len(trace_spans)}")
+
+    node_map = {str(item["node_id"]): item for item in node_runs}
+    assert scenario.expected_branch_node_id in node_map, (
+        f"expected branch node not found: {scenario.expected_branch_node_id}"
+    )
+    expected_node = node_map[scenario.expected_branch_node_id]
+    actual_output_text = expected_node.get("output_text")
+    if actual_output_text != scenario.expected_output_text:
+        raise AssertionError(
+            f"unexpected output_text for {scenario.expected_branch_node_id}: {actual_output_text!r}"
+        )
+
+    other_branch_node_id = "low_path" if scenario.expected_branch_node_id == "high_path" else "high_path"
+    if other_branch_node_id in node_map:
+        raise AssertionError(f"unexpected branch node executed: {other_branch_node_id}")
+
+    return {
+        "score": scenario.score,
+        "executed_node_ids": [str(item["node_id"]) for item in node_runs],
+        "branch_output_text": actual_output_text,
+        "artifact_count": len(artifacts),
+        "trace_span_count": len(trace_spans),
+    }
+
+
+def run_retriever_scenario(
+    client: httpx.Client,
+    app_id: str,
+    workflow_id: str,
+    unique_suffix: str,
+) -> dict[str, object]:
+    workflow_version_id = create_retriever_workflow_version(client, workflow_id, unique_suffix)
+    app_version_id = create_app_version(client, app_id, workflow_version_id)
+    run_id = create_run(client, app_id, app_version_id, workflow_id, workflow_version_id)
+    execute_run(client, run_id)
+    node_runs = list_node_runs(client, run_id)
+    artifacts = list_node_artifacts(client, run_id)
+    if len(artifacts) < 3:
+        raise AssertionError(f"expected at least 3 retriever artifacts, got {len(artifacts)}")
+    trace_spans = list_trace_spans(client, run_id)
+    if len(trace_spans) < 3:
+        raise AssertionError(f"expected at least 3 retriever trace spans, got {len(trace_spans)}")
+
+    node_map = {str(item["node_id"]): item for item in node_runs}
+    answer_node = node_map.get("render_answer")
+    if answer_node is None:
+        raise AssertionError("retriever answer node was not executed")
+    answer_text = answer_node.get("output_text")
+    expected_answer_text = "Top doc: Refund Policy"
+    if answer_text != expected_answer_text:
+        raise AssertionError(f"unexpected retriever answer text: {answer_text!r}")
+
+    retrieve_node = node_map.get("retrieve_docs")
+    if retrieve_node is None:
+        raise AssertionError("retriever node was not executed")
+    retrieve_output = retrieve_node.get("output_json")
+    if not isinstance(retrieve_output, dict):
+        raise AssertionError("retriever output_json must be an object")
+
+    return {
+        "scenario": "retriever",
+        "executed_node_ids": [str(item["node_id"]) for item in node_runs],
+        "answer_text": answer_text,
+        "artifact_count": len(artifacts),
+        "trace_span_count": len(trace_spans),
+    }
+
+
+def create_workflow_version(
+    client: httpx.Client,
+    workflow_id: str,
+    unique_suffix: str,
+    score: int,
+) -> str:
+    response = client.post(
+        f"{WORKFLOW_SERVICE_URL}/versions",
+        json={
+            "tenant_id": TENANT_ID,
+            "workflow_id": workflow_id,
+            "status": "active",
+            "dsl_json": build_workflow_dsl(unique_suffix, score),
+        },
+    )
+    response.raise_for_status()
+    payload = response.json()
+    return str(payload["id"])
+
+
+def create_retriever_workflow_version(
+    client: httpx.Client,
+    workflow_id: str,
+    unique_suffix: str,
+) -> str:
+    response = client.post(
+        f"{WORKFLOW_SERVICE_URL}/versions",
+        json={
+            "tenant_id": TENANT_ID,
+            "workflow_id": workflow_id,
+            "status": "active",
+            "dsl_json": build_retriever_workflow_dsl(unique_suffix),
+        },
+    )
+    response.raise_for_status()
+    payload = response.json()
+    return str(payload["id"])
+
+
+def create_app_version(client: httpx.Client, app_id: str, workflow_version_id: str) -> str:
+    response = client.post(
+        f"{WORKFLOW_SERVICE_URL}/apps/versions",
+        json={
+            "tenant_id": TENANT_ID,
+            "app_id": app_id,
+            "workflow_version_id": workflow_version_id,
+            "status": "active",
+        },
+    )
+    response.raise_for_status()
+    payload = response.json()
+    return str(payload["id"])
+
+
+def create_run(
+    client: httpx.Client,
+    app_id: str,
+    app_version_id: str,
+    workflow_id: str,
+    workflow_version_id: str,
+) -> str:
+    response = client.post(
+        f"{RUNTIME_SERVICE_URL}/runs",
+        json={
+            "tenant_id": TENANT_ID,
+            "app_id": app_id,
+            "app_version_id": app_version_id,
+            "workflow_id": workflow_id,
+            "workflow_version_id": workflow_version_id,
+        },
+    )
+    response.raise_for_status()
+    payload = response.json()
+    return str(payload["run"]["id"])
+
+
+def execute_run(client: httpx.Client, run_id: str) -> None:
+    response = client.post(
+        f"{RUNTIME_SERVICE_URL}/runs/{run_id}/execute",
+        params={"tenant_id": TENANT_ID},
+        json={"max_steps": 8},
+    )
+    response.raise_for_status()
+
+
+def list_node_runs(client: httpx.Client, run_id: str) -> list[dict[str, object]]:
+    response = client.get(
+        f"{RUNTIME_SERVICE_URL}/node-runs",
+        params={"tenant_id": TENANT_ID, "run_id": run_id},
+    )
+    response.raise_for_status()
+    payload = response.json()
+    if not isinstance(payload, list):
+        raise AssertionError("node-runs response must be a list")
+    return [item for item in payload if isinstance(item, dict)]
+
+
+def list_node_artifacts(client: httpx.Client, run_id: str) -> list[dict[str, object]]:
+    response = client.get(
+        f"{RUNTIME_SERVICE_URL}/node-artifacts",
+        params={"tenant_id": TENANT_ID, "run_id": run_id},
+    )
+    response.raise_for_status()
+    payload = response.json()
+    if not isinstance(payload, list):
+        raise AssertionError("node-artifacts response must be a list")
+    return [item for item in payload if isinstance(item, dict)]
+
+
+def list_trace_spans(client: httpx.Client, run_id: str) -> list[dict[str, object]]:
+    response = client.get(
+        f"{RUNTIME_SERVICE_URL}/trace-spans",
+        params={"tenant_id": TENANT_ID, "run_id": run_id},
+    )
+    response.raise_for_status()
+    payload = response.json()
+    if not isinstance(payload, list):
+        raise AssertionError("trace-spans response must be a list")
+    return [item for item in payload if isinstance(item, dict)]
+
+
+def build_workflow_dsl(unique_suffix: str, score: int) -> dict[str, object]:
+    return {
+        "code": f"smoke-flow-{unique_suffix}-{score}",
+        "name": f"Smoke Flow {score}",
+        "nodes": [
+            {
+                "id": "seed_state",
+                "type": "assigner",
+                "config": {
+                    "assignments": {
+                        "score": score,
+                        "user_name": "Alice",
+                    },
+                },
+            },
+            {
+                "id": "check_score",
+                "type": "if-else",
+                "config": {
+                    "expression": "state.score >= 5",
+                },
+            },
+            {
+                "id": "high_path",
+                "type": "template-transform",
+                "config": {
+                    "template": "{{state.user_name}} passed with score {{state.score}}",
+                },
+            },
+            {
+                "id": "low_path",
+                "type": "template-transform",
+                "config": {
+                    "template": "{{state.user_name}} did not pass; score {{state.score}}",
+                },
+            },
+        ],
+        "edges": [
+            {"source": "seed_state", "target": "check_score"},
+            {"source": "check_score", "target": "high_path", "condition": "true"},
+            {"source": "check_score", "target": "low_path", "condition": "false"},
+        ],
+    }
+
+
+def build_retriever_workflow_dsl(unique_suffix: str) -> dict[str, object]:
+    return {
+        "code": f"smoke-retriever-{unique_suffix}",
+        "name": "Smoke Retriever Flow",
+        "nodes": [
+            {
+                "id": "seed_query",
+                "type": "assigner",
+                "config": {
+                    "assignments": {
+                        "query": "refund policy",
+                    },
+                },
+            },
+            {
+                "id": "retrieve_docs",
+                "type": "knowledge-retrieval",
+                "config": {
+                    "query_template": "{{state.query}}",
+                    "top_k": 1,
+                    "documents": [
+                        {
+                            "id": "shipping",
+                            "title": "Shipping Policy",
+                            "text": "Shipping usually takes three to five business days.",
+                        },
+                        {
+                            "id": "refund",
+                            "title": "Refund Policy",
+                            "text": "Refund policy allows returns within seven days after delivery.",
+                        },
+                    ],
+                },
+            },
+            {
+                "id": "render_answer",
+                "type": "template-transform",
+                "config": {
+                    "template": "Top doc: {{nodes.retrieve_docs.output.retrieved_documents.0.title}}",
+                },
+            },
+        ],
+        "edges": [
+            {"source": "seed_query", "target": "retrieve_docs"},
+            {"source": "retrieve_docs", "target": "render_answer"},
+        ],
+    }
+
+
+if __name__ == "__main__":
+    try:
+        raise SystemExit(main())
+    except Exception as exc:
+        print(f"smoke test failed: {exc}", file=sys.stderr)
+        raise

+ 57 - 0
services/api-gateway/alembic/versions/20260423_0001_add_gateway_request_audit.py

@@ -0,0 +1,57 @@
+"""add gateway request audit
+
+Revision ID: 20260423_0001
+Revises:
+Create Date: 2026-04-23 19:00:00
+"""
+
+from collections.abc import Sequence
+
+from alembic import op
+import sqlalchemy as sa
+
+
+revision: str = "20260423_0001"
+down_revision: str | None = None
+branch_labels: Sequence[str] | None = None
+depends_on: Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "gateway_request_audit",
+        sa.Column("request_id", sa.String(length=64), nullable=False),
+        sa.Column("method", sa.String(length=16), nullable=False),
+        sa.Column("path", sa.String(length=512), nullable=False),
+        sa.Column("query_string", sa.Text(), nullable=True),
+        sa.Column("target_service", sa.String(length=64), nullable=True),
+        sa.Column("target_url", sa.String(length=1024), nullable=True),
+        sa.Column("status_code", sa.Integer(), nullable=True),
+        sa.Column("duration_ms", sa.Integer(), nullable=False),
+        sa.Column("client_host", sa.String(length=128), nullable=True),
+        sa.Column("user_agent", sa.String(length=512), nullable=True),
+        sa.Column("error_message", sa.Text(), nullable=True),
+        sa.Column("id", sa.String(length=36), nullable=False),
+        sa.Column("tenant_id", sa.String(length=36), nullable=False),
+        sa.Column("created_by", sa.String(length=36), nullable=True),
+        sa.Column("updated_by", sa.String(length=36), nullable=True),
+        sa.Column("created_time", sa.DateTime(), nullable=False),
+        sa.Column("updated_time", sa.DateTime(), nullable=False),
+        sa.Column("deleted_time", sa.DateTime(), nullable=True),
+        sa.Column("version", sa.Integer(), nullable=False),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index("ix_gateway_request_audit_request_id", "gateway_request_audit", ["request_id"], unique=False)
+    op.create_index("ix_gateway_request_audit_path", "gateway_request_audit", ["path"], unique=False)
+    op.create_index("ix_gateway_request_audit_target_service", "gateway_request_audit", ["target_service"], unique=False)
+    op.create_index("ix_gateway_request_audit_status_code", "gateway_request_audit", ["status_code"], unique=False)
+    op.create_index("ix_gateway_request_audit_tenant_id", "gateway_request_audit", ["tenant_id"], unique=False)
+
+
+def downgrade() -> None:
+    op.drop_index("ix_gateway_request_audit_tenant_id", table_name="gateway_request_audit")
+    op.drop_index("ix_gateway_request_audit_status_code", table_name="gateway_request_audit")
+    op.drop_index("ix_gateway_request_audit_target_service", table_name="gateway_request_audit")
+    op.drop_index("ix_gateway_request_audit_path", table_name="gateway_request_audit")
+    op.drop_index("ix_gateway_request_audit_request_id", table_name="gateway_request_audit")
+    op.drop_table("gateway_request_audit")

+ 215 - 1
services/api-gateway/app/api/routes.py

@@ -1,9 +1,15 @@
-from fastapi import APIRouter, Depends
+import asyncio
+
+from fastapi import APIRouter, Depends, Query, Request, Response
 from sqlalchemy import text
 from sqlalchemy.orm import Session
 
 from core_domain import ServiceDescriptor, ServiceHealth
+from app.bootstrap.settings import ApiGatewaySettings
 from app.db.session import get_db
+from app.domain.repositories import GatewayRequestAuditRepository
+from app.infrastructure.proxy import ProxyServiceName, ProxyTarget, ServiceProxy
+from app.schemas.gateway import GatewayRequestAuditResponse, GatewayServicesHealthResponse
 
 router = APIRouter()
 
@@ -18,3 +24,211 @@ def health_check(db: Session = Depends(get_db)) -> ServiceDescriptor:
 def readiness_check(db: Session = Depends(get_db)) -> ServiceHealth:
     db.execute(text("SELECT 1"))
     return ServiceHealth(service="api-gateway", status="ok", database="ok")
+
+
+@router.get("/gateway/audits", response_model=list[GatewayRequestAuditResponse])
+def list_gateway_audits(
+    tenant_id: str = Query(...),
+    request_id: str | None = Query(default=None),
+    target_service: str | None = Query(default=None),
+    limit: int = Query(default=100, ge=1, le=500),
+    db: Session = Depends(get_db),
+) -> list[GatewayRequestAuditResponse]:
+    items = GatewayRequestAuditRepository(db).list_by_scope(
+        tenant_id=tenant_id,
+        request_id=request_id,
+        target_service=target_service,
+        limit=limit,
+    )
+    return [GatewayRequestAuditResponse.from_entity(item) for item in items]
+
+
+def get_gateway_settings() -> ApiGatewaySettings:
+    return ApiGatewaySettings()
+
+
+def get_service_proxy(settings: ApiGatewaySettings = Depends(get_gateway_settings)) -> ServiceProxy:
+    return ServiceProxy(timeout_seconds=settings.proxy_timeout_seconds)
+
+
+def build_proxy_targets(settings: ApiGatewaySettings) -> dict[ProxyServiceName, ProxyTarget]:
+    return {
+        "workflow-service": ProxyTarget(
+            service_name="workflow-service",
+            base_url=settings.workflow_service_url,
+            path_prefix="/workflows",
+            health_path="/workflows/health",
+        ),
+        "session-service": ProxyTarget(
+            service_name="session-service",
+            base_url=settings.session_service_url,
+            path_prefix="/sessions",
+            health_path="/sessions/health",
+        ),
+        "runtime-service": ProxyTarget(
+            service_name="runtime-service",
+            base_url=settings.runtime_service_url,
+            path_prefix="/runtime",
+            health_path="/runtime/health",
+        ),
+        "tool-service": ProxyTarget(
+            service_name="tool-service",
+            base_url=settings.tool_service_url,
+            path_prefix="/tools",
+            health_path="/tools/health",
+        ),
+        "model-gateway-service": ProxyTarget(
+            service_name="model-gateway-service",
+            base_url=settings.model_gateway_service_url,
+            path_prefix="/models",
+            health_path="/models/health",
+        ),
+        "code-runner-service": ProxyTarget(
+            service_name="code-runner-service",
+            base_url=settings.code_runner_service_url,
+            path_prefix="/code",
+            health_path="/code/health",
+        ),
+    }
+
+
+@router.get("/gateway/services/health", response_model=GatewayServicesHealthResponse)
+async def downstream_health_check(
+    settings: ApiGatewaySettings = Depends(get_gateway_settings),
+) -> GatewayServicesHealthResponse:
+    targets = build_proxy_targets(settings)
+    health_proxy = ServiceProxy(timeout_seconds=settings.downstream_health_timeout_seconds)
+    downstream_services = await asyncio.gather(
+        *[health_proxy.check_health(target) for target in targets.values()]
+    )
+    status = "ok" if all(item.status == "ok" for item in downstream_services) else "degraded"
+    return GatewayServicesHealthResponse(
+        status=status,
+        downstream_services=downstream_services,
+    )
+
+
+@router.api_route(
+    "/gateway/workflows",
+    methods=["GET", "POST", "PUT", "PATCH", "DELETE"],
+)
+@router.api_route(
+    "/gateway/workflows/{path:path}",
+    methods=["GET", "POST", "PUT", "PATCH", "DELETE"],
+)
+async def proxy_workflow_service(
+    request: Request,
+    path: str = "",
+    settings: ApiGatewaySettings = Depends(get_gateway_settings),
+    proxy: ServiceProxy = Depends(get_service_proxy),
+) -> Response:
+    return await proxy.forward(
+        request=request,
+        target=build_proxy_targets(settings)["workflow-service"],
+        path=path,
+    )
+
+
+@router.api_route(
+    "/gateway/sessions",
+    methods=["GET", "POST", "PUT", "PATCH", "DELETE"],
+)
+@router.api_route(
+    "/gateway/sessions/{path:path}",
+    methods=["GET", "POST", "PUT", "PATCH", "DELETE"],
+)
+async def proxy_session_service(
+    request: Request,
+    path: str = "",
+    settings: ApiGatewaySettings = Depends(get_gateway_settings),
+    proxy: ServiceProxy = Depends(get_service_proxy),
+) -> Response:
+    return await proxy.forward(
+        request=request,
+        target=build_proxy_targets(settings)["session-service"],
+        path=path,
+    )
+
+
+@router.api_route(
+    "/gateway/runtime",
+    methods=["GET", "POST", "PUT", "PATCH", "DELETE"],
+)
+@router.api_route(
+    "/gateway/runtime/{path:path}",
+    methods=["GET", "POST", "PUT", "PATCH", "DELETE"],
+)
+async def proxy_runtime_service(
+    request: Request,
+    path: str = "",
+    settings: ApiGatewaySettings = Depends(get_gateway_settings),
+    proxy: ServiceProxy = Depends(get_service_proxy),
+) -> Response:
+    return await proxy.forward(
+        request=request,
+        target=build_proxy_targets(settings)["runtime-service"],
+        path=path,
+    )
+
+
+@router.api_route(
+    "/gateway/tools",
+    methods=["GET", "POST", "PUT", "PATCH", "DELETE"],
+)
+@router.api_route(
+    "/gateway/tools/{path:path}",
+    methods=["GET", "POST", "PUT", "PATCH", "DELETE"],
+)
+async def proxy_tool_service(
+    request: Request,
+    path: str = "",
+    settings: ApiGatewaySettings = Depends(get_gateway_settings),
+    proxy: ServiceProxy = Depends(get_service_proxy),
+) -> Response:
+    return await proxy.forward(
+        request=request,
+        target=build_proxy_targets(settings)["tool-service"],
+        path=path,
+    )
+
+
+@router.api_route(
+    "/gateway/models",
+    methods=["GET", "POST", "PUT", "PATCH", "DELETE"],
+)
+@router.api_route(
+    "/gateway/models/{path:path}",
+    methods=["GET", "POST", "PUT", "PATCH", "DELETE"],
+)
+async def proxy_model_gateway_service(
+    request: Request,
+    path: str = "",
+    settings: ApiGatewaySettings = Depends(get_gateway_settings),
+    proxy: ServiceProxy = Depends(get_service_proxy),
+) -> Response:
+    return await proxy.forward(
+        request=request,
+        target=build_proxy_targets(settings)["model-gateway-service"],
+        path=path,
+    )
+
+
+@router.api_route(
+    "/gateway/code",
+    methods=["GET", "POST", "PUT", "PATCH", "DELETE"],
+)
+@router.api_route(
+    "/gateway/code/{path:path}",
+    methods=["GET", "POST", "PUT", "PATCH", "DELETE"],
+)
+async def proxy_code_runner_service(
+    request: Request,
+    path: str = "",
+    settings: ApiGatewaySettings = Depends(get_gateway_settings),
+    proxy: ServiceProxy = Depends(get_service_proxy),
+) -> Response:
+    return await proxy.forward(
+        request=request,
+        target=build_proxy_targets(settings)["code-runner-service"],
+        path=path,
+    )

+ 2 - 0
services/api-gateway/app/bootstrap/app.py

@@ -3,6 +3,7 @@ from fastapi import FastAPI
 from app.api.routes import router
 from app.bootstrap.settings import ApiGatewaySettings
 from app.db.session import build_session_factory
+from app.infrastructure.request_context import GatewayRequestContextMiddleware
 
 
 def create_app() -> FastAPI:
@@ -13,5 +14,6 @@ def create_app() -> FastAPI:
     )
     app.state.settings = settings
     app.state.session_factory = build_session_factory(settings)
+    app.add_middleware(GatewayRequestContextMiddleware)
     app.include_router(router)
     return app

+ 8 - 1
services/api-gateway/app/bootstrap/settings.py

@@ -5,4 +5,11 @@ class ApiGatewaySettings(ServiceSettings):
     service_name: str = "api-gateway"
     service_port: int = 8000
     database_url: str = "sqlite:///./api_gateway.db"
-
+    workflow_service_url: str = "http://127.0.0.1:8002"
+    session_service_url: str = "http://127.0.0.1:8001"
+    runtime_service_url: str = "http://127.0.0.1:8003"
+    tool_service_url: str = "http://127.0.0.1:8004"
+    model_gateway_service_url: str = "http://127.0.0.1:8005"
+    code_runner_service_url: str = "http://127.0.0.1:8006"
+    proxy_timeout_seconds: float = 30.0
+    downstream_health_timeout_seconds: float = 2.0

+ 2 - 1
services/api-gateway/app/db/models/__init__.py

@@ -1,4 +1,5 @@
 from core_db import Base
 
-__all__ = ["Base"]
+from .gateway_request_audit import GatewayRequestAudit
 
+__all__ = ["Base", "GatewayRequestAudit"]

+ 20 - 0
services/api-gateway/app/db/models/gateway_request_audit.py

@@ -0,0 +1,20 @@
+from sqlalchemy import Integer, String, Text
+from sqlalchemy.orm import Mapped, mapped_column
+
+from core_db import AuditMixin, Base, TenantMixin, VersionMixin
+
+
+class GatewayRequestAudit(TenantMixin, AuditMixin, VersionMixin, Base):
+    __tablename__ = "gateway_request_audit"
+
+    request_id: Mapped[str] = mapped_column(String(64), index=True)
+    method: Mapped[str] = mapped_column(String(16))
+    path: Mapped[str] = mapped_column(String(512), index=True)
+    query_string: Mapped[str | None] = mapped_column(Text, nullable=True)
+    target_service: Mapped[str | None] = mapped_column(String(64), nullable=True, index=True)
+    target_url: Mapped[str | None] = mapped_column(String(1024), nullable=True)
+    status_code: Mapped[int | None] = mapped_column(Integer, nullable=True, index=True)
+    duration_ms: Mapped[int] = mapped_column(Integer)
+    client_host: Mapped[str | None] = mapped_column(String(128), nullable=True)
+    user_agent: Mapped[str | None] = mapped_column(String(512), nullable=True)
+    error_message: Mapped[str | None] = mapped_column(Text, nullable=True)

+ 1 - 0
services/api-gateway/app/domain/__init__.py

@@ -0,0 +1 @@
+

+ 60 - 0
services/api-gateway/app/domain/repositories.py

@@ -0,0 +1,60 @@
+from sqlalchemy import select
+from sqlalchemy.orm import Session
+
+from app.db.models import GatewayRequestAudit
+
+
+class GatewayRequestAuditRepository:
+    def __init__(self, db: Session) -> None:
+        self.db = db
+
+    def create(
+        self,
+        *,
+        tenant_id: str,
+        request_id: str,
+        method: str,
+        path: str,
+        query_string: str | None,
+        target_service: str | None,
+        target_url: str | None,
+        status_code: int | None,
+        duration_ms: int,
+        client_host: str | None,
+        user_agent: str | None,
+        error_message: str | None,
+    ) -> GatewayRequestAudit:
+        entity = GatewayRequestAudit(
+            tenant_id=tenant_id,
+            request_id=request_id,
+            method=method,
+            path=path,
+            query_string=query_string,
+            target_service=target_service,
+            target_url=target_url,
+            status_code=status_code,
+            duration_ms=duration_ms,
+            client_host=client_host,
+            user_agent=user_agent,
+            error_message=error_message,
+        )
+        self.db.add(entity)
+        self.db.commit()
+        self.db.refresh(entity)
+        return entity
+
+    def list_by_scope(
+        self,
+        *,
+        tenant_id: str,
+        request_id: str | None = None,
+        target_service: str | None = None,
+        limit: int = 100,
+    ) -> list[GatewayRequestAudit]:
+        stmt = select(GatewayRequestAudit).where(GatewayRequestAudit.tenant_id == tenant_id)
+        if request_id is not None:
+            stmt = stmt.where(GatewayRequestAudit.request_id == request_id)
+        if target_service is not None:
+            stmt = stmt.where(GatewayRequestAudit.target_service == target_service)
+        stmt = stmt.order_by(GatewayRequestAudit.created_time.desc()).limit(limit)
+        return list(self.db.scalars(stmt))

+ 1 - 0
services/api-gateway/app/infrastructure/__init__.py

@@ -0,0 +1 @@
+

+ 51 - 0
services/api-gateway/app/infrastructure/audit.py

@@ -0,0 +1,51 @@
+from time import perf_counter
+
+from fastapi import Request
+from sqlalchemy.orm import sessionmaker
+
+from app.domain.repositories import GatewayRequestAuditRepository
+from app.infrastructure.request_context import get_gateway_request_context
+
+
+def mark_gateway_target(
+    request: Request,
+    *,
+    target_service: str,
+    target_url: str,
+) -> None:
+    context = get_gateway_request_context(request)
+    context.target_service = target_service
+    context.target_url = target_url
+
+
+def persist_gateway_audit(
+    *,
+    request: Request,
+    session_factory: sessionmaker,
+    status_code: int | None,
+    error_message: str | None = None,
+) -> None:
+    context = get_gateway_request_context(request)
+    duration_ms = int((perf_counter() - context.started_perf_counter) * 1000)
+    client_host = request.client.host if request.client is not None else None
+    query_string = request.url.query or None
+    user_agent = request.headers.get("user-agent")
+
+    db = session_factory()
+    try:
+        GatewayRequestAuditRepository(db).create(
+            tenant_id=context.tenant_id,
+            request_id=context.request_id,
+            method=request.method,
+            path=request.url.path,
+            query_string=query_string,
+            target_service=context.target_service,
+            target_url=context.target_url,
+            status_code=status_code,
+            duration_ms=duration_ms,
+            client_host=client_host,
+            user_agent=user_agent,
+            error_message=error_message,
+        )
+    finally:
+        db.close()

+ 112 - 0
services/api-gateway/app/infrastructure/proxy.py

@@ -0,0 +1,112 @@
+from dataclasses import dataclass
+from typing import Literal
+
+import httpx
+from fastapi import Request, Response
+
+from app.infrastructure.audit import mark_gateway_target
+from app.infrastructure.request_context import REQUEST_ID_HEADER, TENANT_ID_HEADER, get_gateway_request_context
+from app.schemas.gateway import DownstreamServiceHealth
+
+ProxyServiceName = Literal[
+    "workflow-service",
+    "session-service",
+    "runtime-service",
+    "tool-service",
+    "model-gateway-service",
+    "code-runner-service",
+]
+
+
+@dataclass(frozen=True)
+class ProxyTarget:
+    service_name: ProxyServiceName
+    base_url: str
+    path_prefix: str
+    health_path: str
+
+
+class ServiceProxy:
+    def __init__(self, *, timeout_seconds: float) -> None:
+        self.timeout_seconds = timeout_seconds
+
+    async def forward(
+        self,
+        *,
+        request: Request,
+        target: ProxyTarget,
+        path: str,
+    ) -> Response:
+        target_url = build_target_url(target=target, path=path)
+        mark_gateway_target(
+            request,
+            target_service=target.service_name,
+            target_url=target_url,
+        )
+        headers = build_forward_headers(request)
+        request_context = get_gateway_request_context(request)
+        headers[REQUEST_ID_HEADER] = request_context.request_id
+        headers[TENANT_ID_HEADER] = request_context.tenant_id
+        body = await request.body()
+
+        async with httpx.AsyncClient(timeout=self.timeout_seconds) as client:
+            upstream_response = await client.request(
+                method=request.method,
+                url=target_url,
+                params=request.query_params,
+                headers=headers,
+                content=body,
+            )
+
+        return Response(
+            content=upstream_response.content,
+            status_code=upstream_response.status_code,
+            headers=build_response_headers(upstream_response),
+            media_type=upstream_response.headers.get("content-type"),
+        )
+
+    async def check_health(self, target: ProxyTarget) -> DownstreamServiceHealth:
+        health_url = f"{target.base_url.rstrip('/')}{target.health_path}"
+        try:
+            async with httpx.AsyncClient(timeout=self.timeout_seconds) as client:
+                response = await client.get(health_url)
+        except httpx.HTTPError as exc:
+            return DownstreamServiceHealth(
+                service=target.service_name,
+                status="error",
+                url=health_url,
+                error_message=str(exc),
+            )
+
+        return DownstreamServiceHealth(
+            service=target.service_name,
+            status="ok" if response.is_success else "error",
+            url=health_url,
+            status_code=response.status_code,
+            error_message=None if response.is_success else response.text,
+        )
+
+
+def build_target_url(*, target: ProxyTarget, path: str) -> str:
+    normalized_path = path.strip("/")
+    if normalized_path:
+        return f"{target.base_url.rstrip('/')}{target.path_prefix}/{normalized_path}"
+    return f"{target.base_url.rstrip('/')}{target.path_prefix}"
+
+
+def build_forward_headers(request: Request) -> dict[str, str]:
+    skipped_headers = {"host", "content-length", "connection", REQUEST_ID_HEADER, TENANT_ID_HEADER}
+    return {
+        key: value
+        for key, value in request.headers.items()
+        if key.lower() not in skipped_headers
+    }
+
+
+def build_response_headers(response: httpx.Response) -> dict[str, str]:
+    skipped_headers = {"content-length", "transfer-encoding", "connection"}
+    return {
+        key: value
+        for key, value in response.headers.items()
+        if key.lower() not in skipped_headers
+    }

+ 80 - 0
services/api-gateway/app/infrastructure/request_context.py

@@ -0,0 +1,80 @@
+from dataclasses import dataclass
+from time import perf_counter
+from uuid import uuid4
+
+from fastapi import Request, Response
+from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
+
+REQUEST_ID_HEADER = "x-request-id"
+TENANT_ID_HEADER = "x-tenant-id"
+DEFAULT_TENANT_ID = "public"
+
+
+@dataclass
+class GatewayRequestContext:
+    request_id: str
+    tenant_id: str
+    started_perf_counter: float
+    target_service: str | None = None
+    target_url: str | None = None
+
+
+class GatewayRequestContextMiddleware(BaseHTTPMiddleware):
+    async def dispatch(
+        self,
+        request: Request,
+        call_next: RequestResponseEndpoint,
+    ) -> Response:
+        request_id = request.headers.get(REQUEST_ID_HEADER) or str(uuid4())
+        tenant_id = resolve_tenant_id(request)
+        request.state.gateway_context = GatewayRequestContext(
+            request_id=request_id,
+            tenant_id=tenant_id,
+            started_perf_counter=perf_counter(),
+        )
+        try:
+            response = await call_next(request)
+        except Exception as exc:
+            from app.infrastructure.audit import persist_gateway_audit
+
+            persist_gateway_audit(
+                request=request,
+                session_factory=request.app.state.session_factory,
+                status_code=500,
+                error_message=str(exc),
+            )
+            raise
+
+        from app.infrastructure.audit import persist_gateway_audit
+
+        persist_gateway_audit(
+            request=request,
+            session_factory=request.app.state.session_factory,
+            status_code=response.status_code,
+        )
+        response.headers[REQUEST_ID_HEADER] = request_id
+        response.headers[TENANT_ID_HEADER] = tenant_id
+        return response
+
+
+def resolve_tenant_id(request: Request) -> str:
+    header_tenant_id = request.headers.get(TENANT_ID_HEADER)
+    if header_tenant_id:
+        return header_tenant_id
+
+    query_tenant_id = request.query_params.get("tenant_id")
+    if query_tenant_id:
+        return query_tenant_id
+
+    return DEFAULT_TENANT_ID
+
+
+def get_gateway_request_context(request: Request) -> GatewayRequestContext:
+    context = getattr(request.state, "gateway_context", None)
+    if isinstance(context, GatewayRequestContext):
+        return context
+    return GatewayRequestContext(
+        request_id=str(uuid4()),
+        tenant_id=DEFAULT_TENANT_ID,
+        started_perf_counter=perf_counter(),
+    )

+ 42 - 0
services/api-gateway/app/schemas/gateway.py

@@ -0,0 +1,42 @@
+from pydantic import BaseModel
+from datetime import datetime
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from app.db.models import GatewayRequestAudit
+
+
+class DownstreamServiceHealth(BaseModel):
+    service: str
+    status: str
+    url: str
+    status_code: int | None = None
+    error_message: str | None = None
+
+
+class GatewayServicesHealthResponse(BaseModel):
+    service: str = "api-gateway"
+    status: str
+    downstream_services: list[DownstreamServiceHealth]
+
+
+class GatewayRequestAuditResponse(BaseModel):
+    id: str
+    tenant_id: str
+    request_id: str
+    method: str
+    path: str
+    query_string: str | None = None
+    target_service: str | None = None
+    target_url: str | None = None
+    status_code: int | None = None
+    duration_ms: int
+    client_host: str | None = None
+    user_agent: str | None = None
+    error_message: str | None = None
+    created_time: datetime
+
+    @classmethod
+    def from_entity(cls, entity: "GatewayRequestAudit") -> "GatewayRequestAuditResponse":
+        return cls.model_validate(entity, from_attributes=True)

+ 1 - 0
services/api-gateway/pyproject.toml

@@ -10,6 +10,7 @@ requires-python = ">=3.11"
 dependencies = [
   "alembic>=1.13,<2.0",
   "fastapi>=0.111,<1.0",
+  "httpx>=0.27,<1.0",
   "sqlalchemy>=2.0,<3.0",
   "uvicorn[standard]>=0.30,<1.0",
   "core-db",

+ 1 - 0
services/code-runner-service/app/__init__.py

@@ -0,0 +1 @@
+

+ 1 - 0
services/code-runner-service/app/api/__init__.py

@@ -0,0 +1 @@
+

+ 39 - 0
services/code-runner-service/app/api/routes.py

@@ -0,0 +1,39 @@
+from fastapi import APIRouter, Depends, HTTPException
+
+from core_domain import CodeExecutionRequestContract, CodeExecutionResponseContract, ServiceHealth
+from app.application.services import CodeRunnerApplicationService
+from app.bootstrap.settings import CodeRunnerServiceSettings
+from app.infrastructure.runner import CodeRunnerError, PythonCodeRunner
+
+router = APIRouter()
+
+
+def get_code_runner_settings() -> CodeRunnerServiceSettings:
+    return CodeRunnerServiceSettings()
+
+
+def get_code_runner_application_service(
+    settings: CodeRunnerServiceSettings = Depends(get_code_runner_settings),
+) -> CodeRunnerApplicationService:
+    return CodeRunnerApplicationService(
+        runner=PythonCodeRunner(settings=settings),
+        settings=settings,
+    )
+
+
+@router.get("/health", response_model=ServiceHealth)
+def health_check(
+    settings: CodeRunnerServiceSettings = Depends(get_code_runner_settings),
+) -> ServiceHealth:
+    return ServiceHealth(service="code-runner-service", status="ok", database=settings.python_bin)
+
+
+@router.post("/execute", response_model=CodeExecutionResponseContract)
+def execute_code(
+    payload: CodeExecutionRequestContract,
+    service: CodeRunnerApplicationService = Depends(get_code_runner_application_service),
+) -> CodeExecutionResponseContract:
+    try:
+        return service.execute_code(payload)
+    except CodeRunnerError as exc:
+        raise HTTPException(status_code=422, detail=str(exc)) from exc

+ 1 - 0
services/code-runner-service/app/application/__init__.py

@@ -0,0 +1 @@
+

+ 29 - 0
services/code-runner-service/app/application/services.py

@@ -0,0 +1,29 @@
+from core_domain import CodeExecutionRequestContract, CodeExecutionResponseContract
+
+from app.bootstrap.settings import CodeRunnerServiceSettings
+from app.infrastructure.runner import PythonCodeRunner
+
+
+class CodeRunnerApplicationService:
+    def __init__(
+        self,
+        *,
+        runner: PythonCodeRunner,
+        settings: CodeRunnerServiceSettings,
+    ) -> None:
+        self.runner = runner
+        self.settings = settings
+
+    def execute_code(self, payload: CodeExecutionRequestContract) -> CodeExecutionResponseContract:
+        if payload.language != "python":
+            return CodeExecutionResponseContract(
+                success=False,
+                error_message=f"unsupported language: {payload.language}",
+            )
+
+        resolved_payload = payload.model_copy(
+            update={
+                "timeout_seconds": min(payload.timeout_seconds, self.settings.max_timeout_seconds),
+            }
+        )
+        return self.runner.execute(resolved_payload)

+ 1 - 0
services/code-runner-service/app/bootstrap/__init__.py

@@ -0,0 +1 @@
+

+ 12 - 0
services/code-runner-service/app/bootstrap/app.py

@@ -0,0 +1,12 @@
+from fastapi import FastAPI
+
+from app.api.routes import router
+
+
+def create_app() -> FastAPI:
+    app = FastAPI(
+        title="agent-platform code-runner-service",
+        version="0.1.0",
+    )
+    app.include_router(router, prefix="/code", tags=["code"])
+    return app

+ 8 - 0
services/code-runner-service/app/bootstrap/settings.py

@@ -0,0 +1,8 @@
+from core_shared import ServiceSettings
+
+
+class CodeRunnerServiceSettings(ServiceSettings):
+    service_name: str = "code-runner-service"
+    service_port: int = 8006
+    python_bin: str = "python"
+    max_timeout_seconds: int = 30

+ 1 - 0
services/code-runner-service/app/infrastructure/__init__.py

@@ -0,0 +1 @@
+

+ 98 - 0
services/code-runner-service/app/infrastructure/runner.py

@@ -0,0 +1,98 @@
+import json
+import subprocess
+import tempfile
+from pathlib import Path
+
+from core_domain import CodeExecutionRequestContract, CodeExecutionResponseContract
+from core_shared import JSONValue
+
+from app.bootstrap.settings import CodeRunnerServiceSettings
+
+
+class CodeRunnerError(Exception):
+    pass
+
+
+class PythonCodeRunner:
+    def __init__(self, *, settings: CodeRunnerServiceSettings) -> None:
+        self.settings = settings
+
+    def execute(self, payload: CodeExecutionRequestContract) -> CodeExecutionResponseContract:
+        script_content = _build_python_runner_script(payload.code)
+        with tempfile.TemporaryDirectory(prefix="agent-platform-code-") as temp_dir:
+            temp_path = Path(temp_dir)
+            script_file = temp_path / "runner.py"
+            input_file = temp_path / "input.json"
+
+            script_file.write_text(script_content, encoding="utf-8")
+            input_file.write_text(
+                json.dumps(payload.input_json, ensure_ascii=False),
+                encoding="utf-8",
+            )
+
+            try:
+                completed = subprocess.run(
+                    [self.settings.python_bin, str(script_file), str(input_file)],
+                    capture_output=True,
+                    text=True,
+                    encoding="utf-8",
+                    timeout=payload.timeout_seconds,
+                    check=False,
+                )
+            except subprocess.TimeoutExpired as exc:
+                return CodeExecutionResponseContract(
+                    success=False,
+                    stderr=exc.stderr or "",
+                    error_message=f"code execution timed out after {payload.timeout_seconds} seconds",
+                )
+            except OSError as exc:
+                raise CodeRunnerError(f"failed to start python runner: {exc}") from exc
+
+        stdout = completed.stdout
+        stderr = completed.stderr
+        output_json = _extract_result_json(stdout)
+        success = completed.returncode == 0
+        error_message = None if success else f"python exited with code {completed.returncode}"
+        return CodeExecutionResponseContract(
+            success=success,
+            stdout=stdout,
+            stderr=stderr,
+            output_json=output_json,
+            error_message=error_message,
+        )
+
+
+def _build_python_runner_script(user_code: str) -> str:
+    escaped_code = json.dumps(user_code)
+    return (
+        "import json\n"
+        "import pathlib\n"
+        "import sys\n"
+        "\n"
+        "input_path = pathlib.Path(sys.argv[1])\n"
+        "payload = json.loads(input_path.read_text(encoding='utf-8'))\n"
+        "namespace = {\n"
+        "    'payload': payload,\n"
+        "    'result': None,\n"
+        "}\n"
+        f"exec({escaped_code}, namespace, namespace)\n"
+        "print('\\n__RESULT_JSON__=' + json.dumps(namespace.get('result'), ensure_ascii=False))\n"
+    )
+
+
+def _extract_result_json(stdout: str) -> dict[str, JSONValue]:
+    marker = "__RESULT_JSON__="
+    lines = stdout.splitlines()
+    for index in range(len(lines) - 1, -1, -1):
+        line = lines[index]
+        if not line.startswith(marker):
+            continue
+        raw_payload = line[len(marker) :]
+        try:
+            payload = json.loads(raw_payload)
+        except json.JSONDecodeError:
+            return {}
+        if isinstance(payload, dict):
+            return {str(key): value for key, value in payload.items()}
+        return {"result": payload}
+    return {}

+ 3 - 0
services/code-runner-service/app/main.py

@@ -0,0 +1,3 @@
+from app.bootstrap.app import create_app
+
+app = create_app()

+ 22 - 0
services/code-runner-service/pyproject.toml

@@ -0,0 +1,22 @@
+[build-system]
+requires = ["setuptools>=68"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "code-runner-service"
+version = "0.1.0"
+description = "Code runner service for agent platform."
+requires-python = ">=3.11"
+dependencies = [
+  "fastapi>=0.111,<1.0",
+  "pydantic>=2.7,<3.0",
+  "uvicorn[standard]>=0.30,<1.0",
+  "core-domain",
+  "core-shared",
+]
+
+[tool.setuptools]
+package-dir = {"" = "."}
+
+[tool.setuptools.packages.find]
+where = ["."]

+ 1 - 0
services/model-gateway-service/app/__init__.py

@@ -0,0 +1 @@
+

+ 1 - 0
services/model-gateway-service/app/api/__init__.py

@@ -0,0 +1 @@
+

+ 40 - 0
services/model-gateway-service/app/api/routes.py

@@ -0,0 +1,40 @@
+from fastapi import APIRouter, Depends, HTTPException
+
+from core_domain import ChatCompletionRequestContract, ChatCompletionResponseContract, ServiceHealth
+from app.application.services import ModelGatewayApplicationService
+from app.bootstrap.settings import ModelGatewayServiceSettings
+from app.infrastructure.provider import ModelProviderClient, ModelProviderClientError
+
+router = APIRouter()
+
+
+def get_model_gateway_settings() -> ModelGatewayServiceSettings:
+    return ModelGatewayServiceSettings()
+
+
+def get_model_gateway_application_service(
+    settings: ModelGatewayServiceSettings = Depends(get_model_gateway_settings),
+) -> ModelGatewayApplicationService:
+    return ModelGatewayApplicationService(
+        provider_client=ModelProviderClient(settings=settings),
+        settings=settings,
+    )
+
+
+@router.get("/health", response_model=ServiceHealth)
+def health_check(
+    settings: ModelGatewayServiceSettings = Depends(get_model_gateway_settings),
+) -> ServiceHealth:
+    provider_status = "configured" if settings.provider_base_url else "missing"
+    return ServiceHealth(service="model-gateway-service", status="ok", database=provider_status)
+
+
+@router.post("/chat-completions", response_model=ChatCompletionResponseContract)
+def create_chat_completion(
+    payload: ChatCompletionRequestContract,
+    service: ModelGatewayApplicationService = Depends(get_model_gateway_application_service),
+) -> ChatCompletionResponseContract:
+    try:
+        return service.create_chat_completion(payload)
+    except ModelProviderClientError as exc:
+        raise HTTPException(status_code=502, detail=str(exc)) from exc

+ 1 - 0
services/model-gateway-service/app/application/__init__.py

@@ -0,0 +1 @@
+

+ 26 - 0
services/model-gateway-service/app/application/services.py

@@ -0,0 +1,26 @@
+from core_domain import ChatCompletionRequestContract, ChatCompletionResponseContract
+
+from app.bootstrap.settings import ModelGatewayServiceSettings
+from app.infrastructure.provider import ModelProviderClient
+
+
+class ModelGatewayApplicationService:
+    def __init__(
+        self,
+        *,
+        provider_client: ModelProviderClient,
+        settings: ModelGatewayServiceSettings,
+    ) -> None:
+        self.provider_client = provider_client
+        self.settings = settings
+
+    def create_chat_completion(
+        self,
+        payload: ChatCompletionRequestContract,
+    ) -> ChatCompletionResponseContract:
+        resolved_payload = payload.model_copy(
+            update={
+                "model": payload.model or self.settings.default_model,
+            }
+        )
+        return self.provider_client.create_chat_completion(resolved_payload)

+ 1 - 0
services/model-gateway-service/app/bootstrap/__init__.py

@@ -0,0 +1 @@
+

+ 15 - 0
services/model-gateway-service/app/bootstrap/app.py

@@ -0,0 +1,15 @@
+from fastapi import FastAPI
+
+from app.api.routes import router
+from app.bootstrap.settings import ModelGatewayServiceSettings
+
+
+def create_app() -> FastAPI:
+    settings = ModelGatewayServiceSettings()
+    app = FastAPI(
+        title="agent-platform model-gateway-service",
+        version="0.1.0",
+    )
+    app.state.settings = settings
+    app.include_router(router, prefix="/models", tags=["models"])
+    return app

+ 10 - 0
services/model-gateway-service/app/bootstrap/settings.py

@@ -0,0 +1,10 @@
+from core_shared import ServiceSettings
+
+
+class ModelGatewayServiceSettings(ServiceSettings):
+    service_name: str = "model-gateway-service"
+    service_port: int = 8005
+    provider_type: str = "openai_compatible"
+    provider_base_url: str = "http://127.0.0.1:11434/v1"
+    provider_api_key: str | None = None
+    default_model: str | None = None

+ 1 - 0
services/model-gateway-service/app/infrastructure/__init__.py

@@ -0,0 +1 @@
+

+ 98 - 0
services/model-gateway-service/app/infrastructure/provider.py

@@ -0,0 +1,98 @@
+import httpx
+
+from core_domain import ChatCompletionRequestContract, ChatCompletionResponseContract
+from core_shared import JSONValue
+
+from app.bootstrap.settings import ModelGatewayServiceSettings
+
+
+class ModelProviderClientError(Exception):
+    pass
+
+
+class ModelProviderClient:
+    def __init__(self, *, settings: ModelGatewayServiceSettings) -> None:
+        self.settings = settings
+
+    def create_chat_completion(
+        self,
+        payload: ChatCompletionRequestContract,
+    ) -> ChatCompletionResponseContract:
+        if payload.model is None:
+            raise ModelProviderClientError("model is required for chat completion")
+
+        request_payload = {
+            "model": payload.model,
+            "messages": [item.model_dump(mode="json") for item in payload.messages],
+        }
+        if payload.temperature is not None:
+            request_payload["temperature"] = payload.temperature
+        if payload.max_tokens is not None:
+            request_payload["max_tokens"] = payload.max_tokens
+
+        request_headers: dict[str, str] = {"content-type": "application/json"}
+        if self.settings.provider_api_key:
+            request_headers["authorization"] = f"Bearer {self.settings.provider_api_key}"
+
+        try:
+            with httpx.Client(timeout=60.0) as client:
+                response = client.post(
+                    f"{self.settings.provider_base_url.rstrip('/')}/chat/completions",
+                    json=request_payload,
+                    headers=request_headers,
+                )
+                response.raise_for_status()
+        except httpx.HTTPError as exc:
+            raise ModelProviderClientError(f"model provider request failed: {exc}") from exc
+
+        response_json = _coerce_json_dict(response.json())
+        content = _extract_response_content(response_json)
+        finish_reason = _extract_finish_reason(response_json)
+        usage_json = _extract_usage_json(response_json)
+        return ChatCompletionResponseContract(
+            model=payload.model,
+            content=content,
+            finish_reason=finish_reason,
+            usage_json=usage_json,
+            raw_response_json=response_json,
+        )
+
+
+def _coerce_json_dict(payload: JSONValue) -> dict[str, JSONValue]:
+    if isinstance(payload, dict):
+        return {str(key): value for key, value in payload.items()}
+    return {}
+
+
+def _extract_response_content(payload: dict[str, JSONValue]) -> str:
+    choices = payload.get("choices")
+    if isinstance(choices, list) and choices:
+        first_choice = choices[0]
+        if isinstance(first_choice, dict):
+            message = first_choice.get("message")
+            if isinstance(message, dict):
+                content = message.get("content")
+                if isinstance(content, str):
+                    return content
+            text = first_choice.get("text")
+            if isinstance(text, str):
+                return text
+    return ""
+
+
+def _extract_finish_reason(payload: dict[str, JSONValue]) -> str | None:
+    choices = payload.get("choices")
+    if isinstance(choices, list) and choices:
+        first_choice = choices[0]
+        if isinstance(first_choice, dict):
+            finish_reason = first_choice.get("finish_reason")
+            if isinstance(finish_reason, str):
+                return finish_reason
+    return None
+
+
+def _extract_usage_json(payload: dict[str, JSONValue]) -> dict[str, JSONValue]:
+    usage = payload.get("usage")
+    if isinstance(usage, dict):
+        return {str(key): value for key, value in usage.items()}
+    return {}

+ 3 - 0
services/model-gateway-service/app/main.py

@@ -0,0 +1,3 @@
+from app.bootstrap.app import create_app
+
+app = create_app()

+ 23 - 0
services/model-gateway-service/pyproject.toml

@@ -0,0 +1,23 @@
+[build-system]
+requires = ["setuptools>=68"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "model-gateway-service"
+version = "0.1.0"
+description = "Model gateway service for agent platform."
+requires-python = ">=3.11"
+dependencies = [
+  "fastapi>=0.111,<1.0",
+  "httpx>=0.27,<1.0",
+  "pydantic>=2.7,<3.0",
+  "uvicorn[standard]>=0.30,<1.0",
+  "core-domain",
+  "core-shared",
+]
+
+[tool.setuptools]
+package-dir = {"" = "."}
+
+[tool.setuptools.packages.find]
+where = ["."]

+ 27 - 0
services/runtime-service/alembic/versions/20260423_0002_add_node_run_outputs.py

@@ -0,0 +1,27 @@
+"""add node run outputs
+
+Revision ID: 20260423_0002
+Revises: 20260422_0001
+Create Date: 2026-04-23 17:20:00
+"""
+
+from collections.abc import Sequence
+
+from alembic import op
+import sqlalchemy as sa
+
+
+revision: str = "20260423_0002"
+down_revision: str | None = "20260422_0001"
+branch_labels: Sequence[str] | None = None
+depends_on: Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    op.add_column("node_run", sa.Column("output_text", sa.Text(), nullable=True))
+    op.add_column("node_run", sa.Column("output_json", sa.JSON(), nullable=True))
+
+
+def downgrade() -> None:
+    op.drop_column("node_run", "output_json")
+    op.drop_column("node_run", "output_text")

+ 50 - 0
services/runtime-service/alembic/versions/20260423_0003_add_execution_logs.py

@@ -0,0 +1,50 @@
+"""add execution logs
+
+Revision ID: 20260423_0003
+Revises: 20260423_0002
+Create Date: 2026-04-23 16:30:00
+"""
+
+from collections.abc import Sequence
+
+from alembic import op
+import sqlalchemy as sa
+
+
+revision: str = "20260423_0003"
+down_revision: str | None = "20260423_0002"
+branch_labels: Sequence[str] | None = None
+depends_on: Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "execution_log",
+        sa.Column("run_id", sa.String(length=36), nullable=False),
+        sa.Column("node_run_id", sa.String(length=36), nullable=True),
+        sa.Column("event_type", sa.String(length=64), nullable=False),
+        sa.Column("level", sa.String(length=16), nullable=False),
+        sa.Column("message", sa.Text(), nullable=False),
+        sa.Column("detail_json", sa.JSON(), nullable=True),
+        sa.Column("id", sa.String(length=36), nullable=False),
+        sa.Column("tenant_id", sa.String(length=36), nullable=False),
+        sa.Column("created_by", sa.String(length=36), nullable=True),
+        sa.Column("updated_by", sa.String(length=36), nullable=True),
+        sa.Column("created_time", sa.DateTime(), nullable=False),
+        sa.Column("updated_time", sa.DateTime(), nullable=False),
+        sa.Column("deleted_time", sa.DateTime(), nullable=True),
+        sa.Column("version", sa.Integer(), nullable=False),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index("ix_execution_log_run_id", "execution_log", ["run_id"], unique=False)
+    op.create_index("ix_execution_log_node_run_id", "execution_log", ["node_run_id"], unique=False)
+    op.create_index("ix_execution_log_event_type", "execution_log", ["event_type"], unique=False)
+    op.create_index("ix_execution_log_tenant_id", "execution_log", ["tenant_id"], unique=False)
+
+
+def downgrade() -> None:
+    op.drop_index("ix_execution_log_tenant_id", table_name="execution_log")
+    op.drop_index("ix_execution_log_event_type", table_name="execution_log")
+    op.drop_index("ix_execution_log_node_run_id", table_name="execution_log")
+    op.drop_index("ix_execution_log_run_id", table_name="execution_log")
+    op.drop_table("execution_log")

+ 56 - 0
services/runtime-service/alembic/versions/20260423_0004_add_node_artifacts.py

@@ -0,0 +1,56 @@
+"""add node artifacts
+
+Revision ID: 20260423_0004
+Revises: 20260423_0003
+Create Date: 2026-04-23 17:30:00
+"""
+
+from collections.abc import Sequence
+
+from alembic import op
+import sqlalchemy as sa
+
+
+revision: str = "20260423_0004"
+down_revision: str | None = "20260423_0003"
+branch_labels: Sequence[str] | None = None
+depends_on: Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "node_artifact",
+        sa.Column("run_id", sa.String(length=36), nullable=False),
+        sa.Column("node_run_id", sa.String(length=36), nullable=False),
+        sa.Column("node_id", sa.String(length=128), nullable=False),
+        sa.Column("artifact_type", sa.String(length=64), nullable=False),
+        sa.Column("name", sa.String(length=128), nullable=False),
+        sa.Column("mime_type", sa.String(length=128), nullable=True),
+        sa.Column("content_text", sa.Text(), nullable=True),
+        sa.Column("content_json", sa.JSON(), nullable=True),
+        sa.Column("storage_uri", sa.String(length=512), nullable=True),
+        sa.Column("size_bytes", sa.Integer(), nullable=True),
+        sa.Column("id", sa.String(length=36), nullable=False),
+        sa.Column("tenant_id", sa.String(length=36), nullable=False),
+        sa.Column("created_by", sa.String(length=36), nullable=True),
+        sa.Column("updated_by", sa.String(length=36), nullable=True),
+        sa.Column("created_time", sa.DateTime(), nullable=False),
+        sa.Column("updated_time", sa.DateTime(), nullable=False),
+        sa.Column("deleted_time", sa.DateTime(), nullable=True),
+        sa.Column("version", sa.Integer(), nullable=False),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index("ix_node_artifact_run_id", "node_artifact", ["run_id"], unique=False)
+    op.create_index("ix_node_artifact_node_run_id", "node_artifact", ["node_run_id"], unique=False)
+    op.create_index("ix_node_artifact_node_id", "node_artifact", ["node_id"], unique=False)
+    op.create_index("ix_node_artifact_artifact_type", "node_artifact", ["artifact_type"], unique=False)
+    op.create_index("ix_node_artifact_tenant_id", "node_artifact", ["tenant_id"], unique=False)
+
+
+def downgrade() -> None:
+    op.drop_index("ix_node_artifact_tenant_id", table_name="node_artifact")
+    op.drop_index("ix_node_artifact_artifact_type", table_name="node_artifact")
+    op.drop_index("ix_node_artifact_node_id", table_name="node_artifact")
+    op.drop_index("ix_node_artifact_node_run_id", table_name="node_artifact")
+    op.drop_index("ix_node_artifact_run_id", table_name="node_artifact")
+    op.drop_table("node_artifact")

+ 60 - 0
services/runtime-service/alembic/versions/20260423_0005_add_trace_spans.py

@@ -0,0 +1,60 @@
+"""add trace spans
+
+Revision ID: 20260423_0005
+Revises: 20260423_0004
+Create Date: 2026-04-23 18:00:00
+"""
+
+from collections.abc import Sequence
+
+from alembic import op
+import sqlalchemy as sa
+
+
+revision: str = "20260423_0005"
+down_revision: str | None = "20260423_0004"
+branch_labels: Sequence[str] | None = None
+depends_on: Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "trace_span",
+        sa.Column("run_id", sa.String(length=36), nullable=False),
+        sa.Column("node_run_id", sa.String(length=36), nullable=True),
+        sa.Column("parent_span_id", sa.String(length=36), nullable=True),
+        sa.Column("span_type", sa.String(length=64), nullable=False),
+        sa.Column("name", sa.String(length=128), nullable=False),
+        sa.Column("status", sa.String(length=32), nullable=False),
+        sa.Column("started_time", sa.DateTime(), nullable=False),
+        sa.Column("ended_time", sa.DateTime(), nullable=True),
+        sa.Column("duration_ms", sa.Integer(), nullable=True),
+        sa.Column("attributes_json", sa.JSON(), nullable=True),
+        sa.Column("error_code", sa.String(length=64), nullable=True),
+        sa.Column("error_message", sa.Text(), nullable=True),
+        sa.Column("id", sa.String(length=36), nullable=False),
+        sa.Column("tenant_id", sa.String(length=36), nullable=False),
+        sa.Column("created_by", sa.String(length=36), nullable=True),
+        sa.Column("updated_by", sa.String(length=36), nullable=True),
+        sa.Column("created_time", sa.DateTime(), nullable=False),
+        sa.Column("updated_time", sa.DateTime(), nullable=False),
+        sa.Column("deleted_time", sa.DateTime(), nullable=True),
+        sa.Column("version", sa.Integer(), nullable=False),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index("ix_trace_span_run_id", "trace_span", ["run_id"], unique=False)
+    op.create_index("ix_trace_span_node_run_id", "trace_span", ["node_run_id"], unique=False)
+    op.create_index("ix_trace_span_parent_span_id", "trace_span", ["parent_span_id"], unique=False)
+    op.create_index("ix_trace_span_span_type", "trace_span", ["span_type"], unique=False)
+    op.create_index("ix_trace_span_status", "trace_span", ["status"], unique=False)
+    op.create_index("ix_trace_span_tenant_id", "trace_span", ["tenant_id"], unique=False)
+
+
+def downgrade() -> None:
+    op.drop_index("ix_trace_span_tenant_id", table_name="trace_span")
+    op.drop_index("ix_trace_span_status", table_name="trace_span")
+    op.drop_index("ix_trace_span_span_type", table_name="trace_span")
+    op.drop_index("ix_trace_span_parent_span_id", table_name="trace_span")
+    op.drop_index("ix_trace_span_node_run_id", table_name="trace_span")
+    op.drop_index("ix_trace_span_run_id", table_name="trace_span")
+    op.drop_table("trace_span")

+ 178 - 2
services/runtime-service/app/api/routes.py

@@ -6,13 +6,30 @@ from core_domain import ServiceHealth
 from app.application.services import RuntimeApplicationService
 from app.bootstrap.settings import RuntimeServiceSettings
 from app.db.session import get_db
-from app.domain.repositories import NodeRunRepository, WorkflowRunRepository
+from app.domain.repositories import (
+    ExecutionLogRepository,
+    NodeArtifactRepository,
+    NodeRunRepository,
+    TraceSpanRepository,
+    WorkflowRunRepository,
+)
+from app.infrastructure.code_runner_client import CodeRunnerClient, CodeRunnerClientError
+from app.infrastructure.executors import build_node_execution_dispatcher_with_clients
+from app.infrastructure.model_gateway_client import ModelGatewayClient, ModelGatewayClientError
+from app.infrastructure.tool_client import ToolServiceClient, ToolServiceClientError
 from app.infrastructure.workflow_client import WorkflowServiceClient, WorkflowServiceClientError
 from app.schemas.run import (
+    ExecutionLogResponse,
+    NodeArtifactResponse,
+    NodeRunExecuteRequest,
+    NodeRunExecuteResponse,
     NodeRunResponse,
     NodeRunStatusUpdateRequest,
     RunBootstrapResponse,
     RunCreateRequest,
+    RunExecuteRequest,
+    RunExecuteResponse,
+    TraceSpanResponse,
     WorkflowRunResponse,
     WorkflowRunStatusUpdateRequest,
 )
@@ -31,6 +48,14 @@ def get_runtime_application_service(
     return RuntimeApplicationService(
         workflow_run_repository=WorkflowRunRepository(db),
         node_run_repository=NodeRunRepository(db),
+        execution_log_repository=ExecutionLogRepository(db),
+        node_artifact_repository=NodeArtifactRepository(db),
+        trace_span_repository=TraceSpanRepository(db),
+        execution_dispatcher=build_node_execution_dispatcher_with_clients(
+            code_runner_client=CodeRunnerClient(base_url=settings.code_runner_service_url),
+            model_gateway_client=ModelGatewayClient(base_url=settings.model_gateway_service_url),
+            tool_client=ToolServiceClient(base_url=settings.tool_service_url),
+        ),
         workflow_client=WorkflowServiceClient(base_url=settings.workflow_service_url),
     )
 
@@ -48,7 +73,12 @@ def create_run(
 ) -> RunBootstrapResponse:
     try:
         workflow_run, initial_node = service.create_run(payload)
-    except WorkflowServiceClientError as exc:
+    except (
+        CodeRunnerClientError,
+        ModelGatewayClientError,
+        ToolServiceClientError,
+        WorkflowServiceClientError,
+    ) as exc:
         raise HTTPException(status_code=502, detail=str(exc)) from exc
     return RunBootstrapResponse(
         run=WorkflowRunResponse.from_entity(workflow_run),
@@ -80,6 +110,61 @@ def list_node_runs(
     ]
 
 
+@router.get("/execution-logs", response_model=list[ExecutionLogResponse])
+def list_execution_logs(
+    tenant_id: str = Query(...),
+    run_id: str | None = Query(default=None),
+    node_run_id: str | None = Query(default=None),
+    service: RuntimeApplicationService = Depends(get_runtime_application_service),
+) -> list[ExecutionLogResponse]:
+    return [
+        ExecutionLogResponse.from_entity(item)
+        for item in service.list_execution_logs(
+            tenant_id=tenant_id,
+            run_id=run_id,
+            node_run_id=node_run_id,
+        )
+    ]
+
+
+@router.get("/node-artifacts", response_model=list[NodeArtifactResponse])
+def list_node_artifacts(
+    tenant_id: str = Query(...),
+    run_id: str | None = Query(default=None),
+    node_run_id: str | None = Query(default=None),
+    artifact_type: str | None = Query(default=None),
+    service: RuntimeApplicationService = Depends(get_runtime_application_service),
+) -> list[NodeArtifactResponse]:
+    return [
+        NodeArtifactResponse.from_entity(item)
+        for item in service.list_node_artifacts(
+            tenant_id=tenant_id,
+            run_id=run_id,
+            node_run_id=node_run_id,
+            artifact_type=artifact_type,
+        )
+    ]
+
+
+@router.get("/trace-spans", response_model=list[TraceSpanResponse])
+def list_trace_spans(
+    tenant_id: str = Query(...),
+    run_id: str | None = Query(default=None),
+    node_run_id: str | None = Query(default=None),
+    span_type: str | None = Query(default=None),
+    service: RuntimeApplicationService = Depends(get_runtime_application_service),
+) -> list[TraceSpanResponse]:
+    return [
+        TraceSpanResponse.from_entity(item)
+        for item in service.list_trace_spans(
+            tenant_id=tenant_id,
+            run_id=run_id,
+            node_run_id=node_run_id,
+            span_type=span_type,
+        )
+    ]
+
+
 @router.post("/runs/{run_id}/status", response_model=WorkflowRunResponse)
 def update_run_status(
     run_id: str,
@@ -102,3 +187,94 @@ def update_node_run_status(
     if entity is None:
         raise HTTPException(status_code=404, detail=f"node_run not found: {node_run_id}")
     return NodeRunResponse.from_entity(entity)
+
+
+@router.post("/node-runs/{node_run_id}/execute", response_model=NodeRunExecuteResponse)
+def execute_node_run(
+    node_run_id: str,
+    payload: NodeRunExecuteRequest,
+    service: RuntimeApplicationService = Depends(get_runtime_application_service),
+) -> NodeRunExecuteResponse:
+    try:
+        result = service.execute_node_run(node_run_id=node_run_id, payload=payload)
+    except (
+        CodeRunnerClientError,
+        ModelGatewayClientError,
+        ToolServiceClientError,
+        WorkflowServiceClientError,
+    ) as exc:
+        raise HTTPException(status_code=502, detail=str(exc)) from exc
+
+    if result is None:
+        raise HTTPException(status_code=404, detail=f"node_run not found: {node_run_id}")
+
+    workflow_run, node_run, executor_name = result
+    return NodeRunExecuteResponse(
+        run=WorkflowRunResponse.from_entity(workflow_run),
+        node_run=NodeRunResponse.from_entity(node_run),
+        executor_name=executor_name,
+    )
+
+
+@router.post("/runs/{run_id}/execute-next", response_model=NodeRunExecuteResponse)
+def execute_next_node_run(
+    run_id: str,
+    payload: NodeRunExecuteRequest,
+    tenant_id: str = Query(...),
+    service: RuntimeApplicationService = Depends(get_runtime_application_service),
+) -> NodeRunExecuteResponse:
+    try:
+        result = service.execute_next_node_run(
+            tenant_id=tenant_id,
+            run_id=run_id,
+            payload=payload,
+        )
+    except (
+        CodeRunnerClientError,
+        ModelGatewayClientError,
+        ToolServiceClientError,
+        WorkflowServiceClientError,
+    ) as exc:
+        raise HTTPException(status_code=502, detail=str(exc)) from exc
+
+    if result is None:
+        raise HTTPException(status_code=404, detail=f"queued node_run not found for run: {run_id}")
+
+    workflow_run, node_run, executor_name = result
+    return NodeRunExecuteResponse(
+        run=WorkflowRunResponse.from_entity(workflow_run),
+        node_run=NodeRunResponse.from_entity(node_run),
+        executor_name=executor_name,
+    )
+
+
+@router.post("/runs/{run_id}/execute", response_model=RunExecuteResponse)
+def execute_run(
+    run_id: str,
+    payload: RunExecuteRequest,
+    tenant_id: str = Query(...),
+    service: RuntimeApplicationService = Depends(get_runtime_application_service),
+) -> RunExecuteResponse:
+    try:
+        result = service.execute_run(
+            tenant_id=tenant_id,
+            run_id=run_id,
+            payload=payload,
+        )
+    except (
+        CodeRunnerClientError,
+        ModelGatewayClientError,
+        ToolServiceClientError,
+        WorkflowServiceClientError,
+    ) as exc:
+        raise HTTPException(status_code=502, detail=str(exc)) from exc
+
+    if result is None:
+        raise HTTPException(status_code=404, detail=f"workflow_run not found: {run_id}")
+
+    workflow_run, node_runs, executor_names = result
+    return RunExecuteResponse(
+        run=WorkflowRunResponse.from_entity(workflow_run),
+        node_runs=[NodeRunResponse.from_entity(item) for item in node_runs],
+        executor_names=executor_names,
+    )

+ 430 - 5
services/runtime-service/app/application/services.py

@@ -1,10 +1,30 @@
-from core_domain import InitialNodeContract, NodeRunStatus, WorkflowRunStatus
+from core_domain import (
+    InitialNodeContract,
+    NodeExecutionContextContract,
+    NodeExecutionResultContract,
+    NodeRunStatus,
+    WorkflowRunStatus,
+)
 
 from app.db.models import NodeRun, WorkflowRun
-from app.domain.repositories import NodeRunRepository, WorkflowRunRepository
-from app.infrastructure.planner import derive_initial_node, derive_successor_nodes
+from app.domain.repositories import (
+    ExecutionLogRepository,
+    NodeArtifactRepository,
+    NodeRunRepository,
+    TraceSpanRepository,
+    WorkflowRunRepository,
+)
+from app.infrastructure.executors import NodeExecutionDispatcher
+from app.infrastructure.planner import derive_initial_node, derive_node_config, derive_successor_nodes
 from app.infrastructure.workflow_client import WorkflowServiceClient
-from app.schemas.run import NodeRunStatusUpdateRequest, RunCreateRequest, WorkflowRunStatusUpdateRequest
+from app.schemas.run import (
+    NodeRunExecuteRequest,
+    NodeRunStatusUpdateRequest,
+    RunCreateRequest,
+    RunExecuteRequest,
+    WorkflowRunStatusUpdateRequest,
+)
+from core_shared import JSONValue
 
 
 class RuntimeApplicationService:
@@ -12,10 +32,18 @@ class RuntimeApplicationService:
         self,
         workflow_run_repository: WorkflowRunRepository,
         node_run_repository: NodeRunRepository,
+        execution_log_repository: ExecutionLogRepository,
+        node_artifact_repository: NodeArtifactRepository,
+        trace_span_repository: TraceSpanRepository,
+        execution_dispatcher: NodeExecutionDispatcher,
         workflow_client: WorkflowServiceClient | None = None,
     ) -> None:
         self.workflow_run_repository = workflow_run_repository
         self.node_run_repository = node_run_repository
+        self.execution_log_repository = execution_log_repository
+        self.node_artifact_repository = node_artifact_repository
+        self.trace_span_repository = trace_span_repository
+        self.execution_dispatcher = execution_dispatcher
         self.workflow_client = workflow_client
 
     def create_run(self, payload: RunCreateRequest) -> tuple[WorkflowRun, NodeRun | None]:
@@ -47,6 +75,31 @@ class RuntimeApplicationService:
                 node_type=initial_node.node_type,
                 status=initial_node.status,
             )
+            self._log_event(
+                tenant_id=payload.tenant_id,
+                run_id=workflow_run.id,
+                node_run_id=node_run.id,
+                event_type="node_queued",
+                message=f"initial node queued: {initial_node.node_id}",
+                detail_json={
+                    "node_id": initial_node.node_id,
+                    "node_type": initial_node.node_type,
+                    "status": initial_node.status,
+                },
+            )
+
+        self._log_event(
+            tenant_id=payload.tenant_id,
+            run_id=workflow_run.id,
+            node_run_id=node_run.id if node_run is not None else None,
+            event_type="run_created",
+            message="workflow run created",
+            detail_json={
+                "workflow_id": payload.workflow_id,
+                "workflow_version_id": payload.workflow_version_id,
+                "session_id": payload.session_id,
+            },
+        )
 
         return workflow_run, node_run
 
@@ -56,6 +109,46 @@ class RuntimeApplicationService:
     def list_node_runs(self, tenant_id: str, run_id: str) -> list[NodeRun]:
         return self.node_run_repository.list_by_run(tenant_id=tenant_id, run_id=run_id)
 
+    def list_execution_logs(
+        self,
+        tenant_id: str,
+        run_id: str | None = None,
+        node_run_id: str | None = None,
+    ):
+        return self.execution_log_repository.list_by_scope(
+            tenant_id=tenant_id,
+            run_id=run_id,
+            node_run_id=node_run_id,
+        )
+
+    def list_node_artifacts(
+        self,
+        tenant_id: str,
+        run_id: str | None = None,
+        node_run_id: str | None = None,
+        artifact_type: str | None = None,
+    ):
+        return self.node_artifact_repository.list_by_scope(
+            tenant_id=tenant_id,
+            run_id=run_id,
+            node_run_id=node_run_id,
+            artifact_type=artifact_type,
+        )
+
+    def list_trace_spans(
+        self,
+        tenant_id: str,
+        run_id: str | None = None,
+        node_run_id: str | None = None,
+        span_type: str | None = None,
+    ):
+        return self.trace_span_repository.list_by_scope(
+            tenant_id=tenant_id,
+            run_id=run_id,
+            node_run_id=node_run_id,
+            span_type=span_type,
+        )
+
     def update_run_status(
         self,
         run_id: str,
@@ -79,10 +172,26 @@ class RuntimeApplicationService:
             worker_key=payload.worker_key,
             error_code=payload.error_code,
             error_message=payload.error_message,
+            output_text=payload.output_text,
+            output_json=payload.output_json,
         )
         if node_run is None:
             return None
 
+        self._log_event(
+            tenant_id=node_run.tenant_id,
+            run_id=node_run.run_id,
+            node_run_id=node_run.id,
+            event_type="node_status_updated",
+            message=f"node status updated to {payload.status}",
+            detail_json={
+                "node_id": node_run.node_id,
+                "node_type": node_run.node_type,
+                "status": payload.status,
+                "error_code": payload.error_code,
+            },
+        )
+
         if payload.status == "completed":
             self._schedule_successor_nodes(node_run)
 
@@ -92,6 +201,190 @@ class RuntimeApplicationService:
         )
         return node_run
 
+    def execute_node_run(
+        self,
+        node_run_id: str,
+        payload: NodeRunExecuteRequest,
+    ) -> tuple[WorkflowRun, NodeRun, str] | None:
+        node_run = self.node_run_repository.get_by_id(node_run_id)
+        if node_run is None:
+            return None
+
+        workflow_run = self.workflow_run_repository.get_by_id(node_run.run_id)
+        if workflow_run is None:
+            return None
+
+        if node_run.status in {"completed", "failed", "skipped"}:
+            executor_name = self.execution_dispatcher.resolve_executor(node_run.node_type).executor_name
+            return workflow_run, node_run, executor_name
+
+        running_node_run = self.node_run_repository.update_status(
+            node_run_id=node_run_id,
+            status="running",
+            worker_key=payload.worker_key,
+        )
+        if running_node_run is None:
+            return None
+
+        self._log_event(
+            tenant_id=running_node_run.tenant_id,
+            run_id=running_node_run.run_id,
+            node_run_id=running_node_run.id,
+            event_type="node_execution_started",
+            message=f"executing node {running_node_run.node_id}",
+            detail_json={
+                "node_id": running_node_run.node_id,
+                "node_type": running_node_run.node_type,
+                "worker_key": payload.worker_key,
+            },
+        )
+
+        context = self._build_execution_context(
+            workflow_run=workflow_run,
+            node_run=running_node_run,
+            worker_key=payload.worker_key,
+        )
+        executor_name = self.execution_dispatcher.resolve_executor(
+            running_node_run.node_type
+        ).executor_name
+        trace_span = self.trace_span_repository.start(
+            tenant_id=running_node_run.tenant_id,
+            run_id=running_node_run.run_id,
+            node_run_id=running_node_run.id,
+            parent_span_id=None,
+            span_type="node_execution",
+            name=f"{running_node_run.node_type}:{running_node_run.node_id}",
+            attributes_json={
+                "node_id": running_node_run.node_id,
+                "node_type": running_node_run.node_type,
+                "executor_name": executor_name,
+                "worker_key": payload.worker_key,
+            },
+        )
+
+        try:
+            result, executor_name = self.execution_dispatcher.execute(context=context, request=payload)
+        except Exception as exc:
+            result = NodeExecutionResultContract(
+                status="failed",
+                worker_key=payload.worker_key,
+                error_code="executor_error",
+                error_message=str(exc),
+            )
+
+        final_node_run = self.update_node_run_status(
+            node_run_id=running_node_run.id,
+            payload=NodeRunStatusUpdateRequest(
+                status=result.status,
+                worker_key=result.worker_key,
+                error_code=result.error_code,
+                error_message=result.error_message,
+                output_text=result.output_text,
+                output_json=result.output_json,
+            ),
+        )
+        if final_node_run is None:
+            return None
+
+        self.trace_span_repository.finish(
+            span_id=trace_span.id,
+            status="ok" if final_node_run.status == "completed" else "error",
+            error_code=final_node_run.error_code,
+            error_message=final_node_run.error_message,
+            attributes_json={
+                "node_status": final_node_run.status,
+                "executor_name": executor_name,
+                "has_output_text": final_node_run.output_text is not None,
+                "has_output_json": final_node_run.output_json is not None,
+            },
+        )
+        self._persist_node_execution_artifact(final_node_run)
+
+        self._log_event(
+            tenant_id=final_node_run.tenant_id,
+            run_id=final_node_run.run_id,
+            node_run_id=final_node_run.id,
+            event_type="node_execution_finished",
+            message=f"node execution finished with status {final_node_run.status}",
+            detail_json={
+                "node_id": final_node_run.node_id,
+                "node_type": final_node_run.node_type,
+                "executor_name": executor_name,
+                "status": final_node_run.status,
+            },
+        )
+
+        workflow_run = self.workflow_run_repository.get_by_id(final_node_run.run_id)
+        if workflow_run is None:
+            return None
+        return workflow_run, final_node_run, executor_name
+
+    def execute_next_node_run(
+        self,
+        tenant_id: str,
+        run_id: str,
+        payload: NodeRunExecuteRequest,
+    ) -> tuple[WorkflowRun, NodeRun, str] | None:
+        next_node_run = self.node_run_repository.get_next_queued_by_run(
+            tenant_id=tenant_id,
+            run_id=run_id,
+        )
+        if next_node_run is None:
+            return None
+        return self.execute_node_run(node_run_id=next_node_run.id, payload=payload)
+
+    def execute_run(
+        self,
+        tenant_id: str,
+        run_id: str,
+        payload: RunExecuteRequest,
+    ) -> tuple[WorkflowRun, list[NodeRun], list[str]] | None:
+        workflow_run = self.workflow_run_repository.get_by_id(run_id)
+        if workflow_run is None or workflow_run.tenant_id != tenant_id:
+            return None
+
+        executed_node_runs: list[NodeRun] = []
+        executor_names: list[str] = []
+
+        for _ in range(payload.max_steps):
+            step_result = self.execute_next_node_run(
+                tenant_id=tenant_id,
+                run_id=run_id,
+                payload=NodeRunExecuteRequest(worker_key=payload.worker_key),
+            )
+            if step_result is None:
+                break
+
+            workflow_run, node_run, executor_name = step_result
+            executed_node_runs.append(node_run)
+            executor_names.append(executor_name)
+
+            if node_run.status != "completed":
+                break
+
+        final_run = self.workflow_run_repository.get_by_id(run_id)
+        if final_run is None:
+            return None
+        return final_run, executed_node_runs, executor_names
+
+    def _persist_node_execution_artifact(self, node_run: NodeRun) -> None:
+        if node_run.output_text is None and node_run.output_json is None:
+            return
+
+        size_bytes = len(node_run.output_text.encode("utf-8")) if node_run.output_text else None
+        self.node_artifact_repository.create(
+            tenant_id=node_run.tenant_id,
+            run_id=node_run.run_id,
+            node_run_id=node_run.id,
+            node_id=node_run.node_id,
+            artifact_type="execution_result",
+            name=f"{node_run.node_id}-execution-result",
+            mime_type="application/json" if node_run.output_json is not None else "text/plain",
+            content_text=node_run.output_text,
+            content_json=node_run.output_json,
+            size_bytes=size_bytes,
+        )
+
     def _plan_initial_node(self, payload: RunCreateRequest) -> InitialNodeContract | None:
         if self.workflow_client is None:
             return None
@@ -101,6 +394,21 @@ class RuntimeApplicationService:
         )
         return derive_initial_node(workflow_version)
 
+    def _resolve_node_config(
+        self,
+        *,
+        tenant_id: str,
+        workflow_version_id: str,
+        node_id: str,
+    ) -> dict[str, JSONValue]:
+        if self.workflow_client is None:
+            return {}
+        workflow_version = self.workflow_client.get_workflow_version(
+            tenant_id=tenant_id,
+            workflow_version_id=workflow_version_id,
+        )
+        return derive_node_config(workflow_version, node_id)
+
     def _schedule_successor_nodes(self, node_run: NodeRun) -> None:
         if self.workflow_client is None:
             return
@@ -113,7 +421,20 @@ class RuntimeApplicationService:
             tenant_id=node_run.tenant_id,
             workflow_version_id=workflow_run.workflow_version_id,
         )
-        successor_nodes = derive_successor_nodes(workflow_version, node_run.node_id)
+        run_state_json, node_output_json_by_node_id, node_output_text_by_node_id = (
+            self._build_run_state_maps(
+                tenant_id=node_run.tenant_id,
+                run_id=node_run.run_id,
+            )
+        )
+        successor_nodes = derive_successor_nodes(
+            workflow_version,
+            node_run.node_id,
+            current_output_json=node_run.output_json,
+            run_state_json=run_state_json,
+            node_output_json_by_node_id=node_output_json_by_node_id,
+            node_output_text_by_node_id=node_output_text_by_node_id,
+        )
         if not successor_nodes:
             return
 
@@ -134,6 +455,78 @@ class RuntimeApplicationService:
                 node_type=successor.node_type,
                 status=successor.status,
             )
+            self._log_event(
+                tenant_id=node_run.tenant_id,
+                run_id=node_run.run_id,
+                node_run_id=None,
+                event_type="node_queued",
+                message=f"successor node queued: {successor.node_id}",
+                detail_json={
+                    "node_id": successor.node_id,
+                    "node_type": successor.node_type,
+                    "status": successor.status,
+                    "source_node_id": node_run.node_id,
+                },
+            )
+
+    def _build_execution_context(
+        self,
+        *,
+        workflow_run: WorkflowRun,
+        node_run: NodeRun,
+        worker_key: str | None,
+    ) -> NodeExecutionContextContract:
+        run_state_json, node_output_json_by_node_id, node_output_text_by_node_id = (
+            self._build_run_state_maps(
+                tenant_id=node_run.tenant_id,
+                run_id=node_run.run_id,
+            )
+        )
+        return NodeExecutionContextContract(
+            tenant_id=node_run.tenant_id,
+            run_id=node_run.run_id,
+            node_run_id=node_run.id,
+            node_id=node_run.node_id,
+            node_type=node_run.node_type,
+            node_config_json=self._resolve_node_config(
+                tenant_id=node_run.tenant_id,
+                workflow_version_id=workflow_run.workflow_version_id,
+                node_id=node_run.node_id,
+            ),
+            run_state_json=run_state_json,
+            node_output_json_by_node_id=node_output_json_by_node_id,
+            node_output_text_by_node_id=node_output_text_by_node_id,
+            worker_key=worker_key,
+        )
+
+    def _build_run_state_maps(
+        self,
+        *,
+        tenant_id: str,
+        run_id: str,
+    ) -> tuple[
+        dict[str, JSONValue],
+        dict[str, dict[str, JSONValue]],
+        dict[str, str],
+    ]:
+        node_runs = self.node_run_repository.list_by_run(tenant_id=tenant_id, run_id=run_id)
+        run_state_json: dict[str, JSONValue] = {}
+        node_output_json_by_node_id: dict[str, dict[str, JSONValue]] = {}
+        node_output_text_by_node_id: dict[str, str] = {}
+
+        for item in node_runs:
+            if item.output_json is not None:
+                node_output_json_by_node_id[item.node_id] = dict(item.output_json)
+
+                state_updates = item.output_json.get("state_updates")
+                if isinstance(state_updates, dict):
+                    for state_key, state_value in state_updates.items():
+                        run_state_json[str(state_key)] = state_value
+
+            if item.output_text is not None:
+                node_output_text_by_node_id[item.node_id] = item.output_text
+
+        return run_state_json, node_output_json_by_node_id, node_output_text_by_node_id
 
     def _sync_workflow_run_status_from_nodes(self, *, tenant_id: str, run_id: str) -> None:
         node_runs = self.node_run_repository.list_by_run(tenant_id=tenant_id, run_id=run_id)
@@ -152,6 +545,17 @@ class RuntimeApplicationService:
             error_code=error_code,
             error_message=error_message,
         )
+        self._log_event(
+            tenant_id=tenant_id,
+            run_id=run_id,
+            node_run_id=None,
+            event_type="run_status_synced",
+            message=f"workflow run status synced to {next_status}",
+            detail_json={
+                "status": next_status,
+                "error_code": error_code,
+            },
+        )
 
     def _derive_run_status(
         self,
@@ -173,3 +577,24 @@ class RuntimeApplicationService:
             return "completed", None, None
 
         return "running", None, None
+
+    def _log_event(
+        self,
+        *,
+        tenant_id: str,
+        run_id: str,
+        node_run_id: str | None,
+        event_type: str,
+        message: str,
+        detail_json: dict[str, JSONValue] | None,
+        level: str = "info",
+    ) -> None:
+        self.execution_log_repository.create(
+            tenant_id=tenant_id,
+            run_id=run_id,
+            node_run_id=node_run_id,
+            event_type=event_type,
+            level=level,
+            message=message,
+            detail_json=detail_json,
+        )

+ 3 - 0
services/runtime-service/app/bootstrap/settings.py

@@ -6,3 +6,6 @@ class RuntimeServiceSettings(ServiceSettings):
     service_port: int = 8003
     database_url: str = "sqlite:///./runtime_service.db"
     workflow_service_url: str = "http://127.0.0.1:8002"
+    tool_service_url: str = "http://127.0.0.1:8004"
+    model_gateway_service_url: str = "http://127.0.0.1:8005"
+    code_runner_service_url: str = "http://127.0.0.1:8006"

+ 4 - 2
services/runtime-service/app/db/models/__init__.py

@@ -1,7 +1,9 @@
 from core_db import Base
 
+from .execution_log import ExecutionLog
+from .node_artifact import NodeArtifact
 from .node_run import NodeRun
+from .trace_span import TraceSpan
 from .workflow_run import WorkflowRun
 
-__all__ = ["Base", "NodeRun", "WorkflowRun"]
-
+__all__ = ["Base", "ExecutionLog", "NodeArtifact", "NodeRun", "TraceSpan", "WorkflowRun"]

+ 17 - 0
services/runtime-service/app/db/models/execution_log.py

@@ -0,0 +1,17 @@
+from sqlalchemy import String, Text
+from sqlalchemy.dialects.sqlite import JSON
+from sqlalchemy.orm import Mapped, mapped_column
+
+from core_db import AuditMixin, Base, TenantMixin, VersionMixin
+from core_shared import JSONValue
+
+
+class ExecutionLog(TenantMixin, AuditMixin, VersionMixin, Base):
+    __tablename__ = "execution_log"
+
+    run_id: Mapped[str] = mapped_column(String(36), index=True)
+    node_run_id: Mapped[str | None] = mapped_column(String(36), nullable=True, index=True)
+    event_type: Mapped[str] = mapped_column(String(64), index=True)
+    level: Mapped[str] = mapped_column(String(16), default="info")
+    message: Mapped[str] = mapped_column(Text)
+    detail_json: Mapped[dict[str, JSONValue] | None] = mapped_column(JSON, nullable=True)

+ 21 - 0
services/runtime-service/app/db/models/node_artifact.py

@@ -0,0 +1,21 @@
+from sqlalchemy import Integer, String, Text
+from sqlalchemy.dialects.sqlite import JSON
+from sqlalchemy.orm import Mapped, mapped_column
+
+from core_db import AuditMixin, Base, TenantMixin, VersionMixin
+from core_shared import JSONValue
+
+
+class NodeArtifact(TenantMixin, AuditMixin, VersionMixin, Base):
+    __tablename__ = "node_artifact"
+
+    run_id: Mapped[str] = mapped_column(String(36), index=True)
+    node_run_id: Mapped[str] = mapped_column(String(36), index=True)
+    node_id: Mapped[str] = mapped_column(String(128), index=True)
+    artifact_type: Mapped[str] = mapped_column(String(64), index=True)
+    name: Mapped[str] = mapped_column(String(128))
+    mime_type: Mapped[str | None] = mapped_column(String(128), nullable=True)
+    content_text: Mapped[str | None] = mapped_column(Text, nullable=True)
+    content_json: Mapped[dict[str, JSONValue] | None] = mapped_column(JSON, nullable=True)
+    storage_uri: Mapped[str | None] = mapped_column(String(512), nullable=True)
+    size_bytes: Mapped[int | None] = mapped_column(Integer, nullable=True)

+ 4 - 1
services/runtime-service/app/db/models/node_run.py

@@ -1,9 +1,11 @@
 from datetime import datetime
 
 from sqlalchemy import DateTime, Integer, String, Text
+from sqlalchemy.dialects.sqlite import JSON
 from sqlalchemy.orm import Mapped, mapped_column
 
 from core_db import AuditMixin, Base, TenantMixin, VersionMixin
+from core_shared import JSONValue
 
 
 class NodeRun(TenantMixin, AuditMixin, VersionMixin, Base):
@@ -20,6 +22,7 @@ class NodeRun(TenantMixin, AuditMixin, VersionMixin, Base):
     queued_time: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
     started_time: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
     finished_time: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
+    output_text: Mapped[str | None] = mapped_column(Text, nullable=True)
+    output_json: Mapped[dict[str, JSONValue] | None] = mapped_column(JSON, nullable=True)
     error_code: Mapped[str | None] = mapped_column(String(64), nullable=True)
     error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
-

+ 25 - 0
services/runtime-service/app/db/models/trace_span.py

@@ -0,0 +1,25 @@
+from datetime import datetime
+
+from sqlalchemy import DateTime, Integer, String, Text
+from sqlalchemy.dialects.sqlite import JSON
+from sqlalchemy.orm import Mapped, mapped_column
+
+from core_db import AuditMixin, Base, TenantMixin, VersionMixin
+from core_shared import JSONValue
+
+
+class TraceSpan(TenantMixin, AuditMixin, VersionMixin, Base):
+    __tablename__ = "trace_span"
+
+    run_id: Mapped[str] = mapped_column(String(36), index=True)
+    node_run_id: Mapped[str | None] = mapped_column(String(36), nullable=True, index=True)
+    parent_span_id: Mapped[str | None] = mapped_column(String(36), nullable=True, index=True)
+    span_type: Mapped[str] = mapped_column(String(64), index=True)
+    name: Mapped[str] = mapped_column(String(128))
+    status: Mapped[str] = mapped_column(String(32), default="running", index=True)
+    started_time: Mapped[datetime] = mapped_column(DateTime)
+    ended_time: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
+    duration_ms: Mapped[int | None] = mapped_column(Integer, nullable=True)
+    attributes_json: Mapped[dict[str, JSONValue] | None] = mapped_column(JSON, nullable=True)
+    error_code: Mapped[str | None] = mapped_column(String(64), nullable=True)
+    error_message: Mapped[str | None] = mapped_column(Text, nullable=True)

+ 197 - 1
services/runtime-service/app/domain/repositories.py

@@ -3,8 +3,9 @@ from datetime import datetime
 from sqlalchemy import select
 from sqlalchemy.orm import Session
 
-from app.db.models import NodeRun, WorkflowRun
+from app.db.models import ExecutionLog, NodeArtifact, NodeRun, TraceSpan, WorkflowRun
 from core_domain import NodeRunStatus, WorkflowRunStatus
+from core_shared import JSONValue
 
 
 class WorkflowRunRepository:
@@ -150,6 +151,17 @@ class NodeRunRepository:
     def get_by_id(self, node_run_id: str) -> NodeRun | None:
         return self.db.get(NodeRun, node_run_id)
 
+    def get_next_queued_by_run(self, *, tenant_id: str, run_id: str) -> NodeRun | None:
+        stmt = (
+            select(NodeRun)
+            .where(NodeRun.tenant_id == tenant_id)
+            .where(NodeRun.run_id == run_id)
+            .where(NodeRun.status == "queued")
+            .order_by(NodeRun.created_time.asc())
+            .limit(1)
+        )
+        return self.db.scalar(stmt)
+
     def update_status(
         self,
         *,
@@ -158,6 +170,8 @@ class NodeRunRepository:
         worker_key: str | None = None,
         error_code: str | None = None,
         error_message: str | None = None,
+        output_text: str | None = None,
+        output_json: dict[str, JSONValue] | None = None,
     ) -> NodeRun | None:
         entity = self.db.get(NodeRun, node_run_id)
         if entity is None:
@@ -167,6 +181,8 @@ class NodeRunRepository:
         entity.worker_key = worker_key
         entity.error_code = error_code
         entity.error_message = error_message
+        entity.output_text = output_text
+        entity.output_json = output_json
 
         now = datetime.utcnow()
         if status == "running" and entity.started_time is None:
@@ -177,3 +193,183 @@ class NodeRunRepository:
         self.db.commit()
         self.db.refresh(entity)
         return entity
+
+
+class ExecutionLogRepository:
+    def __init__(self, db: Session) -> None:
+        self.db = db
+
+    def create(
+        self,
+        *,
+        tenant_id: str,
+        run_id: str,
+        node_run_id: str | None,
+        event_type: str,
+        level: str,
+        message: str,
+        detail_json: dict[str, JSONValue] | None,
+    ) -> ExecutionLog:
+        entity = ExecutionLog(
+            tenant_id=tenant_id,
+            run_id=run_id,
+            node_run_id=node_run_id,
+            event_type=event_type,
+            level=level,
+            message=message,
+            detail_json=detail_json,
+        )
+        self.db.add(entity)
+        self.db.commit()
+        self.db.refresh(entity)
+        return entity
+
+    def list_by_scope(
+        self,
+        *,
+        tenant_id: str,
+        run_id: str | None = None,
+        node_run_id: str | None = None,
+    ) -> list[ExecutionLog]:
+        stmt = select(ExecutionLog).where(ExecutionLog.tenant_id == tenant_id)
+        if run_id is not None:
+            stmt = stmt.where(ExecutionLog.run_id == run_id)
+        if node_run_id is not None:
+            stmt = stmt.where(ExecutionLog.node_run_id == node_run_id)
+        stmt = stmt.order_by(ExecutionLog.created_time.asc())
+        return list(self.db.scalars(stmt))
+
+
+class NodeArtifactRepository:
+    def __init__(self, db: Session) -> None:
+        self.db = db
+
+    def create(
+        self,
+        *,
+        tenant_id: str,
+        run_id: str,
+        node_run_id: str,
+        node_id: str,
+        artifact_type: str,
+        name: str,
+        mime_type: str | None,
+        content_text: str | None,
+        content_json: dict[str, JSONValue] | None,
+        storage_uri: str | None = None,
+        size_bytes: int | None = None,
+    ) -> NodeArtifact:
+        entity = NodeArtifact(
+            tenant_id=tenant_id,
+            run_id=run_id,
+            node_run_id=node_run_id,
+            node_id=node_id,
+            artifact_type=artifact_type,
+            name=name,
+            mime_type=mime_type,
+            content_text=content_text,
+            content_json=content_json,
+            storage_uri=storage_uri,
+            size_bytes=size_bytes,
+        )
+        self.db.add(entity)
+        self.db.commit()
+        self.db.refresh(entity)
+        return entity
+
+    def list_by_scope(
+        self,
+        *,
+        tenant_id: str,
+        run_id: str | None = None,
+        node_run_id: str | None = None,
+        artifact_type: str | None = None,
+    ) -> list[NodeArtifact]:
+        stmt = select(NodeArtifact).where(NodeArtifact.tenant_id == tenant_id)
+        if run_id is not None:
+            stmt = stmt.where(NodeArtifact.run_id == run_id)
+        if node_run_id is not None:
+            stmt = stmt.where(NodeArtifact.node_run_id == node_run_id)
+        if artifact_type is not None:
+            stmt = stmt.where(NodeArtifact.artifact_type == artifact_type)
+        stmt = stmt.order_by(NodeArtifact.created_time.asc())
+        return list(self.db.scalars(stmt))
+
+
+class TraceSpanRepository:
+    def __init__(self, db: Session) -> None:
+        self.db = db
+
+    def start(
+        self,
+        *,
+        tenant_id: str,
+        run_id: str,
+        node_run_id: str | None,
+        parent_span_id: str | None,
+        span_type: str,
+        name: str,
+        attributes_json: dict[str, JSONValue] | None = None,
+    ) -> TraceSpan:
+        entity = TraceSpan(
+            tenant_id=tenant_id,
+            run_id=run_id,
+            node_run_id=node_run_id,
+            parent_span_id=parent_span_id,
+            span_type=span_type,
+            name=name,
+            status="running",
+            started_time=datetime.utcnow(),
+            attributes_json=attributes_json,
+        )
+        self.db.add(entity)
+        self.db.commit()
+        self.db.refresh(entity)
+        return entity
+
+    def finish(
+        self,
+        *,
+        span_id: str,
+        status: str,
+        error_code: str | None = None,
+        error_message: str | None = None,
+        attributes_json: dict[str, JSONValue] | None = None,
+    ) -> TraceSpan | None:
+        entity = self.db.get(TraceSpan, span_id)
+        if entity is None:
+            return None
+
+        ended_time = datetime.utcnow()
+        entity.status = status
+        entity.ended_time = ended_time
+        entity.duration_ms = int((ended_time - entity.started_time).total_seconds() * 1000)
+        entity.error_code = error_code
+        entity.error_message = error_message
+        if attributes_json is not None:
+            entity.attributes_json = {
+                **(entity.attributes_json or {}),
+                **attributes_json,
+            }
+
+        self.db.commit()
+        self.db.refresh(entity)
+        return entity
+
+    def list_by_scope(
+        self,
+        *,
+        tenant_id: str,
+        run_id: str | None = None,
+        node_run_id: str | None = None,
+        span_type: str | None = None,
+    ) -> list[TraceSpan]:
+        stmt = select(TraceSpan).where(TraceSpan.tenant_id == tenant_id)
+        if run_id is not None:
+            stmt = stmt.where(TraceSpan.run_id == run_id)
+        if node_run_id is not None:
+            stmt = stmt.where(TraceSpan.node_run_id == node_run_id)
+        if span_type is not None:
+            stmt = stmt.where(TraceSpan.span_type == span_type)
+        stmt = stmt.order_by(TraceSpan.started_time.asc())
+        return list(self.db.scalars(stmt))

+ 19 - 0
services/runtime-service/app/infrastructure/__init__.py

@@ -1 +1,20 @@
+from .code_runner_client import CodeRunnerClient, CodeRunnerClientError
+from .model_gateway_client import ModelGatewayClient, ModelGatewayClientError
+from .executors import (
+    NodeExecutionDispatcher,
+    build_node_execution_dispatcher,
+    build_node_execution_dispatcher_with_clients,
+)
+from .tool_client import ToolServiceClient, ToolServiceClientError
 
+__all__ = [
+    "CodeRunnerClient",
+    "CodeRunnerClientError",
+    "NodeExecutionDispatcher",
+    "ModelGatewayClient",
+    "ModelGatewayClientError",
+    "ToolServiceClient",
+    "ToolServiceClientError",
+    "build_node_execution_dispatcher",
+    "build_node_execution_dispatcher_with_clients",
+]

+ 28 - 0
services/runtime-service/app/infrastructure/code_runner_client.py

@@ -0,0 +1,28 @@
+import httpx
+
+from core_domain import CodeExecutionRequestContract, CodeExecutionResponseContract
+
+
+class CodeRunnerClientError(Exception):
+    pass
+
+
+class CodeRunnerClient:
+    def __init__(self, base_url: str, timeout_seconds: float = 60.0) -> None:
+        self.base_url = base_url.rstrip("/")
+        self.timeout_seconds = timeout_seconds
+
+    def execute_code(
+        self,
+        payload: CodeExecutionRequestContract,
+    ) -> CodeExecutionResponseContract:
+        try:
+            with httpx.Client(timeout=self.timeout_seconds) as client:
+                response = client.post(
+                    f"{self.base_url}/code/execute",
+                    json=payload.model_dump(mode="json"),
+                )
+                response.raise_for_status()
+                return CodeExecutionResponseContract.model_validate(response.json())
+        except httpx.HTTPError as exc:
+            raise CodeRunnerClientError(f"code-runner-service request failed: {exc}") from exc

+ 192 - 0
services/runtime-service/app/infrastructure/context.py

@@ -0,0 +1,192 @@
+import json
+import re
+from collections.abc import Callable
+
+from core_shared import JSONValue
+
+TEMPLATE_PATTERN = re.compile(r"\{\{\s*(?P<expr>[^{}]+?)\s*\}\}")
+COMPARISON_OPERATORS = ("==", "!=", ">=", "<=", ">", "<")
+
+
+def build_template_context(
+    *,
+    node_id: str,
+    node_type: str,
+    run_state_json: dict[str, JSONValue],
+    node_output_json_by_node_id: dict[str, dict[str, JSONValue]],
+    node_output_text_by_node_id: dict[str, str],
+) -> dict[str, JSONValue]:
+    current_node_outputs = node_output_json_by_node_id.get(node_id, {})
+    current_node_text = node_output_text_by_node_id.get(node_id)
+
+    return {
+        "state": run_state_json,
+        "nodes": {
+            item_node_id: {
+                "output": output_json,
+                "text": node_output_text_by_node_id.get(item_node_id),
+            }
+            for item_node_id, output_json in node_output_json_by_node_id.items()
+        },
+        "current": {
+            "node_id": node_id,
+            "node_type": node_type,
+            "output": current_node_outputs,
+            "text": current_node_text,
+        },
+    }
+
+
+def render_template_string(template: str, context: dict[str, JSONValue]) -> str:
+    def replace(match: re.Match[str]) -> str:
+        expression = match.group("expr").strip()
+        value = resolve_expression(context, expression)
+        if value is None:
+            return ""
+        if isinstance(value, (dict, list)):
+            return json.dumps(value, ensure_ascii=True, separators=(",", ":"))
+        return str(value)
+
+    return TEMPLATE_PATTERN.sub(replace, template)
+
+
+def render_json_value(value: JSONValue, context: dict[str, JSONValue]) -> JSONValue:
+    if isinstance(value, str):
+        return render_template_string(value, context)
+    if isinstance(value, list):
+        return [render_json_value(item, context) for item in value]
+    if isinstance(value, dict):
+        return {
+            str(item_key): render_json_value(item_value, context)
+            for item_key, item_value in value.items()
+        }
+    return value
+
+
+def evaluate_condition_expression(expression: str, context: dict[str, JSONValue]) -> bool:
+    stripped_expression = expression.strip()
+    if not stripped_expression:
+        return False
+
+    for operator in COMPARISON_OPERATORS:
+        if operator in stripped_expression:
+            left_text, right_text = stripped_expression.split(operator, 1)
+            left_value = resolve_expression(context, left_text.strip())
+            right_value = resolve_expression(context, right_text.strip())
+            return compare_values(left_value, right_value, operator)
+
+    resolved = resolve_expression(context, stripped_expression)
+    return coerce_bool(resolved)
+
+
+def resolve_expression(context: dict[str, JSONValue], expression: str) -> JSONValue:
+    if expression == "":
+        return None
+
+    if (expression.startswith('"') and expression.endswith('"')) or (
+        expression.startswith("'") and expression.endswith("'")
+    ):
+        return expression[1:-1]
+
+    lowered = expression.lower()
+    if lowered == "true":
+        return True
+    if lowered == "false":
+        return False
+    if lowered == "null":
+        return None
+
+    integer_value = try_parse_int(expression)
+    if integer_value is not None:
+        return integer_value
+
+    float_value = try_parse_float(expression)
+    if float_value is not None:
+        return float_value
+
+    return resolve_reference(context, expression)
+
+
+def resolve_reference(context: dict[str, JSONValue], path: str) -> JSONValue:
+    current: JSONValue = context
+    for segment in path.split("."):
+        if not segment:
+            return None
+        if isinstance(current, dict):
+            current = current.get(segment)
+            continue
+        if isinstance(current, list) and segment.isdigit():
+            index = int(segment)
+            if index < 0 or index >= len(current):
+                return None
+            current = current[index]
+            continue
+        return None
+    return current
+
+
+def coerce_bool(value: JSONValue) -> bool:
+    if isinstance(value, bool):
+        return value
+    if value is None:
+        return False
+    if isinstance(value, (int, float)):
+        return value != 0
+    if isinstance(value, str):
+        lowered = value.strip().lower()
+        if lowered in {"", "false", "0", "null", "none"}:
+            return False
+        return True
+    if isinstance(value, (list, dict)):
+        return len(value) > 0
+    return False
+
+
+def compare_values(left: JSONValue, right: JSONValue, operator: str) -> bool:
+    if operator == "==":
+        return left == right
+    if operator == "!=":
+        return left != right
+    if operator == ">":
+        return compare_order(left, right, lambda x, y: x > y)
+    if operator == "<":
+        return compare_order(left, right, lambda x, y: x < y)
+    if operator == ">=":
+        return compare_order(left, right, lambda x, y: x >= y)
+    if operator == "<=":
+        return compare_order(left, right, lambda x, y: x <= y)
+    return False
+
+
+def compare_order(
+    left: JSONValue,
+    right: JSONValue,
+    operator: Callable[[int | float | str, int | float | str], bool],
+) -> bool:
+    if isinstance(left, (int, float)) and isinstance(right, (int, float)):
+        return bool(operator(left, right))
+    if isinstance(left, str) and isinstance(right, str):
+        return bool(operator(left, right))
+    return False
+
+
+def try_parse_int(value: str) -> int | None:
+    if not value or any(item in value for item in {".", "e", "E"}):
+        return None
+    if value.startswith(("+", "-")):
+        digits = value[1:]
+    else:
+        digits = value
+    if not digits.isdigit():
+        return None
+    return int(value)
+
+
+def try_parse_float(value: str) -> float | None:
+    try:
+        parsed = float(value)
+    except ValueError:
+        return None
+    if parsed.is_integer() and "." not in value and "e" not in value.lower():
+        return None
+    return parsed

+ 1056 - 0
services/runtime-service/app/infrastructure/executors.py

@@ -0,0 +1,1056 @@
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+import re
+
+import httpx
+from core_domain import (
+    ChatCompletionRequestContract,
+    ChatMessageContract,
+    CodeExecutionRequestContract,
+    NodeExecutionContextContract,
+    NodeExecutionRequestContract,
+    NodeExecutionResultContract,
+    ToolBindingDetailContract,
+)
+from core_shared import JSONValue
+
+from .code_runner_client import CodeRunnerClient, CodeRunnerClientError
+from .context import (
+    build_template_context,
+    coerce_bool,
+    evaluate_condition_expression,
+    render_json_value,
+    render_template_string,
+    resolve_expression,
+)
+from .model_gateway_client import ModelGatewayClient, ModelGatewayClientError
+from .tool_client import ToolServiceClient, ToolServiceClientError
+
+
+class NodeExecutor(ABC):
+    executor_name: str
+    supported_node_types: frozenset[str]
+
+    @abstractmethod
+    def execute(
+        self,
+        context: NodeExecutionContextContract,
+        request: NodeExecutionRequestContract,
+    ) -> NodeExecutionResultContract:
+        raise NotImplementedError
+
+
+class CompletedNodeExecutor(NodeExecutor):
+    def __init__(self, *, executor_name: str, supported_node_types: frozenset[str]) -> None:
+        self.executor_name = executor_name
+        self.supported_node_types = supported_node_types
+
+    def execute(
+        self,
+        context: NodeExecutionContextContract,
+        request: NodeExecutionRequestContract,
+    ) -> NodeExecutionResultContract:
+        worker_key = request.worker_key or f"{self.executor_name}:{context.node_type}"
+        return NodeExecutionResultContract(
+            status="completed",
+            worker_key=worker_key,
+            output_json={
+                "executor_name": self.executor_name,
+                "node_type": context.node_type,
+            },
+        )
+
+
+class DefaultNodeExecutor(CompletedNodeExecutor):
+    def __init__(self) -> None:
+        super().__init__(
+            executor_name="default-executor",
+            supported_node_types=frozenset(),
+        )
+
+
+class LLMNodeExecutor(CompletedNodeExecutor):
+    def __init__(self, model_gateway_client: ModelGatewayClient | None = None) -> None:
+        super().__init__(
+            executor_name="llm-executor",
+            supported_node_types=frozenset({"llm"}),
+        )
+        self.model_gateway_client = model_gateway_client
+
+    def execute(
+        self,
+        context: NodeExecutionContextContract,
+        request: NodeExecutionRequestContract,
+    ) -> NodeExecutionResultContract:
+        worker_key = request.worker_key or f"{self.executor_name}:{context.node_type}"
+        render_context = _build_executor_template_context(context)
+        rendered_config_json = _render_config_json(context.node_config_json, render_context)
+        chat_request = _build_chat_completion_request(rendered_config_json)
+        if chat_request is None:
+            return NodeExecutionResultContract(
+                status="failed",
+                worker_key=worker_key,
+                error_code="llm_config_missing",
+                error_message="llm node config requires prompt or messages",
+            )
+        if self.model_gateway_client is None:
+            return NodeExecutionResultContract(
+                status="failed",
+                worker_key=worker_key,
+                error_code="llm_gateway_missing",
+                error_message="model gateway client is not configured",
+            )
+
+        try:
+            response = self.model_gateway_client.create_chat_completion(chat_request)
+        except ModelGatewayClientError as exc:
+            return NodeExecutionResultContract(
+                status="failed",
+                worker_key=worker_key,
+                error_code="llm_request_failed",
+                error_message=str(exc),
+            )
+
+        return NodeExecutionResultContract(
+            status="completed",
+            worker_key=worker_key,
+            output_text=response.content,
+            output_json={
+                "executor_name": self.executor_name,
+                "model": response.model,
+                "finish_reason": response.finish_reason,
+                "usage_json": response.usage_json,
+                "raw_response_json": response.raw_response_json,
+            },
+        )
+
+
+class ToolNodeExecutor(CompletedNodeExecutor):
+    def __init__(self, tool_client: ToolServiceClient | None = None) -> None:
+        super().__init__(
+            executor_name="tool-executor",
+            supported_node_types=frozenset({"tool"}),
+        )
+        self.tool_client = tool_client
+
+    def execute(
+        self,
+        context: NodeExecutionContextContract,
+        request: NodeExecutionRequestContract,
+    ) -> NodeExecutionResultContract:
+        tool_binding_id = _read_string_value(context.node_config_json, "tool_binding_id")
+        tool_code = _read_string_value(context.node_config_json, "tool_code")
+        worker_key = request.worker_key or f"{self.executor_name}:{context.node_type}"
+
+        if tool_binding_id is None and tool_code is None:
+            return NodeExecutionResultContract(
+                status="failed",
+                worker_key=worker_key,
+                error_code="tool_config_missing",
+                error_message="tool node config requires tool_binding_id or tool_code",
+            )
+
+        if tool_binding_id is not None and self.tool_client is not None:
+            try:
+                detail = self.tool_client.get_tool_binding_detail(
+                    tenant_id=context.tenant_id,
+                    binding_id=tool_binding_id,
+                )
+            except ToolServiceClientError as exc:
+                return NodeExecutionResultContract(
+                    status="failed",
+                    worker_key=worker_key,
+                    error_code="tool_binding_lookup_failed",
+                    error_message=str(exc),
+                )
+            if not detail.binding.enabled:
+                return NodeExecutionResultContract(
+                    status="failed",
+                    worker_key=worker_key,
+                    error_code="tool_binding_disabled",
+                    error_message=f"tool binding is disabled: {tool_binding_id}",
+                )
+
+            resolved_tool_code = detail.tool_definition.code
+            resolved_tool_version_id = detail.tool_version.id
+            resolved_tool_name = detail.tool_definition.name
+            invoke_result = self._invoke_http_tool(
+                context=context,
+                detail=detail,
+                worker_key=worker_key,
+            )
+            if invoke_result is not None:
+                return invoke_result
+        else:
+            resolved_tool_code = tool_code
+            resolved_tool_version_id = None
+            resolved_tool_name = None
+
+        return NodeExecutionResultContract(
+            status="completed",
+            worker_key=worker_key,
+            output_text=f"tool node completed: {resolved_tool_code or 'unknown-tool'}",
+            output_json={
+                "executor_name": self.executor_name,
+                "tool_binding_id": tool_binding_id,
+                "tool_code": resolved_tool_code,
+                "tool_version_id": resolved_tool_version_id,
+                "tool_name": resolved_tool_name,
+            },
+        )
+
+    def _invoke_http_tool(
+        self,
+        *,
+        context: NodeExecutionContextContract,
+        detail: ToolBindingDetailContract,
+        worker_key: str,
+    ) -> NodeExecutionResultContract | None:
+        if detail.tool_definition.tool_type != "http":
+            return None
+
+        invoke_config_json = detail.tool_version.invoke_config_json or {}
+        binding_config_json = detail.binding.config_json or {}
+        render_context = _build_executor_template_context(context)
+        request_headers = _merge_json_dicts(
+            _render_json_dict(_read_dict_value(invoke_config_json, "headers"), render_context),
+            _render_json_dict(_read_dict_value(binding_config_json, "headers"), render_context),
+            _render_json_dict(_read_dict_value(context.node_config_json, "headers"), render_context),
+        )
+        request_query = _merge_json_dicts(
+            _render_json_dict(_read_dict_value(invoke_config_json, "query"), render_context),
+            _render_json_dict(_read_dict_value(context.node_config_json, "query"), render_context),
+        )
+        request_body = _merge_json_dicts(
+            _render_json_dict(_read_dict_value(invoke_config_json, "body"), render_context),
+            _render_json_dict(_read_dict_value(context.node_config_json, "body"), render_context),
+        )
+
+        method = (_read_string_value(invoke_config_json, "method") or "GET").upper()
+        base_url = (
+            _read_string_value(context.node_config_json, "base_url")
+            or _read_string_value(binding_config_json, "base_url")
+            or _read_string_value(invoke_config_json, "base_url")
+        )
+        path = _read_string_value(context.node_config_json, "path") or _read_string_value(
+            invoke_config_json, "path"
+        )
+        url = _read_string_value(context.node_config_json, "url") or _read_string_value(
+            invoke_config_json, "url"
+        )
+
+        resolved_url = _resolve_http_url(url=url, base_url=base_url, path=path)
+        if resolved_url is None:
+            return NodeExecutionResultContract(
+                status="failed",
+                worker_key=worker_key,
+                error_code="tool_http_url_missing",
+                error_message="http tool requires url or base_url with path",
+            )
+
+        timeout_ms = detail.tool_version.timeout_ms or 10000
+
+        try:
+            with httpx.Client(timeout=timeout_ms / 1000) as client:
+                response = client.request(
+                    method=method,
+                    url=resolved_url,
+                    params=_coerce_http_params(request_query),
+                    headers=_coerce_http_headers(request_headers),
+                    json=request_body if request_body else None,
+                )
+                response.raise_for_status()
+        except httpx.HTTPError as exc:
+            return NodeExecutionResultContract(
+                status="failed",
+                worker_key=worker_key,
+                error_code="tool_http_request_failed",
+                error_message=str(exc),
+                output_json={
+                    "executor_name": self.executor_name,
+                    "tool_binding_id": detail.binding.id,
+                    "tool_code": detail.tool_definition.code,
+                    "request_url": resolved_url,
+                    "request_method": method,
+                },
+            )
+
+        response_json = _try_parse_json_response(response)
+        response_text = None if response_json is not None else response.text
+        return NodeExecutionResultContract(
+            status="completed",
+            worker_key=worker_key,
+            output_text=response_text,
+            output_json={
+                "executor_name": self.executor_name,
+                "tool_binding_id": detail.binding.id,
+                "tool_code": detail.tool_definition.code,
+                "tool_version_id": detail.tool_version.id,
+                "tool_name": detail.tool_definition.name,
+                "request_url": resolved_url,
+                "request_method": method,
+                "response_status_code": response.status_code,
+                "response_headers": dict(response.headers),
+                "response_json": response_json,
+            },
+        )
+
+
+class CodeNodeExecutor(CompletedNodeExecutor):
+    def __init__(self, code_runner_client: CodeRunnerClient | None = None) -> None:
+        super().__init__(
+            executor_name="code-executor",
+            supported_node_types=frozenset({"code"}),
+        )
+        self.code_runner_client = code_runner_client
+
+    def execute(
+        self,
+        context: NodeExecutionContextContract,
+        request: NodeExecutionRequestContract,
+    ) -> NodeExecutionResultContract:
+        worker_key = request.worker_key or f"{self.executor_name}:{context.node_type}"
+        code = _read_string_value(context.node_config_json, "code")
+        if code is None:
+            return NodeExecutionResultContract(
+                status="failed",
+                worker_key=worker_key,
+                error_code="code_config_missing",
+                error_message="code node config requires code",
+            )
+        if self.code_runner_client is None:
+            return NodeExecutionResultContract(
+                status="failed",
+                worker_key=worker_key,
+                error_code="code_runner_missing",
+                error_message="code runner client is not configured",
+            )
+
+        render_context = _build_executor_template_context(context)
+        input_json = _render_json_dict(
+            _read_dict_value(context.node_config_json, "input_json"),
+            render_context,
+        )
+        language = _read_string_value(context.node_config_json, "language") or "python"
+        timeout_seconds = _read_int_value(context.node_config_json, "timeout_seconds") or 10
+        code_request = CodeExecutionRequestContract(
+            language=language,
+            code=code,
+            input_json=input_json,
+            timeout_seconds=timeout_seconds,
+        )
+
+        try:
+            response = self.code_runner_client.execute_code(code_request)
+        except CodeRunnerClientError as exc:
+            return NodeExecutionResultContract(
+                status="failed",
+                worker_key=worker_key,
+                error_code="code_request_failed",
+                error_message=str(exc),
+            )
+
+        if not response.success:
+            return NodeExecutionResultContract(
+                status="failed",
+                worker_key=worker_key,
+                error_code="code_execution_failed",
+                error_message=response.error_message or response.stderr,
+                output_text=response.stdout,
+                output_json={
+                    "executor_name": self.executor_name,
+                    "stderr": response.stderr,
+                    "output_json": response.output_json,
+                },
+            )
+
+        return NodeExecutionResultContract(
+            status="completed",
+            worker_key=worker_key,
+            output_text=response.stdout,
+            output_json={
+                "executor_name": self.executor_name,
+                "stderr": response.stderr,
+                "result_json": response.output_json,
+            },
+        )
+
+
+class AnswerNodeExecutor(CompletedNodeExecutor):
+    def execute(
+        self,
+        context: NodeExecutionContextContract,
+        request: NodeExecutionRequestContract,
+    ) -> NodeExecutionResultContract:
+        answer_text = _read_string_value(context.node_config_json, "text")
+        template = _read_string_value(context.node_config_json, "template")
+        worker_key = request.worker_key or f"{self.executor_name}:{context.node_type}"
+        if answer_text is None and template is None:
+            return NodeExecutionResultContract(
+                status="failed",
+                worker_key=worker_key,
+                error_code="answer_config_missing",
+                error_message="answer node config requires text or template",
+            )
+        render_context = _build_executor_template_context(context)
+        rendered_text = render_template_string(answer_text or template or "", render_context)
+        return NodeExecutionResultContract(
+            status="completed",
+            worker_key=worker_key,
+            output_text=rendered_text,
+            output_json={
+                "executor_name": self.executor_name,
+                "render_mode": "text" if answer_text is not None else "template",
+            },
+        )
+
+    def __init__(self) -> None:
+        super().__init__(
+            executor_name="answer-executor",
+            supported_node_types=frozenset({"answer"}),
+        )
+
+
+class ConditionNodeExecutor(CompletedNodeExecutor):
+    def execute(
+        self,
+        context: NodeExecutionContextContract,
+        request: NodeExecutionRequestContract,
+    ) -> NodeExecutionResultContract:
+        worker_key = request.worker_key or f"{self.executor_name}:{context.node_type}"
+        render_context = _build_executor_template_context(context)
+
+        expression = _read_string_value(context.node_config_json, "expression")
+        path = _read_string_value(context.node_config_json, "path")
+
+        if expression is not None:
+            condition_result = evaluate_condition_expression(expression, render_context)
+            evaluated_expression = expression
+        elif path is not None:
+            condition_result = _evaluate_path_condition(context.node_config_json, path, render_context)
+            evaluated_expression = path
+        else:
+            return NodeExecutionResultContract(
+                status="failed",
+                worker_key=worker_key,
+                error_code="condition_config_missing",
+                error_message="condition node config requires expression or path",
+            )
+
+        route = "true" if condition_result else "false"
+        return NodeExecutionResultContract(
+            status="completed",
+            worker_key=worker_key,
+            output_json={
+                "executor_name": self.executor_name,
+                "condition_result": condition_result,
+                "route": route,
+                "evaluated_expression": evaluated_expression,
+            },
+        )
+
+    def __init__(self) -> None:
+        super().__init__(
+            executor_name="condition-executor",
+            supported_node_types=frozenset({"if-else", "condition"}),
+        )
+
+
+class AssignerNodeExecutor(CompletedNodeExecutor):
+    def execute(
+        self,
+        context: NodeExecutionContextContract,
+        request: NodeExecutionRequestContract,
+    ) -> NodeExecutionResultContract:
+        worker_key = request.worker_key or f"{self.executor_name}:{context.node_type}"
+        assignments = _read_dict_value(context.node_config_json, "assignments")
+        if not assignments:
+            return NodeExecutionResultContract(
+                status="failed",
+                worker_key=worker_key,
+                error_code="assignments_missing",
+                error_message="assigner node config requires assignments",
+            )
+
+        render_context = _build_executor_template_context(context)
+        rendered_assignments = _render_json_dict(assignments, render_context)
+        return NodeExecutionResultContract(
+            status="completed",
+            worker_key=worker_key,
+            output_json={
+                "executor_name": self.executor_name,
+                "assigned_values": rendered_assignments,
+                "state_updates": rendered_assignments,
+            },
+        )
+
+    def __init__(self) -> None:
+        super().__init__(
+            executor_name="assigner-executor",
+            supported_node_types=frozenset({"assigner"}),
+        )
+
+
+class RetrieverNodeExecutor(CompletedNodeExecutor):
+    def execute(
+        self,
+        context: NodeExecutionContextContract,
+        request: NodeExecutionRequestContract,
+    ) -> NodeExecutionResultContract:
+        worker_key = request.worker_key or f"{self.executor_name}:{context.node_type}"
+        render_context = _build_executor_template_context(context)
+        query = _resolve_retriever_query(context.node_config_json, render_context)
+        documents = _read_retriever_documents(context.node_config_json, render_context)
+        source_url = _read_string_value(context.node_config_json, "source_url")
+        top_k = _read_int_value(context.node_config_json, "top_k") or 3
+
+        if query is None:
+            return NodeExecutionResultContract(
+                status="failed",
+                worker_key=worker_key,
+                error_code="retriever_query_missing",
+                error_message="retriever node config requires query or query_template",
+            )
+        if source_url is not None:
+            try:
+                documents.extend(
+                    _fetch_retriever_documents_from_url(
+                        source_url=render_template_string(source_url, render_context),
+                        timeout_ms=_read_int_value(context.node_config_json, "timeout_ms") or 10000,
+                        render_context=render_context,
+                    )
+                )
+            except httpx.HTTPError as exc:
+                return NodeExecutionResultContract(
+                    status="failed",
+                    worker_key=worker_key,
+                    error_code="retriever_source_request_failed",
+                    error_message=str(exc),
+                )
+            except ValueError as exc:
+                return NodeExecutionResultContract(
+                    status="failed",
+                    worker_key=worker_key,
+                    error_code="retriever_source_invalid",
+                    error_message=str(exc),
+                )
+
+        if not documents:
+            return NodeExecutionResultContract(
+                status="failed",
+                worker_key=worker_key,
+                error_code="retriever_documents_missing",
+                error_message="retriever node config requires non-empty documents",
+            )
+
+        ranked_documents = rank_documents(query=query, documents=documents, top_k=top_k)
+        output_documents = [item.to_output_json() for item in ranked_documents]
+        output_text = "\n\n".join(item.text for item in ranked_documents)
+        return NodeExecutionResultContract(
+            status="completed",
+            worker_key=worker_key,
+            output_text=output_text,
+            output_json={
+                "executor_name": self.executor_name,
+                "query": query,
+                "top_k": top_k,
+                "retrieved_documents": output_documents,
+            },
+        )
+
+    def __init__(self) -> None:
+        super().__init__(
+            executor_name="retriever-executor",
+            supported_node_types=frozenset({"knowledge-retrieval", "retriever"}),
+        )
+
+
+class TemplateNodeExecutor(CompletedNodeExecutor):
+    def execute(
+        self,
+        context: NodeExecutionContextContract,
+        request: NodeExecutionRequestContract,
+    ) -> NodeExecutionResultContract:
+        worker_key = request.worker_key or f"{self.executor_name}:{context.node_type}"
+        render_context = _build_executor_template_context(context)
+        template = _read_string_value(context.node_config_json, "template")
+        template_json = _read_dict_value(context.node_config_json, "template_json")
+
+        if template is None and not template_json:
+            return NodeExecutionResultContract(
+                status="failed",
+                worker_key=worker_key,
+                error_code="template_config_missing",
+                error_message="template node config requires template or template_json",
+            )
+
+        rendered_text = None
+        rendered_json = None
+        if template is not None:
+            rendered_text = render_template_string(template, render_context)
+        if template_json:
+            rendered_json = _render_json_dict(template_json, render_context)
+
+        output_json: dict[str, JSONValue] = {"executor_name": self.executor_name}
+        if rendered_json is not None:
+            output_json["rendered_json"] = rendered_json
+
+        return NodeExecutionResultContract(
+            status="completed",
+            worker_key=worker_key,
+            output_text=rendered_text,
+            output_json=output_json,
+        )
+
+    def __init__(self) -> None:
+        super().__init__(
+            executor_name="template-executor",
+            supported_node_types=frozenset({"template-transform", "template"}),
+        )
+
+
+class NodeExecutionDispatcher:
+    def __init__(
+        self,
+        executors: list[NodeExecutor],
+        default_executor: NodeExecutor,
+    ) -> None:
+        self.executors = executors
+        self.default_executor = default_executor
+
+    def resolve_executor(self, node_type: str) -> NodeExecutor:
+        for executor in self.executors:
+            if node_type in executor.supported_node_types:
+                return executor
+        return self.default_executor
+
+    def execute(
+        self,
+        context: NodeExecutionContextContract,
+        request: NodeExecutionRequestContract,
+    ) -> tuple[NodeExecutionResultContract, str]:
+        executor = self.resolve_executor(context.node_type)
+        result = executor.execute(context, request)
+        return result, executor.executor_name
+
+
+def build_node_execution_dispatcher() -> NodeExecutionDispatcher:
+    executors: list[NodeExecutor] = [
+        LLMNodeExecutor(),
+        ToolNodeExecutor(),
+        CodeNodeExecutor(),
+        AnswerNodeExecutor(),
+        ConditionNodeExecutor(),
+        AssignerNodeExecutor(),
+        RetrieverNodeExecutor(),
+        TemplateNodeExecutor(),
+    ]
+    return NodeExecutionDispatcher(
+        executors=executors,
+        default_executor=DefaultNodeExecutor(),
+    )
+
+
+def build_node_execution_dispatcher_with_clients(
+    *,
+    code_runner_client: CodeRunnerClient | None = None,
+    model_gateway_client: ModelGatewayClient | None = None,
+    tool_client: ToolServiceClient | None = None,
+) -> NodeExecutionDispatcher:
+    executors: list[NodeExecutor] = [
+        LLMNodeExecutor(model_gateway_client=model_gateway_client),
+        ToolNodeExecutor(tool_client=tool_client),
+        CodeNodeExecutor(code_runner_client=code_runner_client),
+        AnswerNodeExecutor(),
+        ConditionNodeExecutor(),
+        AssignerNodeExecutor(),
+        RetrieverNodeExecutor(),
+        TemplateNodeExecutor(),
+    ]
+    return NodeExecutionDispatcher(
+        executors=executors,
+        default_executor=DefaultNodeExecutor(),
+    )
+
+
+def _read_string_value(payload: dict[str, JSONValue], key: str) -> str | None:
+    value = payload.get(key)
+    if isinstance(value, str):
+        return value
+    return None
+
+
+def _read_dict_value(payload: dict[str, JSONValue], key: str) -> dict[str, JSONValue]:
+    value = payload.get(key)
+    if isinstance(value, dict):
+        return {str(item_key): item_value for item_key, item_value in value.items()}
+    return {}
+
+
+def _merge_json_dicts(*items: dict[str, JSONValue]) -> dict[str, JSONValue]:
+    merged: dict[str, JSONValue] = {}
+    for item in items:
+        merged.update(item)
+    return merged
+
+
+def _render_json_dict(
+    payload: dict[str, JSONValue],
+    context: dict[str, JSONValue],
+) -> dict[str, JSONValue]:
+    rendered = render_json_value(payload, context)
+    if isinstance(rendered, dict):
+        return {str(key): value for key, value in rendered.items()}
+    return {}
+
+
+def _render_config_json(
+    payload: dict[str, JSONValue],
+    context: dict[str, JSONValue],
+) -> dict[str, JSONValue]:
+    return _render_json_dict(payload, context)
+
+
+def _resolve_http_url(*, url: str | None, base_url: str | None, path: str | None) -> str | None:
+    if url is not None:
+        return url
+    if base_url is None or path is None:
+        return None
+    return f"{base_url.rstrip('/')}/{path.lstrip('/')}"
+
+
+def _coerce_http_headers(payload: dict[str, JSONValue]) -> dict[str, str]:
+    headers: dict[str, str] = {}
+    for key, value in payload.items():
+        if isinstance(value, (str, int, float, bool)):
+            headers[key] = str(value)
+    return headers
+
+
+def _coerce_http_params(payload: dict[str, JSONValue]) -> dict[str, str]:
+    params: dict[str, str] = {}
+    for key, value in payload.items():
+        if isinstance(value, (str, int, float, bool)):
+            params[key] = str(value)
+    return params
+
+
+def _try_parse_json_response(response: httpx.Response) -> JSONValue | None:
+    content_type = response.headers.get("content-type", "")
+    if "json" not in content_type.lower():
+        return None
+    try:
+        payload = response.json()
+    except ValueError:
+        return None
+    if isinstance(payload, (dict, list, str, int, float, bool)) or payload is None:
+        return payload
+    return None
+
+
+def _build_chat_completion_request(
+    payload: dict[str, JSONValue],
+) -> ChatCompletionRequestContract | None:
+    messages = _read_message_list(payload, "messages")
+    if not messages:
+        system_prompt = _read_string_value(payload, "system_prompt")
+        prompt = _read_string_value(payload, "prompt")
+        if system_prompt is not None:
+            messages.append(ChatMessageContract(role="system", content=system_prompt))
+        if prompt is not None:
+            messages.append(ChatMessageContract(role="user", content=prompt))
+
+    if not messages:
+        return None
+
+    temperature = _read_float_value(payload, "temperature")
+    max_tokens = _read_int_value(payload, "max_tokens")
+    model = _read_string_value(payload, "model")
+    return ChatCompletionRequestContract(
+        model=model,
+        messages=messages,
+        temperature=temperature,
+        max_tokens=max_tokens,
+    )
+
+
+def _read_message_list(
+    payload: dict[str, JSONValue],
+    key: str,
+) -> list[ChatMessageContract]:
+    value = payload.get(key)
+    if not isinstance(value, list):
+        return []
+
+    messages: list[ChatMessageContract] = []
+    for item in value:
+        if not isinstance(item, dict):
+            continue
+        role = item.get("role")
+        content = item.get("content")
+        name = item.get("name")
+        if isinstance(role, str) and isinstance(content, str):
+            messages.append(
+                ChatMessageContract(
+                    role=role,
+                    content=content,
+                    name=name if isinstance(name, str) else None,
+                )
+            )
+    return messages
+
+
+def _read_float_value(payload: dict[str, JSONValue], key: str) -> float | None:
+    value = payload.get(key)
+    if isinstance(value, (int, float)) and not isinstance(value, bool):
+        return float(value)
+    return None
+
+
+def _read_int_value(payload: dict[str, JSONValue], key: str) -> int | None:
+    value = payload.get(key)
+    if isinstance(value, int) and not isinstance(value, bool):
+        return value
+    return None
+
+
+def _build_executor_template_context(context: NodeExecutionContextContract) -> dict[str, JSONValue]:
+    return build_template_context(
+        node_id=context.node_id,
+        node_type=context.node_type,
+        run_state_json=context.run_state_json,
+        node_output_json_by_node_id=context.node_output_json_by_node_id,
+        node_output_text_by_node_id=context.node_output_text_by_node_id,
+    )
+
+
+def _evaluate_path_condition(
+    payload: dict[str, JSONValue],
+    path: str,
+    render_context: dict[str, JSONValue],
+) -> bool:
+    value = resolve_expression(render_context, path)
+
+    if "equals" in payload:
+        return value == render_json_value(payload["equals"], render_context)
+    if "not_equals" in payload:
+        return value != render_json_value(payload["not_equals"], render_context)
+    if "gt" in payload:
+        return _compare_numeric(value, render_json_value(payload["gt"], render_context), ">")
+    if "gte" in payload:
+        return _compare_numeric(value, render_json_value(payload["gte"], render_context), ">=")
+    if "lt" in payload:
+        return _compare_numeric(value, render_json_value(payload["lt"], render_context), "<")
+    if "lte" in payload:
+        return _compare_numeric(value, render_json_value(payload["lte"], render_context), "<=")
+    if "exists" in payload:
+        expected = payload["exists"]
+        if isinstance(expected, bool):
+            return (value is not None) is expected
+    return coerce_bool(value)
+
+
+def _compare_numeric(left: JSONValue, right: JSONValue, operator: str) -> bool:
+    if not isinstance(left, (int, float)) or not isinstance(right, (int, float)):
+        return False
+    if operator == ">":
+        return left > right
+    if operator == ">=":
+        return left >= right
+    if operator == "<":
+        return left < right
+    if operator == "<=":
+        return left <= right
+    return False
+
+
+@dataclass(frozen=True)
+class RetrieverDocument:
+    document_id: str
+    title: str | None
+    text: str
+    metadata: dict[str, JSONValue]
+
+
+@dataclass(frozen=True)
+class RankedRetrieverDocument:
+    document_id: str
+    title: str | None
+    text: str
+    metadata: dict[str, JSONValue]
+    score: float
+
+    def to_output_json(self) -> dict[str, JSONValue]:
+        return {
+            "document_id": self.document_id,
+            "title": self.title,
+            "text": self.text,
+            "metadata": self.metadata,
+            "score": self.score,
+        }
+
+
+def _resolve_retriever_query(
+    payload: dict[str, JSONValue],
+    render_context: dict[str, JSONValue],
+) -> str | None:
+    query = _read_string_value(payload, "query")
+    query_template = _read_string_value(payload, "query_template")
+    if query is not None:
+        rendered_query = render_template_string(query, render_context)
+    elif query_template is not None:
+        rendered_query = render_template_string(query_template, render_context)
+    else:
+        return None
+
+    stripped_query = rendered_query.strip()
+    if not stripped_query:
+        return None
+    return stripped_query
+
+
+def _read_retriever_documents(
+    payload: dict[str, JSONValue],
+    render_context: dict[str, JSONValue],
+) -> list[RetrieverDocument]:
+    value = payload.get("documents")
+    if not isinstance(value, list):
+        return []
+
+    documents: list[RetrieverDocument] = []
+    for index, item in enumerate(value):
+        document = _parse_retriever_document(
+            item,
+            index=index,
+            render_context=render_context,
+        )
+        if document is not None:
+            documents.append(document)
+    return documents
+
+
+def _fetch_retriever_documents_from_url(
+    *,
+    source_url: str,
+    timeout_ms: int,
+    render_context: dict[str, JSONValue],
+) -> list[RetrieverDocument]:
+    if not source_url.strip():
+        return []
+
+    with httpx.Client(timeout=timeout_ms / 1000) as client:
+        response = client.get(source_url)
+        response.raise_for_status()
+
+    payload = response.json()
+    if isinstance(payload, dict):
+        documents_payload = payload.get("documents")
+    else:
+        documents_payload = payload
+
+    if not isinstance(documents_payload, list):
+        raise ValueError("retriever source must return a JSON list or object.documents list")
+
+    documents: list[RetrieverDocument] = []
+    for index, item in enumerate(documents_payload):
+        if not _is_json_value(item):
+            continue
+        document = _parse_retriever_document(
+            item,
+            index=index,
+            render_context=render_context,
+        )
+        if document is not None:
+            documents.append(document)
+    return documents
+
+
+def _parse_retriever_document(
+    value: JSONValue,
+    *,
+    index: int,
+    render_context: dict[str, JSONValue],
+) -> RetrieverDocument | None:
+    if isinstance(value, str):
+        text = render_template_string(value, render_context).strip()
+        if not text:
+            return None
+        return RetrieverDocument(
+            document_id=f"doc-{index + 1}",
+            title=None,
+            text=text,
+            metadata={},
+        )
+
+    if not isinstance(value, dict):
+        return None
+
+    rendered = _render_json_dict({str(key): item for key, item in value.items()}, render_context)
+    text_value = rendered.get("text") or rendered.get("content")
+    if not isinstance(text_value, str) or not text_value.strip():
+        return None
+
+    document_id_value = rendered.get("id") or rendered.get("document_id")
+    title_value = rendered.get("title")
+    metadata_value = rendered.get("metadata")
+    return RetrieverDocument(
+        document_id=str(document_id_value) if document_id_value is not None else f"doc-{index + 1}",
+        title=title_value if isinstance(title_value, str) else None,
+        text=text_value.strip(),
+        metadata=metadata_value if isinstance(metadata_value, dict) else {},
+    )
+
+
+def rank_documents(
+    *,
+    query: str,
+    documents: list[RetrieverDocument],
+    top_k: int,
+) -> list[RankedRetrieverDocument]:
+    normalized_top_k = max(top_k, 1)
+    query_tokens = tokenize_text(query)
+    ranked_documents: list[RankedRetrieverDocument] = []
+
+    for document in documents:
+        document_tokens = tokenize_text(" ".join(filter(None, [document.title, document.text])))
+        score = calculate_keyword_score(query_tokens=query_tokens, document_tokens=document_tokens)
+        ranked_documents.append(
+            RankedRetrieverDocument(
+                document_id=document.document_id,
+                title=document.title,
+                text=document.text,
+                metadata=document.metadata,
+                score=score,
+            )
+        )
+
+    ranked_documents.sort(key=lambda item: item.score, reverse=True)
+    return ranked_documents[:normalized_top_k]
+
+
+def calculate_keyword_score(
+    *,
+    query_tokens: set[str],
+    document_tokens: set[str],
+) -> float:
+    if not query_tokens or not document_tokens:
+        return 0.0
+    overlap_count = len(query_tokens.intersection(document_tokens))
+    if overlap_count == 0:
+        return 0.0
+    return round(overlap_count / len(query_tokens), 4)
+
+
+def tokenize_text(value: str) -> set[str]:
+    tokens = {item.lower() for item in re.findall(r"[\w\u4e00-\u9fff]+", value)}
+    return {item for item in tokens if item}
+
+
+def _is_json_value(value: object) -> bool:
+    if value is None or isinstance(value, (str, int, float, bool)):
+        return True
+    if isinstance(value, list):
+        return all(_is_json_value(item) for item in value)
+    if isinstance(value, dict):
+        return all(isinstance(key, str) and _is_json_value(item) for key, item in value.items())
+    return False

+ 28 - 0
services/runtime-service/app/infrastructure/model_gateway_client.py

@@ -0,0 +1,28 @@
+import httpx
+
+from core_domain import ChatCompletionRequestContract, ChatCompletionResponseContract
+
+
+class ModelGatewayClientError(Exception):
+    pass
+
+
+class ModelGatewayClient:
+    def __init__(self, base_url: str, timeout_seconds: float = 60.0) -> None:
+        self.base_url = base_url.rstrip("/")
+        self.timeout_seconds = timeout_seconds
+
+    def create_chat_completion(
+        self,
+        payload: ChatCompletionRequestContract,
+    ) -> ChatCompletionResponseContract:
+        try:
+            with httpx.Client(timeout=self.timeout_seconds) as client:
+                response = client.post(
+                    f"{self.base_url}/models/chat-completions",
+                    json=payload.model_dump(mode="json"),
+                )
+                response.raise_for_status()
+                return ChatCompletionResponseContract.model_validate(response.json())
+        except httpx.HTTPError as exc:
+            raise ModelGatewayClientError(f"model-gateway-service request failed: {exc}") from exc

+ 96 - 71
services/runtime-service/app/infrastructure/planner.py

@@ -1,104 +1,129 @@
 from core_domain import InitialNodeContract, WorkflowVersionContract
+from core_dsl import (
+    EdgeDefinition,
+    get_initial_node_definition,
+    get_node_definition,
+    parse_workflow_definition,
+)
 from core_shared import JSONValue
 
+from .context import build_template_context, evaluate_condition_expression
 
-def derive_initial_node(workflow_version: WorkflowVersionContract) -> InitialNodeContract | None:
-    dsl = workflow_version.dsl_json
-    if not isinstance(dsl, dict):
-        return None
 
-    nodes_value = dsl.get("nodes")
-    if not isinstance(nodes_value, list):
+def derive_initial_node(workflow_version: WorkflowVersionContract) -> InitialNodeContract | None:
+    workflow = parse_workflow_definition(workflow_version.dsl_json)
+    if workflow is None:
         return None
 
-    nodes: list[dict[str, JSONValue]] = [
-        item for item in nodes_value if isinstance(item, dict)
-    ]
-    if not nodes:
+    node = get_initial_node_definition(workflow)
+    if node is None:
         return None
-
-    edges_value = dsl.get("edges")
-    incoming_targets = _collect_incoming_targets(edges_value)
-
-    for node in nodes:
-        node_id = node.get("id")
-        node_type = node.get("type")
-        if isinstance(node_id, str) and isinstance(node_type, str) and node_id not in incoming_targets:
-            return InitialNodeContract(node_id=node_id, node_type=node_type, status="queued")
-
-    first = nodes[0]
-    first_id = first.get("id")
-    first_type = first.get("type")
-    if isinstance(first_id, str) and isinstance(first_type, str):
-        return InitialNodeContract(node_id=first_id, node_type=first_type, status="queued")
-
-    return None
+    return InitialNodeContract(node_id=node.id, node_type=node.type, status="queued")
 
 
 def derive_successor_nodes(
     workflow_version: WorkflowVersionContract,
     current_node_id: str,
+    current_output_json: dict[str, JSONValue] | None = None,
+    run_state_json: dict[str, JSONValue] | None = None,
+    node_output_json_by_node_id: dict[str, dict[str, JSONValue]] | None = None,
+    node_output_text_by_node_id: dict[str, str] | None = None,
 ) -> list[InitialNodeContract]:
-    dsl = workflow_version.dsl_json
-    if not isinstance(dsl, dict):
-        return []
-
-    nodes_value = dsl.get("nodes")
-    edges_value = dsl.get("edges")
-    if not isinstance(nodes_value, list) or not isinstance(edges_value, list):
+    workflow = parse_workflow_definition(workflow_version.dsl_json)
+    if workflow is None:
         return []
 
-    node_type_map = _build_node_type_map(nodes_value)
-    successor_ids = _collect_successor_ids(edges_value, current_node_id)
+    node_map = {node.id: node for node in workflow.nodes}
+    template_context = build_template_context(
+        node_id=current_node_id,
+        node_type=node_map.get(current_node_id).type if current_node_id in node_map else "unknown",
+        run_state_json=run_state_json or {},
+        node_output_json_by_node_id=node_output_json_by_node_id or {},
+        node_output_text_by_node_id=node_output_text_by_node_id or {},
+    )
+    edge_context: dict[str, JSONValue] = {
+        **template_context,
+        "output": current_output_json or {},
+        "route": _read_string_value(current_output_json or {}, "route"),
+        "condition_result": _read_bool_value(current_output_json or {}, "condition_result"),
+    }
 
     successors: list[InitialNodeContract] = []
-    for successor_id in successor_ids:
-        node_type = node_type_map.get(successor_id)
-        if node_type is None:
+    for edge in _get_matching_edges(
+        workflow.edges,
+        current_node_id=current_node_id,
+        edge_context=edge_context,
+    ):
+        successor = node_map.get(edge.target)
+        if successor is None:
             continue
         successors.append(
             InitialNodeContract(
-                node_id=successor_id,
-                node_type=node_type,
+                node_id=successor.id,
+                node_type=successor.type,
                 status="queued",
             )
         )
     return successors
 
 
-def _collect_incoming_targets(edges_value: JSONValue | None) -> set[str]:
-    if not isinstance(edges_value, list):
-        return set()
+def derive_node_config(
+    workflow_version: WorkflowVersionContract,
+    node_id: str,
+) -> dict[str, JSONValue]:
+    workflow = parse_workflow_definition(workflow_version.dsl_json)
+    if workflow is None:
+        return {}
 
-    incoming_targets: set[str] = set()
-    for item in edges_value:
-        if not isinstance(item, dict):
-            continue
-        target = item.get("target")
-        if isinstance(target, str):
-            incoming_targets.add(target)
-    return incoming_targets
+    node = get_node_definition(workflow, node_id)
+    if node is None:
+        return {}
+    return dict(node.config)
 
 
-def _build_node_type_map(nodes_value: list[JSONValue]) -> dict[str, str]:
-    node_type_map: dict[str, str] = {}
-    for item in nodes_value:
-        if not isinstance(item, dict):
+def _get_matching_edges(
+    edges: list[EdgeDefinition],
+    *,
+    current_node_id: str,
+    edge_context: dict[str, JSONValue],
+) -> list[EdgeDefinition]:
+    matching_edges: list[EdgeDefinition] = []
+    for edge in edges:
+        if edge.source != current_node_id:
             continue
-        node_id = item.get("id")
-        node_type = item.get("type")
-        if isinstance(node_id, str) and isinstance(node_type, str):
-            node_type_map[node_id] = node_type
-    return node_type_map
+        if _matches_edge_condition(edge.condition, edge_context):
+            matching_edges.append(edge)
+    return matching_edges
 
 
-def _collect_successor_ids(edges_value: list[JSONValue], current_node_id: str) -> list[str]:
-    successor_ids: list[str] = []
-    for item in edges_value:
-        if not isinstance(item, dict):
-            continue
-        source = item.get("source")
-        target = item.get("target")
-        if isinstance(source, str) and isinstance(target, str) and source == current_node_id:
-            successor_ids.append(target)
-    return successor_ids
+def _matches_edge_condition(
+    condition: str | None,
+    context: dict[str, JSONValue],
+) -> bool:
+    if condition is None or not condition.strip():
+        return True
+
+    stripped = condition.strip()
+    route = context.get("route")
+    if isinstance(route, str) and stripped == route:
+        return True
+
+    condition_result = context.get("condition_result")
+    if isinstance(condition_result, bool) and stripped.lower() in {"true", "false"}:
+        return condition_result is (stripped.lower() == "true")
+
+    return evaluate_condition_expression(stripped, context)
+
+
+def _read_string_value(payload: dict[str, JSONValue], key: str) -> str | None:
+    value = payload.get(key)
+    if isinstance(value, str):
+        return value
+    return None
+
+
+def _read_bool_value(payload: dict[str, JSONValue], key: str) -> bool | None:
+    value = payload.get(key)
+    if isinstance(value, bool):
+        return value
+    return None

+ 30 - 0
services/runtime-service/app/infrastructure/tool_client.py

@@ -0,0 +1,30 @@
+import httpx
+
+from core_domain import ToolBindingDetailContract
+
+
+class ToolServiceClientError(Exception):
+    pass
+
+
+class ToolServiceClient:
+    def __init__(self, base_url: str, timeout_seconds: float = 10.0) -> None:
+        self.base_url = base_url.rstrip("/")
+        self.timeout_seconds = timeout_seconds
+
+    def get_tool_binding_detail(
+        self,
+        *,
+        tenant_id: str,
+        binding_id: str,
+    ) -> ToolBindingDetailContract:
+        try:
+            with httpx.Client(timeout=self.timeout_seconds) as client:
+                response = client.get(
+                    f"{self.base_url}/tools/bindings/{binding_id}",
+                    params={"tenant_id": tenant_id},
+                )
+                response.raise_for_status()
+                return ToolBindingDetailContract.model_validate(response.json())
+        except httpx.HTTPError as exc:
+            raise ToolServiceClientError(f"tool-service request failed: {exc}") from exc

+ 85 - 1
services/runtime-service/app/schemas/run.py

@@ -1,17 +1,23 @@
+from datetime import datetime
 from typing import TYPE_CHECKING
 
+from pydantic import BaseModel
+
 from core_domain import (
     InitialNodeContract,
+    NodeExecutionRequestContract,
     NodeRunContract,
     NodeRunStatusUpdateContract,
     RunBootstrapContract,
     RunCreateContract,
+    RunExecutionRequestContract,
     WorkflowRunStatusUpdateContract,
     WorkflowRunContract,
 )
+from core_shared import JSONValue
 
 if TYPE_CHECKING:
-    from app.db.models import NodeRun, WorkflowRun
+    from app.db.models import ExecutionLog, NodeArtifact, NodeRun, TraceSpan, WorkflowRun
 
 
 class InitialNodeCreateRequest(InitialNodeContract):
@@ -47,3 +53,81 @@ class WorkflowRunStatusUpdateRequest(WorkflowRunStatusUpdateContract):
 
 class NodeRunStatusUpdateRequest(NodeRunStatusUpdateContract):
     pass
+
+
+class NodeRunExecuteRequest(NodeExecutionRequestContract):
+    pass
+
+
+class NodeRunExecuteResponse(BaseModel):
+    run: WorkflowRunResponse
+    node_run: NodeRunResponse
+    executor_name: str
+
+
+class RunExecuteRequest(RunExecutionRequestContract):
+    pass
+
+
+class RunExecuteResponse(BaseModel):
+    run: WorkflowRunResponse
+    node_runs: list[NodeRunResponse]
+    executor_names: list[str]
+
+
+class ExecutionLogResponse(BaseModel):
+    id: str
+    tenant_id: str
+    run_id: str
+    node_run_id: str | None = None
+    event_type: str
+    level: str
+    message: str
+    detail_json: dict[str, JSONValue] | None = None
+    created_time: datetime
+
+    @classmethod
+    def from_entity(cls, entity: "ExecutionLog") -> "ExecutionLogResponse":
+        return cls.model_validate(entity, from_attributes=True)
+
+
+class NodeArtifactResponse(BaseModel):
+    id: str
+    tenant_id: str
+    run_id: str
+    node_run_id: str
+    node_id: str
+    artifact_type: str
+    name: str
+    mime_type: str | None = None
+    content_text: str | None = None
+    content_json: dict[str, JSONValue] | None = None
+    storage_uri: str | None = None
+    size_bytes: int | None = None
+    created_time: datetime
+
+    @classmethod
+    def from_entity(cls, entity: "NodeArtifact") -> "NodeArtifactResponse":
+        return cls.model_validate(entity, from_attributes=True)
+
+
+class TraceSpanResponse(BaseModel):
+    id: str
+    tenant_id: str
+    run_id: str
+    node_run_id: str | None = None
+    parent_span_id: str | None = None
+    span_type: str
+    name: str
+    status: str
+    started_time: datetime
+    ended_time: datetime | None = None
+    duration_ms: int | None = None
+    attributes_json: dict[str, JSONValue] | None = None
+    error_code: str | None = None
+    error_message: str | None = None
+    created_time: datetime
+
+    @classmethod
+    def from_entity(cls, entity: "TraceSpan") -> "TraceSpanResponse":
+        return cls.model_validate(entity, from_attributes=True)

+ 2 - 0
services/runtime-service/pyproject.toml

@@ -15,6 +15,8 @@ dependencies = [
   "pydantic>=2.7,<3.0",
   "sqlalchemy>=2.0,<3.0",
   "core-db",
+  "core-domain",
+  "core-dsl",
   "core-events",
   "core-shared",
 ]

+ 20 - 1
services/tool-service/app/api/routes.py

@@ -1,4 +1,4 @@
-from fastapi import APIRouter, Depends, Query
+from fastapi import APIRouter, Depends, HTTPException, Query
 from sqlalchemy import text
 from sqlalchemy.orm import Session
 
@@ -8,6 +8,7 @@ from app.db.session import get_db
 from app.domain.repositories import ToolBindingRepository, ToolDefinitionRepository, ToolVersionRepository
 from app.schemas.tool import (
     ToolBindingCreateRequest,
+    ToolBindingDetailResponse,
     ToolBindingResponse,
     ToolCreateRequest,
     ToolResponse,
@@ -89,3 +90,21 @@ def list_tool_bindings(
         ToolBindingResponse.from_entity(item)
         for item in service.list_tool_bindings(tenant_id=tenant_id, app_id=app_id)
     ]
+
+
+@router.get("/bindings/{binding_id}", response_model=ToolBindingDetailResponse)
+def get_tool_binding_detail(
+    binding_id: str,
+    tenant_id: str = Query(...),
+    service: ToolApplicationService = Depends(get_tool_application_service),
+) -> ToolBindingDetailResponse:
+    result = service.get_tool_binding_detail(tenant_id=tenant_id, binding_id=binding_id)
+    if result is None:
+        raise HTTPException(status_code=404, detail=f"tool_binding not found: {binding_id}")
+
+    binding, tool_version, tool_definition = result
+    return ToolBindingDetailResponse(
+        binding=ToolBindingResponse.from_entity(binding),
+        tool_version=ToolVersionResponse.from_entity(tool_version),
+        tool_definition=ToolResponse.from_entity(tool_definition),
+    )

+ 25 - 0
services/tool-service/app/application/services.py

@@ -59,3 +59,28 @@ class ToolApplicationService:
     def list_tool_bindings(self, tenant_id: str, app_id: str | None = None) -> list[ToolBinding]:
         return self.tool_binding_repository.list_by_scope(tenant_id=tenant_id, app_id=app_id)
 
+    def get_tool_binding_detail(
+        self,
+        *,
+        tenant_id: str,
+        binding_id: str,
+    ) -> tuple[ToolBinding, ToolVersion, ToolDefinition] | None:
+        binding = self.tool_binding_repository.get_by_id(tenant_id=tenant_id, binding_id=binding_id)
+        if binding is None:
+            return None
+
+        tool_version = self.tool_version_repository.get_by_id(
+            tenant_id=tenant_id,
+            tool_version_id=binding.tool_version_id,
+        )
+        if tool_version is None:
+            return None
+
+        tool_definition = self.tool_definition_repository.get_by_id(
+            tenant_id=tenant_id,
+            tool_id=tool_version.tool_id,
+        )
+        if tool_definition is None:
+            return None
+
+        return binding, tool_version, tool_definition

+ 23 - 0
services/tool-service/app/domain/repositories.py

@@ -40,6 +40,14 @@ class ToolDefinitionRepository:
         )
         return list(self.db.scalars(stmt))
 
+    def get_by_id(self, *, tenant_id: str, tool_id: str) -> ToolDefinition | None:
+        stmt = (
+            select(ToolDefinition)
+            .where(ToolDefinition.tenant_id == tenant_id)
+            .where(ToolDefinition.id == tool_id)
+        )
+        return self.db.scalar(stmt)
+
 
 class ToolVersionRepository:
     def __init__(self, db: Session) -> None:
@@ -80,6 +88,14 @@ class ToolVersionRepository:
         )
         return list(self.db.scalars(stmt))
 
+    def get_by_id(self, *, tenant_id: str, tool_version_id: str) -> ToolVersion | None:
+        stmt = (
+            select(ToolVersion)
+            .where(ToolVersion.tenant_id == tenant_id)
+            .where(ToolVersion.id == tool_version_id)
+        )
+        return self.db.scalar(stmt)
+
     def _next_version_no(self, tool_id: str) -> int:
         stmt = select(func.max(ToolVersion.version_no)).where(ToolVersion.tool_id == tool_id)
         current_max = self.db.scalar(stmt)
@@ -122,3 +138,10 @@ class ToolBindingRepository:
         stmt = stmt.order_by(ToolBinding.created_time.desc())
         return list(self.db.scalars(stmt))
 
+    def get_by_id(self, *, tenant_id: str, binding_id: str) -> ToolBinding | None:
+        stmt = (
+            select(ToolBinding)
+            .where(ToolBinding.tenant_id == tenant_id)
+            .where(ToolBinding.id == binding_id)
+        )
+        return self.db.scalar(stmt)

+ 14 - 31
services/tool-service/app/schemas/tool.py

@@ -1,8 +1,13 @@
-from datetime import datetime
 from typing import TYPE_CHECKING
 
 from pydantic import BaseModel, Field
 
+from core_domain import (
+    ToolBindingContract,
+    ToolBindingDetailContract,
+    ToolDefinitionContract,
+    ToolVersionContract,
+)
 from core_shared import JSONValue
 
 if TYPE_CHECKING:
@@ -18,15 +23,7 @@ class ToolCreateRequest(BaseModel):
     description: str | None = None
 
 
-class ToolResponse(BaseModel):
-    id: str
-    tenant_id: str
-    plugin_id: str | None = None
-    code: str
-    name: str
-    tool_type: str
-    description: str | None = None
-    created_time: datetime
+class ToolResponse(ToolDefinitionContract):
 
     @classmethod
     def from_entity(cls, entity: "ToolDefinition") -> "ToolResponse":
@@ -43,17 +40,7 @@ class ToolVersionCreateRequest(BaseModel):
     retry_policy_json: dict[str, JSONValue] = Field(default_factory=dict)
 
 
-class ToolVersionResponse(BaseModel):
-    id: str
-    tenant_id: str
-    tool_id: str
-    version_no: int
-    input_schema_json: dict[str, JSONValue] | None = None
-    output_schema_json: dict[str, JSONValue] | None = None
-    invoke_config_json: dict[str, JSONValue] | None = None
-    timeout_ms: int | None = None
-    retry_policy_json: dict[str, JSONValue] | None = None
-    created_time: datetime
+class ToolVersionResponse(ToolVersionContract):
 
     @classmethod
     def from_entity(cls, entity: "ToolVersion") -> "ToolVersionResponse":
@@ -70,18 +57,14 @@ class ToolBindingCreateRequest(BaseModel):
     config_json: dict[str, JSONValue] = Field(default_factory=dict)
 
 
-class ToolBindingResponse(BaseModel):
-    id: str
-    tenant_id: str
-    app_id: str
-    tool_version_id: str
-    credential_id: str | None = None
-    binding_scope: str
-    enabled: bool
-    config_json: dict[str, JSONValue] | None = None
-    created_time: datetime
+class ToolBindingResponse(ToolBindingContract):
 
     @classmethod
     def from_entity(cls, entity: "ToolBinding") -> "ToolBindingResponse":
         return cls.model_validate(entity, from_attributes=True)
 
+
+class ToolBindingDetailResponse(ToolBindingDetailContract):
+    binding: ToolBindingResponse
+    tool_version: ToolVersionResponse
+    tool_definition: ToolResponse

+ 1 - 1
services/tool-service/pyproject.toml

@@ -14,6 +14,7 @@ dependencies = [
   "sqlalchemy>=2.0,<3.0",
   "uvicorn[standard]>=0.30,<1.0",
   "core-db",
+  "core-domain",
   "core-shared",
 ]
 
@@ -22,4 +23,3 @@ package-dir = {"" = "."}
 
 [tool.setuptools.packages.find]
 where = ["."]
-

+ 4 - 1
services/workflow-service/app/api/routes.py

@@ -110,7 +110,10 @@ def create_workflow_version(
     payload: WorkflowVersionCreateRequest,
     service: WorkflowApplicationService = Depends(get_workflow_application_service),
 ) -> WorkflowVersionResponse:
-    entity = service.create_workflow_version(payload)
+    try:
+        entity = service.create_workflow_version(payload)
+    except ValueError as exc:
+        raise HTTPException(status_code=422, detail=str(exc)) from exc
     return WorkflowVersionResponse.from_entity(entity)
 
 

+ 22 - 1
services/workflow-service/app/application/services.py

@@ -1,3 +1,7 @@
+from pydantic import ValidationError
+
+from core_dsl import parse_workflow_definition
+from core_shared import JSONValue
 from app.db.models import AppDefinition, AppVersion, WorkflowDefinitionModel, WorkflowVersion
 from app.domain.repositories import (
     AppDefinitionRepository,
@@ -48,10 +52,11 @@ class WorkflowApplicationService:
         return self.workflow_repository.list_by_scope(tenant_id=tenant_id, app_id=app_id)
 
     def create_workflow_version(self, payload: WorkflowVersionCreateRequest) -> WorkflowVersion:
+        dsl_json = self._validate_workflow_dsl(payload.dsl_json)
         return self.workflow_version_repository.create(
             tenant_id=payload.tenant_id,
             workflow_id=payload.workflow_id,
-            dsl_json=payload.dsl_json,
+            dsl_json=dsl_json,
             compiled_plan_json=payload.compiled_plan_json,
             schema_version=payload.schema_version,
             checksum=payload.checksum,
@@ -82,3 +87,19 @@ class WorkflowApplicationService:
 
     def list_app_versions(self, tenant_id: str, app_id: str) -> list[AppVersion]:
         return self.app_version_repository.list_by_app(tenant_id=tenant_id, app_id=app_id)
+
+    def _validate_workflow_dsl(
+        self,
+        dsl_json: dict[str, JSONValue] | None,
+    ) -> dict[str, JSONValue] | None:
+        if dsl_json is None:
+            return None
+
+        try:
+            workflow = parse_workflow_definition(dsl_json)
+        except ValidationError as exc:
+            raise ValueError(f"invalid workflow dsl: {exc}") from exc
+
+        if workflow is None:
+            return None
+        return workflow.model_dump(mode="json")

+ 1 - 0
services/workflow-service/pyproject.toml

@@ -14,6 +14,7 @@ dependencies = [
   "pydantic>=2.7,<3.0",
   "sqlalchemy>=2.0,<3.0",
   "core-db",
+  "core-domain",
   "core-dsl",
   "core-shared",
 ]