|
|
@@ -2,11 +2,13 @@ from datetime import datetime, timedelta
|
|
|
|
|
|
from sqlalchemy.orm import Session
|
|
|
|
|
|
+from core_dsl import parse_workflow_definition
|
|
|
from core_domain import (
|
|
|
InitialNodeContract,
|
|
|
NodeExecutionContextContract,
|
|
|
NodeExecutionResultContract,
|
|
|
NodeRunStatus,
|
|
|
+ WorkflowVersionContract,
|
|
|
WorkflowRunStatus,
|
|
|
)
|
|
|
|
|
|
@@ -18,10 +20,17 @@ from app.domain.repositories import (
|
|
|
TraceSpanRepository,
|
|
|
WorkflowRunRepository,
|
|
|
)
|
|
|
-from app.infrastructure.executors import NodeExecutionDispatcher, build_node_execution_dispatcher_with_clients
|
|
|
+from app.infrastructure.executors import (
|
|
|
+ NodeExecutionDispatcher,
|
|
|
+ build_node_execution_dispatcher_with_clients,
|
|
|
+)
|
|
|
from app.infrastructure.code_runner_client import CodeRunnerClient
|
|
|
from app.infrastructure.model_gateway_client import ModelGatewayClient
|
|
|
-from app.infrastructure.planner import derive_initial_node, derive_node_config, derive_successor_nodes
|
|
|
+from app.infrastructure.planner import (
|
|
|
+ derive_initial_node,
|
|
|
+ derive_node_config,
|
|
|
+ derive_successor_nodes,
|
|
|
+)
|
|
|
from app.infrastructure.tool_client import ToolServiceClient
|
|
|
from app.infrastructure.workflow_client import WorkflowServiceClient
|
|
|
from app.bootstrap.settings import RuntimeServiceSettings
|
|
|
@@ -76,12 +85,20 @@ class RuntimeApplicationService:
|
|
|
run_id=workflow_run.id,
|
|
|
current_node_count=1,
|
|
|
)
|
|
|
+ initial_config = self._resolve_node_config(
|
|
|
+ tenant_id=payload.tenant_id,
|
|
|
+ workflow_version_id=payload.workflow_version_id,
|
|
|
+ node_id=initial_node.node_id,
|
|
|
+ )
|
|
|
+ scheduled_time, timeout_time = self._build_node_timing(initial_config)
|
|
|
node_run = self.node_run_repository.create(
|
|
|
tenant_id=payload.tenant_id,
|
|
|
run_id=workflow_run.id,
|
|
|
node_id=initial_node.node_id,
|
|
|
node_type=initial_node.node_type,
|
|
|
status=initial_node.status,
|
|
|
+ scheduled_time=scheduled_time,
|
|
|
+ timeout_time=timeout_time,
|
|
|
)
|
|
|
self._log_event(
|
|
|
tenant_id=payload.tenant_id,
|
|
|
@@ -112,7 +129,10 @@ class RuntimeApplicationService:
|
|
|
return workflow_run, node_run
|
|
|
|
|
|
def list_runs(self, tenant_id: str, session_id: str | None = None) -> list[WorkflowRun]:
|
|
|
- return self.workflow_run_repository.list_by_scope(tenant_id=tenant_id, session_id=session_id)
|
|
|
+ return self.workflow_run_repository.list_by_scope(
|
|
|
+ tenant_id=tenant_id,
|
|
|
+ session_id=session_id,
|
|
|
+ )
|
|
|
|
|
|
def list_node_runs(self, tenant_id: str, run_id: str) -> list[NodeRun]:
|
|
|
return self.node_run_repository.list_by_run(tenant_id=tenant_id, run_id=run_id)
|
|
|
@@ -202,6 +222,15 @@ class RuntimeApplicationService:
|
|
|
|
|
|
if payload.status == "completed":
|
|
|
self._schedule_successor_nodes(node_run)
|
|
|
+ if payload.status == "failed":
|
|
|
+ workflow_run = self.workflow_run_repository.get_by_id(node_run.run_id)
|
|
|
+ if workflow_run is not None:
|
|
|
+ node_config = self._resolve_node_config(
|
|
|
+ tenant_id=node_run.tenant_id,
|
|
|
+ workflow_version_id=workflow_run.workflow_version_id,
|
|
|
+ node_id=node_run.node_id,
|
|
|
+ )
|
|
|
+ self._schedule_compensation_node(node_run=node_run, node_config=node_config)
|
|
|
|
|
|
self._sync_workflow_run_status_from_nodes(
|
|
|
tenant_id=node_run.tenant_id,
|
|
|
@@ -223,9 +252,38 @@ class RuntimeApplicationService:
|
|
|
return None
|
|
|
|
|
|
if node_run.status in {"completed", "failed", "skipped"}:
|
|
|
- executor_name = self.execution_dispatcher.resolve_executor(node_run.node_type).executor_name
|
|
|
+ executor_name = self.execution_dispatcher.resolve_executor(
|
|
|
+ node_run.node_type
|
|
|
+ ).executor_name
|
|
|
return workflow_run, node_run, executor_name
|
|
|
|
|
|
+ node_config = self._resolve_node_config(
|
|
|
+ tenant_id=node_run.tenant_id,
|
|
|
+ workflow_version_id=workflow_run.workflow_version_id,
|
|
|
+ node_id=node_run.node_id,
|
|
|
+ )
|
|
|
+ if self._node_has_timed_out(node_run):
|
|
|
+ timed_out_node_run = self.update_node_run_status(
|
|
|
+ node_run_id=node_run.id,
|
|
|
+ payload=NodeRunStatusUpdateRequest(
|
|
|
+ status="failed",
|
|
|
+ worker_key=payload.worker_key,
|
|
|
+ error_code="node_timeout",
|
|
|
+ error_message=f"node timed out: {node_run.node_id}",
|
|
|
+ output_json={
|
|
|
+ "timeout_time": node_run.timeout_time.isoformat()
|
|
|
+ if node_run.timeout_time is not None
|
|
|
+ else None,
|
|
|
+ },
|
|
|
+ ),
|
|
|
+ )
|
|
|
+ if timed_out_node_run is None:
|
|
|
+ return None
|
|
|
+ executor_name = self.execution_dispatcher.resolve_executor(
|
|
|
+ node_run.node_type
|
|
|
+ ).executor_name
|
|
|
+ return workflow_run, timed_out_node_run, executor_name
|
|
|
+
|
|
|
running_node_run = self.node_run_repository.update_status(
|
|
|
node_run_id=node_run_id,
|
|
|
status="running",
|
|
|
@@ -251,6 +309,7 @@ class RuntimeApplicationService:
|
|
|
workflow_run=workflow_run,
|
|
|
node_run=running_node_run,
|
|
|
worker_key=payload.worker_key,
|
|
|
+ node_config_json=node_config,
|
|
|
)
|
|
|
executor_name = self.execution_dispatcher.resolve_executor(
|
|
|
running_node_run.node_type
|
|
|
@@ -271,7 +330,10 @@ class RuntimeApplicationService:
|
|
|
)
|
|
|
|
|
|
try:
|
|
|
- result, executor_name = self.execution_dispatcher.execute(context=context, request=payload)
|
|
|
+ result, executor_name = self.execution_dispatcher.execute(
|
|
|
+ context=context,
|
|
|
+ request=payload,
|
|
|
+ )
|
|
|
except Exception as exc:
|
|
|
result = NodeExecutionResultContract(
|
|
|
status="failed",
|
|
|
@@ -280,6 +342,60 @@ class RuntimeApplicationService:
|
|
|
error_message=str(exc),
|
|
|
)
|
|
|
|
|
|
+ if result.status == "failed" and self._should_retry_node(
|
|
|
+ node_run=running_node_run,
|
|
|
+ node_config_json=context.node_config_json,
|
|
|
+ ):
|
|
|
+ retry_time, retry_timeout_time = self._build_retry_timing(context.node_config_json)
|
|
|
+ retried_node_run = self.node_run_repository.requeue_for_retry(
|
|
|
+ node_run_id=running_node_run.id,
|
|
|
+ scheduled_time=retry_time,
|
|
|
+ timeout_time=retry_timeout_time,
|
|
|
+ error_code=result.error_code,
|
|
|
+ error_message=result.error_message,
|
|
|
+ output_text=result.output_text,
|
|
|
+ output_json={
|
|
|
+ **(result.output_json or {}),
|
|
|
+ "retry_scheduled_time": retry_time.isoformat(),
|
|
|
+ "retry_reason": result.error_code or "node_failed",
|
|
|
+ },
|
|
|
+ )
|
|
|
+ if retried_node_run is None:
|
|
|
+ return None
|
|
|
+ self.trace_span_repository.finish(
|
|
|
+ span_id=trace_span.id,
|
|
|
+ status="error",
|
|
|
+ error_code=result.error_code,
|
|
|
+ error_message=result.error_message,
|
|
|
+ attributes_json={
|
|
|
+ "node_status": "queued",
|
|
|
+ "executor_name": executor_name,
|
|
|
+ "retry_scheduled": True,
|
|
|
+ "attempt_no": retried_node_run.attempt_no,
|
|
|
+ },
|
|
|
+ )
|
|
|
+ self._log_event(
|
|
|
+ tenant_id=retried_node_run.tenant_id,
|
|
|
+ run_id=retried_node_run.run_id,
|
|
|
+ node_run_id=retried_node_run.id,
|
|
|
+ event_type="node_retry_scheduled",
|
|
|
+ message=f"node retry scheduled: {retried_node_run.node_id}",
|
|
|
+ detail_json={
|
|
|
+ "node_id": retried_node_run.node_id,
|
|
|
+ "attempt_no": retried_node_run.attempt_no,
|
|
|
+ "scheduled_time": retry_time.isoformat(),
|
|
|
+ "error_code": result.error_code,
|
|
|
+ },
|
|
|
+ )
|
|
|
+ self._sync_workflow_run_status_from_nodes(
|
|
|
+ tenant_id=retried_node_run.tenant_id,
|
|
|
+ run_id=retried_node_run.run_id,
|
|
|
+ )
|
|
|
+ workflow_run = self.workflow_run_repository.get_by_id(retried_node_run.run_id)
|
|
|
+ if workflow_run is None:
|
|
|
+ return None
|
|
|
+ return workflow_run, retried_node_run, executor_name
|
|
|
+
|
|
|
final_node_run = self.update_node_run_status(
|
|
|
node_run_id=running_node_run.id,
|
|
|
payload=NodeRunStatusUpdateRequest(
|
|
|
@@ -477,17 +593,52 @@ class RuntimeApplicationService:
|
|
|
run_id=node_run.run_id,
|
|
|
node_ids=[item.node_id for item in successor_nodes],
|
|
|
)
|
|
|
- existing_node_ids = {item.node_id for item in existing_nodes}
|
|
|
+ existing_node_counts: dict[str, int] = {}
|
|
|
+ for item in existing_nodes:
|
|
|
+ existing_node_counts[item.node_id] = existing_node_counts.get(item.node_id, 0) + 1
|
|
|
|
|
|
for successor in successor_nodes:
|
|
|
- if successor.node_id in existing_node_ids:
|
|
|
+ successor_config = derive_node_config(workflow_version, successor.node_id)
|
|
|
+ if not self._is_join_ready(
|
|
|
+ workflow_version=workflow_version,
|
|
|
+ run_node_runs=self.node_run_repository.list_by_run(
|
|
|
+ tenant_id=node_run.tenant_id,
|
|
|
+ run_id=node_run.run_id,
|
|
|
+ ),
|
|
|
+ successor_node_id=successor.node_id,
|
|
|
+ successor_node_type=successor.node_type,
|
|
|
+ successor_config=successor_config,
|
|
|
+ ):
|
|
|
+ self._log_event(
|
|
|
+ tenant_id=node_run.tenant_id,
|
|
|
+ run_id=node_run.run_id,
|
|
|
+ node_run_id=None,
|
|
|
+ event_type="join_waiting",
|
|
|
+ message=f"join node waiting for predecessors: {successor.node_id}",
|
|
|
+ detail_json={
|
|
|
+ "node_id": successor.node_id,
|
|
|
+ "source_node_id": node_run.node_id,
|
|
|
+ },
|
|
|
+ )
|
|
|
+ continue
|
|
|
+ if not self._can_schedule_repeated_node(
|
|
|
+ successor_config,
|
|
|
+ existing_count=existing_node_counts.get(successor.node_id, 0),
|
|
|
+ ):
|
|
|
continue
|
|
|
+ scheduled_time, timeout_time = self._build_node_timing(successor_config)
|
|
|
self.node_run_repository.create(
|
|
|
tenant_id=node_run.tenant_id,
|
|
|
run_id=node_run.run_id,
|
|
|
+ parent_node_run_id=node_run.id,
|
|
|
node_id=successor.node_id,
|
|
|
node_type=successor.node_type,
|
|
|
status=successor.status,
|
|
|
+ scheduled_time=scheduled_time,
|
|
|
+ timeout_time=timeout_time,
|
|
|
+ )
|
|
|
+ existing_node_counts[successor.node_id] = (
|
|
|
+ existing_node_counts.get(successor.node_id, 0) + 1
|
|
|
)
|
|
|
self._log_event(
|
|
|
tenant_id=node_run.tenant_id,
|
|
|
@@ -509,6 +660,7 @@ class RuntimeApplicationService:
|
|
|
workflow_run: WorkflowRun,
|
|
|
node_run: NodeRun,
|
|
|
worker_key: str | None,
|
|
|
+ node_config_json: dict[str, JSONValue] | None = None,
|
|
|
) -> NodeExecutionContextContract:
|
|
|
run_state_json, node_output_json_by_node_id, node_output_text_by_node_id = (
|
|
|
self._build_run_state_maps(
|
|
|
@@ -522,7 +674,9 @@ class RuntimeApplicationService:
|
|
|
node_run_id=node_run.id,
|
|
|
node_id=node_run.node_id,
|
|
|
node_type=node_run.node_type,
|
|
|
- node_config_json=self._resolve_node_config(
|
|
|
+ node_config_json=node_config_json
|
|
|
+ if node_config_json is not None
|
|
|
+ else self._resolve_node_config(
|
|
|
tenant_id=node_run.tenant_id,
|
|
|
workflow_version_id=workflow_run.workflow_version_id,
|
|
|
node_id=node_run.node_id,
|
|
|
@@ -562,6 +716,219 @@ class RuntimeApplicationService:
|
|
|
|
|
|
return run_state_json, node_output_json_by_node_id, node_output_text_by_node_id
|
|
|
|
|
|
+ def _build_node_timing(
|
|
|
+ self,
|
|
|
+ node_config_json: dict[str, JSONValue],
|
|
|
+ ) -> tuple[datetime, datetime | None]:
|
|
|
+ now = datetime.utcnow()
|
|
|
+ delay_seconds = self._read_int_value(node_config_json, "delay_seconds", default=0)
|
|
|
+ timeout_seconds = self._read_int_value(node_config_json, "timeout_seconds", default=0)
|
|
|
+ scheduled_time = now + timedelta(seconds=max(delay_seconds, 0))
|
|
|
+ timeout_time = (
|
|
|
+ scheduled_time + timedelta(seconds=timeout_seconds)
|
|
|
+ if timeout_seconds > 0
|
|
|
+ else None
|
|
|
+ )
|
|
|
+ return scheduled_time, timeout_time
|
|
|
+
|
|
|
+ def _node_has_timed_out(self, node_run: NodeRun) -> bool:
|
|
|
+ return node_run.timeout_time is not None and node_run.timeout_time <= datetime.utcnow()
|
|
|
+
|
|
|
+ def _should_retry_node(
|
|
|
+ self,
|
|
|
+ *,
|
|
|
+ node_run: NodeRun,
|
|
|
+ node_config_json: dict[str, JSONValue],
|
|
|
+ ) -> bool:
|
|
|
+ retry_policy = self._read_dict_value(node_config_json, "retry_policy")
|
|
|
+ max_attempts = self._read_int_value(retry_policy, "max_attempts", default=1)
|
|
|
+ return max_attempts > node_run.attempt_no
|
|
|
+
|
|
|
+ def _read_retry_delay_seconds(self, node_config_json: dict[str, JSONValue]) -> int:
|
|
|
+ retry_policy = self._read_dict_value(node_config_json, "retry_policy")
|
|
|
+ return self._read_int_value(retry_policy, "retry_delay_seconds", default=0)
|
|
|
+
|
|
|
+ def _build_retry_timing(
|
|
|
+ self,
|
|
|
+ node_config_json: dict[str, JSONValue],
|
|
|
+ ) -> tuple[datetime, datetime | None]:
|
|
|
+ retry_time = datetime.utcnow() + timedelta(
|
|
|
+ seconds=self._read_retry_delay_seconds(node_config_json)
|
|
|
+ )
|
|
|
+ timeout_seconds = self._read_int_value(node_config_json, "timeout_seconds", default=0)
|
|
|
+ timeout_time = (
|
|
|
+ retry_time + timedelta(seconds=timeout_seconds)
|
|
|
+ if timeout_seconds > 0
|
|
|
+ else None
|
|
|
+ )
|
|
|
+ return retry_time, timeout_time
|
|
|
+
|
|
|
+ def _is_join_ready(
|
|
|
+ self,
|
|
|
+ *,
|
|
|
+ workflow_version: WorkflowVersionContract,
|
|
|
+ run_node_runs: list[NodeRun],
|
|
|
+ successor_node_id: str,
|
|
|
+ successor_node_type: str,
|
|
|
+ successor_config: dict[str, JSONValue],
|
|
|
+ ) -> bool:
|
|
|
+ join_policy = self._read_string_value(successor_config, "join_policy")
|
|
|
+ if join_policy is None and successor_node_type != "join":
|
|
|
+ return True
|
|
|
+ workflow = self._parse_workflow(workflow_version)
|
|
|
+ if workflow is None:
|
|
|
+ return True
|
|
|
+ predecessor_ids = [
|
|
|
+ edge.source for edge in workflow.edges if edge.target == successor_node_id
|
|
|
+ ]
|
|
|
+ if not predecessor_ids:
|
|
|
+ return True
|
|
|
+ completed_node_ids = {
|
|
|
+ item.node_id
|
|
|
+ for item in run_node_runs
|
|
|
+ if item.status in {"completed", "skipped"}
|
|
|
+ }
|
|
|
+ if join_policy in {None, "all_completed"}:
|
|
|
+ return all(predecessor_id in completed_node_ids for predecessor_id in predecessor_ids)
|
|
|
+ if join_policy == "any_completed":
|
|
|
+ return any(predecessor_id in completed_node_ids for predecessor_id in predecessor_ids)
|
|
|
+ return True
|
|
|
+
|
|
|
+ def _can_schedule_repeated_node(
|
|
|
+ self,
|
|
|
+ node_config_json: dict[str, JSONValue],
|
|
|
+ *,
|
|
|
+ existing_count: int,
|
|
|
+ ) -> bool:
|
|
|
+ if existing_count == 0:
|
|
|
+ return True
|
|
|
+ allow_loop = self._read_bool_value(node_config_json, "allow_loop", default=False)
|
|
|
+ max_iterations = self._read_int_value(node_config_json, "max_iterations", default=1)
|
|
|
+ return allow_loop and existing_count < max_iterations
|
|
|
+
|
|
|
+ def _schedule_compensation_node(
|
|
|
+ self,
|
|
|
+ *,
|
|
|
+ node_run: NodeRun,
|
|
|
+ node_config: dict[str, JSONValue],
|
|
|
+ ) -> None:
|
|
|
+ compensation_node_id = self._read_string_value(node_config, "compensation_node_id")
|
|
|
+ if compensation_node_id is None:
|
|
|
+ compensation_config = self._read_dict_value(node_config, "compensation")
|
|
|
+ compensation_node_id = self._read_string_value(compensation_config, "node_id")
|
|
|
+ if compensation_node_id is None:
|
|
|
+ return
|
|
|
+
|
|
|
+ workflow_run = self.workflow_run_repository.get_by_id(node_run.run_id)
|
|
|
+ if workflow_run is None:
|
|
|
+ return
|
|
|
+ compensation_config = self._resolve_node_config(
|
|
|
+ tenant_id=node_run.tenant_id,
|
|
|
+ workflow_version_id=workflow_run.workflow_version_id,
|
|
|
+ node_id=compensation_node_id,
|
|
|
+ )
|
|
|
+ existing_nodes = self.node_run_repository.list_by_run_and_node_ids(
|
|
|
+ tenant_id=node_run.tenant_id,
|
|
|
+ run_id=node_run.run_id,
|
|
|
+ node_ids=[compensation_node_id],
|
|
|
+ )
|
|
|
+ if existing_nodes and not self._can_schedule_repeated_node(
|
|
|
+ compensation_config,
|
|
|
+ existing_count=len(existing_nodes),
|
|
|
+ ):
|
|
|
+ return
|
|
|
+ compensation_node_type = self._resolve_workflow_node_type(
|
|
|
+ tenant_id=node_run.tenant_id,
|
|
|
+ workflow_version_id=workflow_run.workflow_version_id,
|
|
|
+ node_id=compensation_node_id,
|
|
|
+ ) or "compensation"
|
|
|
+ scheduled_time, timeout_time = self._build_node_timing(compensation_config)
|
|
|
+ created = self.node_run_repository.create(
|
|
|
+ tenant_id=node_run.tenant_id,
|
|
|
+ run_id=node_run.run_id,
|
|
|
+ parent_node_run_id=node_run.id,
|
|
|
+ node_id=compensation_node_id,
|
|
|
+ node_type=compensation_node_type,
|
|
|
+ status="queued",
|
|
|
+ scheduled_time=scheduled_time,
|
|
|
+ timeout_time=timeout_time,
|
|
|
+ )
|
|
|
+ self._log_event(
|
|
|
+ tenant_id=node_run.tenant_id,
|
|
|
+ run_id=node_run.run_id,
|
|
|
+ node_run_id=created.id,
|
|
|
+ event_type="compensation_queued",
|
|
|
+ message=f"compensation node queued: {compensation_node_id}",
|
|
|
+ detail_json={
|
|
|
+ "failed_node_id": node_run.node_id,
|
|
|
+ "compensation_node_id": compensation_node_id,
|
|
|
+ },
|
|
|
+ )
|
|
|
+
|
|
|
+ def _parse_workflow(self, workflow_version: WorkflowVersionContract):
|
|
|
+ return parse_workflow_definition(workflow_version.dsl_json)
|
|
|
+
|
|
|
+ def _resolve_workflow_node_type(
|
|
|
+ self,
|
|
|
+ *,
|
|
|
+ tenant_id: str,
|
|
|
+ workflow_version_id: str,
|
|
|
+ node_id: str,
|
|
|
+ ) -> str | None:
|
|
|
+ if self.workflow_client is None:
|
|
|
+ return None
|
|
|
+ workflow_version = self.workflow_client.get_workflow_version(
|
|
|
+ tenant_id=tenant_id,
|
|
|
+ workflow_version_id=workflow_version_id,
|
|
|
+ )
|
|
|
+ workflow = self._parse_workflow(workflow_version)
|
|
|
+ if workflow is None:
|
|
|
+ return None
|
|
|
+ for node in workflow.nodes:
|
|
|
+ if node.id == node_id:
|
|
|
+ return node.type
|
|
|
+ return None
|
|
|
+
|
|
|
+ def _read_string_value(self, payload: dict[str, JSONValue], key: str) -> str | None:
|
|
|
+ value = payload.get(key)
|
|
|
+ if isinstance(value, str) and value:
|
|
|
+ return value
|
|
|
+ return None
|
|
|
+
|
|
|
+ def _read_bool_value(
|
|
|
+ self,
|
|
|
+ payload: dict[str, JSONValue],
|
|
|
+ key: str,
|
|
|
+ *,
|
|
|
+ default: bool,
|
|
|
+ ) -> bool:
|
|
|
+ value = payload.get(key)
|
|
|
+ if isinstance(value, bool):
|
|
|
+ return value
|
|
|
+ return default
|
|
|
+
|
|
|
+ def _read_int_value(
|
|
|
+ self,
|
|
|
+ payload: dict[str, JSONValue],
|
|
|
+ key: str,
|
|
|
+ *,
|
|
|
+ default: int,
|
|
|
+ ) -> int:
|
|
|
+ value = payload.get(key)
|
|
|
+ if isinstance(value, int) and not isinstance(value, bool):
|
|
|
+ return value
|
|
|
+ return default
|
|
|
+
|
|
|
+ def _read_dict_value(
|
|
|
+ self,
|
|
|
+ payload: dict[str, JSONValue],
|
|
|
+ key: str,
|
|
|
+ ) -> dict[str, JSONValue]:
|
|
|
+ value = payload.get(key)
|
|
|
+ if isinstance(value, dict):
|
|
|
+ return {str(item_key): item_value for item_key, item_value in value.items()}
|
|
|
+ return {}
|
|
|
+
|
|
|
def _sync_workflow_run_status_from_nodes(self, *, tenant_id: str, run_id: str) -> None:
|
|
|
node_runs = self.node_run_repository.list_by_run(tenant_id=tenant_id, run_id=run_id)
|
|
|
if not node_runs:
|
|
|
@@ -597,15 +964,16 @@ class RuntimeApplicationService:
|
|
|
) -> tuple[WorkflowRunStatus, str | None, str | None]:
|
|
|
statuses = {node_run.status for node_run in node_runs}
|
|
|
|
|
|
+ active_statuses: set[NodeRunStatus] = {"pending", "queued", "running"}
|
|
|
+ if statuses.intersection(active_statuses):
|
|
|
+ return "running", None, None
|
|
|
+
|
|
|
if "failed" in statuses:
|
|
|
failed_node = next((item for item in node_runs if item.status == "failed"), None)
|
|
|
error_code = failed_node.error_code if failed_node is not None else None
|
|
|
error_message = failed_node.error_message if failed_node is not None else None
|
|
|
return "failed", error_code, error_message
|
|
|
|
|
|
- if "running" in statuses:
|
|
|
- return "running", None, None
|
|
|
-
|
|
|
terminal_statuses: set[NodeRunStatus] = {"completed", "skipped"}
|
|
|
if statuses and statuses.issubset(terminal_statuses):
|
|
|
return "completed", None, None
|