Преглед на файлове

feat: execute team agent runs

Jax Docker преди 1 месец
родител
ревизия
f259864eb5

+ 37 - 0
README.md

@@ -324,6 +324,36 @@ Invoke-RestMethod -Method Post `
 
 Through `api-gateway`, use `/gateway/teams/**`.
 
+Execute a team run. The first implementation creates and executes one agent run
+per member, then stores a team-level summary. `dry_run=true` lets this work
+without model API keys:
+
+```powershell
+Invoke-RestMethod -Method Post `
+  -Uri http://127.0.0.1:8009/teams/runs/team-run-id/execute `
+  -ContentType "application/json" `
+  -Body '{"tenant_id":"t1","worker_key":"team-worker-1","dry_run":true}'
+```
+
+Execute one queued team run through the worker claim API:
+
+```powershell
+Invoke-RestMethod -Method Post `
+  -Uri http://127.0.0.1:8009/teams/workers/execute-next `
+  -ContentType "application/json" `
+  -Body '{"worker_key":"team-worker-1","lease_seconds":300,"dry_run":true}'
+```
+
+Run a standalone team worker process:
+
+```powershell
+Push-Location .\services\team-service
+$env:AGENT_PLATFORM_DATABASE_URL="sqlite:///./team_service.db"
+$env:AGENT_PLATFORM_WORKER_DRY_RUN="true"
+..\..\.venv\Scripts\python -m app.worker
+Pop-Location
+```
+
 ## Skill Service APIs
 
 `skill-service` stores reusable skill definitions, versioned parameter/output schemas,
@@ -1052,6 +1082,12 @@ Scale agent workers:
 docker compose -f .\deployments\docker\docker-compose.yml up --build -d --scale agent-worker=3
 ```
 
+Scale team workers:
+
+```powershell
+docker compose -f .\deployments\docker\docker-compose.yml up --build -d --scale team-worker=3
+```
+
 Stop and remove containers:
 
 ```powershell
@@ -1064,6 +1100,7 @@ Important notes:
 - `agent-service` stores agent definitions, prompt/config versions, and agent run records under `/data`
 - `memory-service` stores scoped memories under `/data`; move it to PostgreSQL before enabling high-volume memory writes
 - `team-service` stores multi-agent team definitions, team versions, and team run records under `/data`
+- `team-worker` executes queued team runs by orchestrating member agent runs; it can be scaled independently
 - `skill-service` stores skill definitions, versions, marketplace-style installations, and skill execution runs under `/data`
 - `human-service` stores human approval, input, pause/resume, and takeover task records under `/data`
 - `knowledge-service` stores knowledge bases, documents, chunks, and local retrieval metadata under `/data`

+ 19 - 0
deployments/docker/docker-compose.yml

@@ -203,6 +203,25 @@ services:
       timeout: 5s
       retries: 5
 
+  team-worker:
+    build:
+      context: ../..
+      dockerfile: deployments/docker/python-service.Dockerfile
+      args:
+        SERVICE_PATH: services/team-service
+    command: ["python", "-m", "app.worker"]
+    environment:
+      AGENT_PLATFORM_DATABASE_URL: sqlite:////data/team_service.db
+      AGENT_PLATFORM_AGENT_SERVICE_URL: http://agent-service:8007
+      AGENT_PLATFORM_WORKER_POLL_INTERVAL_SECONDS: ${AGENT_PLATFORM_WORKER_POLL_INTERVAL_SECONDS:-1}
+      AGENT_PLATFORM_WORKER_LEASE_SECONDS: ${AGENT_PLATFORM_WORKER_LEASE_SECONDS:-300}
+      AGENT_PLATFORM_WORKER_DRY_RUN: ${AGENT_PLATFORM_TEAM_WORKER_DRY_RUN:-true}
+    volumes:
+      - team_service_data:/data
+    depends_on:
+      agent-service:
+        condition: service_started
+
   skill-service:
     build:
       context: ../..

+ 61 - 3
services/team-service/app/api/routes.py

@@ -4,7 +4,8 @@ from sqlalchemy.orm import Session
 
 from core_domain import ServiceHealth
 
-from app.application.services import TeamApplicationService
+from app.application.services import TeamApplicationService, build_team_application_service
+from app.bootstrap.settings import TeamServiceSettings
 from app.db.session import get_db
 from app.domain.repositories import (
     TeamDefinitionRepository,
@@ -15,8 +16,12 @@ from app.schemas.team import (
     TeamCreateRequest,
     TeamResponse,
     TeamRunCreateRequest,
+    TeamRunExecuteRequest,
+    TeamRunExecuteResponse,
     TeamRunResponse,
     TeamRunStatusUpdateRequest,
+    TeamWorkerExecuteNextRequest,
+    TeamWorkerExecuteNextResponse,
     TeamStatusUpdateRequest,
     TeamVersionCreateRequest,
     TeamVersionResponse,
@@ -25,11 +30,19 @@ from app.schemas.team import (
 router = APIRouter()
 
 
-def get_team_application_service(db: Session = Depends(get_db)) -> TeamApplicationService:
-    return TeamApplicationService(
+def get_team_settings() -> TeamServiceSettings:
+    return TeamServiceSettings()
+
+
+def get_team_application_service(
+    db: Session = Depends(get_db),
+    settings: TeamServiceSettings = Depends(get_team_settings),
+) -> TeamApplicationService:
+    return build_team_application_service(
         team_repository=TeamDefinitionRepository(db),
         team_version_repository=TeamVersionRepository(db),
         team_run_repository=TeamRunRepository(db),
+        settings=settings,
     )
 
 
@@ -131,3 +144,48 @@ def update_team_run_status(
     if entity is None:
         raise HTTPException(status_code=404, detail=f"team_run not found: {team_run_id}")
     return TeamRunResponse.from_entity(entity)
+
+
+@router.post("/runs/{team_run_id}/execute", response_model=TeamRunExecuteResponse)
+def execute_team_run(
+    team_run_id: str,
+    payload: TeamRunExecuteRequest,
+    service: TeamApplicationService = Depends(get_team_application_service),
+) -> TeamRunExecuteResponse:
+    entity = service.execute_team_run(team_run_id=team_run_id, payload=payload)
+    if entity is None:
+        raise HTTPException(status_code=404, detail=f"team_run not found: {team_run_id}")
+    output_json = entity.output_json or {}
+    member_run_count = output_json.get("member_run_count")
+    dry_run = output_json.get("dry_run")
+    return TeamRunExecuteResponse(
+        run=TeamRunResponse.from_entity(entity),
+        member_run_count=member_run_count if isinstance(member_run_count, int) else 0,
+        dry_run=dry_run if isinstance(dry_run, bool) else payload.dry_run,
+    )
+
+
+@router.post("/workers/execute-next", response_model=TeamWorkerExecuteNextResponse)
+def execute_next_worker_task(
+    payload: TeamWorkerExecuteNextRequest,
+    settings: TeamServiceSettings = Depends(get_team_settings),
+    service: TeamApplicationService = Depends(get_team_application_service),
+) -> TeamWorkerExecuteNextResponse:
+    result = service.execute_next_claimed_team_run(
+        worker_key=payload.worker_key,
+        lease_seconds=payload.lease_seconds or settings.worker_lease_seconds,
+        dry_run=payload.dry_run if payload.dry_run is not None else settings.worker_dry_run,
+    )
+    if result is None:
+        raise HTTPException(status_code=404, detail="queued team_run not found")
+
+    entity, released_lease_count = result
+    output_json = entity.output_json or {}
+    member_run_count = output_json.get("member_run_count")
+    dry_run = output_json.get("dry_run")
+    return TeamWorkerExecuteNextResponse(
+        run=TeamRunResponse.from_entity(entity),
+        member_run_count=member_run_count if isinstance(member_run_count, int) else 0,
+        dry_run=dry_run if isinstance(dry_run, bool) else settings.worker_dry_run,
+        released_lease_count=released_lease_count,
+    )

+ 295 - 0
services/team-service/app/application/services.py

@@ -1,12 +1,20 @@
+from datetime import datetime, timedelta
+
+from core_domain import AgentRunContract, TeamMemberContract
+from core_shared import JSONValue
+
+from app.bootstrap.settings import TeamServiceSettings
 from app.db.models import TeamDefinition, TeamRun, TeamVersion
 from app.domain.repositories import (
     TeamDefinitionRepository,
     TeamRunRepository,
     TeamVersionRepository,
 )
+from app.infrastructure.agent_client import AgentServiceClient, AgentServiceClientError
 from app.schemas.team import (
     TeamCreateRequest,
     TeamRunCreateRequest,
+    TeamRunExecuteRequest,
     TeamRunStatusUpdateRequest,
     TeamStatusUpdateRequest,
     TeamVersionCreateRequest,
@@ -20,10 +28,12 @@ class TeamApplicationService:
         team_repository: TeamDefinitionRepository,
         team_version_repository: TeamVersionRepository,
         team_run_repository: TeamRunRepository,
+        agent_client: AgentServiceClient | None = None,
     ) -> None:
         self.team_repository = team_repository
         self.team_version_repository = team_version_repository
         self.team_run_repository = team_run_repository
+        self.agent_client = agent_client
 
     def create_team(self, payload: TeamCreateRequest) -> TeamDefinition:
         return self.team_repository.create(
@@ -124,6 +134,129 @@ class TeamApplicationService:
             error_message=payload.error_message,
         )
 
+    def execute_team_run(
+        self,
+        *,
+        team_run_id: str,
+        payload: TeamRunExecuteRequest,
+    ) -> TeamRun | None:
+        team_run = self.team_run_repository.get_by_id(
+            tenant_id=payload.tenant_id,
+            team_run_id=team_run_id,
+        )
+        if team_run is None:
+            return None
+
+        team_version = self.team_version_repository.get_by_id(
+            tenant_id=payload.tenant_id,
+            team_version_id=team_run.team_version_id,
+        )
+        if team_version is None:
+            return self.team_run_repository.update_status(
+                team_run_id=team_run.id,
+                status="failed",
+                worker_key=payload.worker_key,
+                error_code="team_version_missing",
+                error_message=f"team version not found: {team_run.team_version_id}",
+            )
+
+        running_run = self.team_run_repository.update_status(
+            team_run_id=team_run.id,
+            status="running",
+            worker_key=payload.worker_key,
+        )
+        if running_run is None:
+            return None
+
+        members = self._read_team_members(team_version)
+        if not members:
+            return self.team_run_repository.update_status(
+                team_run_id=team_run.id,
+                status="failed",
+                worker_key=payload.worker_key,
+                error_code="team_members_missing",
+                error_message="team version has no valid members",
+            )
+
+        try:
+            member_results = self._execute_members(
+                team_run=team_run,
+                team_version=team_version,
+                members=members,
+                worker_key=payload.worker_key,
+                dry_run=payload.dry_run,
+            )
+        except AgentServiceClientError as exc:
+            return self.team_run_repository.update_status(
+                team_run_id=team_run.id,
+                status="failed",
+                worker_key=payload.worker_key,
+                error_code="agent_service_error",
+                error_message=str(exc),
+            )
+
+        failed_results = [item for item in member_results if item.status != "completed"]
+        output_text = self._build_team_output_text(
+            team_version=team_version,
+            member_results=member_results,
+        )
+        output_json: dict[str, JSONValue] = {
+            "dry_run": payload.dry_run,
+            "coordination_mode": team_version.coordination_mode,
+            "team_version_id": team_version.id,
+            "member_run_count": len(member_results),
+            "member_results": [
+                self._member_result_to_json(item) for item in member_results
+            ],
+        }
+        if failed_results:
+            return self.team_run_repository.update_status(
+                team_run_id=team_run.id,
+                status="failed",
+                worker_key=payload.worker_key,
+                output_text=output_text,
+                output_json=output_json,
+                error_code="member_run_failed",
+                error_message=f"{len(failed_results)} member run(s) failed",
+            )
+
+        return self.team_run_repository.update_status(
+            team_run_id=team_run.id,
+            status="completed",
+            worker_key=payload.worker_key,
+            output_text=output_text,
+            output_json=output_json,
+        )
+
+    def execute_next_claimed_team_run(
+        self,
+        *,
+        worker_key: str,
+        lease_seconds: int,
+        dry_run: bool,
+    ) -> tuple[TeamRun, int] | None:
+        released_lease_count = self.team_run_repository.release_expired_leases(
+            now_time=datetime.utcnow(),
+        )
+        claimed_team_run = self.team_run_repository.claim_next_queued(
+            worker_key=worker_key,
+            lease_expire_time=datetime.utcnow() + timedelta(seconds=lease_seconds),
+        )
+        if claimed_team_run is None:
+            return None
+
+        result = self.execute_team_run(
+            team_run_id=claimed_team_run.id,
+            payload=TeamRunExecuteRequest(
+                tenant_id=claimed_team_run.tenant_id,
+                worker_key=worker_key,
+                dry_run=dry_run,
+            ),
+        )
+        if result is None:
+            return None
+        return result, released_lease_count
+
     def _resolve_team_version(
         self,
         *,
@@ -140,3 +273,165 @@ class TeamApplicationService:
             tenant_id=tenant_id,
             team_id=team_id,
         )
+
+    def _execute_members(
+        self,
+        *,
+        team_run: TeamRun,
+        team_version: TeamVersion,
+        members: list[TeamMemberContract],
+        worker_key: str | None,
+        dry_run: bool,
+    ) -> list[AgentRunContract]:
+        if self.agent_client is None:
+            raise AgentServiceClientError("agent service client is not configured")
+
+        member_results: list[AgentRunContract] = []
+        prior_outputs: list[dict[str, JSONValue]] = []
+        for member in self._order_members(members):
+            member_input_json = self._build_member_input_json(
+                team_run=team_run,
+                team_version=team_version,
+                member=member,
+                prior_outputs=prior_outputs,
+            )
+            created_run = self.agent_client.create_agent_run(
+                tenant_id=team_run.tenant_id,
+                agent_id=member.agent_id,
+                agent_version_id=member.agent_version_id,
+                session_id=team_run.session_id,
+                input_text=self._build_member_input_text(
+                    team_run=team_run,
+                    team_version=team_version,
+                    member=member,
+                ),
+                input_json=member_input_json,
+            )
+            executed_run = self.agent_client.execute_agent_run(
+                tenant_id=team_run.tenant_id,
+                agent_run_id=created_run.id,
+                worker_key=worker_key,
+                dry_run=dry_run,
+            )
+            member_results.append(executed_run)
+            prior_outputs.append(
+                {
+                    "member_key": member.member_key,
+                    "role": member.role,
+                    "agent_run_id": executed_run.id,
+                    "status": executed_run.status,
+                    "output_text": executed_run.output_text,
+                    "output_json": executed_run.output_json or {},
+                }
+            )
+        return member_results
+
+    def _read_team_members(self, team_version: TeamVersion) -> list[TeamMemberContract]:
+        members: list[TeamMemberContract] = []
+        for item in team_version.member_refs_json:
+            try:
+                members.append(TeamMemberContract.model_validate(item))
+            except ValueError:
+                continue
+        return members
+
+    def _order_members(self, members: list[TeamMemberContract]) -> list[TeamMemberContract]:
+        role_priority = {
+            "planner": 0,
+            "supervisor": 1,
+            "specialist": 2,
+            "executor": 3,
+            "reviewer": 4,
+        }
+        return sorted(members, key=lambda item: role_priority.get(item.role, 10))
+
+    def _build_member_input_text(
+        self,
+        *,
+        team_run: TeamRun,
+        team_version: TeamVersion,
+        member: TeamMemberContract,
+    ) -> str:
+        lines = [
+            f"Team objective: {team_version.objective or 'No objective provided.'}",
+            f"Member role: {member.role}",
+        ]
+        if member.responsibility:
+            lines.append(f"Responsibility: {member.responsibility}")
+        if team_run.input_text:
+            lines.append(f"User task: {team_run.input_text}")
+        return "\n".join(lines)
+
+    def _build_member_input_json(
+        self,
+        *,
+        team_run: TeamRun,
+        team_version: TeamVersion,
+        member: TeamMemberContract,
+        prior_outputs: list[dict[str, JSONValue]],
+    ) -> dict[str, JSONValue]:
+        input_json: dict[str, JSONValue] = dict(team_run.input_json or {})
+        input_json.update(
+            {
+                "team_id": team_run.team_id,
+                "team_run_id": team_run.id,
+                "team_version_id": team_version.id,
+                "team_objective": team_version.objective,
+                "member_key": member.member_key,
+                "member_role": member.role,
+                "member_responsibility": member.responsibility,
+                "prior_member_outputs": prior_outputs,
+            }
+        )
+        configured_input = member.config_json.get("input_json")
+        if isinstance(configured_input, dict):
+            input_json.update(
+                {str(item_key): item_value for item_key, item_value in configured_input.items()}
+            )
+        return input_json
+
+    def _build_team_output_text(
+        self,
+        *,
+        team_version: TeamVersion,
+        member_results: list[AgentRunContract],
+    ) -> str:
+        lines = [
+            f"Team objective: {team_version.objective or 'No objective provided.'}",
+            f"Coordination mode: {team_version.coordination_mode}",
+            "Member results:",
+        ]
+        for index, result in enumerate(member_results, start=1):
+            output_text = result.output_text or result.error_message or ""
+            lines.append(f"{index}. agent={result.agent_id} status={result.status}: {output_text}")
+        return "\n".join(lines)
+
+    def _member_result_to_json(self, result: AgentRunContract) -> dict[str, JSONValue]:
+        return {
+            "agent_run_id": result.id,
+            "agent_id": result.agent_id,
+            "agent_version_id": result.agent_version_id,
+            "status": result.status,
+            "output_text": result.output_text,
+            "output_json": result.output_json or {},
+            "error_code": result.error_code,
+            "error_message": result.error_message,
+        }
+
+
+def build_team_application_service(
+    *,
+    team_repository: TeamDefinitionRepository,
+    team_version_repository: TeamVersionRepository,
+    team_run_repository: TeamRunRepository,
+    settings: TeamServiceSettings,
+) -> TeamApplicationService:
+    return TeamApplicationService(
+        team_repository=team_repository,
+        team_version_repository=team_version_repository,
+        team_run_repository=team_run_repository,
+        agent_client=AgentServiceClient(
+            base_url=settings.agent_service_url,
+            timeout_seconds=settings.agent_service_timeout_seconds,
+        ),
+    )

+ 6 - 0
services/team-service/app/bootstrap/settings.py

@@ -5,3 +5,9 @@ class TeamServiceSettings(ServiceSettings):
     service_name: str = "team-service"
     service_port: int = 8009
     database_url: str = "sqlite:///./team_service.db"
+    agent_service_url: str = "http://127.0.0.1:8007"
+    agent_service_timeout_seconds: float = 30.0
+    worker_poll_interval_seconds: float = 1.0
+    worker_lease_seconds: int = 300
+    worker_max_idle_cycles: int | None = None
+    worker_dry_run: bool = True

+ 51 - 1
services/team-service/app/domain/repositories.py

@@ -189,6 +189,55 @@ class TeamRunRepository:
         )
         return self.db.scalar(stmt)
 
+    def claim_next_queued(
+        self,
+        *,
+        worker_key: str,
+        lease_expire_time: datetime,
+    ) -> TeamRun | None:
+        stmt = (
+            select(TeamRun)
+            .where(TeamRun.status == "queued")
+            .order_by(TeamRun.created_time.asc())
+            .with_for_update(skip_locked=True)
+            .limit(1)
+        )
+        entity = self.db.scalar(stmt)
+        if entity is None:
+            return None
+
+        now = datetime.utcnow()
+        entity.status = "running"
+        entity.worker_key = worker_key
+        entity.started_time = entity.started_time or now
+        entity.lease_expire_time = lease_expire_time
+        self.db.commit()
+        self.db.refresh(entity)
+        return entity
+
+    def release_expired_leases(self, *, now_time: datetime, max_items: int = 100) -> int:
+        stmt = (
+            select(TeamRun)
+            .where(TeamRun.status == "running")
+            .where(TeamRun.lease_expire_time.is_not(None))
+            .where(TeamRun.lease_expire_time <= now_time)
+            .order_by(TeamRun.lease_expire_time.asc())
+            .limit(max_items)
+        )
+        entities = list(self.db.scalars(stmt))
+        for entity in entities:
+            entity.status = "queued"
+            entity.worker_key = None
+            entity.lease_expire_time = None
+            entity.queued_time = now_time
+            entity.started_time = None
+            entity.finished_time = None
+
+        if entities:
+            self.db.commit()
+
+        return len(entities)
+
     def update_status(
         self,
         *,
@@ -213,9 +262,10 @@ class TeamRunRepository:
         entity.error_message = error_message
         if status == "running" and entity.started_time is None:
             entity.started_time = now
+        if status != "running":
+            entity.lease_expire_time = None
         if status in {"completed", "failed", "cancelled"}:
             entity.finished_time = now
-            entity.lease_expire_time = None
 
         self.db.commit()
         self.db.refresh(entity)

+ 75 - 0
services/team-service/app/infrastructure/agent_client.py

@@ -0,0 +1,75 @@
+import httpx
+
+from core_domain import AgentRunContract
+from core_shared import JSONValue
+
+
+class AgentServiceClientError(Exception):
+    pass
+
+
+class AgentServiceClient:
+    def __init__(self, base_url: str, timeout_seconds: float = 30.0) -> None:
+        self.base_url = base_url.rstrip("/")
+        self.timeout_seconds = timeout_seconds
+
+    def create_agent_run(
+        self,
+        *,
+        tenant_id: str,
+        agent_id: str,
+        agent_version_id: str | None,
+        session_id: str | None,
+        input_text: str | None,
+        input_json: dict[str, JSONValue] | None,
+    ) -> AgentRunContract:
+        payload: dict[str, JSONValue] = {
+            "tenant_id": tenant_id,
+            "agent_id": agent_id,
+        }
+        if agent_version_id is not None:
+            payload["agent_version_id"] = agent_version_id
+        if session_id is not None:
+            payload["session_id"] = session_id
+        if input_text is not None:
+            payload["input_text"] = input_text
+        if input_json is not None:
+            payload["input_json"] = input_json
+
+        try:
+            with httpx.Client(timeout=self.timeout_seconds) as client:
+                response = client.post(f"{self.base_url}/agents/runs", json=payload)
+                response.raise_for_status()
+                return AgentRunContract.model_validate(response.json())
+        except httpx.HTTPError as exc:
+            raise AgentServiceClientError(f"agent-service create run failed: {exc}") from exc
+
+    def execute_agent_run(
+        self,
+        *,
+        tenant_id: str,
+        agent_run_id: str,
+        worker_key: str | None,
+        dry_run: bool,
+    ) -> AgentRunContract:
+        payload: dict[str, JSONValue] = {
+            "tenant_id": tenant_id,
+            "dry_run": dry_run,
+        }
+        if worker_key is not None:
+            payload["worker_key"] = worker_key
+
+        try:
+            with httpx.Client(timeout=self.timeout_seconds) as client:
+                response = client.post(
+                    f"{self.base_url}/agents/runs/{agent_run_id}/execute",
+                    json=payload,
+                )
+                response.raise_for_status()
+                response_payload = response.json()
+                run_payload = response_payload.get("run")
+                if not isinstance(run_payload, dict):
+                    raise AgentServiceClientError("agent-service execute response missing run")
+                return AgentRunContract.model_validate(run_payload)
+        except httpx.HTTPError as exc:
+            raise AgentServiceClientError(f"agent-service execute run failed: {exc}") from exc

+ 25 - 0
services/team-service/app/schemas/team.py

@@ -73,7 +73,32 @@ class TeamRunStatusUpdateRequest(BaseModel):
     error_message: str | None = None
 
 
+class TeamRunExecuteRequest(BaseModel):
+    tenant_id: str
+    worker_key: str | None = None
+    dry_run: bool = True
+
+
 class TeamRunResponse(TeamRunContract):
     @classmethod
     def from_entity(cls, entity: "TeamRun") -> "TeamRunResponse":
         return cls.model_validate(entity, from_attributes=True)
+
+
+class TeamRunExecuteResponse(BaseModel):
+    run: TeamRunResponse
+    member_run_count: int = 0
+    dry_run: bool = True
+
+
+class TeamWorkerExecuteNextRequest(BaseModel):
+    worker_key: str
+    lease_seconds: int | None = Field(default=None, gt=0)
+    dry_run: bool | None = None
+
+
+class TeamWorkerExecuteNextResponse(BaseModel):
+    run: TeamRunResponse
+    member_run_count: int = 0
+    dry_run: bool = True
+    released_lease_count: int = 0

+ 117 - 0
services/team-service/app/worker.py

@@ -0,0 +1,117 @@
+from __future__ import annotations
+
+import os
+import socket
+import time
+import traceback
+from dataclasses import dataclass
+from uuid import uuid4
+
+from sqlalchemy.orm import Session, sessionmaker
+
+from app.application.services import build_team_application_service
+from app.bootstrap.settings import TeamServiceSettings
+from app.db.session import build_session_factory
+from app.domain.repositories import (
+    TeamDefinitionRepository,
+    TeamRunRepository,
+    TeamVersionRepository,
+)
+
+
+@dataclass(frozen=True)
+class TeamWorkerStats:
+    worker_key: str
+    executed_count: int = 0
+    idle_count: int = 0
+    error_count: int = 0
+
+
+class TeamWorker:
+    def __init__(
+        self,
+        *,
+        settings: TeamServiceSettings,
+        session_factory: sessionmaker[Session],
+        worker_key: str,
+    ) -> None:
+        self.settings = settings
+        self.session_factory = session_factory
+        self.worker_key = worker_key
+
+    def run_forever(self) -> TeamWorkerStats:
+        executed_count = 0
+        idle_count = 0
+        error_count = 0
+
+        while True:
+            try:
+                executed = self.run_once()
+            except Exception:
+                error_count += 1
+                traceback.print_exc()
+                executed = False
+
+            if executed:
+                executed_count += 1
+                idle_count = 0
+            else:
+                idle_count += 1
+                time.sleep(self.settings.worker_poll_interval_seconds)
+
+            if self.settings.worker_max_idle_cycles is not None:
+                if idle_count >= self.settings.worker_max_idle_cycles:
+                    return TeamWorkerStats(
+                        worker_key=self.worker_key,
+                        executed_count=executed_count,
+                        idle_count=idle_count,
+                        error_count=error_count,
+                    )
+
+    def run_once(self) -> bool:
+        db = self.session_factory()
+        try:
+            service = build_team_application_service(
+                team_repository=TeamDefinitionRepository(db),
+                team_version_repository=TeamVersionRepository(db),
+                team_run_repository=TeamRunRepository(db),
+                settings=self.settings,
+            )
+            result = service.execute_next_claimed_team_run(
+                worker_key=self.worker_key,
+                lease_seconds=self.settings.worker_lease_seconds,
+                dry_run=self.settings.worker_dry_run,
+            )
+            return result is not None
+        finally:
+            db.close()
+
+
+def build_worker_key() -> str:
+    configured_key = os.getenv("AGENT_PLATFORM_WORKER_KEY")
+    if configured_key:
+        return configured_key
+    hostname = socket.gethostname()
+    return f"{hostname}-{uuid4().hex[:8]}"
+
+
+def main() -> None:
+    settings = TeamServiceSettings()
+    worker = TeamWorker(
+        settings=settings,
+        session_factory=build_session_factory(settings),
+        worker_key=build_worker_key(),
+    )
+    stats = worker.run_forever()
+    print(
+        "team-worker stopped "
+        f"worker_key={stats.worker_key} "
+        f"executed_count={stats.executed_count} "
+        f"idle_count={stats.idle_count} "
+        f"error_count={stats.error_count}",
+        flush=True,
+    )
+
+
+if __name__ == "__main__":
+    main()

+ 1 - 0
services/team-service/pyproject.toml

@@ -13,6 +13,7 @@ dependencies = [
   "uvicorn[standard]>=0.30,<1.0",
   "pydantic>=2.7,<3.0",
   "sqlalchemy>=2.0,<3.0",
+  "httpx>=0.27,<1.0",
   "core-db",
   "core-domain",
   "core-shared",