Przeglądaj źródła

feat: add app management features including listing, creating, and updating apps

Jax Docker 1 miesiąc temu
rodzic
commit
9a7ba815ea
44 zmienionych plików z 3291 dodań i 138 usunięć
  1. 13 0
      .claude/settings.json
  2. 6 0
      libs/core-domain/src/core_domain/__init__.py
  3. 17 0
      libs/core-domain/src/core_domain/model_contracts.py
  4. 90 0
      services/api-gateway/alembic/versions/20260514_0001_add_app_tables.py
  5. 516 2
      services/api-gateway/app/api/routes.py
  6. 11 1
      services/api-gateway/app/db/models/__init__.py
  7. 18 0
      services/api-gateway/app/db/models/app_api_key.py
  8. 16 0
      services/api-gateway/app/db/models/app_definition.py
  9. 21 0
      services/api-gateway/app/db/models/app_invocation_audit.py
  10. 190 1
      services/api-gateway/app/domain/repositories.py
  11. 2 0
      services/api-gateway/app/infrastructure/request_context.py
  12. 156 1
      services/api-gateway/app/schemas/gateway.py
  13. 34 0
      services/knowledge-service/alembic/versions/20260514_0003_resize_embedding_vector.py
  14. 237 0
      services/knowledge-service/app/application/chunking.py
  15. 1 5
      services/knowledge-service/app/application/document_parsers.py
  16. 93 4
      services/knowledge-service/app/application/embeddings.py
  17. 56 3
      services/knowledge-service/app/application/retrieval.py
  18. 28 10
      services/knowledge-service/app/application/services.py
  19. 5 3
      services/knowledge-service/app/bootstrap/settings.py
  20. 3 1
      services/knowledge-service/app/db/models/knowledge_chunk.py
  21. 12 0
      services/model-gateway-service/app/api/routes.py
  22. 33 1
      services/model-gateway-service/app/application/services.py
  23. 83 1
      services/model-gateway-service/app/infrastructure/provider.py
  24. 313 87
      services/team-service/app/application/services.py
  25. 6 1
      services/team-service/app/infrastructure/agent_client.py
  26. 205 1
      tests/test_team_service.py
  27. 3 0
      web/src/App.tsx
  28. 67 0
      web/src/api/apps.ts
  29. 1 0
      web/src/api/index.ts
  30. 2 2
      web/src/api/mock.ts
  31. 9 2
      web/src/api/teams.ts
  32. 2 2
      web/src/hooks/useApps.ts
  33. 4 0
      web/src/lib/constants.ts
  34. 58 0
      web/src/locales/en.json
  35. 58 0
      web/src/locales/zh.json
  36. 194 0
      web/src/pages/apps/AppsPage.tsx
  37. 173 0
      web/src/pages/apps/components/AppApiKeysPanel.tsx
  38. 69 0
      web/src/pages/apps/components/AppAuditsPanel.tsx
  39. 222 0
      web/src/pages/apps/components/AppDetail.tsx
  40. 174 0
      web/src/pages/apps/components/CreateAppDialog.tsx
  41. 2 2
      web/src/pages/sessions/components/CreateSessionDialog.tsx
  42. 4 3
      web/src/pages/teams/components/CreateTeamDialog.tsx
  43. 2 2
      web/src/pages/teams/components/TeamRuns.tsx
  44. 82 3
      web/src/types/app.ts

+ 13 - 0
.claude/settings.json

@@ -0,0 +1,13 @@
+{
+  "permissions": {
+    "allow": [
+      "Bash(python -c \"import ast; ast.parse\\(open\\('services/knowledge-service/app/application/chunking.py'\\).read\\(\\)\\); print\\('chunking OK'\\)\")",
+      "Bash(python -c \"import ast; ast.parse\\(open\\('services/knowledge-service/app/application/document_parsers.py'\\).read\\(\\)\\); print\\('parsers OK'\\)\")",
+      "Bash(python -c \"import ast; ast.parse\\(open\\('services/knowledge-service/app/application/services.py'\\).read\\(\\)\\); print\\('services OK'\\)\")",
+      "Bash(python -c ' *)",
+      "Bash(python -c \"import ast; ast.parse\\(open\\('services/team-service/app/application/services.py'\\).read\\(\\)\\); print\\('Syntax OK'\\)\")",
+      "Bash(python -c \"import ast; ast.parse\\(open\\('services/knowledge-service/app/application/services.py'\\).read\\(\\)\\); print\\('OK'\\)\")",
+      "Bash(python -c \"import ast; ast.parse\\(open\\('services/team-service/app/infrastructure/agent_client.py'\\).read\\(\\)\\); print\\('Syntax OK'\\)\")"
+    ]
+  }
+}

+ 6 - 0
libs/core-domain/src/core_domain/__init__.py

@@ -54,6 +54,9 @@ from .model_contracts import (
     ChatCompletionRequestContract,
     ChatCompletionResponseContract,
     ChatMessageContract,
+    EmbeddingDataItem,
+    EmbeddingRequestContract,
+    EmbeddingResponseContract,
 )
 from .scheduler_contracts import ScheduledJobContract, ScheduledJobStatus, ScheduledJobType
 from .service import ServiceDescriptor, ServiceHealth
@@ -108,6 +111,9 @@ __all__ = [
     "ChatCompletionRequestContract",
     "ChatCompletionResponseContract",
     "ChatMessageContract",
+    "EmbeddingDataItem",
+    "EmbeddingRequestContract",
+    "EmbeddingResponseContract",
     "HumanTaskContract",
     "HumanTaskCreateContract",
     "HumanTaskStatus",

+ 17 - 0
libs/core-domain/src/core_domain/model_contracts.py

@@ -25,3 +25,20 @@ class ChatCompletionResponseContract(BaseModel):
     tool_calls_json: list[dict[str, JSONValue]] = Field(default_factory=list)
     usage_json: dict[str, JSONValue] = Field(default_factory=dict)
     raw_response_json: dict[str, JSONValue] = Field(default_factory=dict)
+
+
+class EmbeddingRequestContract(BaseModel):
+    model: str | None = None
+    input: str | list[str]
+    dimensions: int | None = None
+
+
+class EmbeddingDataItem(BaseModel):
+    embedding: list[float]
+    index: int
+
+
+class EmbeddingResponseContract(BaseModel):
+    model: str | None = None
+    data: list[EmbeddingDataItem] = Field(default_factory=list)
+    usage_json: dict[str, JSONValue] = Field(default_factory=dict)

+ 90 - 0
services/api-gateway/alembic/versions/20260514_0001_add_app_tables.py

@@ -0,0 +1,90 @@
+"""add app_definition, app_api_key, app_invocation_audit tables
+
+Revision ID: 20260514_0001
+Revises: 20260429_9001
+Create Date: 2026-05-14 10:00:00
+"""
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from alembic import op
+
+revision: str = "20260514_0001"
+down_revision: str | None = "20260429_9001"
+branch_labels: Sequence[str] | None = None
+depends_on: Sequence[str] | None = None
+
+MIXIN_COLUMNS = [
+    sa.Column("id", sa.String(length=36), nullable=False),
+    sa.Column("created_by", sa.String(length=36), nullable=True),
+    sa.Column("updated_by", sa.String(length=36), nullable=True),
+    sa.Column("created_time", sa.DateTime(), nullable=False),
+    sa.Column("updated_time", sa.DateTime(), nullable=False),
+    sa.Column("deleted_time", sa.DateTime(), nullable=True),
+]
+
+
+def upgrade() -> None:
+    op.create_table(
+        "app_definition",
+        sa.Column("code", sa.String(length=64), nullable=False),
+        sa.Column("name", sa.String(length=128), nullable=False),
+        sa.Column("description", sa.Text(), nullable=True),
+        sa.Column("status", sa.String(length=32), nullable=False),
+        sa.Column("target_type", sa.String(length=32), nullable=False),
+        sa.Column("target_id", sa.String(length=36), nullable=False),
+        sa.Column("owner_user_id", sa.String(length=36), nullable=True),
+        sa.Column("settings_json", sa.Text(), nullable=True),
+        *MIXIN_COLUMNS,
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index("ix_app_definition_code", "app_definition", ["code"], unique=True)
+    op.create_index("ix_app_definition_status", "app_definition", ["status"], unique=False)
+
+    op.create_table(
+        "app_api_key",
+        sa.Column("app_id", sa.String(length=36), nullable=False),
+        sa.Column("name", sa.String(length=128), nullable=False),
+        sa.Column("key_prefix", sa.String(length=16), nullable=False),
+        sa.Column("key_hash", sa.String(length=128), nullable=False),
+        sa.Column("status", sa.String(length=32), nullable=False),
+        sa.Column("scopes", sa.Text(), nullable=True),
+        sa.Column("expires_time", sa.DateTime(), nullable=True),
+        sa.Column("last_used_time", sa.DateTime(), nullable=True),
+        *MIXIN_COLUMNS,
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index("ix_app_api_key_app_id", "app_api_key", ["app_id"], unique=False)
+    op.create_index("ix_app_api_key_key_prefix", "app_api_key", ["key_prefix"], unique=False)
+    op.create_index("ix_app_api_key_key_hash", "app_api_key", ["key_hash"], unique=True)
+    op.create_index("ix_app_api_key_status", "app_api_key", ["status"], unique=False)
+
+    op.create_table(
+        "app_invocation_audit",
+        sa.Column("app_id", sa.String(length=36), nullable=False),
+        sa.Column("api_key_prefix", sa.String(length=16), nullable=True),
+        sa.Column("request_id", sa.String(length=64), nullable=False),
+        sa.Column("session_id", sa.String(length=36), nullable=True),
+        sa.Column("run_request_id", sa.String(length=36), nullable=True),
+        sa.Column("target_type", sa.String(length=32), nullable=False),
+        sa.Column("target_id", sa.String(length=36), nullable=False),
+        sa.Column("invoke_type", sa.String(length=16), nullable=False),
+        sa.Column("status", sa.String(length=32), nullable=False),
+        sa.Column("duration_ms", sa.Integer(), nullable=False),
+        sa.Column("error_code", sa.String(length=64), nullable=True),
+        sa.Column("error_message", sa.Text(), nullable=True),
+        sa.Column("client_metadata_json", sa.Text(), nullable=True),
+        *MIXIN_COLUMNS,
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index("ix_app_invocation_audit_app_id", "app_invocation_audit", ["app_id"], unique=False)
+    op.create_index("ix_app_invocation_audit_request_id", "app_invocation_audit", ["request_id"], unique=False)
+    op.create_index("ix_app_invocation_audit_session_id", "app_invocation_audit", ["session_id"], unique=False)
+    op.create_index("ix_app_invocation_audit_status", "app_invocation_audit", ["status"], unique=False)
+
+
+def downgrade() -> None:
+    op.drop_table("app_invocation_audit")
+    op.drop_table("app_api_key")
+    op.drop_table("app_definition")

+ 516 - 2
services/api-gateway/app/api/routes.py

@@ -1,6 +1,9 @@
 import asyncio
 import json
-from typing import Annotated
+from datetime import datetime
+from time import perf_counter
+from typing import Annotated, AsyncIterator
+from uuid import uuid4
 
 from core_domain import ServiceDescriptor, ServiceHealth
 import httpx
@@ -11,7 +14,13 @@ from pydantic import BaseModel
 
 from app.bootstrap.settings import ApiGatewaySettings
 from app.db.session import get_db
-from app.domain.repositories import ApiKeyRepository, GatewayRequestAuditRepository
+from app.domain.repositories import (
+    ApiKeyRepository,
+    AppApiKeyRepository,
+    AppDefinitionRepository,
+    AppInvocationAuditRepository,
+    GatewayRequestAuditRepository,
+)
 from app.infrastructure.api_keys import generate_api_key, get_api_key_prefix, hash_api_key
 from app.infrastructure.proxy import ProxyServiceName, ProxyTarget, ServiceProxy
 from core_shared.security import build_internal_service_headers
@@ -22,10 +31,25 @@ from app.schemas.gateway import (
     ApiKeyResponse,
     ApiKeyStatusPostRequest,
     ApiKeyStatusUpdateRequest,
+    AppApiKeyCreateRequest,
+    AppApiKeyCreateResponse,
+    AppApiKeyListRequest,
+    AppApiKeyResponse,
+    AppApiKeyStatusUpdateRequest,
+    AppAuditListRequest,
+    AppCreateRequest,
+    AppDetailRequest,
+    AppInvocationAuditResponse,
+    AppListRequest,
+    AppResponse,
+    AppStatusUpdateRequest,
+    AppUpdateRequest,
     GatewayAuditServiceStats,
     GatewayAuditStatsResponse,
     GatewayRequestAuditResponse,
     GatewayServicesHealthResponse,
+    OpenApiChatRequest,
+    OpenApiChatResponse,
 )
 
 router = APIRouter()
@@ -632,6 +656,496 @@ async def _stream_session_execute(
         await client.aclose()
 
 
+# ── Application Admin Routes ─────────────────────────────────────────────────
+
+
+@router.post("/gateway/apps", response_model=AppResponse)
+def create_app(payload: AppCreateRequest, db: DbSession) -> AppResponse:
+    existing = AppDefinitionRepository(db).get_by_code(code=payload.code)
+    if existing is not None:
+        raise HTTPException(status_code=409, detail=f"app code already exists: {payload.code}")
+    entity = AppDefinitionRepository(db).create(
+        code=payload.code,
+        name=payload.name,
+        description=payload.description,
+        target_type=payload.target_type,
+        target_id=payload.target_id,
+        owner_user_id=payload.owner_user_id,
+        settings_json=payload.settings_json)
+    return AppResponse.from_entity(entity)
+
+
+@router.post("/gateway/apps/list", response_model=list[AppResponse])
+def list_apps(payload: AppListRequest, db: DbSession) -> list[AppResponse]:
+    return [AppResponse.from_entity(e) for e in AppDefinitionRepository(db).list_all()]
+
+
+@router.post("/gateway/apps/detail", response_model=AppResponse)
+def get_app_detail(payload: AppDetailRequest, db: DbSession) -> AppResponse:
+    entity = AppDefinitionRepository(db).get_by_id(app_id=payload.app_id)
+    if entity is None:
+        raise HTTPException(status_code=404, detail=f"app not found: {payload.app_id}")
+    return AppResponse.from_entity(entity)
+
+
+@router.post("/gateway/apps/update", response_model=AppResponse)
+def update_app(payload: AppUpdateRequest, db: DbSession) -> AppResponse:
+    entity = AppDefinitionRepository(db).update(
+        app_id=payload.app_id,
+        name=payload.name,
+        description=payload.description,
+        target_type=payload.target_type,
+        target_id=payload.target_id,
+        settings_json=payload.settings_json)
+    if entity is None:
+        raise HTTPException(status_code=404, detail=f"app not found: {payload.app_id}")
+    return AppResponse.from_entity(entity)
+
+
+@router.post("/gateway/apps/status", response_model=AppResponse)
+def update_app_status(payload: AppStatusUpdateRequest, db: DbSession) -> AppResponse:
+    entity = AppDefinitionRepository(db).update_status(app_id=payload.app_id, status=payload.status)
+    if entity is None:
+        raise HTTPException(status_code=404, detail=f"app not found: {payload.app_id}")
+    return AppResponse.from_entity(entity)
+
+
+@router.post("/gateway/apps/{app_id}/api-keys", response_model=AppApiKeyCreateResponse)
+def create_app_api_key(app_id: str, payload: AppApiKeyCreateRequest, db: DbSession) -> AppApiKeyCreateResponse:
+    app_entity = AppDefinitionRepository(db).get_by_id(app_id=app_id)
+    if app_entity is None:
+        raise HTTPException(status_code=404, detail=f"app not found: {app_id}")
+    api_key = generate_api_key()
+    entity = AppApiKeyRepository(db).create(
+        app_id=app_id,
+        name=payload.name,
+        key_prefix=get_api_key_prefix(api_key),
+        key_hash=hash_api_key(api_key),
+        scopes=payload.scopes,
+        expires_time=payload.expires_time)
+    return AppApiKeyCreateResponse(
+        id=entity.id,
+        app_id=entity.app_id,
+        name=entity.name,
+        key_prefix=entity.key_prefix,
+        api_key=api_key,
+        status=entity.status,
+        scopes=entity.scopes,
+        expires_time=entity.expires_time,
+        created_time=entity.created_time)
+
+
+@router.post("/gateway/apps/{app_id}/api-keys/list", response_model=list[AppApiKeyResponse])
+def list_app_api_keys(app_id: str, payload: AppApiKeyListRequest, db: DbSession) -> list[AppApiKeyResponse]:
+    return [AppApiKeyResponse.from_entity(e) for e in AppApiKeyRepository(db).list_by_app(app_id=app_id)]
+
+
+@router.post("/gateway/apps/{app_id}/api-keys/status", response_model=AppApiKeyResponse)
+def update_app_api_key_status(app_id: str, payload: AppApiKeyStatusUpdateRequest, db: DbSession) -> AppApiKeyResponse:
+    entity = AppApiKeyRepository(db).update_status(api_key_id=payload.api_key_id, status=payload.status)
+    if entity is None:
+        raise HTTPException(status_code=404, detail=f"api key not found: {payload.api_key_id}")
+    return AppApiKeyResponse.from_entity(entity)
+
+
+@router.post("/gateway/apps/{app_id}/audits", response_model=list[AppInvocationAuditResponse])
+def list_app_audits(app_id: str, payload: AppAuditListRequest, db: DbSession) -> list[AppInvocationAuditResponse]:
+    return [
+        AppInvocationAuditResponse.from_entity(e)
+        for e in AppInvocationAuditRepository(db).list_by_app(app_id=app_id, limit=payload.limit)
+    ]
+
+
+# ── OpenAPI External Invocation ──────────────────────────────────────────────
+
+
+def _authenticate_app_api_key(request: Request, db: Session):
+    settings = ApiGatewaySettings()
+    token: str | None = None
+    authorization = request.headers.get("authorization")
+    if authorization:
+        scheme, _, t = authorization.partition(" ")
+        if scheme.lower() == "bearer" and t.strip():
+            token = t.strip()
+    if token is None:
+        token = request.headers.get(settings.api_key_header_name)
+    if not token:
+        raise HTTPException(status_code=401, detail="missing bearer token or api key")
+
+    key_hash = hash_api_key(token)
+    key_entity = AppApiKeyRepository(db).get_active_by_hash(key_hash=key_hash)
+    if key_entity is None:
+        raise HTTPException(status_code=401, detail="invalid api key")
+    if key_entity.expires_time is not None and key_entity.expires_time <= datetime.utcnow():
+        raise HTTPException(status_code=401, detail="api key expired")
+
+    app_entity = AppDefinitionRepository(db).get_by_id(app_id=key_entity.app_id)
+    if app_entity is None:
+        raise HTTPException(status_code=401, detail="app not found")
+    if app_entity.status != "published":
+        raise HTTPException(status_code=403, detail=f"app is {app_entity.status}, not published")
+
+    AppApiKeyRepository(db).touch_last_used_time(api_key_id=key_entity.id)
+    return key_entity, app_entity
+
+
+@router.post("/gateway/openapi/apps/{app_code}/chat", response_model=OpenApiChatResponse)
+async def openapi_chat(app_code: str, payload: OpenApiChatRequest, request: Request, db: DbSession):
+    start = perf_counter()
+    request_id = str(uuid4())
+    key_entity, app_entity = _authenticate_app_api_key(request, db)
+    if app_entity.code != app_code:
+        raise HTTPException(status_code=403, detail="api key does not belong to this app")
+
+    targets = build_proxy_targets(ApiGatewaySettings())
+    session_target = targets["session-service"]
+    agent_target = targets["agent-service"]
+    team_target = targets["team-service"]
+    headers = _build_internal_headers(request, ApiGatewaySettings())
+
+    async with httpx_client(ApiGatewaySettings().proxy_timeout_seconds) as client:
+        session_id = payload.session_id
+        if not session_id:
+            session_data = await _post_json(
+                client=client, target=session_target, path="",
+                payload={
+                    "app_id": app_entity.id,
+                    "user_id": payload.user_id or "openapi",
+                    "channel_type": "openapi",
+                    "runtime_target_type": app_entity.target_type,
+                    "runtime_target_id": app_entity.target_id,
+                }, headers=headers)
+            session_id = _get_string(session_data, "id")
+
+        run_request_payload = {
+            "target_type": app_entity.target_type,
+            "target_id": app_entity.target_id,
+            "mode": "production",
+            "input_text": payload.message,
+        }
+        run_request = await _post_json(
+            client=client, target=session_target, path="run-requests",
+            payload={
+                "session_id": session_id,
+                "app_config_id": app_entity.target_id,
+                "workflow_config_id": app_entity.target_id,
+                "trigger_type": "chat",
+                "request_payload_json": run_request_payload,
+                "request_status": "accepted",
+            }, headers=headers)
+        run_request_id = _get_string(run_request, "id")
+
+        user_message = await _post_json(
+            client=client, target=session_target, path="messages",
+            payload={
+                "session_id": session_id, "turn_id": run_request_id,
+                "role": "user", "content_type": "text",
+                "content_text": payload.message, "content_json": {},
+            }, headers=headers)
+
+        await _post_json(
+            client=client, target=session_target, path="run-requests/update",
+            payload={
+                "run_request_id": run_request_id, "request_status": "running",
+                "request_payload_json": {**run_request_payload, "user_message_id": _get_string(user_message, "id")},
+            }, headers=headers)
+
+        output_text: str | None = None
+        error_message: str | None = None
+        request_status = "completed"
+
+        try:
+            if app_entity.target_type == "agent":
+                agent_run = await _post_json(
+                    client=client, target=agent_target, path="runs",
+                    payload={
+                        "agent_id": app_entity.target_id,
+                        "session_id": session_id,
+                        "input_text": payload.message,
+                        "input_json": {"source": "openapi", "run_request_id": run_request_id},
+                    }, headers=headers)
+                agent_run_id = _get_string(agent_run, "id")
+                execute_result = await _post_json(
+                    client=client, target=agent_target, path="runs/execute",
+                    payload={"agent_run_id": agent_run_id, "dry_run": False}, headers=headers)
+                run_data = _get_dict(execute_result, "run")
+                output_text = _resolve_output_text(run_data)
+                error_message = _get_optional_string(run_data, "error_message")
+            else:
+                team_run = await _post_json(
+                    client=client, target=team_target, path="runs",
+                    payload={
+                        "team_id": app_entity.target_id,
+                        "session_id": session_id,
+                        "input_text": payload.message,
+                        "input_json": {"source": "openapi", "run_request_id": run_request_id},
+                        "enqueue": True,
+                    }, headers=headers)
+                team_run_id = _get_string(team_run, "id")
+                execute_result = await _post_json(
+                    client=client, target=team_target, path=f"runs/{team_run_id}/execute",
+                    payload={"dry_run": False}, headers=headers)
+                run_data = _get_dict(execute_result, "run")
+                output_text = _resolve_output_text(run_data)
+                error_message = _get_optional_string(run_data, "error_message")
+
+            if error_message:
+                request_status = "failed"
+
+            if output_text:
+                await _post_json(
+                    client=client, target=session_target, path="messages",
+                    payload={
+                        "session_id": session_id, "turn_id": run_request_id,
+                        "role": "assistant", "content_type": "text",
+                        "content_text": output_text, "content_json": {},
+                    }, headers=headers)
+        except HTTPException as exc:
+            request_status = "failed"
+            error_message = exc.detail if isinstance(exc.detail, str) else json.dumps(exc.detail, ensure_ascii=False)
+
+        await _post_json(
+            client=client, target=session_target, path="run-requests/update",
+            payload={
+                "run_request_id": run_request_id, "request_status": request_status,
+                "request_payload_json": {
+                    **run_request_payload,
+                    "user_message_id": _get_string(user_message, "id"),
+                    "output_text": output_text, "error_message": error_message,
+                },
+            }, headers=headers)
+
+    duration_ms = int((perf_counter() - start) * 1000)
+    AppInvocationAuditRepository(db).create(
+        app_id=app_entity.id,
+        api_key_prefix=key_entity.key_prefix,
+        request_id=request_id,
+        session_id=session_id,
+        run_request_id=run_request_id,
+        target_type=app_entity.target_type,
+        target_id=app_entity.target_id,
+        invoke_type="sync",
+        status=request_status,
+        duration_ms=duration_ms,
+        error_message=error_message,
+        client_metadata_json=json.dumps(payload.metadata) if payload.metadata else None)
+
+    return OpenApiChatResponse(
+        request_id=request_id,
+        app_code=app_entity.code,
+        session_id=session_id,
+        run_request_id=run_request_id,
+        target_type=app_entity.target_type,
+        target_id=app_entity.target_id,
+        status=request_status,
+        output_text=output_text,
+        error=error_message)
+
+
+@router.post("/gateway/openapi/apps/{app_code}/chat/stream")
+async def openapi_chat_stream(app_code: str, payload: OpenApiChatRequest, request: Request):
+    settings = ApiGatewaySettings()
+    session_factory = request.app.state.session_factory
+
+    auth_db = session_factory()
+    try:
+        key_entity, app_entity = _authenticate_app_api_key(request, auth_db)
+    except HTTPException as exc:
+        auth_db.close()
+        detail = exc.detail if isinstance(exc.detail, str) else json.dumps(exc.detail, ensure_ascii=False)
+        return StreamingResponse(
+            _single_sse("failed", {"status": "failed", "error_code": "auth_error", "error_message": detail}),
+            media_type="text/event-stream",
+            headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"})
+    finally:
+        auth_db.close()
+
+    if app_entity.code != app_code:
+        return StreamingResponse(
+            _single_sse("failed", {"status": "failed", "error_code": "forbidden", "error_message": "api key does not belong to this app"}),
+            media_type="text/event-stream",
+            headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"})
+    if app_entity.target_type != "agent":
+        return StreamingResponse(
+            _single_sse("failed", {"status": "failed", "error_code": "unsupported", "error_message": "streaming is only supported for agent targets in V0.1"}),
+            media_type="text/event-stream",
+            headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"})
+
+    return StreamingResponse(
+        _stream_openapi_chat(app_code, payload, request, key_entity, app_entity, session_factory, settings),
+        media_type="text/event-stream",
+        headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"})
+
+
+def _single_sse(event: str, data: dict) -> AsyncIterator[str]:
+    async def _gen():
+        yield _sse(event, data)
+    return _gen()
+
+
+async def _stream_openapi_chat(
+    app_code: str,
+    payload: OpenApiChatRequest,
+    request: Request,
+    key_entity,
+    app_entity,
+    session_factory,
+    settings: ApiGatewaySettings):
+    start = perf_counter()
+    request_id = str(uuid4())
+
+    targets = build_proxy_targets(settings)
+    session_target = targets["session-service"]
+    agent_target = targets["agent-service"]
+    headers = _build_internal_headers(request, settings)
+    client = httpx.AsyncClient(timeout=settings.proxy_timeout_seconds)
+
+    output_text = ""
+    error_message: str | None = None
+    session_id: str | None = None
+    run_request_id: str | None = None
+    request_status = "failed"
+
+    try:
+        session_id = payload.session_id
+        if not session_id:
+            session_data = await _post_json(
+                client=client, target=session_target, path="",
+                payload={
+                    "app_id": app_entity.id,
+                    "user_id": payload.user_id or "openapi",
+                    "channel_type": "openapi",
+                    "runtime_target_type": app_entity.target_type,
+                    "runtime_target_id": app_entity.target_id,
+                }, headers=headers)
+            session_id = _get_string(session_data, "id")
+
+        run_request_payload = {
+            "target_type": app_entity.target_type,
+            "target_id": app_entity.target_id,
+            "mode": "production",
+            "input_text": payload.message,
+        }
+        run_request = await _post_json(
+            client=client, target=session_target, path="run-requests",
+            payload={
+                "session_id": session_id,
+                "app_config_id": app_entity.target_id,
+                "workflow_config_id": app_entity.target_id,
+                "trigger_type": "chat",
+                "request_payload_json": run_request_payload,
+                "request_status": "accepted",
+            }, headers=headers)
+        run_request_id = _get_string(run_request, "id")
+
+        user_message = await _post_json(
+            client=client, target=session_target, path="messages",
+            payload={
+                "session_id": session_id, "turn_id": run_request_id,
+                "role": "user", "content_type": "text",
+                "content_text": payload.message, "content_json": {},
+            }, headers=headers)
+        user_message_id = _get_string(user_message, "id")
+
+        await _post_json(
+            client=client, target=session_target, path="run-requests/update",
+            payload={
+                "run_request_id": run_request_id, "request_status": "running",
+                "request_payload_json": {**run_request_payload, "user_message_id": user_message_id},
+            }, headers=headers)
+
+        yield _sse("started", {
+            "request_id": request_id,
+            "session_id": session_id,
+            "run_request_id": run_request_id})
+
+        agent_run = await _post_json(
+            client=client, target=agent_target, path="runs",
+            payload={
+                "agent_id": app_entity.target_id,
+                "session_id": session_id,
+                "input_text": payload.message,
+                "input_json": {"source": "openapi", "run_request_id": run_request_id},
+            }, headers=headers)
+        agent_run_id = _get_string(agent_run, "id")
+
+        stream_url = _target_url(agent_target, f"runs/{agent_run_id}/execute-stream")
+        async with client.stream("POST", stream_url, headers=headers, json={"dry_run": False}) as resp:
+            if not resp.is_success:
+                error_message = await _read_stream_error(resp)
+            else:
+                async for ev_name, ev_data in _parse_sse(resp):
+                    data = json.loads(ev_data)
+                    if ev_name == "agent.run.delta":
+                        text_chunk = data.get("text", "")
+                        yield _sse("delta", {"text": text_chunk})
+                        if isinstance(text_chunk, str):
+                            output_text += text_chunk
+                    elif ev_name == "agent.run.completed":
+                        run_data = data.get("run", data)
+                        final_text = _get_optional_string(run_data, "output_text")
+                        if not output_text and final_text:
+                            output_text = final_text
+                        yield _sse("completed", {"status": "completed", "output_text": output_text})
+                    elif ev_name == "agent.run.failed":
+                        msg = data.get("error_message", "Agent execution failed")
+                        if not isinstance(msg, str):
+                            msg = "Agent execution failed"
+                        error_message = msg
+                        yield _sse("failed", {"status": "failed", "error_code": "agent_error", "error_message": msg})
+                    else:
+                        yield _sse(ev_name, data)
+
+        request_status = "failed" if error_message else "completed"
+
+        if output_text:
+            await _post_json(
+                client=client, target=session_target, path="messages",
+                payload={
+                    "session_id": session_id, "turn_id": run_request_id,
+                    "role": "assistant", "content_type": "text",
+                    "content_text": output_text, "content_json": {},
+                }, headers=headers)
+
+        await _post_json(
+            client=client, target=session_target, path="run-requests/update",
+            payload={
+                "run_request_id": run_request_id, "request_status": request_status,
+                "request_payload_json": {
+                    **run_request_payload, "user_message_id": user_message_id,
+                    "output_text": output_text, "error_message": error_message,
+                },
+            }, headers=headers)
+
+    except HTTPException as exc:
+        detail = exc.detail if isinstance(exc.detail, str) else json.dumps(exc.detail, ensure_ascii=False)
+        yield _sse("failed", {"status": "failed", "error_code": "gateway_error", "error_message": detail})
+    except Exception as exc:
+        yield _sse("failed", {"status": "failed", "error_code": "internal_error", "error_message": str(exc)})
+    finally:
+        await client.aclose()
+        duration_ms = int((perf_counter() - start) * 1000)
+        audit_db = session_factory()
+        try:
+            AppInvocationAuditRepository(audit_db).create(
+                app_id=app_entity.id,
+                api_key_prefix=key_entity.key_prefix,
+                request_id=request_id,
+                session_id=session_id,
+                run_request_id=run_request_id,
+                target_type=app_entity.target_type,
+                target_id=app_entity.target_id,
+                invoke_type="stream",
+                status=request_status,
+                duration_ms=duration_ms,
+                error_message=error_message,
+                client_metadata_json=json.dumps(payload.metadata) if payload.metadata else None)
+        except Exception:
+            pass
+        finally:
+            audit_db.close()
+
+
 @router.api_route(
     "/gateway/sessions",
     methods=["GET", "POST", "PUT", "PATCH", "DELETE"])

+ 11 - 1
services/api-gateway/app/db/models/__init__.py

@@ -1,6 +1,16 @@
 from core_db import Base
 
 from .api_key import ApiKey
+from .app_api_key import AppApiKey
+from .app_definition import AppDefinition
+from .app_invocation_audit import AppInvocationAudit
 from .gateway_request_audit import GatewayRequestAudit
 
-__all__ = ["ApiKey", "Base", "GatewayRequestAudit"]
+__all__ = [
+    "ApiKey",
+    "AppApiKey",
+    "AppDefinition",
+    "AppInvocationAudit",
+    "Base",
+    "GatewayRequestAudit",
+]

+ 18 - 0
services/api-gateway/app/db/models/app_api_key.py

@@ -0,0 +1,18 @@
+from datetime import datetime
+
+from core_db import AuditMixin, Base, EntityMixin
+from sqlalchemy import DateTime, String, Text
+from sqlalchemy.orm import Mapped, mapped_column
+
+
+class AppApiKey(EntityMixin, AuditMixin, Base):
+    __tablename__ = "app_api_key"
+
+    app_id: Mapped[str] = mapped_column(String(36), index=True)
+    name: Mapped[str] = mapped_column(String(128))
+    key_prefix: Mapped[str] = mapped_column(String(16), index=True)
+    key_hash: Mapped[str] = mapped_column(String(128), unique=True, index=True)
+    status: Mapped[str] = mapped_column(String(32), default="active", index=True)
+    scopes: Mapped[str | None] = mapped_column(Text, nullable=True)
+    expires_time: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
+    last_used_time: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)

+ 16 - 0
services/api-gateway/app/db/models/app_definition.py

@@ -0,0 +1,16 @@
+from core_db import AuditMixin, Base, EntityMixin
+from sqlalchemy import String, Text
+from sqlalchemy.orm import Mapped, mapped_column
+
+
+class AppDefinition(EntityMixin, AuditMixin, Base):
+    __tablename__ = "app_definition"
+
+    code: Mapped[str] = mapped_column(String(64), unique=True, index=True)
+    name: Mapped[str] = mapped_column(String(128))
+    description: Mapped[str | None] = mapped_column(Text, nullable=True)
+    status: Mapped[str] = mapped_column(String(32), default="draft", index=True)
+    target_type: Mapped[str] = mapped_column(String(32))
+    target_id: Mapped[str] = mapped_column(String(36))
+    owner_user_id: Mapped[str | None] = mapped_column(String(36), nullable=True)
+    settings_json: Mapped[str | None] = mapped_column(Text, nullable=True)

+ 21 - 0
services/api-gateway/app/db/models/app_invocation_audit.py

@@ -0,0 +1,21 @@
+from core_db import AuditMixin, Base, EntityMixin
+from sqlalchemy import Integer, String, Text
+from sqlalchemy.orm import Mapped, mapped_column
+
+
+class AppInvocationAudit(EntityMixin, AuditMixin, Base):
+    __tablename__ = "app_invocation_audit"
+
+    app_id: Mapped[str] = mapped_column(String(36), index=True)
+    api_key_prefix: Mapped[str | None] = mapped_column(String(16), nullable=True)
+    request_id: Mapped[str] = mapped_column(String(64), index=True)
+    session_id: Mapped[str | None] = mapped_column(String(36), nullable=True, index=True)
+    run_request_id: Mapped[str | None] = mapped_column(String(36), nullable=True)
+    target_type: Mapped[str] = mapped_column(String(32))
+    target_id: Mapped[str] = mapped_column(String(36))
+    invoke_type: Mapped[str] = mapped_column(String(16))
+    status: Mapped[str] = mapped_column(String(32), index=True)
+    duration_ms: Mapped[int] = mapped_column(Integer)
+    error_code: Mapped[str | None] = mapped_column(String(64), nullable=True)
+    error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
+    client_metadata_json: Mapped[str | None] = mapped_column(Text, nullable=True)

+ 190 - 1
services/api-gateway/app/domain/repositories.py

@@ -3,7 +3,7 @@ from datetime import datetime
 from sqlalchemy import case, func, select
 from sqlalchemy.orm import Session
 
-from app.db.models import ApiKey, GatewayRequestAudit
+from app.db.models import ApiKey, AppApiKey, AppDefinition, AppInvocationAudit, GatewayRequestAudit
 
 
 class GatewayRequestAuditRepository:
@@ -153,3 +153,192 @@ class ApiKeyRepository:
         self.db.commit()
         self.db.refresh(entity)
         return entity
+
+
+class AppDefinitionRepository:
+    def __init__(self, db: Session) -> None:
+        self.db = db
+
+    def create(
+        self,
+        *,
+        code: str,
+        name: str,
+        target_type: str,
+        target_id: str,
+        description: str | None = None,
+        owner_user_id: str | None = None,
+        settings_json: str | None = None) -> AppDefinition:
+        entity = AppDefinition(
+            code=code,
+            name=name,
+            description=description,
+            status="draft",
+            target_type=target_type,
+            target_id=target_id,
+            owner_user_id=owner_user_id,
+            settings_json=settings_json)
+        self.db.add(entity)
+        self.db.commit()
+        self.db.refresh(entity)
+        return entity
+
+    def get_by_id(self, *, app_id: str) -> AppDefinition | None:
+        stmt = select(AppDefinition).where(AppDefinition.id == app_id).limit(1)
+        return self.db.scalar(stmt)
+
+    def get_by_code(self, *, code: str) -> AppDefinition | None:
+        stmt = select(AppDefinition).where(AppDefinition.code == code).limit(1)
+        return self.db.scalar(stmt)
+
+    def list_all(self) -> list[AppDefinition]:
+        stmt = select(AppDefinition).order_by(AppDefinition.created_time.desc())
+        return list(self.db.scalars(stmt))
+
+    def update(
+        self,
+        *,
+        app_id: str,
+        name: str | None = None,
+        description: str | None = None,
+        target_type: str | None = None,
+        target_id: str | None = None,
+        settings_json: str | None = None) -> AppDefinition | None:
+        entity = self.get_by_id(app_id=app_id)
+        if entity is None:
+            return None
+        if name is not None:
+            entity.name = name
+        if description is not None:
+            entity.description = description
+        if target_type is not None:
+            entity.target_type = target_type
+        if target_id is not None:
+            entity.target_id = target_id
+        if settings_json is not None:
+            entity.settings_json = settings_json
+        self.db.commit()
+        self.db.refresh(entity)
+        return entity
+
+    def update_status(self, *, app_id: str, status: str) -> AppDefinition | None:
+        entity = self.get_by_id(app_id=app_id)
+        if entity is None:
+            return None
+        entity.status = status
+        self.db.commit()
+        self.db.refresh(entity)
+        return entity
+
+
+class AppApiKeyRepository:
+    def __init__(self, db: Session) -> None:
+        self.db = db
+
+    def create(
+        self,
+        *,
+        app_id: str,
+        name: str,
+        key_prefix: str,
+        key_hash: str,
+        scopes: str | None,
+        expires_time: datetime | None) -> AppApiKey:
+        entity = AppApiKey(
+            app_id=app_id,
+            name=name,
+            key_prefix=key_prefix,
+            key_hash=key_hash,
+            status="active",
+            scopes=scopes,
+            expires_time=expires_time)
+        self.db.add(entity)
+        self.db.commit()
+        self.db.refresh(entity)
+        return entity
+
+    def list_by_app(self, *, app_id: str) -> list[AppApiKey]:
+        stmt = (
+            select(AppApiKey)
+            .where(AppApiKey.app_id == app_id)
+            .order_by(AppApiKey.created_time.desc())
+        )
+        return list(self.db.scalars(stmt))
+
+    def get_by_id(self, *, api_key_id: str) -> AppApiKey | None:
+        stmt = select(AppApiKey).where(AppApiKey.id == api_key_id).limit(1)
+        return self.db.scalar(stmt)
+
+    def get_active_by_hash(self, *, key_hash: str) -> AppApiKey | None:
+        stmt = (
+            select(AppApiKey)
+            .where(AppApiKey.key_hash == key_hash)
+            .where(AppApiKey.status == "active")
+            .limit(1)
+        )
+        return self.db.scalar(stmt)
+
+    def touch_last_used_time(self, *, api_key_id: str) -> None:
+        entity = self.db.get(AppApiKey, api_key_id)
+        if entity is None:
+            return
+        entity.last_used_time = datetime.utcnow()
+        self.db.commit()
+
+    def update_status(self, *, api_key_id: str, status: str) -> AppApiKey | None:
+        entity = self.get_by_id(api_key_id=api_key_id)
+        if entity is None:
+            return None
+        entity.status = status
+        self.db.commit()
+        self.db.refresh(entity)
+        return entity
+
+
+class AppInvocationAuditRepository:
+    def __init__(self, db: Session) -> None:
+        self.db = db
+
+    def create(
+        self,
+        *,
+        app_id: str,
+        request_id: str,
+        target_type: str,
+        target_id: str,
+        invoke_type: str,
+        status: str,
+        duration_ms: int,
+        api_key_prefix: str | None = None,
+        session_id: str | None = None,
+        run_request_id: str | None = None,
+        error_code: str | None = None,
+        error_message: str | None = None,
+        client_metadata_json: str | None = None) -> AppInvocationAudit:
+        entity = AppInvocationAudit(
+            app_id=app_id,
+            api_key_prefix=api_key_prefix,
+            request_id=request_id,
+            session_id=session_id,
+            run_request_id=run_request_id,
+            target_type=target_type,
+            target_id=target_id,
+            invoke_type=invoke_type,
+            status=status,
+            duration_ms=duration_ms,
+            error_code=error_code,
+            error_message=error_message,
+            client_metadata_json=client_metadata_json)
+        self.db.add(entity)
+        self.db.commit()
+        self.db.refresh(entity)
+        return entity
+
+    def list_by_app(self, *, app_id: str, limit: int = 100) -> list[AppInvocationAudit]:
+        stmt = (
+            select(AppInvocationAudit)
+            .where(AppInvocationAudit.app_id == app_id)
+            .order_by(AppInvocationAudit.created_time.desc())
+            .limit(limit)
+        )
+        return list(self.db.scalars(stmt))

+ 2 - 0
services/api-gateway/app/infrastructure/request_context.py

@@ -111,6 +111,8 @@ def authenticate_gateway_request(request: Request) -> Response | None:
         return None
     if request.url.path in {"/gateway/services/health"}:
         return None
+    if request.url.path.startswith("/gateway/openapi/"):
+        return None
     if is_auth_login_request(request):
         return None
 

+ 156 - 1
services/api-gateway/app/schemas/gateway.py

@@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, Literal
 from pydantic import BaseModel
 
 if TYPE_CHECKING:
-    from app.db.models import ApiKey, GatewayRequestAudit
+    from app.db.models import ApiKey, AppApiKey, AppDefinition, AppInvocationAudit, GatewayRequestAudit
 
 
 class DownstreamServiceHealth(BaseModel):
@@ -99,3 +99,158 @@ class ApiKeyStatusUpdateRequest(BaseModel):
 
 class ApiKeyStatusPostRequest(ApiKeyStatusUpdateRequest):
     api_key_id: str
+
+
+# ── Application ──────────────────────────────────────────────────────────────
+
+AppStatus = Literal["draft", "published", "disabled"]
+AppTargetType = Literal["agent", "team"]
+
+
+class AppCreateRequest(BaseModel):
+    code: str
+    name: str
+    description: str | None = None
+    target_type: AppTargetType
+    target_id: str
+    owner_user_id: str | None = None
+    settings_json: str | None = None
+
+
+class AppListRequest(BaseModel):
+    pass
+
+
+class AppDetailRequest(BaseModel):
+    app_id: str
+
+
+class AppUpdateRequest(BaseModel):
+    app_id: str
+    name: str | None = None
+    description: str | None = None
+    target_type: AppTargetType | None = None
+    target_id: str | None = None
+    settings_json: str | None = None
+
+
+class AppStatusUpdateRequest(BaseModel):
+    app_id: str
+    status: AppStatus
+
+
+class AppResponse(BaseModel):
+    id: str
+    code: str
+    name: str
+    description: str | None = None
+    status: str
+    target_type: str
+    target_id: str
+    owner_user_id: str | None = None
+    settings_json: str | None = None
+    created_time: datetime
+    updated_time: datetime
+
+    @classmethod
+    def from_entity(cls, entity: "AppDefinition") -> "AppResponse":
+        return cls.model_validate(entity, from_attributes=True)
+
+
+# ── App API Key ──────────────────────────────────────────────────────────────
+
+AppApiKeyStatus = Literal["active", "disabled", "revoked"]
+
+
+class AppApiKeyCreateRequest(BaseModel):
+    name: str
+    scopes: str | None = None
+    expires_time: datetime | None = None
+
+
+class AppApiKeyCreateResponse(BaseModel):
+    id: str
+    app_id: str
+    name: str
+    key_prefix: str
+    api_key: str
+    status: str
+    scopes: str | None = None
+    expires_time: datetime | None = None
+    created_time: datetime
+
+
+class AppApiKeyListRequest(BaseModel):
+    pass
+
+
+class AppApiKeyResponse(BaseModel):
+    id: str
+    app_id: str
+    name: str
+    key_prefix: str
+    status: str
+    scopes: str | None = None
+    expires_time: datetime | None = None
+    last_used_time: datetime | None = None
+    created_time: datetime
+
+    @classmethod
+    def from_entity(cls, entity: "AppApiKey") -> "AppApiKeyResponse":
+        return cls.model_validate(entity, from_attributes=True)
+
+
+class AppApiKeyStatusUpdateRequest(BaseModel):
+    api_key_id: str
+    status: AppApiKeyStatus
+
+
+# ── App Invocation Audit ─────────────────────────────────────────────────────
+
+class AppAuditListRequest(BaseModel):
+    limit: int = 100
+
+
+class AppInvocationAuditResponse(BaseModel):
+    id: str
+    app_id: str
+    api_key_prefix: str | None = None
+    request_id: str
+    session_id: str | None = None
+    run_request_id: str | None = None
+    target_type: str
+    target_id: str
+    invoke_type: str
+    status: str
+    duration_ms: int
+    error_code: str | None = None
+    error_message: str | None = None
+    client_metadata_json: str | None = None
+    created_time: datetime
+
+    @classmethod
+    def from_entity(cls, entity: "AppInvocationAudit") -> "AppInvocationAuditResponse":
+        return cls.model_validate(entity, from_attributes=True)
+
+
+# ── OpenAPI External Invocation ──────────────────────────────────────────────
+
+class OpenApiChatRequest(BaseModel):
+    user_id: str | None = None
+    session_id: str | None = None
+    message: str
+    inputs: dict[str, object] | None = None
+    metadata: dict[str, object] | None = None
+
+
+class OpenApiChatResponse(BaseModel):
+    request_id: str
+    app_code: str
+    session_id: str
+    run_request_id: str
+    target_type: str
+    target_id: str
+    status: str
+    output_text: str | None = None
+    output_json: dict[str, object] | None = None
+    error: str | None = None

+ 34 - 0
services/knowledge-service/alembic/versions/20260514_0003_resize_embedding_vector.py

@@ -0,0 +1,34 @@
+"""resize embedding vector from 32 to 1536 dimensions
+
+Revision ID: 20260514_0003_embedding
+Revises: 20260429_9001_remove_version_columns
+Create Date: 2026-05-14 00:00:00.000000
+"""
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from alembic import op
+
+revision: str = "20260514_0003_embedding"
+down_revision: str | None = "20260429_9001_remove_version_columns"
+branch_labels: Sequence[str] | None = None
+depends_on: Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    op.execute("ALTER TABLE knowledge_chunk ALTER COLUMN embedding_vector TYPE vector(1536)")
+    op.execute("DROP INDEX IF EXISTS ix_knowledge_chunk_embedding_vector")
+    op.execute(
+        "CREATE INDEX ix_knowledge_chunk_embedding_vector "
+        "ON knowledge_chunk USING hnsw (embedding_vector vector_cosine_ops)"
+    )
+
+
+def downgrade() -> None:
+    op.execute("DROP INDEX IF EXISTS ix_knowledge_chunk_embedding_vector")
+    op.execute("ALTER TABLE knowledge_chunk ALTER COLUMN embedding_vector TYPE vector(32)")
+    op.execute(
+        "CREATE INDEX ix_knowledge_chunk_embedding_vector "
+        "ON knowledge_chunk USING hnsw (embedding_vector vector_cosine_ops)"
+    )

+ 237 - 0
services/knowledge-service/app/application/chunking.py

@@ -0,0 +1,237 @@
+"""Structure-aware document chunking."""
+
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass
+
+from core_shared import JSONValue
+
+from app.application.retrieval import tokenize
+
+_HEADING_RE = re.compile(r"^(#{1,6})\s+(.+)$", re.MULTILINE)
+_CODE_BLOCK_RE = re.compile(r"```[\s\S]*?```")
+_SENTENCE_SPLIT_RE = re.compile(r"(?<=[.!?。!?])\s+")
+_PARAGRAPH_SPLIT_RE = re.compile(r"\n{2,}")
+
+
+@dataclass(frozen=True)
+class ChunkPayload:
+    chunk_index: int
+    content_text: str
+    token_count: int
+    metadata_json: dict[str, JSONValue]
+
+
+def chunk_document(
+    *,
+    content_text: str,
+    source_type: str,
+    chunk_size: int,
+    chunk_overlap: int,
+) -> list[dict[str, JSONValue]]:
+    """Dispatch to the appropriate chunker based on source_type."""
+    normalized = source_type.strip().lower()
+    text_for_chunking = raw_content or content_text
+    if normalized in {"markdown", "md"}:
+        chunks = _chunk_markdown(text_for_chunking, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
+    elif normalized == "json":
+        chunks = _chunk_json(content_text, chunk_size=chunk_size)
+    else:
+        chunks = _chunk_plain_text(content_text, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
+    return [
+        {
+            "chunk_index": c.chunk_index,
+            "content_text": c.content_text,
+            "token_count": c.token_count,
+            "metadata_json": c.metadata_json,
+        }
+        for c in chunks
+    ]
+
+
+def _chunk_markdown(content: str, *, chunk_size: int, chunk_overlap: int) -> list[ChunkPayload]:
+    sections = _split_markdown_by_headings(content)
+    chunks: list[ChunkPayload] = []
+    index = 0
+
+    for heading_path, section_text in sections:
+        section_text = section_text.strip()
+        if not section_text:
+            continue
+
+        if len(section_text) <= chunk_size:
+            chunks.append(_make_chunk(index, section_text, {"heading_path": heading_path, "chunk_type": "heading_section"}))
+            index += 1
+            continue
+
+        sub_parts = _split_markdown_section(section_text)
+        buffer = ""
+        for part_text, part_type in sub_parts:
+            if len(buffer) + len(part_text) + 1 > chunk_size and buffer:
+                chunks.append(_make_chunk(index, buffer.strip(), {"heading_path": heading_path, "chunk_type": part_type}))
+                index += 1
+                overlap_text = buffer[-chunk_overlap:] if chunk_overlap > 0 else ""
+                buffer = overlap_text + "\n" + part_text
+            else:
+                buffer = buffer + "\n" + part_text if buffer else part_text
+        if buffer.strip():
+            chunks.append(_make_chunk(index, buffer.strip(), {"heading_path": heading_path, "chunk_type": "paragraph"}))
+            index += 1
+
+    return chunks
+
+
+def _split_markdown_by_headings(content: str) -> list[tuple[list[str], str]]:
+    """Split markdown into (heading_path, section_text) tuples."""
+    positions: list[tuple[int, int, str]] = []
+    for match in _HEADING_RE.finditer(content):
+        level = len(match.group(1))
+        title = match.group(2).strip()
+        positions.append((match.start(), level, title))
+
+    if not positions:
+        return [([], content)]
+
+    sections: list[tuple[list[str], str]] = []
+    active_headings: dict[int, str] = {}
+
+    first_pos = positions[0][0]
+    if first_pos > 0:
+        preamble = content[:first_pos].strip()
+        if preamble:
+            sections.append(([], preamble))
+
+    for i, (pos, level, title) in enumerate(positions):
+        active_headings[level] = title
+        for higher in list(active_headings):
+            if higher > level:
+                del active_headings[higher]
+        path = [active_headings[l] for l in sorted(active_headings)]
+        end = positions[i + 1][0] if i + 1 < len(positions) else len(content)
+        section_text = content[pos:end]
+        section_text = re.sub(r"^#{1,6}\s+.+$", "", section_text, count=1, flags=re.MULTILINE).strip()
+        if section_text:
+            sections.append((path, section_text))
+
+    return sections
+
+
+def _split_markdown_section(text: str) -> list[tuple[str, str]]:
+    """Split a markdown section into (text, chunk_type) parts."""
+    parts: list[tuple[str, str]] = []
+    last_end = 0
+
+    for match in _CODE_BLOCK_RE.finditer(text):
+        if match.start() > last_end:
+            prose = text[last_end:match.start()].strip()
+            if prose:
+                for para in _PARAGRAPH_SPLIT_RE.split(prose):
+                    p = para.strip()
+                    if p:
+                        parts.append((p, "paragraph"))
+        code = match.group()
+        parts.append((code, "code_block"))
+        last_end = match.end()
+
+    if last_end < len(text):
+        remaining = text[last_end:].strip()
+        if remaining:
+            for para in _PARAGRAPH_SPLIT_RE.split(remaining):
+                p = para.strip()
+                if p:
+                    parts.append((p, "paragraph"))
+
+    return parts
+
+
+def _chunk_plain_text(content: str, *, chunk_size: int, chunk_overlap: int) -> list[ChunkPayload]:
+    paragraphs = _PARAGRAPH_SPLIT_RE.split(content.strip())
+    paragraphs = [p.strip() for p in paragraphs if p.strip()]
+    if not paragraphs:
+        return []
+
+    chunks: list[ChunkPayload] = []
+    buffer = ""
+    index = 0
+
+    for para in paragraphs:
+        if len(para) > chunk_size:
+            if buffer:
+                chunks.append(_make_chunk(index, buffer.strip(), {"chunk_type": "paragraph"}))
+                index += 1
+                buffer = ""
+            sentences = _split_sentences(para)
+            sent_buffer = ""
+            for sentence in sentences:
+                if len(sent_buffer) + len(sentence) + 1 > chunk_size and sent_buffer:
+                    chunks.append(_make_chunk(index, sent_buffer.strip(), {"chunk_type": "sentence"}))
+                    index += 1
+                    overlap_text = sent_buffer[-chunk_overlap:] if chunk_overlap > 0 else ""
+                    sent_buffer = overlap_text + " " + sentence
+                else:
+                    sent_buffer = sent_buffer + " " + sentence if sent_buffer else sentence
+            if sent_buffer.strip():
+                buffer = sent_buffer.strip()
+        elif len(buffer) + len(para) + 2 > chunk_size and buffer:
+            chunks.append(_make_chunk(index, buffer.strip(), {"chunk_type": "paragraph"}))
+            index += 1
+            overlap_text = buffer[-chunk_overlap:] if chunk_overlap > 0 else ""
+            buffer = overlap_text + "\n\n" + para
+        else:
+            buffer = buffer + "\n\n" + para if buffer else para
+
+    if buffer.strip():
+        chunks.append(_make_chunk(index, buffer.strip(), {"chunk_type": "paragraph"}))
+
+    return chunks
+
+
+def _chunk_json(content: str, *, chunk_size: int) -> list[ChunkPayload]:
+    import json as json_lib
+    try:
+        data = json_lib.loads(content)
+    except json_lib.JSONDecodeError:
+        return _chunk_plain_text(content, chunk_size=chunk_size, chunk_overlap=0)
+
+    if isinstance(data, dict):
+        parts: list[tuple[str, str]] = []
+        for key, value in data.items():
+            line = f"{key}: {json_lib.dumps(value, ensure_ascii=False) if not isinstance(value, str) else value}"
+            parts.append((key, line))
+        chunks: list[ChunkPayload] = []
+        buffer = ""
+        buffer_keys: list[str] = []
+        index = 0
+        for key, line in parts:
+            if len(buffer) + len(line) + 1 > chunk_size and buffer:
+                chunks.append(_make_chunk(index, buffer.strip(), {"chunk_type": "json_keys", "key_path": buffer_keys}))
+                index += 1
+                buffer = line
+                buffer_keys = [key]
+            else:
+                buffer = buffer + "\n" + line if buffer else line
+                buffer_keys.append(key)
+        if buffer.strip():
+            chunks.append(_make_chunk(index, buffer.strip(), {"chunk_type": "json_keys", "key_path": buffer_keys}))
+        return chunks
+
+    if isinstance(data, list):
+        items_text = "\n".join(json_lib.dumps(item, ensure_ascii=False) for item in data)
+        return _chunk_plain_text(items_text, chunk_size=chunk_size, chunk_overlap=0)
+
+    return _chunk_plain_text(content, chunk_size=chunk_size, chunk_overlap=0)
+
+
+def _split_sentences(text: str) -> list[str]:
+    parts = _SENTENCE_SPLIT_RE.split(text)
+    return [p.strip() for p in parts if p.strip()]
+
+
+def _make_chunk(index: int, text: str, metadata: dict[str, JSONValue]) -> ChunkPayload:
+    return ChunkPayload(
+        chunk_index=index,
+        content_text=text,
+        token_count=len(tokenize(text)),
+        metadata_json=metadata,
+    )

+ 1 - 5
services/knowledge-service/app/application/document_parsers.py

@@ -113,13 +113,9 @@ def normalize_source_type(*, source_type: str, source_uri: str | None = None) ->
 
 
 def parse_markdown(content: str) -> str:
-    text = re.sub(r"```[\s\S]*?```", " ", content)
-    text = re.sub(r"`([^`]+)`", r"\1", text)
+    text = re.sub(r"`([^`]+)`", r"\1", content)
     text = re.sub(r"!\[[^\]]*\]\([^)]+\)", " ", text)
     text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text)
-    text = re.sub(r"^\s{0,3}#{1,6}\s*", "", text, flags=re.MULTILINE)
-    text = re.sub(r"^\s{0,3}>\s?", "", text, flags=re.MULTILINE)
-    text = re.sub(r"^\s*[-*+]\s+", "", text, flags=re.MULTILINE)
     return normalize_text(text)
 
 

+ 93 - 4
services/knowledge-service/app/application/embeddings.py

@@ -1,3 +1,4 @@
+import logging
 from dataclasses import dataclass
 
 import httpx
@@ -5,6 +6,8 @@ import httpx
 from app.application.retrieval import build_hash_embedding
 from app.bootstrap.settings import KnowledgeServiceSettings
 
+logger = logging.getLogger(__name__)
+
 
 class EmbeddingProviderError(Exception):
     pass
@@ -22,14 +25,36 @@ class EmbeddingService:
         self.settings = settings
 
     def embed_text(self, text: str) -> EmbeddingResult:
-        if self.settings.embedding_provider == "http":
+        provider = self.settings.embedding_provider
+        if provider == "model_gateway":
+            try:
+                return self._embed_with_model_gateway(text)
+            except EmbeddingProviderError:
+                if not self.settings.embedding_fallback_to_local:
+                    raise
+                logger.warning("model_gateway embedding failed, falling back to local-hash")
+        elif provider == "http":
             try:
                 return self._embed_with_http(text)
             except EmbeddingProviderError:
                 if not self.settings.embedding_fallback_to_local:
                     raise
+                logger.warning("http embedding failed, falling back to local-hash")
         return self._embed_with_local_hash(text)
 
+    def embed_texts(self, texts: list[str]) -> list[EmbeddingResult]:
+        if not texts:
+            return []
+        provider = self.settings.embedding_provider
+        if provider == "model_gateway":
+            try:
+                return self._embed_batch_with_model_gateway(texts)
+            except EmbeddingProviderError:
+                if not self.settings.embedding_fallback_to_local:
+                    raise
+                logger.warning("model_gateway batch embedding failed, falling back to local-hash")
+        return [self._embed_with_local_hash(t) for t in texts]
+
     def _embed_with_local_hash(self, text: str) -> EmbeddingResult:
         return EmbeddingResult(
             embedding=build_hash_embedding(
@@ -38,6 +63,51 @@ class EmbeddingService:
             model=self.settings.embedding_model,
             provider="local-hash")
 
+    def _embed_with_model_gateway(self, text: str) -> EmbeddingResult:
+        url = f"{self.settings.model_gateway_service_url.rstrip('/')}/models/embeddings"
+        try:
+            with httpx.Client(timeout=self.settings.model_gateway_timeout_seconds) as client:
+                response = client.post(url, json={
+                    "model": self.settings.embedding_model,
+                    "input": text,
+                    "dimensions": self.settings.embedding_dimensions or None,
+                })
+                response.raise_for_status()
+                payload = response.json()
+        except (httpx.HTTPError, ValueError) as exc:
+            raise EmbeddingProviderError(f"model_gateway embedding failed: {exc}") from exc
+
+        embedding = _read_openai_embedding(payload)
+        if embedding is None:
+            raise EmbeddingProviderError("model_gateway response missing data[0].embedding")
+        return EmbeddingResult(
+            embedding=embedding,
+            model=self.settings.embedding_model,
+            provider="model_gateway")
+
+    def _embed_batch_with_model_gateway(self, texts: list[str]) -> list[EmbeddingResult]:
+        url = f"{self.settings.model_gateway_service_url.rstrip('/')}/models/embeddings"
+        try:
+            with httpx.Client(timeout=self.settings.model_gateway_timeout_seconds) as client:
+                response = client.post(url, json={
+                    "model": self.settings.embedding_model,
+                    "input": texts,
+                    "dimensions": self.settings.embedding_dimensions or None,
+                })
+                response.raise_for_status()
+                payload = response.json()
+        except (httpx.HTTPError, ValueError) as exc:
+            raise EmbeddingProviderError(f"model_gateway batch embedding failed: {exc}") from exc
+
+        items = _read_openai_embedding_batch(payload)
+        if len(items) != len(texts):
+            raise EmbeddingProviderError(
+                f"model_gateway returned {len(items)} embeddings, expected {len(texts)}")
+        return [
+            EmbeddingResult(embedding=emb, model=self.settings.embedding_model, provider="model_gateway")
+            for emb in items
+        ]
+
     def _embed_with_http(self, text: str) -> EmbeddingResult:
         if not self.settings.embedding_base_url:
             raise EmbeddingProviderError("embedding_base_url is required for http provider")
@@ -75,11 +145,30 @@ def _read_openai_embedding(payload: object) -> list[float] | None:
     first_item = data[0]
     if not isinstance(first_item, dict):
         return None
-    embedding = first_item.get("embedding")
-    if not isinstance(embedding, list):
+    return _extract_embedding_list(first_item.get("embedding"))
+
+
+def _read_openai_embedding_batch(payload: object) -> list[list[float]]:
+    if not isinstance(payload, dict):
+        return []
+    data = payload.get("data")
+    if not isinstance(data, list):
+        return []
+    results: list[list[float]] = []
+    for item in sorted(data, key=lambda d: d.get("index", 0) if isinstance(d, dict) else 0):
+        if not isinstance(item, dict):
+            continue
+        embedding = _extract_embedding_list(item.get("embedding"))
+        if embedding is not None:
+            results.append(embedding)
+    return results
+
+
+def _extract_embedding_list(raw: object) -> list[float] | None:
+    if not isinstance(raw, list):
         return None
     values: list[float] = []
-    for item in embedding:
+    for item in raw:
         if not isinstance(item, (int, float)) or isinstance(item, bool):
             return None
         values.append(float(item))

+ 56 - 3
services/knowledge-service/app/application/retrieval.py

@@ -7,6 +7,9 @@ from core_shared import JSONValue
 
 TOKEN_PATTERN = re.compile(r"[\w\u4e00-\u9fff]+", re.UNICODE)
 
+_K1 = 1.5
+_B = 0.75
+
 
 def split_text(text: str, *, chunk_size: int, chunk_overlap: int) -> list[str]:
     normalized_text = text.strip()
@@ -55,15 +58,65 @@ def cosine_similarity(left: list[float] | None, right: list[float] | None) -> fl
 
 
 def keyword_score(query: str, text: str) -> float:
+    """Backward-compatible wrapper around bm25_score with fallback stats."""
+    query_tokens = tokenize(query)
+    if not query_tokens:
+        return 0.0
+    text_tokens = tokenize(text)
+    if not text_tokens:
+        return 0.0
+    doc_length = len(text_tokens)
+    avg_doc_length = float(doc_length) or 1.0
+    doc_count = 1
+    text_counts = Counter(text_tokens)
+    df: dict[str, int] = {token: 1 for token in text_counts}
+    return bm25_score(query, text, avg_doc_length=avg_doc_length, doc_count=doc_count, df=df)
+
+
+def bm25_score(
+    query: str,
+    text: str,
+    *,
+    avg_doc_length: float,
+    doc_count: int,
+    df: dict[str, int],
+) -> float:
+    """Standard BM25 scoring. k1=1.5, b=0.75."""
     query_tokens = tokenize(query)
     if not query_tokens:
         return 0.0
     text_counts = Counter(tokenize(text))
+    doc_length = sum(text_counts.values())
     if not text_counts:
         return 0.0
-    matched = sum(1 for token in query_tokens if token in text_counts)
-    frequency = sum(text_counts.get(token, 0) for token in query_tokens)
-    return matched / len(set(query_tokens)) + min(frequency / 20.0, 1.0)
+    score = 0.0
+    for token in set(query_tokens):
+        tf = text_counts.get(token, 0)
+        if tf == 0:
+            continue
+        idf_numerator = doc_count - df.get(token, 0) + 0.5
+        idf_denominator = df.get(token, 0) + 0.5
+        if idf_denominator <= 0:
+            continue
+        idf = math.log((idf_numerator / idf_denominator) + 1.0)
+        tf_norm = (tf * (_K1 + 1)) / (tf + _K1 * (1 - _B + _B * doc_length / max(avg_doc_length, 1.0)))
+        score += idf * tf_norm
+    return max(score, 0.0)
+
+
+def compute_bm25_stats(chunk_texts: list[str]) -> tuple[float, int, dict[str, int]]:
+    """Compute average doc length, total doc count, and document frequency map for BM25."""
+    if not chunk_texts:
+        return 0.0, 0, {}
+    total_length = 0
+    df: dict[str, int] = {}
+    for text in chunk_texts:
+        tokens = set(tokenize(text))
+        total_length += len(tokens)
+        for token in tokens:
+            df[token] = df.get(token, 0) + 1
+    avg_doc_length = total_length / len(chunk_texts)
+    return avg_doc_length, len(chunk_texts), df
 
 
 def rerank_score(*, query: str, chunk_text: str, document_title: str | None = None) -> float:

+ 28 - 10
services/knowledge-service/app/application/services.py

@@ -18,9 +18,12 @@ from app.application.document_parsers import (
     normalize_source_type,
     parse_document_content,
     read_document_content_bytes)
+from app.application.chunking import chunk_document
 from app.application.embeddings import EmbeddingService
 from app.application.retrieval import (
+    bm25_score,
     build_chunk_payloads,
+    compute_bm25_stats,
     cosine_similarity,
     keyword_score,
     rerank_score,
@@ -774,6 +777,15 @@ class KnowledgeApplicationService:
                 knowledge_base_id=payload.knowledge_base_id)
             vector_scores_by_chunk_id = {}
             retrieval_mode = "hybrid"
+        # Resolve per-base retrieval weights with global fallback
+        kb = self.base_repository.get_by_id(knowledge_base_id=payload.knowledge_base_id)
+        retrieval_config = (kb.metadata_json or {}).get("retrieval_config", {}) if kb else {}
+        keyword_weight = float(retrieval_config.get("keyword_weight", self.settings.retrieval_keyword_weight))
+        vector_weight = float(retrieval_config.get("vector_weight", self.settings.retrieval_vector_weight))
+        rerank_weight = float(retrieval_config.get("rerank_weight", self.settings.retrieval_rerank_weight))
+        # Pre-compute BM25 collection stats from candidate chunks
+        chunk_texts = [chunk.content_text for chunk in chunks]
+        avg_doc_length, doc_count, df_map = compute_bm25_stats(chunk_texts)
         scored: list[tuple[KnowledgeChunk, KnowledgeDocument, float, dict[str, JSONValue]]] = []
         for chunk in chunks:
             document = document_cache.get(chunk.document_id)
@@ -785,7 +797,9 @@ class KnowledgeApplicationService:
                 document_cache[chunk.document_id] = document
             if not self._matches_filters(document=document, filters_json=payload.filters_json):
                 continue
-            keyword = keyword_score(payload.query, chunk.content_text)
+            keyword = bm25_score(
+                payload.query, chunk.content_text,
+                avg_doc_length=avg_doc_length, doc_count=doc_count, df=df_map)
             vector = vector_scores_by_chunk_id.get(chunk.id)
             if vector is None:
                 vector = cosine_similarity(query_embedding_result.embedding, chunk.embedding_json)
@@ -798,9 +812,9 @@ class KnowledgeApplicationService:
                 else 0.0
             )
             score = round(
-                keyword * self.settings.retrieval_keyword_weight
-                + vector * self.settings.retrieval_vector_weight
-                + rerank * self.settings.retrieval_rerank_weight,
+                keyword * keyword_weight
+                + vector * vector_weight
+                + rerank * rerank_weight,
                 6)
             scored.append(
                 (
@@ -816,9 +830,9 @@ class KnowledgeApplicationService:
                         "rerank_enabled": self.settings.retrieval_rerank_enabled,
                         "candidate_limit": candidate_limit,
                         "weights": {
-                            "keyword": self.settings.retrieval_keyword_weight,
-                            "vector": self.settings.retrieval_vector_weight,
-                            "rerank": self.settings.retrieval_rerank_weight,
+                            "keyword": keyword_weight,
+                            "vector": vector_weight,
+                            "rerank": rerank_weight,
                         },
                         "embedding_provider": query_embedding_result.provider,
                         "embedding_model": query_embedding_result.model,
@@ -841,10 +855,14 @@ class KnowledgeApplicationService:
         content_text: str,
         chunk_size: int | None,
         chunk_overlap: int | None) -> list[KnowledgeChunk]:
-        chunk_payloads = build_chunk_payloads(
+        source_type = document.source_type or "text"
+        resolved_size = chunk_size or self.settings.default_chunk_size
+        resolved_overlap = chunk_overlap or self.settings.default_chunk_overlap
+        chunk_payloads = chunk_document(
             content_text=content_text,
-            chunk_size=chunk_size or self.settings.default_chunk_size,
-            chunk_overlap=chunk_overlap or self.settings.default_chunk_overlap)
+            source_type=source_type,
+            chunk_size=resolved_size,
+            chunk_overlap=resolved_overlap)
         for chunk_payload in chunk_payloads:
             content_text = self._read_chunk_content(chunk_payload)
             embedding_result = self.embedding_service.embed_text(content_text)

+ 5 - 3
services/knowledge-service/app/bootstrap/settings.py

@@ -6,9 +6,11 @@ class KnowledgeServiceSettings(ServiceSettings):
     service_port: int = 8012
     default_chunk_size: int = 800
     default_chunk_overlap: int = 120
-    embedding_dimensions: int = 32
-    embedding_model: str = "local-hash-v1"
-    embedding_provider: str = "local"
+    embedding_dimensions: int = 1536
+    embedding_model: str = "text-embedding-3-small"
+    embedding_provider: str = "model_gateway"
+    model_gateway_service_url: str = "http://127.0.0.1:8005"
+    model_gateway_timeout_seconds: float = 30.0
     embedding_base_url: str | None = None
     embedding_api_key: str | None = None
     embedding_timeout_seconds: float = 30.0

+ 3 - 1
services/knowledge-service/app/db/models/knowledge_chunk.py

@@ -6,6 +6,8 @@ from sqlalchemy.orm import Mapped, mapped_column
 from sqlalchemy.sql.expression import ColumnElement
 from sqlalchemy.types import UserDefinedType
 
+EMBEDDING_DIMENSIONS = 1536
+
 
 class PgVector(UserDefinedType[str]):
     cache_ok = True
@@ -30,5 +32,5 @@ class KnowledgeChunk(EntityMixin, AuditMixin, Base):
     token_count: Mapped[int] = mapped_column(Integer, default=0)
     embedding_model: Mapped[str | None] = mapped_column(String(64), nullable=True, index=True)
     embedding_json: Mapped[list[float] | None] = mapped_column(JSON, nullable=True)
-    embedding_vector: Mapped[str | None] = mapped_column(PgVector(32), nullable=True)
+    embedding_vector: Mapped[str | None] = mapped_column(PgVector(EMBEDDING_DIMENSIONS), nullable=True)
     metadata_json: Mapped[dict[str, JSONValue] | None] = mapped_column(JSON, nullable=True)

+ 12 - 0
services/model-gateway-service/app/api/routes.py

@@ -5,6 +5,8 @@ from typing import Annotated, TypeVar
 from core_domain import (
     ChatCompletionRequestContract,
     ChatCompletionResponseContract,
+    EmbeddingRequestContract,
+    EmbeddingResponseContract,
     ServiceHealth,
 )
 from fastapi import APIRouter, Depends, HTTPException
@@ -196,6 +198,16 @@ def stream_chat_completion(
         })
 
 
+@router.post("/embeddings", response_model=EmbeddingResponseContract)
+def create_embedding(
+    payload: EmbeddingRequestContract,
+    service: ModelServiceDep) -> EmbeddingResponseContract:
+    try:
+        return service.create_embedding(payload)
+    except ModelProviderClientError as exc:
+        raise HTTPException(status_code=502, detail=str(exc)) from exc
+
+
 @router.post("/list", response_model=ApiResponse[PageResult[ModelDto]])
 def list_models_contract(
     payload: PageRequest,

+ 33 - 1
services/model-gateway-service/app/application/services.py

@@ -1,4 +1,9 @@
-from core_domain import ChatCompletionRequestContract, ChatCompletionResponseContract
+from core_domain import (
+    ChatCompletionRequestContract,
+    ChatCompletionResponseContract,
+    EmbeddingRequestContract,
+    EmbeddingResponseContract,
+)
 from collections.abc import Iterator
 
 from app.bootstrap.settings import ModelGatewayServiceSettings
@@ -210,6 +215,33 @@ class ModelGatewayApplicationService:
             resolved_payload,
             provider_type=self.settings.provider_type)
 
+    def create_embedding(
+        self,
+        payload: EmbeddingRequestContract) -> EmbeddingResponseContract:
+        configured_model = None
+        if payload.model:
+            configured_model = self.model_repository.get_active_for_request(payload.model)
+
+        if configured_model is not None:
+            configured_provider = self._resolve_model_provider(configured_model)
+            resolved_payload = payload.model_copy(
+                update={"model": configured_model.model_name}
+            )
+            return self.provider_client.create_embedding(
+                resolved_payload,
+                provider_type=configured_provider.provider_type,
+                provider_base_url=configured_provider.provider_base_url,
+                provider_api_key=configured_provider.provider_api_key,
+                timeout_seconds=configured_model.timeout_seconds,
+            )
+
+        resolved_payload = payload.model_copy(
+            update={"model": payload.model or self.settings.default_model}
+        )
+        return self.provider_client.create_embedding(
+            resolved_payload,
+            provider_type=self.settings.provider_type)
+
     def stream_chat_completion(
         self,
         payload: ChatCompletionRequestContract) -> Iterator[str]:

+ 83 - 1
services/model-gateway-service/app/infrastructure/provider.py

@@ -2,7 +2,13 @@ import json
 from collections.abc import Iterator
 
 import httpx
-from core_domain import ChatCompletionRequestContract, ChatCompletionResponseContract
+from core_domain import (
+    ChatCompletionRequestContract,
+    ChatCompletionResponseContract,
+    EmbeddingDataItem,
+    EmbeddingRequestContract,
+    EmbeddingResponseContract,
+)
 from core_shared import JSONValue
 
 from app.bootstrap.settings import ModelGatewayServiceSettings
@@ -69,6 +75,63 @@ class ModelProviderClient:
             provider_api_key=provider_api_key,
             timeout_seconds=timeout_seconds)
 
+    def create_embedding(
+        self,
+        payload: EmbeddingRequestContract,
+        *,
+        provider_type: str | None = None,
+        provider_base_url: str | None = None,
+        provider_api_key: str | None = None,
+        timeout_seconds: float = 60.0,
+    ) -> EmbeddingResponseContract:
+        resolved_provider_type = provider_type or self.settings.provider_type
+        return self._create_openai_compatible_embedding(
+            payload,
+            provider_base_url=provider_base_url,
+            provider_api_key=provider_api_key,
+            timeout_seconds=timeout_seconds)
+
+    def _create_openai_compatible_embedding(
+        self,
+        payload: EmbeddingRequestContract,
+        *,
+        provider_base_url: str | None,
+        provider_api_key: str | None,
+        timeout_seconds: float) -> EmbeddingResponseContract:
+        request_payload: dict[str, JSONValue] = {
+            "model": payload.model or "",
+            "input": payload.input,
+        }
+        if payload.dimensions is not None:
+            request_payload["dimensions"] = payload.dimensions
+
+        request_headers: dict[str, str] = {"content-type": "application/json"}
+        api_key = (
+            provider_api_key
+            if provider_api_key is not None
+            else self.settings.provider_api_key
+        )
+        if api_key:
+            request_headers["authorization"] = f"Bearer {api_key}"
+
+        try:
+            base_url = provider_base_url or self.settings.provider_base_url
+            with httpx.Client(timeout=timeout_seconds) as client:
+                response = client.post(
+                    _join_url(base_url, "embeddings"),
+                    json=request_payload,
+                    headers=request_headers)
+                response.raise_for_status()
+        except httpx.HTTPStatusError as exc:
+            detail = exc.response.text[:1000]
+            raise ModelProviderClientError(
+                f"embedding request failed: {exc.response.status_code} {detail}") from exc
+        except httpx.HTTPError as exc:
+            raise ModelProviderClientError(f"embedding request failed: {exc}") from exc
+
+        response_json = _coerce_json_dict(response.json())
+        return _parse_embedding_response(response_json)
+
     def list_models(
         self,
         *,
@@ -579,3 +642,22 @@ def _extract_usage_json(payload: dict[str, JSONValue]) -> dict[str, JSONValue]:
     if isinstance(usage, dict):
         return {str(key): value for key, value in usage.items()}
     return {}
+
+
+def _parse_embedding_response(payload: dict[str, JSONValue]) -> EmbeddingResponseContract:
+    model = _read_string(payload, "model")
+    usage = payload.get("usage")
+    usage_json = {str(k): v for k, v in usage.items()} if isinstance(usage, dict) else {}
+    data_items: list[EmbeddingDataItem] = []
+    data = payload.get("data")
+    if isinstance(data, list):
+        for idx, item in enumerate(data):
+            if not isinstance(item, dict):
+                continue
+            embedding_raw = item.get("embedding")
+            if not isinstance(embedding_raw, list):
+                continue
+            embedding = [float(v) for v in embedding_raw if isinstance(v, (int, float)) and not isinstance(v, bool)]
+            index = item.get("index")
+            data_items.append(EmbeddingDataItem(embedding=embedding, index=index if isinstance(index, int) else idx))
+    return EmbeddingResponseContract(model=model, data=data_items, usage_json=usage_json)

+ 313 - 87
services/team-service/app/application/services.py

@@ -139,9 +139,6 @@ class TeamApplicationService:
                 member_refs=self._normalize_member_refs(payload.memberRefs),
                 policy_json=payload.policy))
 
-    def list_team_configs(self, *, team_id: str) -> list[TeamConfig]:
-        return self.team_config_repository.list_by_team(team_id=team_id)
-
     def list_team_configs(self, *, team_id: str | None = None) -> list[TeamConfig]:
         if team_id is not None:
             return self.team_config_repository.list_by_team(team_id=team_id)
@@ -330,7 +327,8 @@ class TeamApplicationService:
                 self._member_result_to_json(item) for item in member_results
             ],
         }
-        if failed_results:
+        failure_mode = self._read_failure_mode(team_config)
+        if failed_results and failure_mode != "continue_with_warning":
             failed_run = self.team_run_repository.update_status(
                 team_run_id=team_run.id,
                 status="failed",
@@ -416,65 +414,16 @@ class TeamApplicationService:
             return
 
         stream_members = self._select_stream_members(team_config=team_config, members=members)
-        member_results: list[TeamMemberRunResult] = []
-        prior_outputs: list[dict[str, JSONValue]] = []
-        for member in stream_members:
-            member_input_json = self._build_member_input_json(
-                team_run=team_run,
-                team_config=team_config,
-                member=member,
-                prior_outputs=prior_outputs)
-            created_run = self.agent_client.create_agent_run(
-                agent_id=member.agent_id,
-                agent_config_id=member.agent_config_id,
-                session_id=team_run.session_id,
-                input_text=self._build_member_input_text(
-                    team_run=team_run,
-                    team_config=team_config,
-                    member=member),
-                input_json=member_input_json)
-            yield {
-                "event": "team.member.started",
-                "member": self._member_to_json(member),
-                "agent_run": created_run.model_dump(mode="json"),
-            }
+        mode = team_config.coordination_mode
 
-            final_agent_run = created_run
-            try:
-                for event_name, data in self.agent_client.execute_agent_run_stream(
-                    agent_run_id=created_run.id,
-                    worker_key=payload.worker_key,
-                    dry_run=payload.dry_run):
-                    if event_name == "agent.run.delta":
-                        delta = data.get("delta")
-                        if isinstance(delta, str):
-                            yield {
-                                "event": "team.member.delta",
-                                "member": self._member_to_json(member),
-                                "agent_run_id": created_run.id,
-                                "delta": delta,
-                            }
-                    elif event_name in {"agent.run.completed", "agent.run.failed"}:
-                        run_payload = data.get("run")
-                        if isinstance(run_payload, dict):
-                            final_agent_run = AgentRunContract.model_validate(run_payload)
-            except AgentServiceClientError as exc:
-                failed_agent_run = created_run.model_copy(
-                    update={
-                        "status": "failed",
-                        "error_code": "agent_service_error",
-                        "error_message": str(exc),
-                    })
-                final_agent_run = failed_agent_run
-
-            result = TeamMemberRunResult(member=member, run=final_agent_run)
-            member_results.append(result)
-            prior_outputs.append(self._compact_prior_output(result))
-            yield {
-                "event": "team.member.completed",
-                "member": self._member_to_json(member),
-                "agent_run": final_agent_run.model_dump(mode="json"),
-            }
+        if mode == "debate":
+            member_results = yield from self._stream_members_debate(
+                team_run=team_run, team_config=team_config, members=stream_members,
+                payload=payload)
+        else:
+            member_results = yield from self._stream_members_sequential(
+                team_run=team_run, team_config=team_config, members=stream_members,
+                payload=payload)
 
         failed_results = [item for item in member_results if item.run.status != "completed"]
         output_text = self._build_team_output_text(
@@ -491,7 +440,8 @@ class TeamApplicationService:
             "streamed": True,
             "response_mode": self._read_response_mode(team_config),
         }
-        if failed_results:
+        failure_mode = self._read_failure_mode(team_config)
+        if failed_results and failure_mode != "continue_with_warning":
             failed_run = self.team_run_repository.update_status(
                 team_run_id=team_run.id,
                 status="failed",
@@ -587,34 +537,187 @@ class TeamApplicationService:
             raise AgentServiceClientError("agent service client is not configured")
 
         ordered_members = self._order_members(members)
-        if self._should_execute_members_in_parallel(team_config):
-            return self._execute_members_in_parallel(
-                team_run=team_run,
-                team_config=team_config,
-                members=ordered_members,
-                worker_key=worker_key,
-                dry_run=dry_run)
+        handoff = team_config.policy_json.get("handoff")
+        mode = team_config.coordination_mode
+
+        if mode == "parallel" or handoff == "parallel_merge":
+            return self._execute_members_parallel(
+                team_run=team_run, team_config=team_config,
+                members=ordered_members, worker_key=worker_key, dry_run=dry_run)
+        if mode == "pipeline":
+            return self._execute_members_pipeline(
+                team_run=team_run, team_config=team_config,
+                members=ordered_members, worker_key=worker_key, dry_run=dry_run)
+        if mode == "debate":
+            return self._execute_members_debate(
+                team_run=team_run, team_config=team_config,
+                members=ordered_members, worker_key=worker_key, dry_run=dry_run)
+        return self._execute_members_supervisor(
+            team_run=team_run, team_config=team_config,
+            members=ordered_members, worker_key=worker_key, dry_run=dry_run)
+
+    def _execute_members_supervisor(
+        self,
+        *,
+        team_run: TeamRun,
+        team_config: TeamConfig,
+        members: list[TeamMemberContract],
+        worker_key: str | None,
+        dry_run: bool) -> list[TeamMemberRunResult]:
+        lead = next((m for m in members if m.role in {"supervisor", "planner"}), None)
+        others = [m for m in members if m is not lead] if lead else members
+        failure_mode = self._read_failure_mode(team_config)
+
+        if lead is None:
+            return self._execute_members_sequential(
+                team_run=team_run, team_config=team_config, members=members,
+                worker_key=worker_key, dry_run=dry_run, failure_mode=failure_mode)
+
+        # Phase 1: lead executes first
+        lead_input = self._build_member_input_json(
+            team_run=team_run, team_config=team_config, member=lead, prior_outputs=[])
+        lead_result = self._execute_single_member(
+            team_run=team_run, team_config=team_config, member=lead,
+            member_input_json=lead_input, worker_key=worker_key, dry_run=dry_run)
+
+        if lead_result.run.status != "completed" and failure_mode == "stop_on_critical":
+            return [lead_result]
+
+        # Phase 2: others execute with lead output as context
+        lead_output = self._compact_prior_output(lead_result)
+        other_results = self._execute_members_sequential(
+            team_run=team_run, team_config=team_config, members=others,
+            worker_key=worker_key, dry_run=dry_run, failure_mode=failure_mode,
+            initial_prior_outputs=[lead_output])
+
+        # Phase 3: optional synthesis pass
+        do_synthesis = team_config.policy_json.get("supervisor_synthesis", True)
+        if do_synthesis and lead_result.run.status == "completed":
+            all_outputs = [lead_output] + [self._compact_prior_output(r) for r in other_results]
+            synthesis_input = self._build_member_input_json(
+                team_run=team_run, team_config=team_config, member=lead,
+                prior_outputs=all_outputs)
+            synthesis_result = self._execute_single_member(
+                team_run=team_run, team_config=team_config, member=lead,
+                member_input_json=synthesis_input, worker_key=worker_key, dry_run=dry_run)
+            return [lead_result] + other_results + [synthesis_result]
+
+        return [lead_result] + other_results
+
+    def _execute_members_pipeline(
+        self,
+        *,
+        team_run: TeamRun,
+        team_config: TeamConfig,
+        members: list[TeamMemberContract],
+        worker_key: str | None,
+        dry_run: bool) -> list[TeamMemberRunResult]:
+        failure_mode = self._read_failure_mode(team_config)
+        member_results: list[TeamMemberRunResult] = []
+        prev_output: dict[str, JSONValue] | None = None
+
+        for member in members:
+            prior = [prev_output] if prev_output is not None else []
+            member_input_json = self._build_member_input_json(
+                team_run=team_run, team_config=team_config, member=member, prior_outputs=prior)
+            result = self._execute_single_member(
+                team_run=team_run, team_config=team_config, member=member,
+                member_input_json=member_input_json, worker_key=worker_key, dry_run=dry_run)
+            member_results.append(result)
+
+            if result.run.status == "completed":
+                prev_output = self._compact_prior_output(result)
+            elif failure_mode == "stop_on_critical":
+                break
+            elif failure_mode == "retry_once":
+                retry_result = self._execute_single_member(
+                    team_run=team_run, team_config=team_config, member=member,
+                    member_input_json=member_input_json, worker_key=worker_key, dry_run=dry_run)
+                member_results[-1] = retry_result
+                if retry_result.run.status == "completed":
+                    prev_output = self._compact_prior_output(retry_result)
+                elif failure_mode == "stop_on_critical":
+                    break
 
+        return member_results
+
+    def _execute_members_debate(
+        self,
+        *,
+        team_run: TeamRun,
+        team_config: TeamConfig,
+        members: list[TeamMemberContract],
+        worker_key: str | None,
+        dry_run: bool) -> list[TeamMemberRunResult]:
+        max_rounds = self._read_max_rounds(team_config)
+        failure_mode = self._read_failure_mode(team_config)
+        debate_history: list[dict[str, JSONValue]] = []
+        final_results: list[TeamMemberRunResult] = []
+
+        for round_num in range(1, max_rounds + 1):
+            round_results: list[TeamMemberRunResult] = []
+            for member in members:
+                member_input_json = self._build_member_input_json(
+                    team_run=team_run, team_config=team_config, member=member,
+                    prior_outputs=debate_history)
+                result = self._execute_single_member(
+                    team_run=team_run, team_config=team_config, member=member,
+                    member_input_json=member_input_json, worker_key=worker_key, dry_run=dry_run)
+                round_results.append(result)
+                debate_history.append(self._compact_prior_output(result))
+
+                if result.run.status != "completed" and failure_mode == "stop_on_critical":
+                    break
+                if result.run.status != "completed" and failure_mode == "retry_once":
+                    retry = self._execute_single_member(
+                        team_run=team_run, team_config=team_config, member=member,
+                        member_input_json=member_input_json, worker_key=worker_key, dry_run=dry_run)
+                    round_results[-1] = retry
+                    debate_history[-1] = self._compact_prior_output(retry)
+
+            final_results = round_results
+            if any(r.run.status != "completed" for r in round_results) and failure_mode == "stop_on_critical":
+                break
+
+        return final_results
+
+    def _execute_members_sequential(
+        self,
+        *,
+        team_run: TeamRun,
+        team_config: TeamConfig,
+        members: list[TeamMemberContract],
+        worker_key: str | None,
+        dry_run: bool,
+        failure_mode: str = "stop_on_critical",
+        initial_prior_outputs: list[dict[str, JSONValue]] | None = None) -> list[TeamMemberRunResult]:
         member_results: list[TeamMemberRunResult] = []
-        prior_outputs: list[dict[str, JSONValue]] = []
-        for member in ordered_members:
+        prior_outputs = list(initial_prior_outputs or [])
+
+        for member in members:
             member_input_json = self._build_member_input_json(
-                team_run=team_run,
-                team_config=team_config,
-                member=member,
-                prior_outputs=prior_outputs)
+                team_run=team_run, team_config=team_config, member=member, prior_outputs=prior_outputs)
             result = self._execute_single_member(
-                team_run=team_run,
-                team_config=team_config,
-                member=member,
-                member_input_json=member_input_json,
-                worker_key=worker_key,
-                dry_run=dry_run)
+                team_run=team_run, team_config=team_config, member=member,
+                member_input_json=member_input_json, worker_key=worker_key, dry_run=dry_run)
             member_results.append(result)
             prior_outputs.append(self._compact_prior_output(result))
+
+            if result.run.status != "completed":
+                if failure_mode == "stop_on_critical":
+                    break
+                if failure_mode == "retry_once":
+                    retry = self._execute_single_member(
+                        team_run=team_run, team_config=team_config, member=member,
+                        member_input_json=member_input_json, worker_key=worker_key, dry_run=dry_run)
+                    member_results[-1] = retry
+                    prior_outputs[-1] = self._compact_prior_output(retry)
+                    if retry.run.status != "completed" and failure_mode == "stop_on_critical":
+                        break
+
         return member_results
 
-    def _execute_members_in_parallel(
+    def _execute_members_parallel(
         self,
         *,
         team_run: TeamRun,
@@ -670,9 +773,120 @@ class TeamApplicationService:
             dry_run=dry_run)
         return TeamMemberRunResult(member=member, run=executed_run)
 
-    def _should_execute_members_in_parallel(self, team_config: TeamConfig) -> bool:
-        handoff = team_config.policy_json.get("handoff")
-        return team_config.coordination_mode == "parallel" or handoff == "parallel_merge"
+    def _stream_single_member(
+        self,
+        *,
+        team_run: TeamRun,
+        team_config: TeamConfig,
+        member: TeamMemberContract,
+        prior_outputs: list[dict[str, JSONValue]],
+        payload: TeamRunExecuteRequest) -> Iterator[tuple[dict[str, JSONValue], TeamMemberRunResult]]:
+        member_input_json = self._build_member_input_json(
+            team_run=team_run, team_config=team_config, member=member, prior_outputs=prior_outputs)
+        created_run = self.agent_client.create_agent_run(
+            agent_id=member.agent_id,
+            agent_config_id=member.agent_config_id,
+            session_id=team_run.session_id,
+            input_text=self._build_member_input_text(
+                team_run=team_run, team_config=team_config, member=member),
+            input_json=member_input_json)
+        yield {
+            "event": "team.member.started",
+            "member": self._member_to_json(member),
+            "agent_run": created_run.model_dump(mode="json"),
+        }, None
+
+        final_agent_run = created_run
+        try:
+            for event_name, data in self.agent_client.execute_agent_run_stream(
+                agent_run_id=created_run.id,
+                worker_key=payload.worker_key,
+                dry_run=payload.dry_run):
+                if event_name == "agent.run.delta":
+                    delta = data.get("delta")
+                    if isinstance(delta, str):
+                        yield {
+                            "event": "team.member.delta",
+                            "member": self._member_to_json(member),
+                            "agent_run_id": created_run.id,
+                            "delta": delta,
+                        }, None
+                elif event_name in {"agent.run.completed", "agent.run.failed"}:
+                    run_payload = data.get("run")
+                    if isinstance(run_payload, dict):
+                        final_agent_run = AgentRunContract.model_validate(run_payload)
+        except AgentServiceClientError as exc:
+            final_agent_run = created_run.model_copy(update={
+                "status": "failed",
+                "error_code": "agent_service_error",
+                "error_message": str(exc),
+            })
+
+        result = TeamMemberRunResult(member=member, run=final_agent_run)
+        yield {
+            "event": "team.member.completed",
+            "member": self._member_to_json(member),
+            "agent_run": final_agent_run.model_dump(mode="json"),
+        }, result
+
+    def _stream_members_sequential(
+        self,
+        *,
+        team_run: TeamRun,
+        team_config: TeamConfig,
+        members: list[TeamMemberContract],
+        payload: TeamRunExecuteRequest) -> Iterator[list[TeamMemberRunResult]]:
+        member_results: list[TeamMemberRunResult] = []
+        prior_outputs: list[dict[str, JSONValue]] = []
+
+        for member in members:
+            for event, result in self._stream_single_member(
+                team_run=team_run, team_config=team_config, member=member,
+                prior_outputs=prior_outputs, payload=payload):
+                if result is not None:
+                    member_results.append(result)
+                    prior_outputs.append(self._compact_prior_output(result))
+                else:
+                    yield event
+
+        return member_results
+
+    def _stream_members_debate(
+        self,
+        *,
+        team_run: TeamRun,
+        team_config: TeamConfig,
+        members: list[TeamMemberContract],
+        payload: TeamRunExecuteRequest) -> Iterator[list[TeamMemberRunResult]]:
+        max_rounds = self._read_max_rounds(team_config)
+        debate_history: list[dict[str, JSONValue]] = []
+        final_results: list[TeamMemberRunResult] = []
+
+        for round_num in range(1, max_rounds + 1):
+            yield {
+                "event": "team.debate.round_started",
+                "round": round_num,
+                "max_rounds": max_rounds,
+            }
+            round_results: list[TeamMemberRunResult] = []
+            for member in members:
+                for event, result in self._stream_single_member(
+                    team_run=team_run, team_config=team_config, member=member,
+                    prior_outputs=debate_history, payload=payload):
+                    if result is not None:
+                        round_results.append(result)
+                        debate_history.append(self._compact_prior_output(result))
+                    else:
+                        yield event
+            final_results = round_results
+            yield {
+                "event": "team.debate.round_completed",
+                "round": round_num,
+                "max_rounds": max_rounds,
+                "member_count": len(round_results),
+            }
+
+        return final_results
 
     def _read_team_members(self, team_config: TeamConfig) -> list[TeamMemberContract]:
         members: list[TeamMemberContract] = []
@@ -712,6 +926,18 @@ class TeamApplicationService:
             return value
         return "single_responder"
 
+    def _read_max_rounds(self, team_config: TeamConfig) -> int:
+        value = team_config.policy_json.get("max_rounds")
+        if isinstance(value, (int, float)):
+            return max(1, min(int(value), 20))
+        return 3
+
+    def _read_failure_mode(self, team_config: TeamConfig) -> str:
+        value = team_config.policy_json.get("failure_mode")
+        if isinstance(value, str) and value in {"stop_on_critical", "continue_with_warning", "retry_once"}:
+            return value
+        return "stop_on_critical"
+
     def _build_member_input_text(
         self,
         *,
@@ -894,7 +1120,7 @@ class TeamApplicationService:
         members: list[TeamMemberContract] = []
         for index, item in enumerate(member_refs, start=1):
             role = item.get("role")
-            normalized_role = "executor" if role == "worker" else role
+            normalized_role = "specialist" if role == "worker" else role
             member = {
                 **item,
                 "member_key": item.get("member_key") or item.get("memberKey") or f"member_{index}",

+ 6 - 1
services/team-service/app/infrastructure/agent_client.py

@@ -118,7 +118,12 @@ class AgentServiceClient:
             payload["worker_key"] = worker_key
 
         try:
-            with httpx.Client(timeout=self.timeout_seconds) as client:
+            timeout = httpx.Timeout(
+                connect=self.timeout_seconds,
+                read=self.timeout_seconds,
+                write=self.timeout_seconds,
+                pool=self.timeout_seconds)
+            with httpx.Client(timeout=timeout) as client:
                 with client.stream(
                     "POST",
                     f"{self.base_url}/agents/runs/{agent_run_id}/execute-stream",

+ 205 - 1
tests/test_team_service.py

@@ -67,7 +67,7 @@ def test_team_service_post_contract_supports_team_configs_and_runs(
     assert config_response.status_code == 200
     config_payload = config_response.json()["data"]
     assert config_payload["teamId"] == team_payload["id"]
-    assert config_payload["memberRefs"][0]["role"] == "executor"
+    assert config_payload["memberRefs"][0]["role"] == "specialist"
     assert config_payload["memberRefs"][0]["member_key"] == "member_1"
 
     list_response = client.post(
@@ -192,3 +192,207 @@ def test_team_service_compacts_member_context_between_agent_calls() -> None:
     }
     assert "messages" not in member_json["output_json"]
     assert "raw_response_json" not in member_json["output_json"]
+
+
+def _build_service_with_mock_agent() -> tuple:
+    prepare_known_service_import("team-service")
+    from unittest.mock import MagicMock
+    from app.application.services import TeamApplicationService, TeamMemberRunResult
+    from core_domain import AgentRunContract, TeamMemberContract
+
+    call_log: list[str] = []
+
+    def make_member_result(member: TeamMemberContract, text: str) -> TeamMemberRunResult:
+        return TeamMemberRunResult(
+            member=member,
+            run=AgentRunContract(
+                id=f"run_{member.member_key}",
+                agent_id=member.agent_id,
+                agent_config_id=member.agent_config_id,
+                output_text=text,
+                output_json={},
+                status="completed",
+                created_time=datetime.utcnow()))
+
+    mock_client = MagicMock()
+    mock_client.create_agent_run = MagicMock(
+        side_effect=lambda **kw: AgentRunContract(
+            id="run_mock", agent_id=kw.get("agent_id", "a"),
+            status="created", created_time=datetime.utcnow()))
+    mock_client.execute_agent_run = MagicMock(
+        side_effect=lambda **kw: AgentRunContract(
+            id=kw.get("agent_run_id", "run_mock"),
+            agent_id="a", output_text="ok",
+            output_json={}, status="completed",
+            created_time=datetime.utcnow()))
+
+    def track_execute(team_run, team_config, member, member_input_json, worker_key, dry_run):
+        prior = member_input_json.get("prior_member_outputs", [])
+        call_log.append(f"{member.member_key}:{member.role}:prior={len(prior)}")
+        return make_member_result(member, f"output_{member.member_key}")
+
+    service = TeamApplicationService(
+        team_repository=None,
+        team_config_repository=None,
+        team_run_repository=None,
+        agent_client=mock_client)
+    return service, call_log, track_execute
+
+
+def test_supervisor_mode_executes_lead_first_then_others() -> None:
+    service, call_log, track_execute = _build_service_with_mock_agent()
+    from unittest.mock import patch
+    from core_domain import TeamMemberContract
+
+    members = [
+        TeamMemberContract(member_key="worker_1", agent_id="a1", role="executor"),
+        TeamMemberContract(member_key="lead_1", agent_id="a2", role="supervisor"),
+        TeamMemberContract(member_key="worker_2", agent_id="a3", role="reviewer"),
+    ]
+
+    team_config = type("C", (), {
+        "coordination_mode": "supervisor",
+        "objective": "test",
+        "policy_json": {"supervisor_synthesis": True},
+    })()
+
+    with patch.object(service, "_execute_single_member", side_effect=track_execute):
+        results = service._execute_members(
+            team_run=MagicMock(), team_config=team_config,
+            members=members, worker_key=None, dry_run=False)
+
+    # lead runs first, then workers, then synthesis = 4 executions
+    assert len(results) == 4
+    keys = [r.member.member_key for r in results]
+    assert keys[0] == "lead_1"  # supervisor first
+    assert "worker_1" in keys[1:3]
+    assert "worker_2" in keys[1:3]
+    assert keys[3] == "lead_1"  # synthesis pass
+
+
+def test_pipeline_mode_chains_single_prior_output() -> None:
+    service, call_log, track_execute = _build_service_with_mock_agent()
+    from unittest.mock import patch
+    from core_domain import TeamMemberContract
+
+    members = [
+        TeamMemberContract(member_key="m1", agent_id="a1", role="planner"),
+        TeamMemberContract(member_key="m2", agent_id="a2", role="executor"),
+        TeamMemberContract(member_key="m3", agent_id="a3", role="reviewer"),
+    ]
+
+    team_config = type("C", (), {
+        "coordination_mode": "pipeline",
+        "objective": "test",
+        "policy_json": {},
+    })()
+
+    with patch.object(service, "_execute_single_member", side_effect=track_execute):
+        results = service._execute_members(
+            team_run=MagicMock(), team_config=team_config,
+            members=members, worker_key=None, dry_run=False)
+
+    assert len(results) == 3
+    # m1: no prior, m2: 1 prior, m3: 1 prior (only previous, not all)
+    assert call_log[0] == "m1:planner:prior=0"
+    assert call_log[1] == "m2:executor:prior=1"
+    assert call_log[2] == "m3:reviewer:prior=1"
+
+
+def test_debate_mode_executes_multiple_rounds() -> None:
+    service, call_log, track_execute = _build_service_with_mock_agent()
+    from unittest.mock import patch
+    from core_domain import TeamMemberContract
+
+    members = [
+        TeamMemberContract(member_key="m1", agent_id="a1", role="executor"),
+        TeamMemberContract(member_key="m2", agent_id="a2", role="reviewer"),
+    ]
+
+    team_config = type("C", (), {
+        "coordination_mode": "debate",
+        "objective": "test",
+        "policy_json": {"max_rounds": 3},
+    })()
+
+    with patch.object(service, "_execute_single_member", side_effect=track_execute):
+        results = service._execute_members(
+            team_run=MagicMock(), team_config=team_config,
+            members=members, worker_key=None, dry_run=False)
+
+    # 2 members x 3 rounds = 6 executions, final_results = last round
+    assert len(results) == 2
+    assert len(call_log) == 6
+    # Round 1: prior=0 for first, prior=1 for second
+    assert call_log[0] == "m1:executor:prior=0"
+    assert call_log[1] == "m2:reviewer:prior=1"
+    # Round 2: prior=2 (history from round 1)
+    assert call_log[2] == "m1:executor:prior=2"
+    assert call_log[3] == "m2:reviewer:prior=3"
+    # Round 3: prior=4
+    assert call_log[4] == "m1:executor:prior=4"
+
+
+def test_failure_mode_continue_allows_partial_failure() -> None:
+    service, call_log, track_execute = _build_service_with_mock_agent()
+    from unittest.mock import patch, MagicMock
+    from core_domain import TeamMemberContract, AgentRunContract
+
+    members = [
+        TeamMemberContract(member_key="m1", agent_id="a1", role="executor"),
+        TeamMemberContract(member_key="m2", agent_id="a2", role="executor"),
+    ]
+
+    team_config = type("C", (), {
+        "coordination_mode": "supervisor",
+        "objective": "test",
+        "policy_json": {"failure_mode": "continue_with_warning"},
+    })()
+
+    call_count = 0
+
+    def track_with_failure(team_run, team_config, member, member_input_json, worker_key, dry_run):
+        nonlocal call_count
+        call_count += 1
+        if member.member_key == "m1":
+            return TeamMemberRunResult(
+                member=member,
+                run=AgentRunContract(
+                    id="run_fail", agent_id="a1",
+                    status="failed", error_code="test_error",
+                    error_message="boom",
+                    created_time=datetime.utcnow()))
+        return make_member_result(member, f"output_{member.member_key}")
+
+    def make_member_result(member, text):
+        return TeamMemberRunResult(
+            member=member,
+            run=AgentRunContract(
+                id=f"run_{member.member_key}", agent_id=member.agent_id,
+                output_text=text, output_json={},
+                status="completed", created_time=datetime.utcnow()))
+
+    with patch.object(service, "_execute_single_member", side_effect=track_with_failure):
+        results = service._execute_members(
+            team_run=MagicMock(), team_config=team_config,
+            members=members, worker_key=None, dry_run=False)
+
+    # Both members executed despite first failing
+    assert call_count == 2
+    assert len(results) == 2
+
+
+def test_read_max_rounds_and_failure_mode_helpers() -> None:
+    service, _, _ = _build_service_with_mock_agent()
+
+    config_default = type("C", (), {"policy_json": {}})()
+    assert service._read_max_rounds(config_default) == 3
+    assert service._read_failure_mode(config_default) == "stop_on_critical"
+
+    config_custom = type("C", (), {"policy_json": {
+        "max_rounds": 5, "failure_mode": "continue_with_warning"}})()
+    assert service._read_max_rounds(config_custom) == 5
+    assert service._read_failure_mode(config_custom) == "continue_with_warning"
+
+    config_clamped = type("C", (), {"policy_json": {"max_rounds": 50}})()
+    assert service._read_max_rounds(config_clamped) == 20

+ 3 - 0
web/src/App.tsx

@@ -19,6 +19,7 @@ const ModelsPage = lazy(() => import("@/pages/models/ModelsPage").then((module)
 const KnowledgePage = lazy(() => import("@/pages/knowledge/KnowledgePage").then((module) => ({ default: module.KnowledgePage })));
 const TeamsPage = lazy(() => import("@/pages/teams/TeamsPage").then((module) => ({ default: module.TeamsPage })));
 const SkillsPage = lazy(() => import("@/pages/skills/SkillsPage").then((module) => ({ default: module.SkillsPage })));
+const AppsPage = lazy(() => import("@/pages/apps/AppsPage").then((module) => ({ default: module.AppsPage })));
 const SettingsPage = lazy(() => import("@/pages/settings/SettingsPage").then((module) => ({ default: module.SettingsPage })));
 
 export default function App() {
@@ -51,6 +52,7 @@ export default function App() {
               <Route path="/knowledge/:section" element={<KnowledgePage />} />
               <Route path="/teams" element={<TeamsPage />} />
               <Route path="/skills" element={<SkillsPage />} />
+              <Route path="/apps" element={<AppsPage />} />
               <Route path="/settings" element={<SettingsPage />} />
             </Route>
             <Route path="*" element={<Navigate to={defaultRoute || "/dashboard"} replace />} />
@@ -83,6 +85,7 @@ function RoutePreloader() {
         import("@/pages/knowledge/KnowledgePage"),
         import("@/pages/teams/TeamsPage"),
         import("@/pages/skills/SkillsPage"),
+        import("@/pages/apps/AppsPage"),
         import("@/pages/settings/SettingsPage"),
       ]);
     };

+ 67 - 0
web/src/api/apps.ts

@@ -0,0 +1,67 @@
+import { apiClient } from "./client";
+import type {
+  AppApiKeyCreateResponse,
+  AppApiKeyResponse,
+  AppCreateRequest,
+  AppDefinition,
+  AppInvocationAuditResponse,
+  AppStatus,
+  AppUpdateRequest,
+} from "@/types";
+
+export async function listApps() {
+  const { data } = await apiClient.post<AppDefinition[]>("/apps/list", {});
+  return data;
+}
+
+export async function getApp(appId: string) {
+  const { data } = await apiClient.post<AppDefinition>("/apps/detail", { app_id: appId });
+  return data;
+}
+
+export async function createApp(payload: AppCreateRequest) {
+  const { data } = await apiClient.post<AppDefinition>("/apps", payload);
+  return data;
+}
+
+export async function updateApp(appId: string, payload: Omit<AppUpdateRequest, "app_id">) {
+  const { data } = await apiClient.post<AppDefinition>("/apps/update", {
+    app_id: appId,
+    ...payload,
+  });
+  return data;
+}
+
+export async function updateAppStatus(appId: string, status: AppStatus) {
+  const { data } = await apiClient.post<AppDefinition>("/apps/status", {
+    app_id: appId,
+    status,
+  });
+  return data;
+}
+
+export async function listAppApiKeys(appId: string) {
+  const { data } = await apiClient.post<AppApiKeyResponse[]>(`/apps/${appId}/api-keys/list`, {});
+  return data;
+}
+
+export async function createAppApiKey(
+  appId: string,
+  payload: { name: string; scopes?: string | null; expires_time?: string | null },
+) {
+  const { data } = await apiClient.post<AppApiKeyCreateResponse>(`/apps/${appId}/api-keys`, payload);
+  return data;
+}
+
+export async function updateAppApiKeyStatus(appId: string, apiKeyId: string, status: string) {
+  const { data } = await apiClient.post<AppApiKeyResponse>(`/apps/${appId}/api-keys/status`, {
+    api_key_id: apiKeyId,
+    status,
+  });
+  return data;
+}
+
+export async function listAppAudits(appId: string, limit = 100) {
+  const { data } = await apiClient.post<AppInvocationAuditResponse[]>(`/apps/${appId}/audits`, { limit });
+  return data;
+}

+ 1 - 0
web/src/api/index.ts

@@ -3,6 +3,7 @@ export * from "./health";
 export * from "./auth";
 export * from "./api-keys";
 export * from "./agents";
+export * from "./apps";
 export * from "./sessions";
 export * from "./tools";
 export * from "./skills";

+ 2 - 2
web/src/api/mock.ts

@@ -6,7 +6,7 @@ import type {
   ApiResponse,
   ApiKeyCreateResponse,
   ApiKeyResponse,
-  AppResponse,
+  SessionAppResponse,
   AuthMeData,
   DiscoverModelsResponse,
   DownstreamServiceHealth,
@@ -100,7 +100,7 @@ function toLegacyModelItems(models: Record<string, unknown>[]) {
   }));
 }
 
-const apps: AppResponse[] = [
+const apps: SessionAppResponse[] = [
   {
     id: "app_customer_ops",
     name: "Customer Ops",

+ 9 - 2
web/src/api/teams.ts

@@ -264,6 +264,8 @@ export type TeamRunStreamEvent =
   | { type: "team.member.started"; member?: JSONObject; agent_run?: JSONObject }
   | { type: "team.member.delta"; member?: JSONObject; agent_run_id?: string; delta: string }
   | { type: "team.member.completed"; member?: JSONObject; agent_run?: JSONObject }
+  | { type: "team.debate.round_started"; round: number; max_rounds: number }
+  | { type: "team.debate.round_completed"; round: number; max_rounds: number; member_count: number }
   | { type: "team.run.completed"; run?: TeamRun }
   | { type: "team.run.failed"; run?: TeamRun }
   | { type: string; [key: string]: unknown };
@@ -330,7 +332,12 @@ function parseSseEvent(raw: string): TeamRunStreamEvent | undefined {
     if (line.startsWith("data:")) dataLines.push(line.slice("data:".length).trim());
   });
   if (!dataLines.length) return undefined;
-  const payload = JSON.parse(dataLines.join("\n")) as Record<string, unknown>;
+  let payload: Record<string, unknown>;
+  try {
+    payload = JSON.parse(dataLines.join("\n")) as Record<string, unknown>;
+  } catch {
+    return undefined;
+  }
   return { type, ...normalizeStreamPayload(payload) } as TeamRunStreamEvent;
 }
 
@@ -348,7 +355,7 @@ function isRecord(value: unknown): value is Record<string, unknown> {
 
 function normalizeMemberRef(member: JSONObject): JSONObject {
   const rawRole = member.role;
-  const role = rawRole === "worker" ? "executor" : rawRole;
+  const role = rawRole === "worker" ? "specialist" : rawRole;
   if (typeof role !== "string") return member;
   return {
     ...member,

+ 2 - 2
web/src/hooks/useApps.ts

@@ -1,9 +1,9 @@
 import { useApi } from "./useApi";
 import { apiClient } from "@/api/client";
-import type { AppResponse } from "@/types";
+import type { SessionAppResponse } from "@/types";
 
 async function listApps() {
-  const { data } = await apiClient.get<AppResponse[]>("/apps");
+  const { data } = await apiClient.get<SessionAppResponse[]>("/apps");
   return data;
 }
 

+ 4 - 0
web/src/lib/constants.ts

@@ -5,6 +5,7 @@ import {
   BrainCircuit,
   LayoutDashboard,
   MessageSquare,
+  Package,
   Puzzle,
   Users,
   Wrench,
@@ -22,6 +23,7 @@ export const ROUTE_PATHS = {
   teams: "/teams",
   skills: "/skills",
   models: "/models",
+  apps: "/apps",
   settings: "/settings",
 } as const;
 
@@ -34,6 +36,7 @@ export const NAV_ITEMS: Array<{ labelKey: string; path: string; icon: LucideIcon
   { labelKey: "nav.knowledge", path: ROUTE_PATHS.knowledge, icon: BookOpen },
   { labelKey: "nav.teams", path: ROUTE_PATHS.teams, icon: Users },
   { labelKey: "nav.skills", path: ROUTE_PATHS.skills, icon: Puzzle },
+  { labelKey: "nav.apps", path: ROUTE_PATHS.apps, icon: Package },
   { labelKey: "nav.models", path: ROUTE_PATHS.models, icon: BrainCircuit },
 ];
 
@@ -53,6 +56,7 @@ export const STATUS_COLOR_MAP: Record<string, string> = {
   completed: "border-emerald-500/35 bg-emerald-500/10 text-emerald-800 dark:text-emerald-300",
   active: "border-emerald-500/35 bg-emerald-500/10 text-emerald-800 dark:text-emerald-300",
   ok: "border-emerald-500/35 bg-emerald-500/10 text-emerald-800 dark:text-emerald-300",
+  published: "border-emerald-500/35 bg-emerald-500/10 text-emerald-800 dark:text-emerald-300",
   failed: "border-red-500/35 bg-red-500/10 text-red-800 dark:text-red-300",
   disabled: "border-zinc-500/35 bg-zinc-500/10 text-zinc-700 dark:text-zinc-300",
   draft: "border-zinc-500/35 bg-zinc-500/10 text-zinc-700 dark:text-zinc-300",

+ 58 - 0
web/src/locales/en.json

@@ -106,6 +106,7 @@
     "teams": "Teams",
     "skills": "Skills",
     "models": "Model Providers",
+    "apps": "Apps",
     "settings": "Settings",
     "collapse": "Collapse",
     "skipToContent": "Skip to main content",
@@ -1224,6 +1225,63 @@
     "checkGatewayConnection": "Check the gateway connection and credentials.",
     "somethingBroke": "Something broke"
   },
+  "apps": {
+    "title": "Apps",
+    "description": "Configure, publish, and manage application open capabilities.",
+    "newApp": "New App",
+    "appDirectory": "App Directory",
+    "searchPlaceholder": "Search apps...",
+    "filterByStatus": "Filter by status",
+    "allStatuses": "All statuses",
+    "published": "Published",
+    "disabled": "Disabled",
+    "noMatchingApps": "No matching apps",
+    "adjustFilters": "Adjust search or filters to find a matching app.",
+    "noApps": "No apps",
+    "createAppStart": "Create an app to expose agent capabilities as APIs.",
+    "selectApp": "Select an app to view details.",
+    "noDescription": "No description",
+    "createApp": "Create App",
+    "createAppDescription": "Bind an agent or team to an application, generate API keys, and expose it externally.",
+    "namePlaceholder": "Customer Support",
+    "codePlaceholder": "customer_support",
+    "code": "Code",
+    "targetType": "Target Type",
+    "targetId": "Target ID",
+    "selectTarget": "Select a target...",
+    "appCreated": "App Created",
+    "copyKeyNow": "Copy this key now. The full secret is only shown once.",
+    "keyCopied": "API key copied",
+    "config": "Configuration",
+    "apiKeys": "API Keys",
+    "audits": "Audit Logs",
+    "apiUsage": "API Usage",
+    "publish": "Publish",
+    "disable": "Disable",
+    "republish": "Republish",
+    "publishedSuccess": "App published",
+    "disabledSuccess": "App disabled",
+    "updateSuccess": "App updated",
+    "basicConfig": "Basic Configuration",
+    "syncEndpoint": "Sync Endpoint",
+    "streamEndpoint": "Stream Endpoint",
+    "createKey": "Create Key",
+    "keyNamePlaceholder": "Production key",
+    "scopes": "Scopes",
+    "keyDisabled": "API key disabled",
+    "disableKey": "Disable",
+    "noKeys": "No API keys",
+    "createKeyStart": "Create an API key to allow external access.",
+    "apiKeyCreated": "API Key Created",
+    "noAudits": "No audit records",
+    "noAuditsDesc": "Audit records will appear here after the first API call.",
+    "records": "records",
+    "invokeType": "Type",
+    "duration": "Duration",
+    "keyPrefix": "Key",
+    "error": "Error",
+    "lastUsed": "Last used"
+  },
   "models": {
     "title": "Models",
     "description": "Manage configured model endpoints.",

+ 58 - 0
web/src/locales/zh.json

@@ -106,6 +106,7 @@
     "teams": "团队",
     "skills": "技能",
     "models": "模型",
+    "apps": "应用",
     "settings": "设置",
     "collapse": "收起",
     "skipToContent": "跳到主内容",
@@ -1224,6 +1225,63 @@
     "checkGatewayConnection": "请检查网关连接和凭据配置。",
     "somethingBroke": "页面发生错误"
   },
+  "apps": {
+    "title": "应用",
+    "description": "配置、发布和管理应用开放能力。",
+    "newApp": "新建应用",
+    "appDirectory": "应用目录",
+    "searchPlaceholder": "搜索应用...",
+    "filterByStatus": "按状态筛选",
+    "allStatuses": "全部状态",
+    "published": "已发布",
+    "disabled": "已停用",
+    "noMatchingApps": "没有匹配的应用",
+    "adjustFilters": "调整搜索或筛选条件。",
+    "noApps": "暂无应用",
+    "createAppStart": "创建应用以将智能体能力开放为 API。",
+    "selectApp": "选择一个应用查看详情。",
+    "noDescription": "暂无描述",
+    "createApp": "创建应用",
+    "createAppDescription": "绑定 Agent 或 Team 到应用,生成 API Key 并对外开放调用。",
+    "namePlaceholder": "客户支持",
+    "codePlaceholder": "customer_support",
+    "code": "编码",
+    "targetType": "目标类型",
+    "targetId": "目标 ID",
+    "selectTarget": "选择目标...",
+    "appCreated": "应用已创建",
+    "copyKeyNow": "请立即复制此密钥,明文仅显示一次。",
+    "keyCopied": "API Key 已复制",
+    "config": "配置",
+    "apiKeys": "API Key",
+    "audits": "调用审计",
+    "apiUsage": "接口调用",
+    "publish": "发布",
+    "disable": "停用",
+    "republish": "重新发布",
+    "publishedSuccess": "应用已发布",
+    "disabledSuccess": "应用已停用",
+    "updateSuccess": "应用已更新",
+    "basicConfig": "基础配置",
+    "syncEndpoint": "同步接口",
+    "streamEndpoint": "流式接口",
+    "createKey": "创建 Key",
+    "keyNamePlaceholder": "生产环境 Key",
+    "scopes": "权限范围",
+    "keyDisabled": "API Key 已禁用",
+    "disableKey": "禁用",
+    "noKeys": "暂无 API Key",
+    "createKeyStart": "创建 API Key 以允许外部调用。",
+    "apiKeyCreated": "API Key 已创建",
+    "noAudits": "暂无审计记录",
+    "noAuditsDesc": "首次 API 调用后审计记录将出现在此处。",
+    "records": "条记录",
+    "invokeType": "类型",
+    "duration": "耗时",
+    "keyPrefix": "Key",
+    "error": "错误",
+    "lastUsed": "最近使用"
+  },
   "models": {
     "title": "模型",
     "description": "管理已配置的模型接入。",

+ 194 - 0
web/src/pages/apps/AppsPage.tsx

@@ -0,0 +1,194 @@
+import * as React from "react";
+import { useTranslation } from "react-i18next";
+import {
+  Clock,
+  Package,
+  RefreshCw,
+  Search,
+  SlidersHorizontal,
+} from "lucide-react";
+import { listApps } from "@/api/apps";
+import { ApiErrorState } from "@/components/shared/ApiErrorState";
+import { EmptyState } from "@/components/shared/EmptyState";
+import { LoadingSpinner } from "@/components/shared/LoadingSpinner";
+import { PageHeader } from "@/components/shared/PageHeader";
+import { SearchInput } from "@/components/shared/SearchInput";
+import { StatusBadge } from "@/components/shared/StatusBadge";
+import { Badge } from "@/components/ui/badge";
+import { Button } from "@/components/ui/button";
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
+import { Select } from "@/components/ui/select";
+import { cn, relativeTime } from "@/lib/utils";
+import type { AppDefinition, AppStatus } from "@/types";
+import { AppDetail } from "./components/AppDetail";
+import { CreateAppDialog } from "./components/CreateAppDialog";
+
+type StatusFilter = "all" | AppStatus;
+
+export function AppsPage() {
+  const { t } = useTranslation();
+  const [apps, setApps] = React.useState<AppDefinition[]>([]);
+  const [selectedAppId, setSelectedAppId] = React.useState<string>();
+  const [search, setSearch] = React.useState("");
+  const [statusFilter, setStatusFilter] = React.useState<StatusFilter>("all");
+  const [loading, setLoading] = React.useState(true);
+  const [error, setError] = React.useState<string>();
+  const [createOpen, setCreateOpen] = React.useState(false);
+
+  const selectedApp = apps.find((app) => app.id === selectedAppId);
+
+  const filtered = apps.filter((app) => {
+    const text = `${app.name} ${app.code} ${app.description ?? ""}`.toLowerCase();
+    const matchesSearch = text.includes(search.toLowerCase());
+    const matchesStatus = statusFilter === "all" || app.status === statusFilter;
+    return matchesSearch && matchesStatus;
+  });
+
+  const hasFilters = search.length > 0 || statusFilter !== "all";
+
+  const load = React.useCallback(async () => {
+    setLoading(true);
+    setError(undefined);
+    try {
+      const data = await listApps();
+      setApps(data);
+      setSelectedAppId((current) => current ?? data[0]?.id);
+    } catch (err) {
+      setError(err instanceof Error ? err.message : t("errors.failedToLoad"));
+    } finally {
+      setLoading(false);
+    }
+  }, [t]);
+
+  React.useEffect(() => { void load(); }, [load]);
+  React.useEffect(() => {
+    if (!selectedAppId && apps[0]) setSelectedAppId(apps[0].id);
+  }, [selectedAppId, apps]);
+
+  if (loading) return <LoadingSpinner label={t("common.loading")} />;
+  if (error) return <ApiErrorState message={error} onRetry={() => void load()} />;
+
+  return (
+    <div className="flex min-h-0 flex-col gap-6">
+      <PageHeader
+        title={t("apps.title")}
+        description={t("apps.description")}
+        actions={
+          <>
+            <Button variant="outline" onClick={() => void load()}>
+              <RefreshCw className="h-4 w-4" /> {t("common.refresh")}
+            </Button>
+            <Button onClick={() => setCreateOpen(true)}>
+              <Package className="h-4 w-4" /> {t("apps.newApp")}
+            </Button>
+          </>
+        }
+      />
+
+      <div className="grid h-[calc(100dvh-180px)] min-h-[620px] gap-5 xl:grid-cols-[320px_minmax(0,1fr)]">
+        <Card className="min-h-0 overflow-hidden">
+          <CardHeader className="p-4">
+            <div className="flex items-start justify-between gap-3">
+              <div>
+                <CardTitle>{t("apps.appDirectory")}</CardTitle>
+                <CardDescription>
+                  {filtered.length} / {apps.length}
+                </CardDescription>
+              </div>
+              <SlidersHorizontal className="mt-1 h-4 w-4 text-muted-foreground" />
+            </div>
+          </CardHeader>
+          <CardContent className="space-y-3 p-4 pt-0">
+            <SearchInput value={search} onChange={setSearch} placeholder={t("apps.searchPlaceholder")} />
+            <div className="grid gap-3">
+              <Select
+                aria-label={t("apps.filterByStatus")}
+                value={statusFilter}
+                onChange={(event) => setStatusFilter(event.target.value as StatusFilter)}
+                options={[
+                  { value: "all", label: t("apps.allStatuses") },
+                  { value: "draft", label: t("common.draft") },
+                  { value: "published", label: t("apps.published") },
+                  { value: "disabled", label: t("apps.disabled") },
+                ]}
+              />
+            </div>
+            {hasFilters ? (
+              <Button type="button" variant="ghost" size="sm" onClick={() => { setSearch(""); setStatusFilter("all"); }}>
+                {t("common.clearFilters")}
+              </Button>
+            ) : null}
+
+            {filtered.length ? (
+              <div className="max-h-[calc(100vh-390px)] space-y-2 overflow-auto pr-1">
+                {filtered.map((app) => (
+                  <button
+                    key={app.id}
+                    type="button"
+                    onClick={() => setSelectedAppId(app.id)}
+                    className={cn(
+                      "w-full rounded-md border border-border bg-muted/30 p-4 text-left transition hover:bg-muted/55 focus:outline-none focus-visible:ring-2 focus-visible:ring-primary",
+                      app.id === selectedAppId && "border-primary/45 bg-primary/10 shadow-glow",
+                    )}
+                  >
+                    <div className="flex items-start justify-between gap-3">
+                      <div className="min-w-0">
+                        <div className="flex items-center gap-2">
+                          <Package className="h-4 w-4 text-primary" />
+                          <p className="truncate text-sm font-semibold">{app.name}</p>
+                        </div>
+                        <p className="mt-1 truncate text-xs text-muted-foreground">{app.code}</p>
+                      </div>
+                      <StatusBadge status={app.status} />
+                    </div>
+                    <p className="mt-2 line-clamp-2 text-sm leading-6 text-muted-foreground">
+                      {app.description ?? t("apps.noDescription")}
+                    </p>
+                    <div className="mt-3 flex flex-wrap items-center gap-2">
+                      <Badge className="border-border bg-surface-elevated text-muted-foreground">{app.target_type}</Badge>
+                      <span className="inline-flex items-center gap-1 text-xs text-muted-foreground">
+                        <Clock className="h-3.5 w-3.5" /> {relativeTime(app.created_time)}
+                      </span>
+                    </div>
+                  </button>
+                ))}
+              </div>
+            ) : (
+              <EmptyState icon={Search} title={t("apps.noMatchingApps")} description={t("apps.adjustFilters")} />
+            )}
+          </CardContent>
+        </Card>
+
+        <div className="min-h-0 min-w-0">
+          <Card className="flex h-full min-h-0 min-w-0 flex-col overflow-hidden">
+            {selectedApp ? (
+              <AppDetail app={selectedApp} onUpdated={(updated) => {
+                setApps((current) => current.map((a) => (a.id === updated.id ? updated : a)));
+              }} />
+            ) : (
+              <>
+                <CardHeader className="border-b border-border bg-muted/15 p-5">
+                  <CardTitle>{t("apps.title")}</CardTitle>
+                  <CardDescription>{t("apps.selectApp")}</CardDescription>
+                </CardHeader>
+                <CardContent className="p-6">
+                  <EmptyState icon={Package} title={t("apps.noApps")} description={t("apps.createAppStart")} actionLabel={t("apps.newApp")} onAction={() => setCreateOpen(true)} />
+                </CardContent>
+              </>
+            )}
+          </Card>
+        </div>
+      </div>
+
+      <CreateAppDialog
+        open={createOpen}
+        onOpenChange={setCreateOpen}
+        onCreated={(app) => {
+          setSelectedAppId(app.id);
+          setSearch("");
+          void load();
+        }}
+      />
+    </div>
+  );
+}

+ 173 - 0
web/src/pages/apps/components/AppApiKeysPanel.tsx

@@ -0,0 +1,173 @@
+import * as React from "react";
+import { useTranslation } from "react-i18next";
+import { Key } from "lucide-react";
+import { createAppApiKey, listAppApiKeys, updateAppApiKeyStatus } from "@/api/apps";
+import { Button } from "@/components/ui/button";
+import { Dialog } from "@/components/ui/dialog";
+import { Input } from "@/components/ui/input";
+import { EmptyState } from "@/components/shared/EmptyState";
+import { LoadingSpinner } from "@/components/shared/LoadingSpinner";
+import { StatusBadge } from "@/components/shared/StatusBadge";
+import { toast } from "@/components/ui/toaster";
+import type { AppApiKeyResponse } from "@/types";
+
+export function AppApiKeysPanel({ appId }: { appId: string }) {
+  const { t } = useTranslation();
+  const [keys, setKeys] = React.useState<AppApiKeyResponse[]>([]);
+  const [loading, setLoading] = React.useState(true);
+  const [createOpen, setCreateOpen] = React.useState(false);
+  const [revealKey, setRevealKey] = React.useState<string>();
+
+  const loadKeys = React.useCallback(async () => {
+    try {
+      const data = await listAppApiKeys(appId);
+      setKeys(data);
+    } catch {
+      toast.error(t("errors.failedToLoad"));
+    } finally {
+      setLoading(false);
+    }
+  }, [appId, t]);
+
+  React.useEffect(() => { void loadKeys(); }, [loadKeys]);
+
+  const handleDisable = React.useCallback(async (keyId: string) => {
+    try {
+      await updateAppApiKeyStatus(appId, keyId, "disabled");
+      toast.success(t("apps.keyDisabled"));
+      void loadKeys();
+    } catch {
+      toast.error(t("errors.failedToUpdate"));
+    }
+  }, [appId, loadKeys, t]);
+
+  if (loading) return <LoadingSpinner label={t("common.loading")} />;
+
+  return (
+    <div className="space-y-4">
+      <div className="flex items-center justify-between">
+        <h3 className="text-sm font-semibold">{t("apps.apiKeys")}</h3>
+        <Button size="sm" onClick={() => setCreateOpen(true)}>
+          {t("apps.createKey")}
+        </Button>
+      </div>
+
+      {keys.length ? (
+        <div className="space-y-2">
+          {keys.map((key) => (
+            <div key={key.id} className="flex items-center justify-between rounded-md border border-border p-3">
+              <div className="min-w-0">
+                <div className="flex items-center gap-2">
+                  <Key className="h-4 w-4 text-muted-foreground" />
+                  <span className="text-sm font-medium">{key.name}</span>
+                  <StatusBadge status={key.status} />
+                </div>
+                <div className="mt-1 flex gap-3 text-xs text-muted-foreground">
+                  <span>{key.key_prefix}...</span>
+                  {key.scopes && <span>{key.scopes}</span>}
+                  {key.last_used_time && <span>{t("apps.lastUsed")}: {new Date(key.last_used_time).toLocaleString()}</span>}
+                  <span>{t("common.created")}: {new Date(key.created_time).toLocaleString()}</span>
+                </div>
+              </div>
+              {key.status === "active" && (
+                <Button size="sm" variant="outline" onClick={() => void handleDisable(key.id)}>
+                  {t("apps.disableKey")}
+                </Button>
+              )}
+            </div>
+          ))}
+        </div>
+      ) : (
+        <EmptyState icon={Key} title={t("apps.noKeys")} description={t("apps.createKeyStart")} />
+      )}
+
+      <CreateAppApiKeyDialog
+        open={createOpen}
+        onOpenChange={setCreateOpen}
+        appId={appId}
+        onCreated={(keyText) => {
+          setRevealKey(keyText);
+          void loadKeys();
+        }}
+      />
+
+      <Dialog open={!!revealKey} onOpenChange={() => setRevealKey(undefined)}>
+        <div className="w-[480px] space-y-4">
+          <h2 className="text-lg font-semibold">{t("apps.apiKeyCreated")}</h2>
+          <p className="text-sm text-muted-foreground">{t("apps.copyKeyNow")}</p>
+          <div className="rounded-md border border-amber-500/30 bg-amber-500/5 p-4">
+            <code className="block break-all text-sm">{revealKey}</code>
+          </div>
+          <div className="flex justify-end">
+            <Button onClick={() => {
+              navigator.clipboard.writeText(revealKey!).then(() => toast.success(t("apps.keyCopied")));
+            }}>
+              {t("common.copy")}
+            </Button>
+          </div>
+        </div>
+      </Dialog>
+    </div>
+  );
+}
+
+function CreateAppApiKeyDialog({
+  open,
+  onOpenChange,
+  appId,
+  onCreated,
+}: {
+  open: boolean;
+  onOpenChange: (open: boolean) => void;
+  appId: string;
+  onCreated: (apiKey: string) => void;
+}) {
+  const { t } = useTranslation();
+  const [name, setName] = React.useState("");
+  const [scopes, setScopes] = React.useState("app:invoke app:stream");
+  const [saving, setSaving] = React.useState(false);
+
+  const handleSubmit = React.useCallback(async () => {
+    if (!name.trim()) return;
+    setSaving(true);
+    try {
+      const resp = await createAppApiKey(appId, {
+        name: name.trim(),
+        scopes: scopes || null,
+      });
+      onCreated(resp.api_key);
+      setName("");
+      onOpenChange(false);
+    } catch {
+      toast.error(t("errors.failedToCreate"));
+    } finally {
+      setSaving(false);
+    }
+  }, [appId, name, scopes, onCreated, onOpenChange, t]);
+
+  return (
+    <Dialog open={open} onOpenChange={onOpenChange}>
+      <div className="w-[420px] space-y-4">
+        <h2 className="text-lg font-semibold">{t("apps.createKey")}</h2>
+        <div className="space-y-3">
+          <div>
+            <label className="mb-1 block text-xs font-medium text-muted-foreground">{t("common.name")} *</label>
+            <Input value={name} onChange={(e) => setName(e.target.value)} placeholder={t("apps.keyNamePlaceholder")} />
+          </div>
+          <div>
+            <label className="mb-1 block text-xs font-medium text-muted-foreground">{t("apps.scopes")}</label>
+            <Input value={scopes} onChange={(e) => setScopes(e.target.value)} placeholder="app:invoke app:stream" />
+          </div>
+        </div>
+        <div className="flex justify-end gap-2">
+          <Button variant="outline" onClick={() => onOpenChange(false)}>
+            {t("common.cancel")}
+          </Button>
+          <Button disabled={saving || !name.trim()} onClick={() => void handleSubmit()}>
+            {saving ? t("common.creating") : t("common.create")}
+          </Button>
+        </div>
+      </div>
+    </Dialog>
+  );
+}

+ 69 - 0
web/src/pages/apps/components/AppAuditsPanel.tsx

@@ -0,0 +1,69 @@
+import * as React from "react";
+import { useTranslation } from "react-i18next";
+import { FileText } from "lucide-react";
+import { listAppAudits } from "@/api/apps";
+import { EmptyState } from "@/components/shared/EmptyState";
+import { LoadingSpinner } from "@/components/shared/LoadingSpinner";
+import { StatusBadge } from "@/components/shared/StatusBadge";
+import { toast } from "@/components/ui/toaster";
+import type { AppInvocationAuditResponse } from "@/types";
+
+export function AppAuditsPanel({ appId }: { appId: string }) {
+  const { t } = useTranslation();
+  const [audits, setAudits] = React.useState<AppInvocationAuditResponse[]>([]);
+  const [loading, setLoading] = React.useState(true);
+
+  const loadAudits = React.useCallback(async () => {
+    try {
+      const data = await listAppAudits(appId, 50);
+      setAudits(data);
+    } catch {
+      toast.error(t("errors.failedToLoad"));
+    } finally {
+      setLoading(false);
+    }
+  }, [appId, t]);
+
+  React.useEffect(() => { void loadAudits(); }, [loadAudits]);
+
+  if (loading) return <LoadingSpinner label={t("common.loading")} />;
+
+  if (!audits.length) {
+    return <EmptyState icon={FileText} title={t("apps.noAudits")} description={t("apps.noAuditsDesc")} />;
+  }
+
+  return (
+    <div className="space-y-4">
+      <div className="flex items-center justify-between">
+        <h3 className="text-sm font-semibold">{t("apps.audits")}</h3>
+        <span className="text-xs text-muted-foreground">{audits.length} {t("apps.records")}</span>
+      </div>
+      <div className="overflow-auto">
+        <table className="w-full text-sm">
+          <thead>
+            <tr className="border-b border-border text-left text-xs text-muted-foreground">
+              <th className="pb-2 pr-3 font-medium">{t("common.created")}</th>
+              <th className="pb-2 pr-3 font-medium">{t("common.status")}</th>
+              <th className="pb-2 pr-3 font-medium">{t("apps.invokeType")}</th>
+              <th className="pb-2 pr-3 font-medium">{t("apps.duration")}</th>
+              <th className="pb-2 pr-3 font-medium">{t("apps.keyPrefix")}</th>
+              <th className="pb-2 font-medium">{t("apps.error")}</th>
+            </tr>
+          </thead>
+          <tbody>
+            {audits.map((audit) => (
+              <tr key={audit.id} className="border-b border-border/50">
+                <td className="py-2 pr-3 text-xs">{new Date(audit.created_time).toLocaleString()}</td>
+                <td className="py-2 pr-3"><StatusBadge status={audit.status} /></td>
+                <td className="py-2 pr-3 text-xs">{audit.invoke_type}</td>
+                <td className="py-2 pr-3 text-xs">{audit.duration_ms}ms</td>
+                <td className="py-2 pr-3 text-xs font-mono">{audit.api_key_prefix ?? "-"}</td>
+                <td className="py-2 text-xs text-red-500">{audit.error_message ?? "-"}</td>
+              </tr>
+            ))}
+          </tbody>
+        </table>
+      </div>
+    </div>
+  );
+}

+ 222 - 0
web/src/pages/apps/components/AppDetail.tsx

@@ -0,0 +1,222 @@
+import * as React from "react";
+import { useTranslation } from "react-i18next";
+import { Pencil } from "lucide-react";
+import { updateApp, updateAppStatus } from "@/api/apps";
+import { Button } from "@/components/ui/button";
+import { CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
+import { Tabs } from "@/components/ui/tabs";
+import { toast } from "@/components/ui/toaster";
+import { StatusBadge } from "@/components/shared/StatusBadge";
+import type { AppDefinition } from "@/types";
+import { AppApiKeysPanel } from "./AppApiKeysPanel";
+import { AppAuditsPanel } from "./AppAuditsPanel";
+
+type DetailTab = "config" | "apiKeys" | "audits" | "usage";
+
+export function AppDetail({ app, onUpdated }: { app: AppDefinition; onUpdated: (app: AppDefinition) => void }) {
+  const { t } = useTranslation();
+  const [tab, setTab] = React.useState<DetailTab>("config");
+
+  const handlePublish = React.useCallback(async () => {
+    try {
+      const updated = await updateAppStatus(app.id, "published");
+      onUpdated(updated);
+      toast.success(t("apps.publishedSuccess"));
+    } catch {
+      toast.error(t("errors.failedToUpdate"));
+    }
+  }, [app.id, onUpdated, t]);
+
+  const handleDisable = React.useCallback(async () => {
+    try {
+      const updated = await updateAppStatus(app.id, "disabled");
+      onUpdated(updated);
+      toast.success(t("apps.disabledSuccess"));
+    } catch {
+      toast.error(t("errors.failedToUpdate"));
+    }
+  }, [app.id, onUpdated, t]);
+
+  return (
+    <>
+      <CardHeader className="border-b border-border bg-muted/15 p-5">
+        <div className="flex flex-col gap-4 lg:flex-row lg:items-start lg:justify-between">
+          <div className="min-w-0">
+            <div className="flex min-w-0 flex-wrap items-center gap-2">
+              <CardTitle className="truncate text-lg">{app.name}</CardTitle>
+              <StatusBadge status={app.status} />
+            </div>
+            <CardDescription className="mt-1">{app.code} &middot; {app.target_type}/{app.target_id}</CardDescription>
+          </div>
+          <div className="flex flex-wrap items-center gap-2">
+            {app.status === "draft" && (
+              <Button size="sm" onClick={() => void handlePublish()}>
+                {t("apps.publish")}
+              </Button>
+            )}
+            {app.status === "published" && (
+              <Button size="sm" variant="outline" onClick={() => void handleDisable()}>
+                {t("apps.disable")}
+              </Button>
+            )}
+            {app.status === "disabled" && (
+              <Button size="sm" onClick={() => void handlePublish()}>
+                {t("apps.republish")}
+              </Button>
+            )}
+          </div>
+        </div>
+      </CardHeader>
+      <CardContent className="min-h-0 flex-1 p-5">
+        <Tabs
+          value={tab}
+          onChange={(value) => setTab(value as DetailTab)}
+          tabs={[
+            {
+              value: "config",
+              label: t("apps.config"),
+              content: <AppConfigPanel app={app} onUpdated={onUpdated} />,
+            },
+            {
+              value: "apiKeys",
+              label: t("apps.apiKeys"),
+              content: <AppApiKeysPanel appId={app.id} />,
+            },
+            {
+              value: "audits",
+              label: t("apps.audits"),
+              content: <AppAuditsPanel appId={app.id} />,
+            },
+            {
+              value: "usage",
+              label: t("apps.apiUsage"),
+              content: <AppUsagePanel app={app} />,
+            },
+          ]}
+        />
+      </CardContent>
+    </>
+  );
+}
+
+function AppConfigPanel({ app, onUpdated }: { app: AppDefinition; onUpdated: (app: AppDefinition) => void }) {
+  const { t } = useTranslation();
+  const [editing, setEditing] = React.useState(false);
+  const [name, setName] = React.useState(app.name);
+  const [description, setDescription] = React.useState(app.description ?? "");
+  const [saving, setSaving] = React.useState(false);
+
+  const handleSave = React.useCallback(async () => {
+    setSaving(true);
+    try {
+      const updated = await updateApp(app.id, { name, description: description || null });
+      onUpdated(updated);
+      setEditing(false);
+      toast.success(t("apps.updateSuccess"));
+    } catch {
+      toast.error(t("errors.failedToUpdate"));
+    } finally {
+      setSaving(false);
+    }
+  }, [app.id, name, description, onUpdated, t]);
+
+  const fields = [
+    { label: t("common.name"), value: editing ? undefined : app.name },
+    { label: t("apps.code"), value: app.code },
+    { label: t("common.description"), value: editing ? undefined : (app.description ?? "-") },
+    { label: t("apps.targetType"), value: app.target_type },
+    { label: t("apps.targetId"), value: app.target_id },
+    { label: t("common.status"), value: app.status },
+    { label: t("common.created"), value: new Date(app.created_time).toLocaleString() },
+    { label: t("common.updated"), value: new Date(app.updated_time).toLocaleString() },
+  ];
+
+  return (
+    <div className="space-y-4">
+      <div className="flex items-center justify-between">
+        <h3 className="text-sm font-semibold">{t("apps.basicConfig")}</h3>
+        {!editing && (
+          <Button size="sm" variant="outline" onClick={() => setEditing(true)}>
+            <Pencil className="h-4 w-4" /> {t("common.edit")}
+          </Button>
+        )}
+      </div>
+
+      {editing ? (
+        <div className="space-y-3">
+          <div>
+            <label className="mb-1 block text-xs font-medium text-muted-foreground">{t("common.name")}</label>
+            <input
+              className="w-full rounded-md border border-border bg-transparent px-3 py-2 text-sm outline-none focus:ring-2 focus:ring-primary"
+              value={name}
+              onChange={(e) => setName(e.target.value)}
+            />
+          </div>
+          <div>
+            <label className="mb-1 block text-xs font-medium text-muted-foreground">{t("common.description")}</label>
+            <textarea
+              className="w-full rounded-md border border-border bg-transparent px-3 py-2 text-sm outline-none focus:ring-2 focus:ring-primary"
+              rows={3}
+              value={description}
+              onChange={(e) => setDescription(e.target.value)}
+            />
+          </div>
+          <div className="flex gap-2">
+            <Button size="sm" disabled={saving} onClick={() => void handleSave()}>
+              {saving ? t("common.creating") : t("common.save")}
+            </Button>
+            <Button size="sm" variant="outline" onClick={() => setEditing(false)}>
+              {t("common.cancel")}
+            </Button>
+          </div>
+        </div>
+      ) : (
+        <div className="grid gap-3 text-sm">
+          {fields.map((field) => (
+            <div key={field.label} className="grid grid-cols-[140px_minmax(0,1fr)] gap-2">
+              <span className="text-muted-foreground">{field.label}</span>
+              <span className="font-medium">{field.value}</span>
+            </div>
+          ))}
+        </div>
+      )}
+    </div>
+  );
+}
+
+function AppUsagePanel({ app }: { app: AppDefinition }) {
+  const { t } = useTranslation();
+  const baseUrl = `${window.location.origin}/gateway/openapi/apps/${app.code}`;
+
+  const syncExample = `curl -X POST "${baseUrl}/chat" \\
+  -H "Content-Type: application/json" \\
+  -H "Authorization: Bearer agp_YOUR_API_KEY" \\
+  -d '{
+    "message": "Hello",
+    "user_id": "user-001"
+  }'`;
+
+  const streamExample = `curl -X POST "${baseUrl}/chat/stream" \\
+  -H "Content-Type: application/json" \\
+  -H "Accept: text/event-stream" \\
+  -H "Authorization: Bearer agp_YOUR_API_KEY" \\
+  -d '{
+    "message": "Hello",
+    "user_id": "user-001"
+  }'`;
+
+  return (
+    <div className="space-y-6">
+      <div>
+        <h3 className="mb-2 text-sm font-semibold">{t("apps.syncEndpoint")}</h3>
+        <code className="block rounded-md bg-muted p-3 text-xs">{baseUrl}/chat</code>
+        <pre className="mt-2 max-h-60 overflow-auto rounded-md bg-muted p-3 text-xs">{syncExample}</pre>
+      </div>
+      <div>
+        <h3 className="mb-2 text-sm font-semibold">{t("apps.streamEndpoint")}</h3>
+        <code className="block rounded-md bg-muted p-3 text-xs">{baseUrl}/chat/stream</code>
+        <pre className="mt-2 max-h-60 overflow-auto rounded-md bg-muted p-3 text-xs">{streamExample}</pre>
+      </div>
+    </div>
+  );
+}

+ 174 - 0
web/src/pages/apps/components/CreateAppDialog.tsx

@@ -0,0 +1,174 @@
+import * as React from "react";
+import { useTranslation } from "react-i18next";
+import { listAgents } from "@/api/agents";
+import { listTeams } from "@/api";
+import { createApp, createAppApiKey } from "@/api/apps";
+import { Button } from "@/components/ui/button";
+import { Dialog } from "@/components/ui/dialog";
+import { Input } from "@/components/ui/input";
+import { Select } from "@/components/ui/select";
+import { toast } from "@/components/ui/toaster";
+import type { AgentDefinition, AppDefinition, AppTargetType, TeamDefinition } from "@/types";
+
+export function CreateAppDialog({
+  open,
+  onOpenChange,
+  onCreated,
+}: {
+  open: boolean;
+  onOpenChange: (open: boolean) => void;
+  onCreated: (app: AppDefinition) => void;
+}) {
+  const { t } = useTranslation();
+  const [step, setStep] = React.useState<"form" | "key">("form");
+  const [saving, setSaving] = React.useState(false);
+  const [name, setName] = React.useState("");
+  const [code, setCode] = React.useState("");
+  const [description, setDescription] = React.useState("");
+  const [targetType, setTargetType] = React.useState<AppTargetType>("agent");
+  const [targetId, setTargetId] = React.useState("");
+  const [agents, setAgents] = React.useState<AgentDefinition[]>([]);
+  const [teams, setTeams] = React.useState<TeamDefinition[]>([]);
+  const [createdApp, setCreatedApp] = React.useState<AppDefinition>();
+  const [apiKey, setApiKey] = React.useState("");
+
+  React.useEffect(() => {
+    if (open) {
+      setStep("form");
+      setName("");
+      setCode("");
+      setDescription("");
+      setTargetType("agent");
+      setTargetId("");
+      setCreatedApp(undefined);
+      setApiKey("");
+      Promise.all([
+        listAgents().catch(() => [] as AgentDefinition[]),
+        listTeams().catch(() => [] as TeamDefinition[]),
+      ]).then(([a, t]) => {
+        setAgents(a);
+        setTeams(t);
+      });
+    }
+  }, [open]);
+
+  React.useEffect(() => {
+    if (name && !code) {
+      setCode(name.toLowerCase().replace(/[^a-z0-9_]/g, "_").replace(/_+/g, "_").slice(0, 64));
+    }
+  }, [name, code]);
+
+  const targets = targetType === "agent" ? agents : teams;
+
+  const handleSubmit = React.useCallback(async () => {
+    if (!name.trim() || !code.trim() || !targetId) return;
+    setSaving(true);
+    try {
+      const app = await createApp({
+        code: code.trim(),
+        name: name.trim(),
+        description: description.trim() || null,
+        target_type: targetType,
+        target_id: targetId,
+      });
+      const keyResp = await createAppApiKey(app.id, {
+        name: "default",
+        scopes: "app:invoke app:stream",
+      });
+      setCreatedApp(app);
+      setApiKey(keyResp.api_key);
+      setStep("key");
+      onCreated(app);
+    } catch (err) {
+      toast.error(err instanceof Error ? err.message : t("errors.failedToCreate"));
+    } finally {
+      setSaving(false);
+    }
+  }, [name, code, description, targetType, targetId, onCreated, t]);
+
+  const handleCopyKey = React.useCallback(() => {
+    navigator.clipboard.writeText(apiKey).then(() => toast.success(t("apps.keyCopied")));
+  }, [apiKey, t]);
+
+  const handleClose = React.useCallback(() => {
+    onOpenChange(false);
+  }, [onOpenChange]);
+
+  return (
+    <Dialog open={open} onOpenChange={onOpenChange}>
+      <div className="w-[520px] space-y-5">
+        {step === "form" ? (
+          <>
+            <h2 className="text-lg font-semibold">{t("apps.createApp")}</h2>
+            <p className="text-sm text-muted-foreground">{t("apps.createAppDescription")}</p>
+            <div className="space-y-3">
+              <div>
+                <label className="mb-1 block text-xs font-medium text-muted-foreground">{t("common.name")} *</label>
+                <Input value={name} onChange={(e) => setName(e.target.value)} placeholder={t("apps.namePlaceholder")} />
+              </div>
+              <div>
+                <label className="mb-1 block text-xs font-medium text-muted-foreground">{t("apps.code")} *</label>
+                <Input value={code} onChange={(e) => setCode(e.target.value)} placeholder={t("apps.codePlaceholder")} />
+              </div>
+              <div>
+                <label className="mb-1 block text-xs font-medium text-muted-foreground">{t("common.description")}</label>
+                <textarea
+                  className="w-full rounded-md border border-border bg-transparent px-3 py-2 text-sm outline-none focus:ring-2 focus:ring-primary"
+                  rows={2}
+                  value={description}
+                  onChange={(e) => setDescription(e.target.value)}
+                />
+              </div>
+              <div>
+                <label className="mb-1 block text-xs font-medium text-muted-foreground">{t("apps.targetType")} *</label>
+                <Select
+                  value={targetType}
+                  onChange={(e) => { setTargetType(e.target.value as AppTargetType); setTargetId(""); }}
+                  options={[
+                    { value: "agent", label: "Agent" },
+                    { value: "team", label: "Team" },
+                  ]}
+                />
+              </div>
+              <div>
+                <label className="mb-1 block text-xs font-medium text-muted-foreground">{t("apps.targetId")} *</label>
+                <Select
+                  value={targetId}
+                  onChange={(e) => setTargetId(e.target.value)}
+                  options={[
+                    { value: "", label: t("apps.selectTarget") },
+                    ...targets.map((target) => ({ value: target.id, label: target.name })),
+                  ]}
+                />
+              </div>
+            </div>
+            <div className="flex justify-end gap-2">
+              <Button variant="outline" onClick={handleClose}>
+                {t("common.cancel")}
+              </Button>
+              <Button disabled={saving || !name.trim() || !code.trim() || !targetId} onClick={() => void handleSubmit()}>
+                {saving ? t("common.creating") : t("common.create")}
+              </Button>
+            </div>
+          </>
+        ) : (
+          <>
+            <h2 className="text-lg font-semibold">{t("apps.appCreated")}</h2>
+            <p className="text-sm text-muted-foreground">{t("apps.copyKeyNow")}</p>
+            <div className="rounded-md border border-amber-500/30 bg-amber-500/5 p-4">
+              <code className="block break-all text-sm">{apiKey}</code>
+            </div>
+            <div className="flex justify-end gap-2">
+              <Button variant="outline" onClick={handleCopyKey}>
+                {t("common.copy")}
+              </Button>
+              <Button onClick={handleClose}>
+                {t("common.close")}
+              </Button>
+            </div>
+          </>
+        )}
+      </div>
+    </Dialog>
+  );
+}

+ 2 - 2
web/src/pages/sessions/components/CreateSessionDialog.tsx

@@ -7,7 +7,7 @@ import { Input } from "@/components/ui/input";
 import { Select } from "@/components/ui/select";
 import { toast } from "@/components/ui/toaster";
 import { useAuthStore } from "@/stores/auth";
-import type { AgentConfig, AppResponse, Session, TeamConfig, TeamDefinition } from "@/types";
+import type { AgentConfig, SessionAppResponse, Session, TeamConfig, TeamDefinition } from "@/types";
 
 export function CreateSessionDialog({
   open,
@@ -17,7 +17,7 @@ export function CreateSessionDialog({
 }: {
   open: boolean;
   onOpenChange: (open: boolean) => void;
-  apps: AppResponse[];
+  apps: SessionAppResponse[];
   onCreated: (session: Session) => void;
 }) {
   const { t } = useTranslation();

+ 4 - 3
web/src/pages/teams/components/CreateTeamDialog.tsx

@@ -30,7 +30,7 @@ const DEFAULT_POLICY: PolicyDraft = {
 };
 
 function createDefaultMember(): MemberDraft {
-  return { role: "executor", agent_id: "", responsibility: "" };
+  return { role: "specialist", agent_id: "", responsibility: "" };
 }
 
 export function CreateTeamDialog({
@@ -211,9 +211,10 @@ function MemberEditor({ members, onChange, agents }: { members: MemberDraft[]; o
                     onChange={(event) => update(index, { role: event.target.value })}
                     options={[
                       { value: "supervisor", label: t("teams.supervisor") },
+                      { value: "planner", label: t("teams.planner") },
+                      { value: "specialist", label: t("teams.specialist") },
                       { value: "executor", label: t("teams.executor") },
                       { value: "reviewer", label: t("teams.reviewer") },
-                      { value: "planner", label: t("teams.planner") },
                     ]}
                   />
                 </Field>
@@ -520,7 +521,7 @@ function buildTeamConfig(
 function readMemberDrafts(activeConfig?: TeamConfig): MemberDraft[] {
   if (!activeConfig?.member_refs_json.length) return [createDefaultMember()];
   return activeConfig.member_refs_json.map((member) => ({
-    role: readString(member, "role") ?? "executor",
+    role: readString(member, "role") ?? "specialist",
     agent_id: readString(member, "agent_id") ?? readString(member, "agentId") ?? "",
     responsibility: readString(member, "responsibility") ?? readString(member, "description") ?? "",
   }));

+ 2 - 2
web/src/pages/teams/components/TeamRuns.tsx

@@ -1000,9 +1000,9 @@ const TEAM_VALUE_LABEL_KEYS: Record<string, string> = {
   executor: "executor",
   planner: "planner",
   reviewer: "reviewer",
-  specialist: "executor",
+  specialist: "specialist",
   supervisor: "supervisor",
-  worker: "executor",
+  worker: "specialist",
 };
 
 function wait(ms: number) {

+ 82 - 3
web/src/types/app.ts

@@ -1,13 +1,15 @@
 import type { JSONObject } from "./common";
 
-export interface AppCreateRequest {
+// ── Session-level app config (session-service) ─────────────────────────────
+
+export interface SessionAppCreateRequest {
   name: string;
   description?: string | null;
   owner_user_id?: string | null;
   settings_json?: JSONObject;
 }
 
-export interface AppResponse {
+export interface SessionAppResponse {
   id: string;
   name: string;
   description?: string | null;
@@ -16,9 +18,86 @@ export interface AppResponse {
   created_time: string;
 }
 
-export interface AppConfigResponse {
+export interface SessionAppConfigResponse {
   id: string;
   app_id: string;
   workflow_config_id: string;
   created_time: string;
 }
+
+// ── Application definitions (api-gateway app module) ────────────────────────
+
+export type AppStatus = "draft" | "published" | "disabled";
+export type AppTargetType = "agent" | "team";
+
+export interface AppDefinition {
+  id: string;
+  code: string;
+  name: string;
+  description: string | null;
+  status: AppStatus;
+  target_type: AppTargetType;
+  target_id: string;
+  owner_user_id: string | null;
+  settings_json: string | null;
+  created_time: string;
+  updated_time: string;
+}
+
+export interface AppCreateRequest {
+  code: string;
+  name: string;
+  description?: string | null;
+  target_type: AppTargetType;
+  target_id: string;
+  owner_user_id?: string | null;
+  settings_json?: string | null;
+}
+
+export interface AppUpdateRequest {
+  app_id: string;
+  name?: string | null;
+  description?: string | null;
+  target_type?: AppTargetType | null;
+  target_id?: string | null;
+  settings_json?: string | null;
+}
+
+export interface AppStatusUpdateRequest {
+  app_id: string;
+  status: AppStatus;
+}
+
+export interface AppApiKeyResponse {
+  id: string;
+  app_id: string;
+  name: string;
+  key_prefix: string;
+  status: string;
+  scopes: string | null;
+  expires_time: string | null;
+  last_used_time: string | null;
+  created_time: string;
+}
+
+export interface AppApiKeyCreateResponse extends AppApiKeyResponse {
+  api_key: string;
+}
+
+export interface AppInvocationAuditResponse {
+  id: string;
+  app_id: string;
+  api_key_prefix: string | null;
+  request_id: string;
+  session_id: string | null;
+  run_request_id: string | null;
+  target_type: string;
+  target_id: string;
+  invoke_type: string;
+  status: string;
+  duration_ms: number;
+  error_code: string | null;
+  error_message: string | null;
+  client_metadata_json: string | null;
+  created_time: string;
+}