Jax Docker 1 месяц назад
Родитель
Сommit
8f7ae87a61
33 измененных файлов с 1262 добавлено и 1 удалено
  1. 41 0
      README.md
  2. 27 0
      deployments/docker/docker-compose.yml
  3. 16 0
      libs/core-domain/src/core_domain/__init__.py
  4. 64 0
      libs/core-domain/src/core_domain/knowledge_contracts.py
  5. 1 0
      pyproject.toml
  6. 27 0
      services/api-gateway/app/api/routes.py
  7. 1 0
      services/api-gateway/app/bootstrap/settings.py
  8. 6 1
      services/api-gateway/app/infrastructure/proxy.py
  9. 36 0
      services/knowledge-service/alembic.ini
  10. 38 0
      services/knowledge-service/alembic/env.py
  11. 1 0
      services/knowledge-service/alembic/versions/.gitkeep
  12. 147 0
      services/knowledge-service/alembic/versions/20260425_0001_init_knowledge_models.py
  13. 1 0
      services/knowledge-service/app/__init__.py
  14. 1 0
      services/knowledge-service/app/api/__init__.py
  15. 132 0
      services/knowledge-service/app/api/routes.py
  16. 1 0
      services/knowledge-service/app/application/__init__.py
  17. 101 0
      services/knowledge-service/app/application/retrieval.py
  18. 178 0
      services/knowledge-service/app/application/services.py
  19. 1 0
      services/knowledge-service/app/bootstrap/__init__.py
  20. 14 0
      services/knowledge-service/app/bootstrap/app.py
  21. 11 0
      services/knowledge-service/app/bootstrap/settings.py
  22. 1 0
      services/knowledge-service/app/db/__init__.py
  23. 7 0
      services/knowledge-service/app/db/models/__init__.py
  24. 16 0
      services/knowledge-service/app/db/models/knowledge_base.py
  25. 19 0
      services/knowledge-service/app/db/models/knowledge_chunk.py
  26. 22 0
      services/knowledge-service/app/db/models/knowledge_document.py
  27. 30 0
      services/knowledge-service/app/db/session.py
  28. 1 0
      services/knowledge-service/app/domain/__init__.py
  29. 220 0
      services/knowledge-service/app/domain/repositories.py
  30. 3 0
      services/knowledge-service/app/main.py
  31. 1 0
      services/knowledge-service/app/schemas/__init__.py
  32. 72 0
      services/knowledge-service/app/schemas/knowledge.py
  33. 25 0
      services/knowledge-service/pyproject.toml

+ 41 - 0
README.md

@@ -21,6 +21,7 @@
 - `team-service`
 - `skill-service`
 - `human-service`
+- `knowledge-service`
 - `tool-service`
 
 每个服务都提供了最小 `FastAPI` 启动入口和健康检查接口,数据库相关服务也已经带上了 `SQLAlchemy` 模型骨架与 Alembic 目录。
@@ -55,6 +56,7 @@ pip install -e .\services\memory-service
 pip install -e .\services\team-service
 pip install -e .\services\skill-service
 pip install -e .\services\human-service
+pip install -e .\services\knowledge-service
 pip install -e .\services\tool-service
 ```
 
@@ -200,6 +202,7 @@ services/
   runtime-service/
   skill-service/
   human-service/
+  knowledge-service/
   tool-service/
 libs/
   core-domain/
@@ -393,6 +396,42 @@ Invoke-RestMethod -Method Post `
 
 Through `api-gateway`, use `/gateway/human/**`.
 
+## Knowledge Service APIs
+
+`knowledge-service` stores independent knowledge bases, documents, chunks, and
+retrieval results. The first version uses deterministic local hash embeddings plus
+keyword scoring, so it works without external API keys. Later pgvector and provider
+embeddings can sit behind the same search contract.
+
+Create a knowledge base:
+
+```powershell
+Invoke-RestMethod -Method Post `
+  -Uri http://127.0.0.1:8012/knowledge/bases `
+  -ContentType "application/json" `
+  -Body '{"tenant_id":"t1","code":"support_kb","name":"Support Knowledge Base"}'
+```
+
+Create and index a document:
+
+```powershell
+Invoke-RestMethod -Method Post `
+  -Uri http://127.0.0.1:8012/knowledge/documents `
+  -ContentType "application/json" `
+  -Body '{"tenant_id":"t1","knowledge_base_id":"kb-id","title":"Refund Policy","content_text":"Refunds are available within seven days for eligible orders.","source_type":"text"}'
+```
+
+Search the knowledge base:
+
+```powershell
+Invoke-RestMethod -Method Post `
+  -Uri http://127.0.0.1:8012/knowledge/search `
+  -ContentType "application/json" `
+  -Body '{"tenant_id":"t1","knowledge_base_id":"kb-id","query":"refund within seven days","top_k":3}'
+```
+
+Through `api-gateway`, use `/gateway/knowledge/**`.
+
 Execute an agent run without calling an external model:
 
 ```powershell
@@ -745,6 +784,7 @@ $env:AGENT_PLATFORM_SMOKE_RUNTIME_URL="http://127.0.0.1:8000/gateway/runtime"
 - `/gateway/teams/**` -> `team-service /teams/**`
 - `/gateway/skills/**` -> `skill-service /skills/**`
 - `/gateway/human/**` -> `human-service /human/**`
+- `/gateway/knowledge/**` -> `knowledge-service /knowledge/**`
 - `/gateway/tools/**` -> `tool-service /tools/**`
 - `/gateway/models/**` -> `model-gateway-service /models/**`
 - `/gateway/code/**` -> `code-runner-service /code/**`
@@ -981,6 +1021,7 @@ Important notes:
 - `team-service` stores multi-agent team definitions, team versions, and team run records under `/data`
 - `skill-service` stores skill definitions, versions, marketplace-style installations, and skill execution runs under `/data`
 - `human-service` stores human approval, input, pause/resume, and takeover task records under `/data`
+- `knowledge-service` stores knowledge bases, documents, chunks, and local retrieval metadata under `/data`
 - `agent-worker` has no exposed port and can be scaled independently; set `AGENT_PLATFORM_AGENT_WORKER_DRY_RUN=true` for no-key local smoke runs
 - `runtime-worker` has no exposed port and can be scaled independently; prefer PostgreSQL for real multi-worker write concurrency
 - `runtime-service` automatically resolves internal URLs to `workflow-service`, `tool-service`, `model-gateway-service`, and `code-runner-service`

+ 27 - 0
deployments/docker/docker-compose.yml

@@ -243,6 +243,26 @@ services:
       timeout: 5s
       retries: 5
 
+  knowledge-service:
+    build:
+      context: ../..
+      dockerfile: deployments/docker/python-service.Dockerfile
+      args:
+        SERVICE_PATH: services/knowledge-service
+    container_name: agent-platform-knowledge-service
+    command: ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8012"]
+    environment:
+      AGENT_PLATFORM_DATABASE_URL: sqlite:////data/knowledge_service.db
+    ports:
+      - "8012:8012"
+    volumes:
+      - knowledge_service_data:/data
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8012/knowledge/health').read()"]
+      interval: 15s
+      timeout: 5s
+      retries: 5
+
   runtime-service:
     build:
       context: ../..
@@ -262,6 +282,7 @@ services:
       AGENT_PLATFORM_TEAM_SERVICE_URL: http://team-service:8009
       AGENT_PLATFORM_SKILL_SERVICE_URL: http://skill-service:8010
       AGENT_PLATFORM_HUMAN_SERVICE_URL: http://human-service:8011
+      AGENT_PLATFORM_KNOWLEDGE_SERVICE_URL: http://knowledge-service:8012
     ports:
       - "8003:8003"
     volumes:
@@ -285,6 +306,8 @@ services:
         condition: service_started
       human-service:
         condition: service_started
+      knowledge-service:
+        condition: service_started
     healthcheck:
       test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8003/runtime/health').read()"]
       interval: 15s
@@ -339,6 +362,7 @@ services:
       AGENT_PLATFORM_TEAM_SERVICE_URL: http://team-service:8009
       AGENT_PLATFORM_SKILL_SERVICE_URL: http://skill-service:8010
       AGENT_PLATFORM_HUMAN_SERVICE_URL: http://human-service:8011
+      AGENT_PLATFORM_KNOWLEDGE_SERVICE_URL: http://knowledge-service:8012
       AGENT_PLATFORM_AUTH_REQUIRED: ${AGENT_PLATFORM_AUTH_REQUIRED:-false}
     ports:
       - "8000:8000"
@@ -367,6 +391,8 @@ services:
         condition: service_started
       human-service:
         condition: service_started
+      knowledge-service:
+        condition: service_started
     healthcheck:
       test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000/health').read()"]
       interval: 15s
@@ -380,6 +406,7 @@ volumes:
   team_service_data:
   skill_service_data:
   human_service_data:
+  knowledge_service_data:
   workflow_service_data:
   session_service_data:
   runtime_service_data:

+ 16 - 0
libs/core-domain/src/core_domain/__init__.py

@@ -22,6 +22,15 @@ from .human_contracts import (
     HumanTaskStatus,
     HumanTaskType,
 )
+from .knowledge_contracts import (
+    KnowledgeBaseContract,
+    KnowledgeBaseStatus,
+    KnowledgeChunkContract,
+    KnowledgeDocumentContract,
+    KnowledgeDocumentStatus,
+    KnowledgeSearchRequestContract,
+    KnowledgeSearchResultContract,
+)
 from .model_contracts import (
     ChatCompletionRequestContract,
     ChatCompletionResponseContract,
@@ -95,6 +104,13 @@ __all__ = [
     "HumanTaskStatus",
     "HumanTaskType",
     "InitialNodeContract",
+    "KnowledgeBaseContract",
+    "KnowledgeBaseStatus",
+    "KnowledgeChunkContract",
+    "KnowledgeDocumentContract",
+    "KnowledgeDocumentStatus",
+    "KnowledgeSearchRequestContract",
+    "KnowledgeSearchResultContract",
     "MemoryCreateContract",
     "MemoryItemContract",
     "MemoryScopeType",

+ 64 - 0
libs/core-domain/src/core_domain/knowledge_contracts.py

@@ -0,0 +1,64 @@
+from datetime import datetime
+from typing import Literal
+
+from pydantic import BaseModel, Field
+
+from core_shared import JSONValue
+
+
+KnowledgeBaseStatus = Literal["active", "archived"]
+KnowledgeDocumentStatus = Literal["draft", "indexed", "failed", "archived"]
+
+
+class KnowledgeBaseContract(BaseModel):
+    id: str
+    tenant_id: str
+    code: str
+    name: str
+    description: str | None = None
+    status: KnowledgeBaseStatus
+    metadata_json: dict[str, JSONValue] | None = None
+    created_time: datetime
+
+
+class KnowledgeDocumentContract(BaseModel):
+    id: str
+    tenant_id: str
+    knowledge_base_id: str
+    title: str
+    source_type: str
+    source_uri: str | None = None
+    status: KnowledgeDocumentStatus
+    content_hash: str | None = None
+    metadata_json: dict[str, JSONValue] | None = None
+    indexed_time: datetime | None = None
+    created_time: datetime
+
+
+class KnowledgeChunkContract(BaseModel):
+    id: str
+    tenant_id: str
+    knowledge_base_id: str
+    document_id: str
+    chunk_index: int
+    content_text: str
+    token_count: int
+    embedding_model: str | None = None
+    embedding_json: list[float] | None = None
+    metadata_json: dict[str, JSONValue] | None = None
+    created_time: datetime
+
+
+class KnowledgeSearchRequestContract(BaseModel):
+    tenant_id: str
+    knowledge_base_id: str
+    query: str
+    top_k: int = 5
+    filters_json: dict[str, JSONValue] = Field(default_factory=dict)
+
+
+class KnowledgeSearchResultContract(BaseModel):
+    chunk: KnowledgeChunkContract
+    document: KnowledgeDocumentContract
+    score: float
+    score_json: dict[str, JSONValue] = Field(default_factory=dict)

+ 1 - 0
pyproject.toml

@@ -9,6 +9,7 @@ members = [
   "services/agent-service",
   "services/code-runner-service",
   "services/human-service",
+  "services/knowledge-service",
   "services/memory-service",
   "services/model-gateway-service",
   "services/session-service",

+ 27 - 0
services/api-gateway/app/api/routes.py

@@ -181,6 +181,12 @@ def build_proxy_targets(settings: ApiGatewaySettings) -> dict[ProxyServiceName,
             path_prefix="/human",
             health_path="/human/health",
         ),
+        "knowledge-service": ProxyTarget(
+            service_name="knowledge-service",
+            base_url=settings.knowledge_service_url,
+            path_prefix="/knowledge",
+            health_path="/knowledge/health",
+        ),
     }
 
 
@@ -368,6 +374,27 @@ async def proxy_human_service(
     )
 
 
+@router.api_route(
+    "/gateway/knowledge",
+    methods=["GET", "POST", "PUT", "PATCH", "DELETE"],
+)
+@router.api_route(
+    "/gateway/knowledge/{path:path}",
+    methods=["GET", "POST", "PUT", "PATCH", "DELETE"],
+)
+async def proxy_knowledge_service(
+    request: Request,
+    path: str = "",
+    settings: ApiGatewaySettings = Depends(get_gateway_settings),
+    proxy: ServiceProxy = Depends(get_service_proxy),
+) -> Response:
+    return await proxy.forward(
+        request=request,
+        target=build_proxy_targets(settings)["knowledge-service"],
+        path=path,
+    )
+
+
 @router.api_route(
     "/gateway/tools",
     methods=["GET", "POST", "PUT", "PATCH", "DELETE"],

+ 1 - 0
services/api-gateway/app/bootstrap/settings.py

@@ -16,6 +16,7 @@ class ApiGatewaySettings(ServiceSettings):
     team_service_url: str = "http://127.0.0.1:8009"
     skill_service_url: str = "http://127.0.0.1:8010"
     human_service_url: str = "http://127.0.0.1:8011"
+    knowledge_service_url: str = "http://127.0.0.1:8012"
     proxy_timeout_seconds: float = 30.0
     downstream_health_timeout_seconds: float = 2.0
     auth_required: bool = False

+ 6 - 1
services/api-gateway/app/infrastructure/proxy.py

@@ -5,7 +5,11 @@ import httpx
 from fastapi import Request, Response
 
 from app.infrastructure.audit import mark_gateway_target
-from app.infrastructure.request_context import REQUEST_ID_HEADER, TENANT_ID_HEADER, get_gateway_request_context
+from app.infrastructure.request_context import (
+    REQUEST_ID_HEADER,
+    TENANT_ID_HEADER,
+    get_gateway_request_context,
+)
 from app.schemas.gateway import DownstreamServiceHealth
 
 ProxyServiceName = Literal[
@@ -20,6 +24,7 @@ ProxyServiceName = Literal[
     "team-service",
     "skill-service",
     "human-service",
+    "knowledge-service",
 ]
 
 

+ 36 - 0
services/knowledge-service/alembic.ini

@@ -0,0 +1,36 @@
+[alembic]
+script_location = alembic
+prepend_sys_path = .
+sqlalchemy.url = sqlite:///./knowledge_service.db
+
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARN
+handlers = console
+
+[logger_sqlalchemy]
+level = WARN
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers = console
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s

+ 38 - 0
services/knowledge-service/alembic/env.py

@@ -0,0 +1,38 @@
+from logging.config import fileConfig
+
+from alembic import context
+from sqlalchemy import engine_from_config, pool
+
+from app.db.models import Base
+
+config = context.config
+
+if config.config_file_name is not None:
+    fileConfig(config.config_file_name)
+
+target_metadata = Base.metadata
+
+
+def run_migrations_offline() -> None:
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(url=url, target_metadata=target_metadata, literal_binds=True)
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online() -> None:
+    connectable = engine_from_config(
+        config.get_section(config.config_ini_section, {}),
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+    with connectable.connect() as connection:
+        context.configure(connection=connection, target_metadata=target_metadata)
+        with context.begin_transaction():
+            context.run_migrations()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()

+ 1 - 0
services/knowledge-service/alembic/versions/.gitkeep

@@ -0,0 +1 @@
+

+ 147 - 0
services/knowledge-service/alembic/versions/20260425_0001_init_knowledge_models.py

@@ -0,0 +1,147 @@
+"""init knowledge models
+
+Revision ID: 20260425_0001
+Revises:
+Create Date: 2026-04-25 17:10:00
+"""
+
+from collections.abc import Sequence
+
+from alembic import op
+import sqlalchemy as sa
+
+
+revision: str = "20260425_0001"
+down_revision: str | None = None
+branch_labels: Sequence[str] | None = None
+depends_on: Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "knowledge_base",
+        sa.Column("code", sa.String(length=64), nullable=False),
+        sa.Column("name", sa.String(length=128), nullable=False),
+        sa.Column("description", sa.Text(), nullable=True),
+        sa.Column("status", sa.String(length=32), nullable=False),
+        sa.Column("metadata_json", sa.JSON(), nullable=True),
+        sa.Column("id", sa.String(length=36), nullable=False),
+        sa.Column("tenant_id", sa.String(length=36), nullable=False),
+        sa.Column("created_by", sa.String(length=36), nullable=True),
+        sa.Column("updated_by", sa.String(length=36), nullable=True),
+        sa.Column("created_time", sa.DateTime(), nullable=False),
+        sa.Column("updated_time", sa.DateTime(), nullable=False),
+        sa.Column("deleted_time", sa.DateTime(), nullable=True),
+        sa.Column("version", sa.Integer(), nullable=False),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index("ix_knowledge_base_code", "knowledge_base", ["code"], unique=False)
+    op.create_index("ix_knowledge_base_status", "knowledge_base", ["status"], unique=False)
+    op.create_index("ix_knowledge_base_tenant_id", "knowledge_base", ["tenant_id"], unique=False)
+
+    op.create_table(
+        "knowledge_document",
+        sa.Column("knowledge_base_id", sa.String(length=36), nullable=False),
+        sa.Column("title", sa.String(length=256), nullable=False),
+        sa.Column("source_type", sa.String(length=32), nullable=False),
+        sa.Column("source_uri", sa.String(length=512), nullable=True),
+        sa.Column("status", sa.String(length=32), nullable=False),
+        sa.Column("content_text", sa.Text(), nullable=False),
+        sa.Column("content_hash", sa.String(length=64), nullable=True),
+        sa.Column("metadata_json", sa.JSON(), nullable=True),
+        sa.Column("indexed_time", sa.DateTime(), nullable=True),
+        sa.Column("id", sa.String(length=36), nullable=False),
+        sa.Column("tenant_id", sa.String(length=36), nullable=False),
+        sa.Column("created_by", sa.String(length=36), nullable=True),
+        sa.Column("updated_by", sa.String(length=36), nullable=True),
+        sa.Column("created_time", sa.DateTime(), nullable=False),
+        sa.Column("updated_time", sa.DateTime(), nullable=False),
+        sa.Column("deleted_time", sa.DateTime(), nullable=True),
+        sa.Column("version", sa.Integer(), nullable=False),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(
+        "ix_knowledge_document_knowledge_base_id",
+        "knowledge_document",
+        ["knowledge_base_id"],
+        unique=False,
+    )
+    op.create_index(
+        "ix_knowledge_document_source_type",
+        "knowledge_document",
+        ["source_type"],
+        unique=False,
+    )
+    op.create_index("ix_knowledge_document_status", "knowledge_document", ["status"], unique=False)
+    op.create_index(
+        "ix_knowledge_document_content_hash",
+        "knowledge_document",
+        ["content_hash"],
+        unique=False,
+    )
+    op.create_index(
+        "ix_knowledge_document_tenant_id",
+        "knowledge_document",
+        ["tenant_id"],
+        unique=False,
+    )
+
+    op.create_table(
+        "knowledge_chunk",
+        sa.Column("knowledge_base_id", sa.String(length=36), nullable=False),
+        sa.Column("document_id", sa.String(length=36), nullable=False),
+        sa.Column("chunk_index", sa.Integer(), nullable=False),
+        sa.Column("content_text", sa.Text(), nullable=False),
+        sa.Column("token_count", sa.Integer(), nullable=False),
+        sa.Column("embedding_model", sa.String(length=64), nullable=True),
+        sa.Column("embedding_json", sa.JSON(), nullable=True),
+        sa.Column("metadata_json", sa.JSON(), nullable=True),
+        sa.Column("id", sa.String(length=36), nullable=False),
+        sa.Column("tenant_id", sa.String(length=36), nullable=False),
+        sa.Column("created_by", sa.String(length=36), nullable=True),
+        sa.Column("updated_by", sa.String(length=36), nullable=True),
+        sa.Column("created_time", sa.DateTime(), nullable=False),
+        sa.Column("updated_time", sa.DateTime(), nullable=False),
+        sa.Column("deleted_time", sa.DateTime(), nullable=True),
+        sa.Column("version", sa.Integer(), nullable=False),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(
+        "ix_knowledge_chunk_knowledge_base_id",
+        "knowledge_chunk",
+        ["knowledge_base_id"],
+        unique=False,
+    )
+    op.create_index(
+        "ix_knowledge_chunk_document_id",
+        "knowledge_chunk",
+        ["document_id"],
+        unique=False,
+    )
+    op.create_index(
+        "ix_knowledge_chunk_embedding_model",
+        "knowledge_chunk",
+        ["embedding_model"],
+        unique=False,
+    )
+    op.create_index("ix_knowledge_chunk_tenant_id", "knowledge_chunk", ["tenant_id"], unique=False)
+
+
+def downgrade() -> None:
+    op.drop_index("ix_knowledge_chunk_tenant_id", table_name="knowledge_chunk")
+    op.drop_index("ix_knowledge_chunk_embedding_model", table_name="knowledge_chunk")
+    op.drop_index("ix_knowledge_chunk_document_id", table_name="knowledge_chunk")
+    op.drop_index("ix_knowledge_chunk_knowledge_base_id", table_name="knowledge_chunk")
+    op.drop_table("knowledge_chunk")
+
+    op.drop_index("ix_knowledge_document_tenant_id", table_name="knowledge_document")
+    op.drop_index("ix_knowledge_document_content_hash", table_name="knowledge_document")
+    op.drop_index("ix_knowledge_document_status", table_name="knowledge_document")
+    op.drop_index("ix_knowledge_document_source_type", table_name="knowledge_document")
+    op.drop_index("ix_knowledge_document_knowledge_base_id", table_name="knowledge_document")
+    op.drop_table("knowledge_document")
+
+    op.drop_index("ix_knowledge_base_tenant_id", table_name="knowledge_base")
+    op.drop_index("ix_knowledge_base_status", table_name="knowledge_base")
+    op.drop_index("ix_knowledge_base_code", table_name="knowledge_base")
+    op.drop_table("knowledge_base")

+ 1 - 0
services/knowledge-service/app/__init__.py

@@ -0,0 +1 @@
+"""Knowledge service package."""

+ 1 - 0
services/knowledge-service/app/api/__init__.py

@@ -0,0 +1 @@
+"""API package."""

+ 132 - 0
services/knowledge-service/app/api/routes.py

@@ -0,0 +1,132 @@
+from fastapi import APIRouter, Depends, HTTPException, Query
+from sqlalchemy import text
+from sqlalchemy.orm import Session
+
+from core_domain import ServiceHealth
+
+from app.application.services import KnowledgeApplicationService
+from app.bootstrap.settings import KnowledgeServiceSettings
+from app.db.session import get_db
+from app.domain.repositories import (
+    KnowledgeBaseRepository,
+    KnowledgeChunkRepository,
+    KnowledgeDocumentRepository,
+)
+from app.schemas.knowledge import (
+    KnowledgeBaseCreateRequest,
+    KnowledgeBaseResponse,
+    KnowledgeBaseStatusUpdateRequest,
+    KnowledgeChunkResponse,
+    KnowledgeDocumentCreateRequest,
+    KnowledgeDocumentIngestResponse,
+    KnowledgeDocumentResponse,
+    KnowledgeSearchRequest,
+    KnowledgeSearchResultResponse,
+)
+
+router = APIRouter()
+
+
+def get_knowledge_settings() -> KnowledgeServiceSettings:
+    return KnowledgeServiceSettings()
+
+
+def get_knowledge_application_service(
+    db: Session = Depends(get_db),
+    settings: KnowledgeServiceSettings = Depends(get_knowledge_settings),
+) -> KnowledgeApplicationService:
+    return KnowledgeApplicationService(
+        settings=settings,
+        base_repository=KnowledgeBaseRepository(db),
+        document_repository=KnowledgeDocumentRepository(db),
+        chunk_repository=KnowledgeChunkRepository(db),
+    )
+
+
+@router.get("/health", response_model=ServiceHealth)
+def health_check(db: Session = Depends(get_db)) -> ServiceHealth:
+    db.execute(text("SELECT 1"))
+    return ServiceHealth(service="knowledge-service", status="ok", database="ok")
+
+
+@router.post("/bases", response_model=KnowledgeBaseResponse)
+def create_base(
+    payload: KnowledgeBaseCreateRequest,
+    service: KnowledgeApplicationService = Depends(get_knowledge_application_service),
+) -> KnowledgeBaseResponse:
+    return KnowledgeBaseResponse.from_entity(service.create_base(payload))
+
+
+@router.get("/bases", response_model=list[KnowledgeBaseResponse])
+def list_bases(
+    tenant_id: str = Query(...),
+    service: KnowledgeApplicationService = Depends(get_knowledge_application_service),
+) -> list[KnowledgeBaseResponse]:
+    return [
+        KnowledgeBaseResponse.from_entity(item)
+        for item in service.list_bases(tenant_id=tenant_id)
+    ]
+
+
+@router.patch("/bases/{knowledge_base_id}/status", response_model=KnowledgeBaseResponse)
+def update_base_status(
+    knowledge_base_id: str,
+    payload: KnowledgeBaseStatusUpdateRequest,
+    service: KnowledgeApplicationService = Depends(get_knowledge_application_service),
+) -> KnowledgeBaseResponse:
+    entity = service.update_base_status(
+        knowledge_base_id=knowledge_base_id,
+        payload=payload,
+    )
+    if entity is None:
+        raise HTTPException(
+            status_code=404,
+            detail=f"knowledge base not found: {knowledge_base_id}",
+        )
+    return KnowledgeBaseResponse.from_entity(entity)
+
+
+@router.post("/documents", response_model=KnowledgeDocumentIngestResponse)
+def create_document(
+    payload: KnowledgeDocumentCreateRequest,
+    service: KnowledgeApplicationService = Depends(get_knowledge_application_service),
+) -> KnowledgeDocumentIngestResponse:
+    try:
+        document, chunks = service.create_document(payload)
+    except ValueError as exc:
+        raise HTTPException(status_code=422, detail=str(exc)) from exc
+    return KnowledgeDocumentIngestResponse(
+        document=KnowledgeDocumentResponse.from_entity(document),
+        chunks=[KnowledgeChunkResponse.from_entity(item) for item in chunks],
+    )
+
+
+@router.get("/documents", response_model=list[KnowledgeDocumentResponse])
+def list_documents(
+    tenant_id: str = Query(...),
+    knowledge_base_id: str = Query(...),
+    service: KnowledgeApplicationService = Depends(get_knowledge_application_service),
+) -> list[KnowledgeDocumentResponse]:
+    return [
+        KnowledgeDocumentResponse.from_entity(item)
+        for item in service.list_documents(
+            tenant_id=tenant_id,
+            knowledge_base_id=knowledge_base_id,
+        )
+    ]
+
+
+@router.post("/search", response_model=list[KnowledgeSearchResultResponse])
+def search(
+    payload: KnowledgeSearchRequest,
+    service: KnowledgeApplicationService = Depends(get_knowledge_application_service),
+) -> list[KnowledgeSearchResultResponse]:
+    return [
+        KnowledgeSearchResultResponse(
+            chunk=KnowledgeChunkResponse.from_entity(chunk),
+            document=KnowledgeDocumentResponse.from_entity(document),
+            score=score,
+            score_json=score_json,
+        )
+        for chunk, document, score, score_json in service.search(payload)
+    ]

+ 1 - 0
services/knowledge-service/app/application/__init__.py

@@ -0,0 +1 @@
+"""Application package."""

+ 101 - 0
services/knowledge-service/app/application/retrieval.py

@@ -0,0 +1,101 @@
+import hashlib
+import math
+import re
+from collections import Counter
+
+from core_shared import JSONValue
+
+TOKEN_PATTERN = re.compile(r"[\w\u4e00-\u9fff]+", re.UNICODE)
+
+
+def split_text(text: str, *, chunk_size: int, chunk_overlap: int) -> list[str]:
+    normalized_text = text.strip()
+    if not normalized_text:
+        return []
+    safe_overlap = min(chunk_overlap, max(chunk_size - 1, 0))
+    chunks: list[str] = []
+    start = 0
+    while start < len(normalized_text):
+        end = min(start + chunk_size, len(normalized_text))
+        chunks.append(normalized_text[start:end])
+        if end == len(normalized_text):
+            break
+        start = end - safe_overlap
+    return chunks
+
+
+def tokenize(text: str) -> list[str]:
+    return [item.lower() for item in TOKEN_PATTERN.findall(text)]
+
+
+def build_hash_embedding(text: str, *, dimensions: int) -> list[float]:
+    vector = [0.0 for _ in range(dimensions)]
+    tokens = tokenize(text)
+    if not tokens:
+        return vector
+    for token in tokens:
+        digest = hashlib.sha256(token.encode("utf-8")).digest()
+        index = int.from_bytes(digest[:4], "big") % dimensions
+        sign = 1.0 if digest[4] % 2 == 0 else -1.0
+        vector[index] += sign
+    norm = math.sqrt(sum(item * item for item in vector))
+    if norm == 0:
+        return vector
+    return [round(item / norm, 6) for item in vector]
+
+
+def cosine_similarity(left: list[float] | None, right: list[float] | None) -> float:
+    if not left or not right or len(left) != len(right):
+        return 0.0
+    left_norm = math.sqrt(sum(item * item for item in left))
+    right_norm = math.sqrt(sum(item * item for item in right))
+    if left_norm == 0 or right_norm == 0:
+        return 0.0
+    return sum(a * b for a, b in zip(left, right, strict=True)) / (left_norm * right_norm)
+
+
+def keyword_score(query: str, text: str) -> float:
+    query_tokens = tokenize(query)
+    if not query_tokens:
+        return 0.0
+    text_counts = Counter(tokenize(text))
+    if not text_counts:
+        return 0.0
+    matched = sum(1 for token in query_tokens if token in text_counts)
+    frequency = sum(text_counts.get(token, 0) for token in query_tokens)
+    return matched / len(set(query_tokens)) + min(frequency / 20.0, 1.0)
+
+
+def stable_content_hash(text: str) -> str:
+    return hashlib.sha256(text.encode("utf-8")).hexdigest()
+
+
+def build_chunk_payloads(
+    *,
+    content_text: str,
+    chunk_size: int,
+    chunk_overlap: int,
+    embedding_dimensions: int,
+    embedding_model: str,
+) -> list[dict[str, JSONValue]]:
+    chunks = split_text(
+        content_text,
+        chunk_size=chunk_size,
+        chunk_overlap=chunk_overlap,
+    )
+    payloads: list[dict[str, JSONValue]] = []
+    for index, chunk_text in enumerate(chunks):
+        payloads.append(
+            {
+                "chunk_index": index,
+                "content_text": chunk_text,
+                "token_count": len(tokenize(chunk_text)),
+                "embedding_model": embedding_model,
+                "embedding_json": build_hash_embedding(
+                    chunk_text,
+                    dimensions=embedding_dimensions,
+                ),
+                "metadata_json": {},
+            }
+        )
+    return payloads

+ 178 - 0
services/knowledge-service/app/application/services.py

@@ -0,0 +1,178 @@
+from core_shared import JSONValue
+
+from app.application.retrieval import (
+    build_chunk_payloads,
+    build_hash_embedding,
+    cosine_similarity,
+    keyword_score,
+    stable_content_hash,
+)
+from app.bootstrap.settings import KnowledgeServiceSettings
+from app.db.models import KnowledgeBase, KnowledgeChunk, KnowledgeDocument
+from app.domain.repositories import (
+    KnowledgeBaseRepository,
+    KnowledgeChunkRepository,
+    KnowledgeDocumentRepository,
+)
+from app.schemas.knowledge import (
+    KnowledgeBaseCreateRequest,
+    KnowledgeBaseStatusUpdateRequest,
+    KnowledgeDocumentCreateRequest,
+    KnowledgeSearchRequest,
+)
+
+
+class KnowledgeApplicationService:
+    def __init__(
+        self,
+        *,
+        settings: KnowledgeServiceSettings,
+        base_repository: KnowledgeBaseRepository,
+        document_repository: KnowledgeDocumentRepository,
+        chunk_repository: KnowledgeChunkRepository,
+    ) -> None:
+        self.settings = settings
+        self.base_repository = base_repository
+        self.document_repository = document_repository
+        self.chunk_repository = chunk_repository
+
+    def create_base(self, payload: KnowledgeBaseCreateRequest) -> KnowledgeBase:
+        return self.base_repository.create(
+            tenant_id=payload.tenant_id,
+            code=payload.code,
+            name=payload.name,
+            description=payload.description,
+            metadata_json=payload.metadata_json,
+        )
+
+    def list_bases(self, *, tenant_id: str) -> list[KnowledgeBase]:
+        return self.base_repository.list_by_tenant(tenant_id=tenant_id)
+
+    def update_base_status(
+        self,
+        *,
+        knowledge_base_id: str,
+        payload: KnowledgeBaseStatusUpdateRequest,
+    ) -> KnowledgeBase | None:
+        return self.base_repository.update_status(
+            tenant_id=payload.tenant_id,
+            knowledge_base_id=knowledge_base_id,
+            status=payload.status,
+        )
+
+    def create_document(
+        self,
+        payload: KnowledgeDocumentCreateRequest,
+    ) -> tuple[KnowledgeDocument, list[KnowledgeChunk]]:
+        knowledge_base = self.base_repository.get_by_id(
+            tenant_id=payload.tenant_id,
+            knowledge_base_id=payload.knowledge_base_id,
+        )
+        if knowledge_base is None:
+            raise ValueError(f"knowledge base not found: {payload.knowledge_base_id}")
+
+        document = self.document_repository.create(
+            tenant_id=payload.tenant_id,
+            knowledge_base_id=payload.knowledge_base_id,
+            title=payload.title,
+            source_type=payload.source_type,
+            source_uri=payload.source_uri,
+            content_text=payload.content_text,
+            content_hash=stable_content_hash(payload.content_text),
+            metadata_json=payload.metadata_json,
+        )
+        chunks = self._index_document(document=document, payload=payload)
+        indexed_document = self.document_repository.update_status(
+            document_id=document.id,
+            status="indexed",
+        )
+        return indexed_document or document, chunks
+
+    def list_documents(
+        self,
+        *,
+        tenant_id: str,
+        knowledge_base_id: str,
+    ) -> list[KnowledgeDocument]:
+        return self.document_repository.list_by_base(
+            tenant_id=tenant_id,
+            knowledge_base_id=knowledge_base_id,
+        )
+
+    def search(
+        self,
+        payload: KnowledgeSearchRequest,
+    ) -> list[tuple[KnowledgeChunk, KnowledgeDocument, float, dict[str, JSONValue]]]:
+        chunks = self.chunk_repository.list_by_base(
+            tenant_id=payload.tenant_id,
+            knowledge_base_id=payload.knowledge_base_id,
+        )
+        document_cache: dict[str, KnowledgeDocument] = {}
+        query_embedding = build_hash_embedding(
+            payload.query,
+            dimensions=self.settings.embedding_dimensions,
+        )
+        scored: list[tuple[KnowledgeChunk, KnowledgeDocument, float, dict[str, JSONValue]]] = []
+        for chunk in chunks:
+            document = document_cache.get(chunk.document_id)
+            if document is None:
+                document = self.document_repository.get_by_id(
+                    tenant_id=payload.tenant_id,
+                    document_id=chunk.document_id,
+                )
+                if document is None:
+                    continue
+                document_cache[chunk.document_id] = document
+            if not self._matches_filters(document=document, filters_json=payload.filters_json):
+                continue
+            keyword = keyword_score(payload.query, chunk.content_text)
+            vector = cosine_similarity(query_embedding, chunk.embedding_json)
+            score = round(keyword * 0.7 + vector * 0.3, 6)
+            scored.append(
+                (
+                    chunk,
+                    document,
+                    score,
+                    {
+                        "keyword_score": round(keyword, 6),
+                        "vector_score": round(vector, 6),
+                        "retrieval_mode": "hybrid-local",
+                    },
+                )
+            )
+        scored.sort(key=lambda item: item[2], reverse=True)
+        return scored[: payload.top_k]
+
+    def _index_document(
+        self,
+        *,
+        document: KnowledgeDocument,
+        payload: KnowledgeDocumentCreateRequest,
+    ) -> list[KnowledgeChunk]:
+        chunk_payloads = build_chunk_payloads(
+            content_text=payload.content_text,
+            chunk_size=payload.chunk_size or self.settings.default_chunk_size,
+            chunk_overlap=payload.chunk_overlap or self.settings.default_chunk_overlap,
+            embedding_dimensions=self.settings.embedding_dimensions,
+            embedding_model=self.settings.embedding_model,
+        )
+        return self.chunk_repository.replace_document_chunks(
+            tenant_id=document.tenant_id,
+            knowledge_base_id=document.knowledge_base_id,
+            document_id=document.id,
+            chunks=chunk_payloads,
+        )
+
+    def _matches_filters(
+        self,
+        *,
+        document: KnowledgeDocument,
+        filters_json: dict[str, JSONValue],
+    ) -> bool:
+        source_type = filters_json.get("source_type")
+        if isinstance(source_type, str) and document.source_type != source_type:
+            return False
+        status = filters_json.get("status")
+        if isinstance(status, str) and document.status != status:
+            return False
+        return True

+ 1 - 0
services/knowledge-service/app/bootstrap/__init__.py

@@ -0,0 +1 @@
+"""Bootstrap package."""

+ 14 - 0
services/knowledge-service/app/bootstrap/app.py

@@ -0,0 +1,14 @@
+from fastapi import FastAPI
+
+from app.api.routes import router
+from app.bootstrap.settings import KnowledgeServiceSettings
+from app.db.session import build_session_factory
+
+
+def create_app() -> FastAPI:
+    settings = KnowledgeServiceSettings()
+    app = FastAPI(title="agent-platform knowledge-service", version="0.1.0")
+    app.state.settings = settings
+    app.state.session_factory = build_session_factory(settings)
+    app.include_router(router, prefix="/knowledge", tags=["knowledge"])
+    return app

+ 11 - 0
services/knowledge-service/app/bootstrap/settings.py

@@ -0,0 +1,11 @@
+from core_shared import ServiceSettings
+
+
+class KnowledgeServiceSettings(ServiceSettings):
+    service_name: str = "knowledge-service"
+    service_port: int = 8012
+    database_url: str = "sqlite:///./knowledge_service.db"
+    default_chunk_size: int = 800
+    default_chunk_overlap: int = 120
+    embedding_dimensions: int = 32
+    embedding_model: str = "local-hash-v1"

+ 1 - 0
services/knowledge-service/app/db/__init__.py

@@ -0,0 +1 @@
+"""Database package."""

+ 7 - 0
services/knowledge-service/app/db/models/__init__.py

@@ -0,0 +1,7 @@
+from core_db import Base
+
+from .knowledge_base import KnowledgeBase
+from .knowledge_chunk import KnowledgeChunk
+from .knowledge_document import KnowledgeDocument
+
+__all__ = ["Base", "KnowledgeBase", "KnowledgeChunk", "KnowledgeDocument"]

+ 16 - 0
services/knowledge-service/app/db/models/knowledge_base.py

@@ -0,0 +1,16 @@
+from sqlalchemy import String, Text
+from sqlalchemy.dialects.sqlite import JSON
+from sqlalchemy.orm import Mapped, mapped_column
+
+from core_db import AuditMixin, Base, TenantMixin, VersionMixin
+from core_shared import JSONValue
+
+
+class KnowledgeBase(TenantMixin, AuditMixin, VersionMixin, Base):
+    __tablename__ = "knowledge_base"
+
+    code: Mapped[str] = mapped_column(String(64), index=True)
+    name: Mapped[str] = mapped_column(String(128))
+    description: Mapped[str | None] = mapped_column(Text, nullable=True)
+    status: Mapped[str] = mapped_column(String(32), default="active", index=True)
+    metadata_json: Mapped[dict[str, JSONValue] | None] = mapped_column(JSON, nullable=True)

+ 19 - 0
services/knowledge-service/app/db/models/knowledge_chunk.py

@@ -0,0 +1,19 @@
+from sqlalchemy import Integer, String, Text
+from sqlalchemy.dialects.sqlite import JSON
+from sqlalchemy.orm import Mapped, mapped_column
+
+from core_db import AuditMixin, Base, TenantMixin, VersionMixin
+from core_shared import JSONValue
+
+
+class KnowledgeChunk(TenantMixin, AuditMixin, VersionMixin, Base):
+    __tablename__ = "knowledge_chunk"
+
+    knowledge_base_id: Mapped[str] = mapped_column(String(36), index=True)
+    document_id: Mapped[str] = mapped_column(String(36), index=True)
+    chunk_index: Mapped[int] = mapped_column(Integer)
+    content_text: Mapped[str] = mapped_column(Text)
+    token_count: Mapped[int] = mapped_column(Integer, default=0)
+    embedding_model: Mapped[str | None] = mapped_column(String(64), nullable=True, index=True)
+    embedding_json: Mapped[list[float] | None] = mapped_column(JSON, nullable=True)
+    metadata_json: Mapped[dict[str, JSONValue] | None] = mapped_column(JSON, nullable=True)

+ 22 - 0
services/knowledge-service/app/db/models/knowledge_document.py

@@ -0,0 +1,22 @@
+from datetime import datetime
+
+from sqlalchemy import DateTime, String, Text
+from sqlalchemy.dialects.sqlite import JSON
+from sqlalchemy.orm import Mapped, mapped_column
+
+from core_db import AuditMixin, Base, TenantMixin, VersionMixin
+from core_shared import JSONValue
+
+
+class KnowledgeDocument(TenantMixin, AuditMixin, VersionMixin, Base):
+    __tablename__ = "knowledge_document"
+
+    knowledge_base_id: Mapped[str] = mapped_column(String(36), index=True)
+    title: Mapped[str] = mapped_column(String(256))
+    source_type: Mapped[str] = mapped_column(String(32), default="text", index=True)
+    source_uri: Mapped[str | None] = mapped_column(String(512), nullable=True)
+    status: Mapped[str] = mapped_column(String(32), default="draft", index=True)
+    content_text: Mapped[str] = mapped_column(Text)
+    content_hash: Mapped[str | None] = mapped_column(String(64), nullable=True, index=True)
+    metadata_json: Mapped[dict[str, JSONValue] | None] = mapped_column(JSON, nullable=True)
+    indexed_time: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)

+ 30 - 0
services/knowledge-service/app/db/session.py

@@ -0,0 +1,30 @@
+from collections.abc import Generator
+
+from fastapi import Request
+from sqlalchemy.orm import Session, sessionmaker
+
+from core_db import DatabaseSettings, create_engine_from_settings, create_session_factory
+
+from app.bootstrap.settings import KnowledgeServiceSettings
+
+
+def build_session_factory(
+    settings: KnowledgeServiceSettings | None = None,
+) -> sessionmaker[Session]:
+    resolved_settings = settings or KnowledgeServiceSettings()
+    engine = create_engine_from_settings(
+        DatabaseSettings(
+            database_url=resolved_settings.database_url,
+            echo_sql=resolved_settings.echo_sql,
+        )
+    )
+    return create_session_factory(engine)
+
+
+def get_db(request: Request) -> Generator[Session, None, None]:
+    session_factory: sessionmaker[Session] = request.app.state.session_factory
+    session = session_factory()
+    try:
+        yield session
+    finally:
+        session.close()

+ 1 - 0
services/knowledge-service/app/domain/__init__.py

@@ -0,0 +1 @@
+"""Domain package."""

+ 220 - 0
services/knowledge-service/app/domain/repositories.py

@@ -0,0 +1,220 @@
+from datetime import datetime
+
+from sqlalchemy import delete, select
+from sqlalchemy.orm import Session
+
+from core_domain import KnowledgeBaseStatus, KnowledgeDocumentStatus
+from core_shared import JSONValue
+
+from app.db.models import KnowledgeBase, KnowledgeChunk, KnowledgeDocument
+
+
+class KnowledgeBaseRepository:
+    def __init__(self, db: Session) -> None:
+        self.db = db
+
+    def create(
+        self,
+        *,
+        tenant_id: str,
+        code: str,
+        name: str,
+        description: str | None,
+        metadata_json: dict[str, JSONValue] | None,
+    ) -> KnowledgeBase:
+        entity = KnowledgeBase(
+            tenant_id=tenant_id,
+            code=code,
+            name=name,
+            description=description,
+            metadata_json=metadata_json,
+        )
+        self.db.add(entity)
+        self.db.commit()
+        self.db.refresh(entity)
+        return entity
+
+    def list_by_tenant(self, *, tenant_id: str) -> list[KnowledgeBase]:
+        stmt = (
+            select(KnowledgeBase)
+            .where(KnowledgeBase.tenant_id == tenant_id)
+            .order_by(KnowledgeBase.created_time.desc())
+        )
+        return list(self.db.scalars(stmt))
+
+    def get_by_id(self, *, tenant_id: str, knowledge_base_id: str) -> KnowledgeBase | None:
+        stmt = (
+            select(KnowledgeBase)
+            .where(KnowledgeBase.tenant_id == tenant_id)
+            .where(KnowledgeBase.id == knowledge_base_id)
+        )
+        return self.db.scalar(stmt)
+
+    def update_status(
+        self,
+        *,
+        tenant_id: str,
+        knowledge_base_id: str,
+        status: KnowledgeBaseStatus,
+    ) -> KnowledgeBase | None:
+        entity = self.get_by_id(tenant_id=tenant_id, knowledge_base_id=knowledge_base_id)
+        if entity is None:
+            return None
+        entity.status = status
+        self.db.commit()
+        self.db.refresh(entity)
+        return entity
+
+
+class KnowledgeDocumentRepository:
+    def __init__(self, db: Session) -> None:
+        self.db = db
+
+    def create(
+        self,
+        *,
+        tenant_id: str,
+        knowledge_base_id: str,
+        title: str,
+        source_type: str,
+        source_uri: str | None,
+        content_text: str,
+        content_hash: str | None,
+        metadata_json: dict[str, JSONValue] | None,
+    ) -> KnowledgeDocument:
+        entity = KnowledgeDocument(
+            tenant_id=tenant_id,
+            knowledge_base_id=knowledge_base_id,
+            title=title,
+            source_type=source_type,
+            source_uri=source_uri,
+            content_text=content_text,
+            content_hash=content_hash,
+            metadata_json=metadata_json,
+            status="draft",
+        )
+        self.db.add(entity)
+        self.db.commit()
+        self.db.refresh(entity)
+        return entity
+
+    def list_by_base(
+        self,
+        *,
+        tenant_id: str,
+        knowledge_base_id: str,
+    ) -> list[KnowledgeDocument]:
+        stmt = (
+            select(KnowledgeDocument)
+            .where(KnowledgeDocument.tenant_id == tenant_id)
+            .where(KnowledgeDocument.knowledge_base_id == knowledge_base_id)
+            .order_by(KnowledgeDocument.created_time.desc())
+        )
+        return list(self.db.scalars(stmt))
+
+    def get_by_id(self, *, tenant_id: str, document_id: str) -> KnowledgeDocument | None:
+        stmt = (
+            select(KnowledgeDocument)
+            .where(KnowledgeDocument.tenant_id == tenant_id)
+            .where(KnowledgeDocument.id == document_id)
+        )
+        return self.db.scalar(stmt)
+
+    def update_status(
+        self,
+        *,
+        document_id: str,
+        status: KnowledgeDocumentStatus,
+    ) -> KnowledgeDocument | None:
+        entity = self.db.get(KnowledgeDocument, document_id)
+        if entity is None:
+            return None
+        entity.status = status
+        entity.indexed_time = datetime.utcnow() if status == "indexed" else entity.indexed_time
+        self.db.commit()
+        self.db.refresh(entity)
+        return entity
+
+
+class KnowledgeChunkRepository:
+    def __init__(self, db: Session) -> None:
+        self.db = db
+
+    def replace_document_chunks(
+        self,
+        *,
+        tenant_id: str,
+        knowledge_base_id: str,
+        document_id: str,
+        chunks: list[dict[str, JSONValue]],
+    ) -> list[KnowledgeChunk]:
+        self.db.execute(
+            delete(KnowledgeChunk)
+            .where(KnowledgeChunk.tenant_id == tenant_id)
+            .where(KnowledgeChunk.document_id == document_id)
+        )
+        entities: list[KnowledgeChunk] = []
+        for chunk in chunks:
+            entity = KnowledgeChunk(
+                tenant_id=tenant_id,
+                knowledge_base_id=knowledge_base_id,
+                document_id=document_id,
+                chunk_index=_read_int(chunk, "chunk_index"),
+                content_text=_read_string(chunk, "content_text"),
+                token_count=_read_int(chunk, "token_count"),
+                embedding_model=_read_optional_string(chunk, "embedding_model"),
+                embedding_json=_read_float_list(chunk, "embedding_json"),
+                metadata_json=_read_optional_dict(chunk, "metadata_json"),
+            )
+            self.db.add(entity)
+            entities.append(entity)
+        self.db.commit()
+        for entity in entities:
+            self.db.refresh(entity)
+        return entities
+
+    def list_by_base(
+        self,
+        *,
+        tenant_id: str,
+        knowledge_base_id: str,
+    ) -> list[KnowledgeChunk]:
+        stmt = (
+            select(KnowledgeChunk)
+            .where(KnowledgeChunk.tenant_id == tenant_id)
+            .where(KnowledgeChunk.knowledge_base_id == knowledge_base_id)
+            .order_by(KnowledgeChunk.created_time.asc())
+        )
+        return list(self.db.scalars(stmt))
+
+
+def _read_string(payload: dict[str, JSONValue], key: str) -> str:
+    value = payload.get(key)
+    return value if isinstance(value, str) else ""
+
+
+def _read_optional_string(payload: dict[str, JSONValue], key: str) -> str | None:
+    value = payload.get(key)
+    return value if isinstance(value, str) else None
+
+
+def _read_int(payload: dict[str, JSONValue], key: str) -> int:
+    value = payload.get(key)
+    return value if isinstance(value, int) and not isinstance(value, bool) else 0
+
+
+def _read_float_list(payload: dict[str, JSONValue], key: str) -> list[float] | None:
+    value = payload.get(key)
+    if not isinstance(value, list):
+        return None
+    return [float(item) for item in value if isinstance(item, (int, float))]
+
+
+def _read_optional_dict(
+    payload: dict[str, JSONValue],
+    key: str,
+) -> dict[str, JSONValue] | None:
+    value = payload.get(key)
+    if isinstance(value, dict):
+        return {str(item_key): item_value for item_key, item_value in value.items()}
+    return None

+ 3 - 0
services/knowledge-service/app/main.py

@@ -0,0 +1,3 @@
+from app.bootstrap.app import create_app
+
+app = create_app()

+ 1 - 0
services/knowledge-service/app/schemas/__init__.py

@@ -0,0 +1 @@
+"""Schemas package."""

+ 72 - 0
services/knowledge-service/app/schemas/knowledge.py

@@ -0,0 +1,72 @@
+from typing import TYPE_CHECKING
+
+from pydantic import BaseModel, Field
+
+from core_domain import (
+    KnowledgeBaseContract,
+    KnowledgeBaseStatus,
+    KnowledgeChunkContract,
+    KnowledgeDocumentContract,
+    KnowledgeSearchRequestContract,
+    KnowledgeSearchResultContract,
+)
+from core_shared import JSONValue
+
+if TYPE_CHECKING:
+    from app.db.models import KnowledgeBase, KnowledgeChunk, KnowledgeDocument
+
+
+class KnowledgeBaseCreateRequest(BaseModel):
+    tenant_id: str
+    code: str
+    name: str
+    description: str | None = None
+    metadata_json: dict[str, JSONValue] = Field(default_factory=dict)
+
+
+class KnowledgeBaseStatusUpdateRequest(BaseModel):
+    tenant_id: str
+    status: KnowledgeBaseStatus
+
+
+class KnowledgeBaseResponse(KnowledgeBaseContract):
+    @classmethod
+    def from_entity(cls, entity: "KnowledgeBase") -> "KnowledgeBaseResponse":
+        return cls.model_validate(entity, from_attributes=True)
+
+
+class KnowledgeDocumentCreateRequest(BaseModel):
+    tenant_id: str
+    knowledge_base_id: str
+    title: str
+    content_text: str
+    source_type: str = "text"
+    source_uri: str | None = None
+    metadata_json: dict[str, JSONValue] = Field(default_factory=dict)
+    chunk_size: int | None = Field(default=None, gt=0)
+    chunk_overlap: int | None = Field(default=None, ge=0)
+
+
+class KnowledgeDocumentResponse(KnowledgeDocumentContract):
+    @classmethod
+    def from_entity(cls, entity: "KnowledgeDocument") -> "KnowledgeDocumentResponse":
+        return cls.model_validate(entity, from_attributes=True)
+
+
+class KnowledgeChunkResponse(KnowledgeChunkContract):
+    @classmethod
+    def from_entity(cls, entity: "KnowledgeChunk") -> "KnowledgeChunkResponse":
+        return cls.model_validate(entity, from_attributes=True)
+
+
+class KnowledgeDocumentIngestResponse(BaseModel):
+    document: KnowledgeDocumentResponse
+    chunks: list[KnowledgeChunkResponse]
+
+
+class KnowledgeSearchRequest(KnowledgeSearchRequestContract):
+    pass
+
+
+class KnowledgeSearchResultResponse(KnowledgeSearchResultContract):
+    pass

+ 25 - 0
services/knowledge-service/pyproject.toml

@@ -0,0 +1,25 @@
+[build-system]
+requires = ["setuptools>=68"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "knowledge-service"
+version = "0.1.0"
+description = "Knowledge base and RAG retrieval service for agent platform."
+requires-python = ">=3.11"
+dependencies = [
+  "alembic>=1.13,<2.0",
+  "fastapi>=0.111,<1.0",
+  "uvicorn[standard]>=0.30,<1.0",
+  "pydantic>=2.7,<3.0",
+  "sqlalchemy>=2.0,<3.0",
+  "core-db",
+  "core-domain",
+  "core-shared",
+]
+
+[tool.setuptools]
+package-dir = {"" = "."}
+
+[tool.setuptools.packages.find]
+where = ["."]