| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458 |
- from datetime import datetime
- from typing import TYPE_CHECKING, Generic, Literal, TypeVar
- from core_domain import (
- KnowledgeBaseContract,
- KnowledgeBaseStatus,
- KnowledgeChunkContract,
- KnowledgeDocumentContract,
- KnowledgeDocumentStatus,
- KnowledgeSearchRequestContract,
- KnowledgeSearchResultContract,
- )
- from core_shared import JSONValue
- from pydantic import BaseModel, Field
- if TYPE_CHECKING:
- from app.db.models import KnowledgeBase, KnowledgeChunk, KnowledgeDocument
- T = TypeVar("T")
- class ApiErrorResponse(BaseModel):
- errorType: str
- message: str
- details: dict[str, JSONValue] = Field(default_factory=dict)
- class ApiResponse(BaseModel, Generic[T]):
- success: bool = True
- data: T | None = None
- error: ApiErrorResponse | None = None
- requestId: str
- serverTime: datetime
- class PageRequest(BaseModel):
- page: int = Field(default=1, ge=1)
- pageSize: int = Field(default=20, ge=1, le=200)
- keyword: str | None = None
- @property
- def offset(self) -> int:
- return (self.page - 1) * self.pageSize
- class PageResult(BaseModel, Generic[T]):
- items: list[T]
- total: int
- page: int
- pageSize: int
- hasMore: bool
- @classmethod
- def from_items(
- cls,
- *,
- items: list[T],
- total: int,
- page: int,
- page_size: int) -> "PageResult[T]":
- return cls(
- items=items,
- total=total,
- page=page,
- pageSize=page_size,
- hasMore=page * page_size < total)
- class KnowledgeBaseCreateRequest(BaseModel):
- code: str
- name: str
- description: str | None = None
- metadata_json: dict[str, JSONValue] = Field(default_factory=dict)
- class KnowledgeBaseStatusUpdateRequest(BaseModel):
- status: KnowledgeBaseStatus
- class KnowledgeBaseResponse(KnowledgeBaseContract):
- @classmethod
- def from_entity(cls, entity: "KnowledgeBase") -> "KnowledgeBaseResponse":
- return cls.model_validate(entity, from_attributes=True)
- class KnowledgeDocumentCreateRequest(BaseModel):
- knowledge_base_id: str
- title: str
- content_text: str | None = None
- content_base64: str | None = None
- source_type: str = "text"
- source_uri: str | None = None
- metadata_json: dict[str, JSONValue] = Field(default_factory=dict)
- chunk_size: int | None = Field(default=None, gt=0)
- chunk_overlap: int | None = Field(default=None, ge=0)
- class KnowledgeDocumentResponse(KnowledgeDocumentContract):
- @classmethod
- def from_entity(cls, entity: "KnowledgeDocument") -> "KnowledgeDocumentResponse":
- return cls.model_validate(entity, from_attributes=True)
- class KnowledgeChunkResponse(KnowledgeChunkContract):
- @classmethod
- def from_entity(cls, entity: "KnowledgeChunk") -> "KnowledgeChunkResponse":
- return cls.model_validate(entity, from_attributes=True)
- class KnowledgeDocumentIngestResponse(BaseModel):
- document: KnowledgeDocumentResponse
- chunks: list[KnowledgeChunkResponse]
- class KnowledgeDocumentParseRequest(BaseModel):
- source_type: str = "auto"
- source_uri: str | None = None
- content_text: str | None = None
- content_base64: str | None = None
- class KnowledgeDocumentParseResponse(BaseModel):
- content_text: str
- source_type: str
- metadata_json: dict[str, JSONValue] = Field(default_factory=dict)
- class KnowledgeSearchRequest(KnowledgeSearchRequestContract):
- pass
- class KnowledgeSearchResultResponse(KnowledgeSearchResultContract):
- pass
- class KnowledgeBaseDto(BaseModel):
- id: str
- name: str
- description: str | None = None
- status: KnowledgeBaseStatus
- metadata: dict[str, JSONValue] | None = None
- createdTime: datetime
- @classmethod
- def from_entity(cls, entity: "KnowledgeBase") -> "KnowledgeBaseDto":
- return cls(
- id=entity.id,
- name=entity.name,
- description=entity.description,
- status=entity.status,
- metadata=entity.metadata_json,
- createdTime=entity.created_time)
- class KnowledgeBaseListRequestDto(PageRequest):
- status: KnowledgeBaseStatus | None = None
- class KnowledgeBaseCreateRequestDto(BaseModel):
- name: str
- description: str | None = None
- metadata: dict[str, JSONValue] = Field(default_factory=dict)
- class KnowledgeBaseDetailRequestDto(BaseModel):
- knowledgeBaseId: str
- class KnowledgeBaseUpdateRequestDto(BaseModel):
- knowledgeBaseId: str
- name: str | None = None
- description: str | None = None
- status: KnowledgeBaseStatus | None = None
- metadata: dict[str, JSONValue] | None = None
- class KnowledgeBaseStatusRequestDto(BaseModel):
- knowledgeBaseId: str
- status: KnowledgeBaseStatus
- class KnowledgeBaseDeleteRequestDto(BaseModel):
- knowledgeBaseId: str
- class KnowledgeDocumentDto(BaseModel):
- id: str
- knowledgeBaseId: str
- title: str
- sourceType: str
- sourceUri: str | None = None
- status: KnowledgeDocumentStatus
- contentHash: str | None = None
- objectStorage: dict[str, JSONValue] | None = None
- metadata: dict[str, JSONValue] | None = None
- indexedTime: datetime | None = None
- createdTime: datetime
- @classmethod
- def from_entity(cls, entity: "KnowledgeDocument") -> "KnowledgeDocumentDto":
- metadata = entity.metadata_json or {}
- object_storage = metadata.get("object_storage")
- return cls(
- id=entity.id,
- knowledgeBaseId=entity.knowledge_base_id,
- title=entity.title,
- sourceType=entity.source_type,
- sourceUri=entity.source_uri,
- status=entity.status,
- contentHash=entity.content_hash,
- objectStorage=object_storage if isinstance(object_storage, dict) else None,
- metadata=entity.metadata_json,
- indexedTime=entity.indexed_time,
- createdTime=entity.created_time)
- class KnowledgeDocumentListRequestDto(PageRequest):
- knowledgeBaseId: str | None = None
- status: KnowledgeDocumentStatus | None = None
- sourceType: str | None = None
- class KnowledgeDocumentCreateRequestDto(BaseModel):
- knowledgeBaseId: str
- title: str
- contentText: str | None = None
- contentBase64: str | None = None
- sourceType: str = "text"
- sourceUri: str | None = None
- metadata: dict[str, JSONValue] = Field(default_factory=dict)
- chunkSize: int | None = Field(default=None, gt=0)
- chunkOverlap: int | None = Field(default=None, ge=0)
- asyncMode: bool | None = None
- class KnowledgeDocumentDetailRequestDto(BaseModel):
- documentId: str
- class KnowledgeDocumentUpdateRequestDto(BaseModel):
- documentId: str
- title: str | None = None
- sourceUri: str | None = None
- status: KnowledgeDocumentStatus | None = None
- metadata: dict[str, JSONValue] | None = None
- class KnowledgeDocumentStatusRequestDto(BaseModel):
- documentId: str
- status: KnowledgeDocumentStatus
- class KnowledgeDocumentDeleteRequestDto(BaseModel):
- documentId: str
- class KnowledgeDocumentReindexRequestDto(BaseModel):
- documentId: str
- chunkSize: int | None = Field(default=None, gt=0)
- chunkOverlap: int | None = Field(default=None, ge=0)
- asyncMode: bool | None = None
- class KnowledgeDocumentContentRequestDto(BaseModel):
- documentId: str
- includeText: bool = True
- includeBase64: bool = False
- class KnowledgeDocumentContentData(BaseModel):
- documentId: str
- title: str
- sourceType: str
- contentType: str | None = None
- sizeBytes: int
- contentText: str | None = None
- contentBase64: str | None = None
- objectStorage: dict[str, JSONValue] | None = None
- class KnowledgeDocumentStorageStatusRequestDto(BaseModel):
- documentId: str
- class KnowledgeDocumentStorageStatusData(BaseModel):
- documentId: str
- exists: bool
- objectStorage: dict[str, JSONValue] | None = None
- contentType: str | None = None
- sizeBytes: int | None = None
- etag: str | None = None
- errorMessage: str | None = None
- checkedTime: datetime
- class KnowledgeDocumentParseRequestDto(BaseModel):
- sourceType: str = "auto"
- sourceUri: str | None = None
- contentText: str | None = None
- contentBase64: str | None = None
- class KnowledgeDocumentParseData(BaseModel):
- contentText: str
- sourceType: str
- metadata: dict[str, JSONValue] = Field(default_factory=dict)
- class KnowledgeChunkDto(BaseModel):
- id: str
- knowledgeBaseId: str
- documentId: str
- chunkIndex: int
- contentText: str
- tokenCount: int
- embeddingModel: str | None = None
- embedding: list[float] | None = None
- metadata: dict[str, JSONValue] | None = None
- createdTime: datetime
- @classmethod
- def from_entity(cls, entity: "KnowledgeChunk") -> "KnowledgeChunkDto":
- return cls(
- id=entity.id,
- knowledgeBaseId=entity.knowledge_base_id,
- documentId=entity.document_id,
- chunkIndex=entity.chunk_index,
- contentText=entity.content_text,
- tokenCount=entity.token_count,
- embeddingModel=entity.embedding_model,
- embedding=entity.embedding_json,
- metadata=entity.metadata_json,
- createdTime=entity.created_time)
- class KnowledgeChunkListRequestDto(PageRequest):
- knowledgeBaseId: str | None = None
- documentId: str | None = None
- class KnowledgeChunkDetailRequestDto(BaseModel):
- chunkId: str
- class KnowledgeChunkDeleteRequestDto(BaseModel):
- chunkId: str
- class KnowledgeSearchRequestDto(BaseModel):
- knowledgeBaseId: str
- query: str
- topK: int = Field(default=5, ge=1, le=50)
- filters: dict[str, JSONValue] = Field(default_factory=dict)
- class KnowledgeSearchResultDto(BaseModel):
- chunk: KnowledgeChunkDto
- document: KnowledgeDocumentDto
- score: float
- scoreDetails: dict[str, JSONValue] = Field(default_factory=dict)
- class KnowledgeDocumentIngestData(BaseModel):
- document: KnowledgeDocumentDto
- chunks: list[KnowledgeChunkDto]
- queued: bool = False
- job: "KnowledgeIndexJobData | None" = None
- KnowledgeIndexJobStatus = Literal["queued", "running", "completed", "failed", "skipped"]
- KnowledgeIndexJobAction = Literal["index", "reindex"]
- class KnowledgeIndexJobData(BaseModel):
- jobId: str
- documentId: str
- knowledgeBaseId: str | None = None
- documentTitle: str | None = None
- action: KnowledgeIndexJobAction
- status: KnowledgeIndexJobStatus
- progress: int = Field(default=0, ge=0, le=100)
- queueName: str | None = None
- workerKey: str | None = None
- errorMessage: str | None = None
- chunkSize: int | None = None
- chunkOverlap: int | None = None
- queuedTime: datetime | None = None
- startedTime: datetime | None = None
- completedTime: datetime | None = None
- class KnowledgeIndexJobListRequestDto(PageRequest):
- knowledgeBaseId: str | None = None
- documentId: str | None = None
- status: KnowledgeIndexJobStatus | None = None
- class KnowledgeIndexJobDetailRequestDto(BaseModel):
- documentId: str
- class KnowledgeIndexJobRetryRequestDto(BaseModel):
- documentId: str
- chunkSize: int | None = Field(default=None, gt=0)
- chunkOverlap: int | None = Field(default=None, ge=0)
- class KnowledgeBaseReindexRequestDto(BaseModel):
- knowledgeBaseId: str
- chunkSize: int | None = Field(default=None, gt=0)
- chunkOverlap: int | None = Field(default=None, ge=0)
- class KnowledgeBaseReindexData(BaseModel):
- knowledgeBaseId: str
- queuedCount: int
- jobs: list[KnowledgeIndexJobData]
- class KnowledgeStorageHealthRequestDto(BaseModel):
- pass
- class KnowledgeStorageHealthData(BaseModel):
- backend: str
- bucket: str
- available: bool
- message: str | None = None
- checkedTime: datetime
- class KnowledgeSettingsDto(BaseModel):
- knowledgeBaseId: str | None = None
- retrievalMode: str = "hybrid"
- embeddingModelId: str = "auto"
- rerankModelId: str = "auto"
- chunkSize: int = 800
- chunkOverlap: int = 120
- topK: int = 5
- minScore: float = 0.0
- maxCandidates: int = 50
- keywordWeight: float = 0.55
- vectorWeight: float = 0.30
- rerankWeight: float = 0.15
- queryRewrite: bool = False
- requireCitations: bool = True
- class KnowledgeSettingsUpdateRequestDto(KnowledgeSettingsDto):
- knowledgeBaseId: str | None = None
- class DeleteData(BaseModel):
- deleted: bool
- knowledgeBaseId: str | None = None
- documentId: str | None = None
- chunkId: str | None = None
- objectDeleted: bool | None = None
|