from datetime import datetime from typing import TYPE_CHECKING, Generic, Literal, TypeVar from core_domain import ( KnowledgeBaseContract, KnowledgeBaseStatus, KnowledgeChunkContract, KnowledgeDocumentContract, KnowledgeDocumentStatus, KnowledgeSearchRequestContract, KnowledgeSearchResultContract, ) from core_shared import JSONValue from pydantic import BaseModel, Field if TYPE_CHECKING: from app.db.models import KnowledgeBase, KnowledgeChunk, KnowledgeDocument T = TypeVar("T") class ApiErrorResponse(BaseModel): errorType: str message: str details: dict[str, JSONValue] = Field(default_factory=dict) class ApiResponse(BaseModel, Generic[T]): success: bool = True data: T | None = None error: ApiErrorResponse | None = None requestId: str serverTime: datetime class PageRequest(BaseModel): page: int = Field(default=1, ge=1) pageSize: int = Field(default=20, ge=1, le=200) keyword: str | None = None @property def offset(self) -> int: return (self.page - 1) * self.pageSize class PageResult(BaseModel, Generic[T]): items: list[T] total: int page: int pageSize: int hasMore: bool @classmethod def from_items( cls, *, items: list[T], total: int, page: int, page_size: int) -> "PageResult[T]": return cls( items=items, total=total, page=page, pageSize=page_size, hasMore=page * page_size < total) class KnowledgeBaseCreateRequest(BaseModel): code: str name: str description: str | None = None metadata_json: dict[str, JSONValue] = Field(default_factory=dict) class KnowledgeBaseStatusUpdateRequest(BaseModel): status: KnowledgeBaseStatus class KnowledgeBaseResponse(KnowledgeBaseContract): @classmethod def from_entity(cls, entity: "KnowledgeBase") -> "KnowledgeBaseResponse": return cls.model_validate(entity, from_attributes=True) class KnowledgeDocumentCreateRequest(BaseModel): knowledge_base_id: str title: str content_text: str | None = None content_base64: str | None = None source_type: str = "text" source_uri: str | None = None metadata_json: dict[str, JSONValue] = Field(default_factory=dict) chunk_size: int | None = Field(default=None, gt=0) chunk_overlap: int | None = Field(default=None, ge=0) class KnowledgeDocumentResponse(KnowledgeDocumentContract): @classmethod def from_entity(cls, entity: "KnowledgeDocument") -> "KnowledgeDocumentResponse": return cls.model_validate(entity, from_attributes=True) class KnowledgeChunkResponse(KnowledgeChunkContract): @classmethod def from_entity(cls, entity: "KnowledgeChunk") -> "KnowledgeChunkResponse": return cls.model_validate(entity, from_attributes=True) class KnowledgeDocumentIngestResponse(BaseModel): document: KnowledgeDocumentResponse chunks: list[KnowledgeChunkResponse] class KnowledgeDocumentParseRequest(BaseModel): source_type: str = "auto" source_uri: str | None = None content_text: str | None = None content_base64: str | None = None class KnowledgeDocumentParseResponse(BaseModel): content_text: str source_type: str metadata_json: dict[str, JSONValue] = Field(default_factory=dict) class KnowledgeSearchRequest(KnowledgeSearchRequestContract): pass class KnowledgeSearchResultResponse(KnowledgeSearchResultContract): pass class KnowledgeBaseDto(BaseModel): id: str name: str description: str | None = None status: KnowledgeBaseStatus metadata: dict[str, JSONValue] | None = None createdTime: datetime @classmethod def from_entity(cls, entity: "KnowledgeBase") -> "KnowledgeBaseDto": return cls( id=entity.id, name=entity.name, description=entity.description, status=entity.status, metadata=entity.metadata_json, createdTime=entity.created_time) class KnowledgeBaseListRequestDto(PageRequest): status: KnowledgeBaseStatus | None = None class KnowledgeBaseCreateRequestDto(BaseModel): name: str description: str | None = None metadata: dict[str, JSONValue] = Field(default_factory=dict) class KnowledgeBaseDetailRequestDto(BaseModel): knowledgeBaseId: str class KnowledgeBaseUpdateRequestDto(BaseModel): knowledgeBaseId: str name: str | None = None description: str | None = None status: KnowledgeBaseStatus | None = None metadata: dict[str, JSONValue] | None = None class KnowledgeBaseStatusRequestDto(BaseModel): knowledgeBaseId: str status: KnowledgeBaseStatus class KnowledgeBaseDeleteRequestDto(BaseModel): knowledgeBaseId: str class KnowledgeDocumentDto(BaseModel): id: str knowledgeBaseId: str title: str sourceType: str sourceUri: str | None = None status: KnowledgeDocumentStatus contentHash: str | None = None objectStorage: dict[str, JSONValue] | None = None metadata: dict[str, JSONValue] | None = None indexedTime: datetime | None = None createdTime: datetime @classmethod def from_entity(cls, entity: "KnowledgeDocument") -> "KnowledgeDocumentDto": metadata = entity.metadata_json or {} object_storage = metadata.get("object_storage") return cls( id=entity.id, knowledgeBaseId=entity.knowledge_base_id, title=entity.title, sourceType=entity.source_type, sourceUri=entity.source_uri, status=entity.status, contentHash=entity.content_hash, objectStorage=object_storage if isinstance(object_storage, dict) else None, metadata=entity.metadata_json, indexedTime=entity.indexed_time, createdTime=entity.created_time) class KnowledgeDocumentListRequestDto(PageRequest): knowledgeBaseId: str | None = None status: KnowledgeDocumentStatus | None = None sourceType: str | None = None class KnowledgeDocumentCreateRequestDto(BaseModel): knowledgeBaseId: str title: str contentText: str | None = None contentBase64: str | None = None sourceType: str = "text" sourceUri: str | None = None metadata: dict[str, JSONValue] = Field(default_factory=dict) chunkSize: int | None = Field(default=None, gt=0) chunkOverlap: int | None = Field(default=None, ge=0) asyncMode: bool | None = None class KnowledgeDocumentDetailRequestDto(BaseModel): documentId: str class KnowledgeDocumentUpdateRequestDto(BaseModel): documentId: str title: str | None = None sourceUri: str | None = None status: KnowledgeDocumentStatus | None = None metadata: dict[str, JSONValue] | None = None class KnowledgeDocumentStatusRequestDto(BaseModel): documentId: str status: KnowledgeDocumentStatus class KnowledgeDocumentDeleteRequestDto(BaseModel): documentId: str class KnowledgeDocumentReindexRequestDto(BaseModel): documentId: str chunkSize: int | None = Field(default=None, gt=0) chunkOverlap: int | None = Field(default=None, ge=0) asyncMode: bool | None = None class KnowledgeDocumentContentRequestDto(BaseModel): documentId: str includeText: bool = True includeBase64: bool = False class KnowledgeDocumentContentData(BaseModel): documentId: str title: str sourceType: str contentType: str | None = None sizeBytes: int contentText: str | None = None contentBase64: str | None = None objectStorage: dict[str, JSONValue] | None = None class KnowledgeDocumentStorageStatusRequestDto(BaseModel): documentId: str class KnowledgeDocumentStorageStatusData(BaseModel): documentId: str exists: bool objectStorage: dict[str, JSONValue] | None = None contentType: str | None = None sizeBytes: int | None = None etag: str | None = None errorMessage: str | None = None checkedTime: datetime class KnowledgeDocumentParseRequestDto(BaseModel): sourceType: str = "auto" sourceUri: str | None = None contentText: str | None = None contentBase64: str | None = None class KnowledgeDocumentParseData(BaseModel): contentText: str sourceType: str metadata: dict[str, JSONValue] = Field(default_factory=dict) class KnowledgeChunkDto(BaseModel): id: str knowledgeBaseId: str documentId: str chunkIndex: int contentText: str tokenCount: int embeddingModel: str | None = None embedding: list[float] | None = None metadata: dict[str, JSONValue] | None = None createdTime: datetime @classmethod def from_entity(cls, entity: "KnowledgeChunk") -> "KnowledgeChunkDto": return cls( id=entity.id, knowledgeBaseId=entity.knowledge_base_id, documentId=entity.document_id, chunkIndex=entity.chunk_index, contentText=entity.content_text, tokenCount=entity.token_count, embeddingModel=entity.embedding_model, embedding=entity.embedding_json, metadata=entity.metadata_json, createdTime=entity.created_time) class KnowledgeChunkListRequestDto(PageRequest): knowledgeBaseId: str | None = None documentId: str | None = None class KnowledgeChunkDetailRequestDto(BaseModel): chunkId: str class KnowledgeChunkDeleteRequestDto(BaseModel): chunkId: str class KnowledgeSearchRequestDto(BaseModel): knowledgeBaseId: str query: str topK: int = Field(default=5, ge=1, le=50) filters: dict[str, JSONValue] = Field(default_factory=dict) class KnowledgeSearchResultDto(BaseModel): chunk: KnowledgeChunkDto document: KnowledgeDocumentDto score: float scoreDetails: dict[str, JSONValue] = Field(default_factory=dict) class KnowledgeDocumentIngestData(BaseModel): document: KnowledgeDocumentDto chunks: list[KnowledgeChunkDto] queued: bool = False job: "KnowledgeIndexJobData | None" = None KnowledgeIndexJobStatus = Literal["queued", "running", "completed", "failed", "skipped"] KnowledgeIndexJobAction = Literal["index", "reindex"] class KnowledgeIndexJobData(BaseModel): jobId: str documentId: str knowledgeBaseId: str | None = None documentTitle: str | None = None action: KnowledgeIndexJobAction status: KnowledgeIndexJobStatus progress: int = Field(default=0, ge=0, le=100) queueName: str | None = None workerKey: str | None = None errorMessage: str | None = None chunkSize: int | None = None chunkOverlap: int | None = None queuedTime: datetime | None = None startedTime: datetime | None = None completedTime: datetime | None = None class KnowledgeIndexJobListRequestDto(PageRequest): knowledgeBaseId: str | None = None documentId: str | None = None status: KnowledgeIndexJobStatus | None = None class KnowledgeIndexJobDetailRequestDto(BaseModel): documentId: str class KnowledgeIndexJobRetryRequestDto(BaseModel): documentId: str chunkSize: int | None = Field(default=None, gt=0) chunkOverlap: int | None = Field(default=None, ge=0) class KnowledgeBaseReindexRequestDto(BaseModel): knowledgeBaseId: str chunkSize: int | None = Field(default=None, gt=0) chunkOverlap: int | None = Field(default=None, ge=0) class KnowledgeBaseReindexData(BaseModel): knowledgeBaseId: str queuedCount: int jobs: list[KnowledgeIndexJobData] class KnowledgeStorageHealthRequestDto(BaseModel): pass class KnowledgeStorageHealthData(BaseModel): backend: str bucket: str available: bool message: str | None = None checkedTime: datetime class KnowledgeSettingsDto(BaseModel): knowledgeBaseId: str | None = None retrievalMode: str = "hybrid" embeddingModelId: str = "auto" rerankModelId: str = "auto" chunkSize: int = 800 chunkOverlap: int = 120 topK: int = 5 minScore: float = 0.0 maxCandidates: int = 50 keywordWeight: float = 0.55 vectorWeight: float = 0.30 rerankWeight: float = 0.15 queryRewrite: bool = False requireCitations: bool = True class KnowledgeSettingsUpdateRequestDto(KnowledgeSettingsDto): knowledgeBaseId: str | None = None class DeleteData(BaseModel): deleted: bool knowledgeBaseId: str | None = None documentId: str | None = None chunkId: str | None = None objectDeleted: bool | None = None