from core_db import AuditMixin, Base, EntityMixin from core_shared import JSONValue from sqlalchemy import Integer, String, Text, cast from sqlalchemy import JSON from sqlalchemy.orm import Mapped, mapped_column from sqlalchemy.sql.expression import ColumnElement from sqlalchemy.types import UserDefinedType EMBEDDING_DIMENSIONS = 1536 class PgVector(UserDefinedType[str]): cache_ok = True def __init__(self, dimensions: int) -> None: self.dimensions = dimensions def get_col_spec(self, **kw: object) -> str: return f"public.vector({self.dimensions})" def bind_expression(self, bindvalue: ColumnElement[str]) -> ColumnElement[str]: return cast(bindvalue, self) class KnowledgeChunk(EntityMixin, AuditMixin, Base): __tablename__ = "knowledge_chunk" knowledge_base_id: Mapped[str] = mapped_column(String(36), index=True) document_id: Mapped[str] = mapped_column(String(36), index=True) chunk_index: Mapped[int] = mapped_column(Integer) content_text: Mapped[str] = mapped_column(Text) token_count: Mapped[int] = mapped_column(Integer, default=0) embedding_model: Mapped[str | None] = mapped_column(String(64), nullable=True, index=True) embedding_json: Mapped[list[float] | None] = mapped_column(JSON, nullable=True) embedding_vector: Mapped[str | None] = mapped_column(PgVector(EMBEDDING_DIMENSIONS), nullable=True) metadata_json: Mapped[dict[str, JSONValue] | None] = mapped_column(JSON, nullable=True)