| 12345678910111213141516171819202122232425262728293031323334 |
- from core_db import AuditMixin, Base, EntityMixin
- from core_shared import JSONValue
- from sqlalchemy import Integer, String, Text, cast
- from sqlalchemy import JSON
- from sqlalchemy.orm import Mapped, mapped_column
- from sqlalchemy.sql.expression import ColumnElement
- from sqlalchemy.types import UserDefinedType
- class PgVector(UserDefinedType[str]):
- cache_ok = True
- def __init__(self, dimensions: int) -> None:
- self.dimensions = dimensions
- def get_col_spec(self, **kw: object) -> str:
- return f"public.vector({self.dimensions})"
- def bind_expression(self, bindvalue: ColumnElement[str]) -> ColumnElement[str]:
- return cast(bindvalue, self)
- class KnowledgeChunk(EntityMixin, AuditMixin, Base):
- __tablename__ = "knowledge_chunk"
- knowledge_base_id: Mapped[str] = mapped_column(String(36), index=True)
- document_id: Mapped[str] = mapped_column(String(36), index=True)
- chunk_index: Mapped[int] = mapped_column(Integer)
- content_text: Mapped[str] = mapped_column(Text)
- token_count: Mapped[int] = mapped_column(Integer, default=0)
- embedding_model: Mapped[str | None] = mapped_column(String(64), nullable=True, index=True)
- embedding_json: Mapped[list[float] | None] = mapped_column(JSON, nullable=True)
- embedding_vector: Mapped[str | None] = mapped_column(PgVector(32), nullable=True)
- metadata_json: Mapped[dict[str, JSONValue] | None] = mapped_column(JSON, nullable=True)
|