knowledge_chunk.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334
  1. from core_db import AuditMixin, Base, EntityMixin
  2. from core_shared import JSONValue
  3. from sqlalchemy import Integer, String, Text, cast
  4. from sqlalchemy import JSON
  5. from sqlalchemy.orm import Mapped, mapped_column
  6. from sqlalchemy.sql.expression import ColumnElement
  7. from sqlalchemy.types import UserDefinedType
  8. class PgVector(UserDefinedType[str]):
  9. cache_ok = True
  10. def __init__(self, dimensions: int) -> None:
  11. self.dimensions = dimensions
  12. def get_col_spec(self, **kw: object) -> str:
  13. return f"public.vector({self.dimensions})"
  14. def bind_expression(self, bindvalue: ColumnElement[str]) -> ColumnElement[str]:
  15. return cast(bindvalue, self)
  16. class KnowledgeChunk(EntityMixin, AuditMixin, Base):
  17. __tablename__ = "knowledge_chunk"
  18. knowledge_base_id: Mapped[str] = mapped_column(String(36), index=True)
  19. document_id: Mapped[str] = mapped_column(String(36), index=True)
  20. chunk_index: Mapped[int] = mapped_column(Integer)
  21. content_text: Mapped[str] = mapped_column(Text)
  22. token_count: Mapped[int] = mapped_column(Integer, default=0)
  23. embedding_model: Mapped[str | None] = mapped_column(String(64), nullable=True, index=True)
  24. embedding_json: Mapped[list[float] | None] = mapped_column(JSON, nullable=True)
  25. embedding_vector: Mapped[str | None] = mapped_column(PgVector(32), nullable=True)
  26. metadata_json: Mapped[dict[str, JSONValue] | None] = mapped_column(JSON, nullable=True)