from dataclasses import dataclass import httpx from app.application.retrieval import build_hash_embedding from app.bootstrap.settings import KnowledgeServiceSettings class EmbeddingProviderError(Exception): pass @dataclass(frozen=True) class EmbeddingResult: embedding: list[float] model: str provider: str class EmbeddingService: def __init__(self, *, settings: KnowledgeServiceSettings) -> None: self.settings = settings def embed_text(self, text: str) -> EmbeddingResult: if self.settings.embedding_provider == "http": try: return self._embed_with_http(text) except EmbeddingProviderError: if not self.settings.embedding_fallback_to_local: raise return self._embed_with_local_hash(text) def _embed_with_local_hash(self, text: str) -> EmbeddingResult: return EmbeddingResult( embedding=build_hash_embedding( text, dimensions=self.settings.embedding_dimensions), model=self.settings.embedding_model, provider="local-hash") def _embed_with_http(self, text: str) -> EmbeddingResult: if not self.settings.embedding_base_url: raise EmbeddingProviderError("embedding_base_url is required for http provider") headers: dict[str, str] = {} if self.settings.embedding_api_key: headers["Authorization"] = f"Bearer {self.settings.embedding_api_key}" try: with httpx.Client(timeout=self.settings.embedding_timeout_seconds) as client: response = client.post( f"{self.settings.embedding_base_url.rstrip('/')}/embeddings", headers=headers, json={"model": self.settings.embedding_model, "input": text}) response.raise_for_status() payload = response.json() except (httpx.HTTPError, ValueError) as exc: raise EmbeddingProviderError(f"http embedding request failed: {exc}") from exc embedding = _read_openai_embedding(payload) if embedding is None: raise EmbeddingProviderError("embedding response missing data[0].embedding") return EmbeddingResult( embedding=embedding, model=self.settings.embedding_model, provider="http") def _read_openai_embedding(payload: object) -> list[float] | None: if not isinstance(payload, dict): return None data = payload.get("data") if not isinstance(data, list) or not data: return None first_item = data[0] if not isinstance(first_item, dict): return None embedding = first_item.get("embedding") if not isinstance(embedding, list): return None values: list[float] = [] for item in embedding: if not isinstance(item, (int, float)) or isinstance(item, bool): return None values.append(float(item)) return values