diff --git a/.devcontainer/compose.yaml b/.devcontainer/compose.yaml index 555b106..e45097c 100644 --- a/.devcontainer/compose.yaml +++ b/.devcontainer/compose.yaml @@ -8,9 +8,12 @@ services: ports: - "8000:8000" environment: - - QDRANT_URL=qdrant:6334 + - QDRANT_HOST=qdrant + - QQDRANT_GRPC_PORT=6334 - TEI_BASE_URL=http://text-embeddings-inference - TEI_RERANK_BASE_URL=http://text-embeddings-inference-rerank + - OLLAMA_BASE_URL=http://ollama:11434 + - OLLAMA_MODEL_NAME=${OLLAMA_MODEL_NAME} command: sleep infinity qdrant: @@ -30,6 +33,7 @@ services: environment: - MODEL_ID=${TEI_MODEL_ID:-BAAI/bge-large-en-v1.5} - REVISION=${TEI_MODEL_REVISION} + - MAX_CLIENT_BATCH_SIZE=128 text-embeddings-inference-rerank: image: ghcr.io/huggingface/text-embeddings-inference:cpu-0.6 @@ -40,3 +44,19 @@ services: environment: - MODEL_ID=${TEI_RERANK_MODEL_ID:-BAAI/bge-reranker-large} - REVISION=${TEI_RERANK_MODEL_REVISION} + - MAX_CLIENT_BATCH_SIZE=128 + + ollama: + image: ollama/ollama:latest + volumes: + - ../ollama:/root/.ollama + ports: + - 5000:11434 + deploy: + resources: + reservations: + devices: + - driver: ${OLLAMA_GPU_DRIVER-nvidia} + count: ${OLLAMA_GPU_COUNT-1} + capabilities: + - gpu diff --git a/.gitignore b/.gitignore index 75e3c6c..ee152a4 100644 --- a/.gitignore +++ b/.gitignore @@ -163,3 +163,4 @@ cython_debug/ data/ tei_data/ qdrant_storage/ +ollama/ diff --git a/llm-qa/llm_qa/chains/text_splitters/markdown_header_text_splitter.py b/llm-qa/llm_qa/chains/text_splitters/markdown_header_text_splitter.py index d3c12a6..b8ad946 100644 --- a/llm-qa/llm_qa/chains/text_splitters/markdown_header_text_splitter.py +++ b/llm-qa/llm_qa/chains/text_splitters/markdown_header_text_splitter.py @@ -6,7 +6,7 @@ def get_markdown_header_text_splitter_chain( markdown_header_text_splitter: MarkdownHeaderTextSplitter, ) -> Runnable[str, list[str]]: if not markdown_header_text_splitter.strip_headers: - raise ValueError("`strip_headers` must be True") # noqa: TRY003 + raise ValueError("`strip_headers` must be True") @chain def markdown_header_text_splitter_chain(text: str) -> list[str]: @@ -15,7 +15,11 @@ def get_markdown_header_text_splitter_chain( return [ "\n...\n".join( f"{header_key} {document.metadata[header_key]}" - for _, header_key in markdown_header_text_splitter.headers_to_split_on + for _, header_key in sorted( + markdown_header_text_splitter.headers_to_split_on, + key=lambda x: len(x[0]), + ) + if header_key in document.metadata ) + f"\n{document.page_content}" for document in documents diff --git a/llm-qa/llm_qa/client.py b/llm-qa/llm_qa/client.py new file mode 100644 index 0000000..258226f --- /dev/null +++ b/llm-qa/llm_qa/client.py @@ -0,0 +1,114 @@ +import logging +import pathlib +from typing import Annotated, Optional + +import httpx +import typer + +from llm_qa.logging import load_logging_config +from llm_qa.models.chat import ChatMessage, ChatRequest, ChatResponse +from llm_qa.models.prompts import ChatPrompts +from llm_qa.models.upsert import TextType, UpsertTextRequest, UpsertTextResponse + +logger = logging.getLogger(__name__) + +SYSTEM_MESSAGE_PROMPT_TEMPLATE_PATH = "../data/prompts/system_message.txt" +LAST_HUMAN_MESSAGE_PROMPT_TEMPLATE_PATH = "../data/prompts/last_human_message.txt" +CHAT_URL = "http://localhost:8000/api/v1/chat" +UPSERT_URL = "http://localhost:8000/api/v1/upsert-text" + + +def load_prompt(path: str) -> str: + return pathlib.Path(path).read_text() + + +app = typer.Typer() + + +@app.command() +def chat( + collection: Annotated[str, typer.Option()], + chat_url: Annotated[str, typer.Option()] = CHAT_URL, + system_message_prompt_template_path: Annotated[ + Optional[typer.FileText], typer.Option() # noqa: UP007 + ] = None, + last_human_message_prompt_template_path: Annotated[ + Optional[typer.FileText], typer.Option() # noqa: UP007 + ] = None, +) -> None: + system_message_prompt_template = ( + system_message_prompt_template_path.read() + if system_message_prompt_template_path + else load_prompt(SYSTEM_MESSAGE_PROMPT_TEMPLATE_PATH) + ) + last_human_message_prompt_template = ( + last_human_message_prompt_template_path.read() + if last_human_message_prompt_template_path + else load_prompt(LAST_HUMAN_MESSAGE_PROMPT_TEMPLATE_PATH) + ) + prompts = ChatPrompts( + system_message=system_message_prompt_template, + last_human_message=last_human_message_prompt_template, + ) + client = httpx.Client(timeout=180.0) + messages: list[ChatMessage] = [] + while True: + message_content = input("### User: ") + if message_content in {"/exit", "/quit", "/bye"}: + break + message = ChatMessage.new_human(message_content) + messages.append(message) + response = client.post( + chat_url, + json=ChatRequest( + messages=messages, collection_name=collection, prompts=prompts + ).model_dump(), + ) + response.raise_for_status() + chat_response = ChatResponse.model_validate_json(response.content) + messages.append(chat_response.response_message) + print("### AI:", chat_response.response_message.content) + print( + "----------\n### Sources:\n" + + "\n\n".join( + f"Source {i + 1}:\n{source.content}" + for i, source in enumerate(chat_response.sources) + ) + + "\n----------" + ) + + +@app.command() +def upsert( + files: list[typer.FileText], + collection: Annotated[str, typer.Option()], + upsert_url: Annotated[str, typer.Option()] = UPSERT_URL, +) -> None: + client = httpx.Client(timeout=180.0) + for file in files: + logger = logging.getLogger(__name__) + text = file.read() + ext = pathlib.Path(file.name).suffix + match ext.lower(): + case ".txt": + type = TextType.PLAIN_TEXT + case ".md": + type = TextType.MARKDOWN + case _: + logger.error("Unsupported file extension `%s`", ext) + continue + logger.info("Upserting file `%s` as type `%s`", file.name, type.value) + response = client.post( + upsert_url, + json=UpsertTextRequest( + text=text, type=type, collection=collection + ).model_dump(), + ) + response.raise_for_status() + upsert_response = UpsertTextResponse.model_validate_json(response.content) + logger.info("Upserted %d document chunks", upsert_response.num_documents) + + +if __name__ == "__main__": + load_logging_config() + app() diff --git a/llm-qa/llm_qa/dependencies.py b/llm-qa/llm_qa/dependencies.py index 81b5bda..4ee9bb5 100644 --- a/llm-qa/llm_qa/dependencies.py +++ b/llm-qa/llm_qa/dependencies.py @@ -1,8 +1,12 @@ from typing import Annotated from fastapi import Depends +from langchain.callbacks import StreamingStdOutCallbackHandler +from langchain.callbacks.manager import CallbackManager +from langchain.chat_models.base import BaseChatModel +from langchain.chat_models.ollama import ChatOllama -from llm_qa.embeddings.tei import TeiEmbeddings +from llm_qa.models.tei import TeiConfig from llm_qa.settings import Settings @@ -10,15 +14,28 @@ def settings() -> Settings: return Settings() -def tei_embeddings(settings: Annotated[Settings, Depends(settings)]) -> TeiEmbeddings: - return TeiEmbeddings( +def tei_config(settings: Annotated[Settings, Depends(settings)]) -> TeiConfig: + return TeiConfig( base_url=settings.tei_base_url, + document_prefix=settings.tei_document_prefix, + query_prefix=settings.tei_query_prefix, ) -def tei_rerank_embeddings( +def tei_rerank_config( settings: Annotated[Settings, Depends(settings)], -) -> TeiEmbeddings: - return TeiEmbeddings( +) -> TeiConfig: + return TeiConfig( base_url=settings.tei_rerank_base_url, ) + + +def chat_model( + settings: Annotated[Settings, Depends(settings)], +) -> BaseChatModel: + return ChatOllama( + base_url=settings.ollama_base_url, + model=settings.ollama_model_name, + verbose=True, + callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), + ) diff --git a/llm-qa/llm_qa/embeddings/base.py b/llm-qa/llm_qa/embeddings/base.py deleted file mode 100644 index 014d61e..0000000 --- a/llm-qa/llm_qa/embeddings/base.py +++ /dev/null @@ -1,8 +0,0 @@ -from abc import ABC - -from langchain.embeddings.base import Embeddings -from pydantic import BaseModel - - -class PydanticEmbeddings(Embeddings, BaseModel, ABC): - pass diff --git a/llm-qa/llm_qa/embeddings/tei.py b/llm-qa/llm_qa/embeddings/tei.py index 90907ad..5227b10 100644 --- a/llm-qa/llm_qa/embeddings/tei.py +++ b/llm-qa/llm_qa/embeddings/tei.py @@ -2,8 +2,8 @@ from typing import override from urllib.parse import urljoin import httpx +from langchain.embeddings.base import Embeddings -from llm_qa.embeddings.base import PydanticEmbeddings from llm_qa.models.tei import ( EmbedRequest, EmbedResponseAdapter, @@ -11,25 +11,28 @@ from llm_qa.models.tei import ( RerankRequest, RerankResponse, RerankResponseAdapter, + TeiConfig, ) -class TeiEmbeddings(PydanticEmbeddings): - base_url: str - embed_endpoint: str = "/embed" - rerank_endpoint: str = "/rerank" - document_prefix: str = "passage: " - query_prefix: str = "query: " - _client: httpx.Client = httpx.Client() - _async_client: httpx.AsyncClient = httpx.AsyncClient() +class TeiEmbeddings(Embeddings): + def __init__( + self, + tei_config: TeiConfig, + client: httpx.Client | None = None, + async_client: httpx.AsyncClient | None = None, + ) -> None: + self.tei_config = tei_config + self._client = client or httpx.Client(timeout=180.0) + self._async_client = async_client or httpx.AsyncClient(timeout=180.0) @property def embed_url(self) -> str: - return urljoin(self.base_url, self.embed_endpoint) + return urljoin(self.tei_config.base_url, self.tei_config.embed_endpoint) @property def rerank_url(self) -> str: - return urljoin(self.base_url, self.rerank_endpoint) + return urljoin(self.tei_config.base_url, self.tei_config.rerank_endpoint) @staticmethod def _handle_status(response: httpx.Response) -> None: @@ -46,7 +49,8 @@ class TeiEmbeddings(PydanticEmbeddings): except ValueError: note = e.response.text e.add_note(note) - raise + case _: + raise def _embed(self, text: str | list[str]) -> list[list[float]]: """Embed text.""" @@ -69,22 +73,24 @@ class TeiEmbeddings(PydanticEmbeddings): @override def embed_documents(self, texts: list[str]) -> list[list[float]]: """Embed search docs.""" - return self._embed([self.document_prefix + text for text in texts]) + return self._embed([self.tei_config.document_prefix + text for text in texts]) @override def embed_query(self, text: str) -> list[float]: """Embed query text.""" - return self._embed(self.document_prefix + text)[0] + return self._embed(self.tei_config.document_prefix + text)[0] @override async def aembed_documents(self, texts: list[str]) -> list[list[float]]: """Asynchronous Embed search docs.""" - return await self._aembed([self.document_prefix + text for text in texts]) + return await self._aembed([ + self.tei_config.document_prefix + text for text in texts + ]) @override async def aembed_query(self, text: str) -> list[float]: """Asynchronous Embed query text.""" - return (await self._aembed(self.document_prefix + text))[0] + return (await self._aembed(self.tei_config.document_prefix + text))[0] def rerank(self, query: str, texts: list[str]) -> list[RerankResponse]: """Rerank texts.""" diff --git a/llm-qa/llm_qa/logging.py b/llm-qa/llm_qa/logging.py new file mode 100644 index 0000000..7bea105 --- /dev/null +++ b/llm-qa/llm_qa/logging.py @@ -0,0 +1,9 @@ +import logging.config +import pathlib + +import yaml + + +def load_logging_config() -> None: + logging_config = yaml.safe_load(pathlib.Path("logging.yaml").read_text()) + logging.config.dictConfig(logging_config) diff --git a/llm-qa/llm_qa/models/chat.py b/llm-qa/llm_qa/models/chat.py new file mode 100644 index 0000000..0c65365 --- /dev/null +++ b/llm-qa/llm_qa/models/chat.py @@ -0,0 +1,57 @@ +from enum import StrEnum +from typing import Self + +from langchain_core.messages import AIMessage, BaseMessage, HumanMessage +from pydantic import BaseModel + +from llm_qa.models.prompts import ChatPrompts +from llm_qa.models.source import Source +from llm_qa.typing_utils import assert_never + + +class MessageType(StrEnum): + HUMAN = "HUMAN" + AI = "AI" + + +class ChatMessage(BaseModel): + content: str + type: MessageType + + @classmethod + def new_human(cls, content: str) -> Self: + return cls(content=content, type=MessageType.HUMAN) + + @classmethod + def new_ai(cls, content: str) -> Self: + return cls(content=content, type=MessageType.AI) + + @classmethod + def from_langchain_message(cls, message: HumanMessage | AIMessage) -> Self: + match message: + case HumanMessage(content=content): + return cls(content=content, type=MessageType.HUMAN) + case AIMessage(content=content): + return cls(content=content, type=MessageType.AI) + case _: + return assert_never(message) + + def to_langchain_message(self) -> BaseMessage: + match self.type: + case MessageType.HUMAN: + return HumanMessage(content=self.content) + case MessageType.AI: + return AIMessage(content=self.content) + case _: + return assert_never(self) + + +class ChatRequest(BaseModel): + messages: list[ChatMessage] + collection_name: str + prompts: ChatPrompts | None = None + + +class ChatResponse(BaseModel): + response_message: ChatMessage + sources: list[Source] diff --git a/llm-qa/llm_qa/models/prompts.py b/llm-qa/llm_qa/models/prompts.py new file mode 100644 index 0000000..e800698 --- /dev/null +++ b/llm-qa/llm_qa/models/prompts.py @@ -0,0 +1,6 @@ +from pydantic import BaseModel + + +class ChatPrompts(BaseModel): + system_message: str + last_human_message: str diff --git a/llm-qa/llm_qa/models/source.py b/llm-qa/llm_qa/models/source.py new file mode 100644 index 0000000..15ba0be --- /dev/null +++ b/llm-qa/llm_qa/models/source.py @@ -0,0 +1,12 @@ +from typing import Self + +from langchain_core.documents import Document +from pydantic import BaseModel + + +class Source(BaseModel): + content: str + + @classmethod + def from_document(cls, document: Document) -> Self: + return cls(content=document.page_content) diff --git a/llm-qa/llm_qa/models/tei.py b/llm-qa/llm_qa/models/tei.py index 6789ebd..aee7f4c 100644 --- a/llm-qa/llm_qa/models/tei.py +++ b/llm-qa/llm_qa/models/tei.py @@ -3,10 +3,18 @@ from enum import StrEnum from pydantic import BaseModel, Field, TypeAdapter +class TeiConfig(BaseModel): + base_url: str + embed_endpoint: str = "/embed" + rerank_endpoint: str = "/rerank" + document_prefix: str = "" + query_prefix: str = "" + + class EmbedRequest(BaseModel): inputs: str | list[str] normalize: bool = True - truncate: bool = False + truncate: bool = True EmbedResponseAdapter = TypeAdapter(list[list[float]]) diff --git a/llm-qa/llm_qa/routers/api_v1.py b/llm-qa/llm_qa/routers/api_v1.py index ed5f84a..11006bb 100644 --- a/llm-qa/llm_qa/routers/api_v1.py +++ b/llm-qa/llm_qa/routers/api_v1.py @@ -1,6 +1,7 @@ from fastapi import APIRouter -from llm_qa.routers import upsert +from llm_qa.routers import chat, upsert router = APIRouter(prefix="/api/v1", tags=["v1"]) router.include_router(upsert.router) +router.include_router(chat.router) diff --git a/llm-qa/llm_qa/routers/chat.py b/llm-qa/llm_qa/routers/chat.py new file mode 100644 index 0000000..f114df2 --- /dev/null +++ b/llm-qa/llm_qa/routers/chat.py @@ -0,0 +1,31 @@ +from typing import Annotated + +from fastapi import APIRouter, Depends +from langchain.chat_models.base import BaseChatModel + +from llm_qa.dependencies import chat_model, settings, tei_config +from llm_qa.models.chat import ChatRequest, ChatResponse +from llm_qa.models.tei import TeiConfig +from llm_qa.services.chat import chat as chat_service +from llm_qa.settings import Settings + +router = APIRouter() + + +@router.post("/chat") +async def upsert_text( + chat_request: ChatRequest, + settings: Annotated[Settings, Depends(settings)], + tei_config: Annotated[TeiConfig, Depends(tei_config)], + chat_model: Annotated[BaseChatModel, Depends(chat_model)], +) -> ChatResponse: + prompts = chat_request.prompts or settings.chat_prompts + return await chat_service( + messages=[message.to_langchain_message() for message in chat_request.messages], + collection_name=chat_request.collection_name, + prompts=prompts, + tei_config=tei_config, + qdrant_host=settings.qdrant_host, + qdrant_grpc_port=settings.qdrant_grpc_port, + chat_model=chat_model, + ) diff --git a/llm-qa/llm_qa/routers/upsert.py b/llm-qa/llm_qa/routers/upsert.py index 2efaf38..90be3f9 100644 --- a/llm-qa/llm_qa/routers/upsert.py +++ b/llm-qa/llm_qa/routers/upsert.py @@ -2,8 +2,8 @@ from typing import Annotated from fastapi import APIRouter, Depends -from llm_qa.dependencies import settings, tei_embeddings -from llm_qa.embeddings.base import PydanticEmbeddings +from llm_qa.dependencies import settings, tei_config +from llm_qa.models.tei import TeiConfig from llm_qa.models.upsert import UpsertTextRequest, UpsertTextResponse from llm_qa.services.upsert import upsert_text as upsert_text_service from llm_qa.settings import Settings @@ -15,14 +15,15 @@ router = APIRouter() async def upsert_text( upsert_request: UpsertTextRequest, settings: Annotated[Settings, Depends(settings)], - embeddings: Annotated[PydanticEmbeddings, Depends(tei_embeddings)], + tei_config: Annotated[TeiConfig, Depends(tei_config)], ) -> UpsertTextResponse: num_documents = await upsert_text_service( text=upsert_request.text, text_type=upsert_request.type, collection=upsert_request.collection, - embeddings=embeddings, - qdrant_url=settings.qdrant_url, + tei_config=tei_config, + qdrant_host=settings.qdrant_host, + qdrant_grpc_port=settings.qdrant_grpc_port, ) return UpsertTextResponse(num_documents=num_documents) diff --git a/llm-qa/llm_qa/services/chat.py b/llm-qa/llm_qa/services/chat.py new file mode 100644 index 0000000..cd94862 --- /dev/null +++ b/llm-qa/llm_qa/services/chat.py @@ -0,0 +1,113 @@ +from operator import itemgetter +from typing import TypedDict + +from langchain.chat_models.base import BaseChatModel +from langchain.prompts import ( + ChatPromptTemplate, + HumanMessagePromptTemplate, + MessagesPlaceholder, + SystemMessagePromptTemplate, +) +from langchain.vectorstores import VectorStore +from langchain.vectorstores.qdrant import Qdrant +from langchain_core.documents import Document +from langchain_core.messages import AIMessage, BaseMessage +from langchain_core.runnables import ( + Runnable, + RunnableLambda, + RunnablePassthrough, + chain, +) +from qdrant_client import AsyncQdrantClient, QdrantClient + +from llm_qa.embeddings.tei import TeiEmbeddings +from llm_qa.models.chat import ChatMessage, ChatResponse +from llm_qa.models.prompts import ChatPrompts +from llm_qa.models.source import Source +from llm_qa.models.tei import TeiConfig + + +class ChatRunnableInput(TypedDict): + messages: list[BaseMessage] + collection_name: str + + +class ChatRunnableOutput(TypedDict): + response: AIMessage + documents: list[Document] + + +@chain +def combine_documents(documents: list[Document]) -> str: + return "\n\n".join(document.page_content for document in documents) + + +def get_chat_chain( + chat_prompt_template: ChatPromptTemplate, + vectorstore: VectorStore, + chat_model: BaseChatModel, +) -> Runnable[ChatRunnableInput, ChatRunnableOutput]: + return ( + RunnablePassthrough[ChatRunnableInput]() + | { + "messages": itemgetter("messages"), + "collection_name": itemgetter("collection_name"), + "last_human_message": itemgetter("messages") + | RunnableLambda[list[BaseMessage], str](lambda x: x[-1].content), + } + | { + "conversation_history": RunnableLambda(lambda x: x["messages"][:-1]), + "last_human_message": itemgetter("last_human_message"), + "documents": itemgetter("last_human_message") | vectorstore.as_retriever(), + } + | { + "conversation_history": itemgetter("conversation_history"), + "last_human_message": itemgetter("last_human_message"), + "documents": itemgetter("documents"), + "context": itemgetter("documents") | combine_documents, + } + | { + "documents": itemgetter("documents"), + "response": chat_prompt_template | chat_model, + } + ) + + +async def chat( + messages: list[BaseMessage], + collection_name: str, + prompts: ChatPrompts, + tei_config: TeiConfig, + qdrant_host: str, + qdrant_grpc_port: int, + chat_model: BaseChatModel, +) -> ChatResponse: + chat_prompt_template = ChatPromptTemplate.from_messages([ + SystemMessagePromptTemplate.from_template(prompts.system_message), + MessagesPlaceholder(variable_name="conversation_history"), + HumanMessagePromptTemplate.from_template(prompts.last_human_message), + ]) + embeddings = TeiEmbeddings(tei_config=tei_config) + qdrant_client = QdrantClient( + location=qdrant_host, grpc_port=qdrant_grpc_port, prefer_grpc=True + ) + async_qdrant_client = AsyncQdrantClient( + location=qdrant_host, grpc_port=qdrant_grpc_port, prefer_grpc=True + ) + qdrant_vectorstore = Qdrant( + client=qdrant_client, + async_client=async_qdrant_client, + collection_name=collection_name, + embeddings=embeddings, + ) + chain = get_chat_chain(chat_prompt_template, qdrant_vectorstore, chat_model) + chain_output = await chain.ainvoke( + ChatRunnableInput(messages=messages, collection_name=collection_name) + ) + return ChatResponse( + response_message=ChatMessage.from_langchain_message(chain_output["response"]), + sources=[ + Source(content=document.page_content) + for document in chain_output["documents"] + ], + ) diff --git a/llm-qa/llm_qa/services/upsert.py b/llm-qa/llm_qa/services/upsert.py index e6a5cd3..96bd96b 100644 --- a/llm-qa/llm_qa/services/upsert.py +++ b/llm-qa/llm_qa/services/upsert.py @@ -1,7 +1,7 @@ import logging from langchain.schema.document import Document -from langchain_core.embeddings import Embeddings +from langchain.vectorstores.qdrant import Qdrant from llm_qa.chains.text_splitters.markdown_header_text_splitter import ( markdown_3_headers_text_splitter_chain, @@ -9,8 +9,9 @@ from llm_qa.chains.text_splitters.markdown_header_text_splitter import ( from llm_qa.chains.text_splitters.text_splitter import ( recursive_character_text_splitter_chain, ) +from llm_qa.embeddings.tei import TeiEmbeddings +from llm_qa.models.tei import TeiConfig from llm_qa.models.upsert import TextType -from llm_qa.vectorstores.qdrant import upsert_documents logger = logging.getLogger(__name__) @@ -19,8 +20,9 @@ async def upsert_text( text: str, text_type: TextType, collection: str, - embeddings: Embeddings, - qdrant_url: str, + tei_config: TeiConfig, + qdrant_host: str, + qdrant_grpc_port: int, ) -> int: match text_type: case TextType.PLAIN_TEXT: @@ -28,16 +30,25 @@ async def upsert_text( case TextType.MARKDOWN: text_splitter_chain = markdown_3_headers_text_splitter_chain case _: - raise ValueError(f"Unknown text type: `{text_type}`") # noqa: TRY003 + raise ValueError(f"Unknown text type: `{text_type}`") text_chunks = await text_splitter_chain.ainvoke(text) documents = [Document(page_content=chunk) for chunk in text_chunks] - await upsert_documents( - documents=documents, - embeddings=embeddings, - qdrant_url=qdrant_url, - collection=collection, + embeddings = TeiEmbeddings(tei_config=tei_config) + + logger.info( + "Upserting %d documents to Qdrant collection `%s`", len(documents), collection ) + await Qdrant.afrom_documents( + location=qdrant_host, + grpc_port=qdrant_grpc_port, + prefer_grpc=True, + documents=documents, + embedding=embeddings, + collection_name=collection, + force_recreate=False, + ) + return len(documents) diff --git a/llm-qa/llm_qa/settings.py b/llm-qa/llm_qa/settings.py index c4b551b..3d962e5 100644 --- a/llm-qa/llm_qa/settings.py +++ b/llm-qa/llm_qa/settings.py @@ -1,9 +1,19 @@ from pydantic_settings import BaseSettings, SettingsConfigDict +from llm_qa.models.prompts import ChatPrompts + class Settings(BaseSettings): model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8") - qdrant_url: str + qdrant_host: str + qdrant_grpc_port: int = 6334 tei_base_url: str tei_rerank_base_url: str + tei_document_prefix: str = "passage: " + tei_query_prefix: str = "query: " + ollama_base_url: str + ollama_model_name: str + chat_prompts: ChatPrompts = ChatPrompts( + system_message="System message", last_human_message="Last human message" + ) diff --git a/llm-qa/llm_qa/typing_utils.py b/llm-qa/llm_qa/typing_utils.py new file mode 100644 index 0000000..27c50fd --- /dev/null +++ b/llm-qa/llm_qa/typing_utils.py @@ -0,0 +1,5 @@ +from typing import Never, NoReturn + + +def assert_never(arg: Never) -> NoReturn: + raise AssertionError(f"Expected code is unreachable. Instead received `{arg}`.") diff --git a/llm-qa/llm_qa/vectorstores/__init__.py b/llm-qa/llm_qa/upsert_client.py similarity index 100% rename from llm-qa/llm_qa/vectorstores/__init__.py rename to llm-qa/llm_qa/upsert_client.py diff --git a/llm-qa/llm_qa/vectorstores/qdrant.py b/llm-qa/llm_qa/vectorstores/qdrant.py deleted file mode 100644 index e5f400a..0000000 --- a/llm-qa/llm_qa/vectorstores/qdrant.py +++ /dev/null @@ -1,22 +0,0 @@ -import logging - -from langchain.docstore.document import Document -from langchain.embeddings.base import Embeddings -from langchain.vectorstores.qdrant import Qdrant - -logger = logging.getLogger(__name__) - - -async def upsert_documents( - documents: list[Document], embeddings: Embeddings, qdrant_url: str, collection: str -) -> None: - logger.info( - "Upserting %d documents to Qdrant collection `%s`", len(documents), collection - ) - await Qdrant.afrom_documents( - documents=documents, - embedding=embeddings, - url=qdrant_url, - prefer_grpc=True, - collection_name=collection, - ) diff --git a/llm-qa/llm_qa/web.py b/llm-qa/llm_qa/web.py index 659992f..5671c27 100644 --- a/llm-qa/llm_qa/web.py +++ b/llm-qa/llm_qa/web.py @@ -1,5 +1,6 @@ from fastapi import FastAPI +from llm_qa.logging import load_logging_config from llm_qa.routers import api_v1 app = FastAPI(title="LLM QA") @@ -9,4 +10,5 @@ app.include_router(api_v1.router) if __name__ == "__main__": import uvicorn + load_logging_config() uvicorn.run("llm_qa.web:app", host="0.0.0.0", port=8000, reload=True) # noqa: S104 diff --git a/llm-qa/logging.yaml b/llm-qa/logging.yaml new file mode 100644 index 0000000..dce8513 --- /dev/null +++ b/llm-qa/logging.yaml @@ -0,0 +1,27 @@ +version: 1 +disable_existing_loggers: False + +formatters: + simple: + format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + +handlers: + console: + class: logging.StreamHandler + level: DEBUG + formatter: simple + stream: ext://sys.stdout + +loggers: + uvicorn: + level: INFO + handlers: [console] + propagate: no + gunicorn: + level: INFO + handlers: [console] + propagate: no + +root: + level: INFO + handlers: [console] diff --git a/llm-qa/poetry.lock b/llm-qa/poetry.lock index 917ec29..70e919d 100644 --- a/llm-qa/poetry.lock +++ b/llm-qa/poetry.lock @@ -528,6 +528,140 @@ files = [ docs = ["Sphinx", "furo"] test = ["objgraph", "psutil"] +[[package]] +name = "grpcio" +version = "1.60.1" +description = "HTTP/2-based RPC framework" +optional = false +python-versions = ">=3.7" +files = [ + {file = "grpcio-1.60.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:14e8f2c84c0832773fb3958240c69def72357bc11392571f87b2d7b91e0bb092"}, + {file = "grpcio-1.60.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:33aed0a431f5befeffd9d346b0fa44b2c01aa4aeae5ea5b2c03d3e25e0071216"}, + {file = "grpcio-1.60.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:fead980fbc68512dfd4e0c7b1f5754c2a8e5015a04dea454b9cada54a8423525"}, + {file = "grpcio-1.60.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:082081e6a36b6eb5cf0fd9a897fe777dbb3802176ffd08e3ec6567edd85bc104"}, + {file = "grpcio-1.60.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:55ccb7db5a665079d68b5c7c86359ebd5ebf31a19bc1a91c982fd622f1e31ff2"}, + {file = "grpcio-1.60.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:9b54577032d4f235452f77a83169b6527bf4b77d73aeada97d45b2aaf1bf5ce0"}, + {file = "grpcio-1.60.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7d142bcd604166417929b071cd396aa13c565749a4c840d6c702727a59d835eb"}, + {file = "grpcio-1.60.1-cp310-cp310-win32.whl", hash = "sha256:2a6087f234cb570008a6041c8ffd1b7d657b397fdd6d26e83d72283dae3527b1"}, + {file = "grpcio-1.60.1-cp310-cp310-win_amd64.whl", hash = "sha256:f2212796593ad1d0235068c79836861f2201fc7137a99aa2fea7beeb3b101177"}, + {file = "grpcio-1.60.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:79ae0dc785504cb1e1788758c588c711f4e4a0195d70dff53db203c95a0bd303"}, + {file = "grpcio-1.60.1-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:4eec8b8c1c2c9b7125508ff7c89d5701bf933c99d3910e446ed531cd16ad5d87"}, + {file = "grpcio-1.60.1-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:8c9554ca8e26241dabe7951aa1fa03a1ba0856688ecd7e7bdbdd286ebc272e4c"}, + {file = "grpcio-1.60.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:91422ba785a8e7a18725b1dc40fbd88f08a5bb4c7f1b3e8739cab24b04fa8a03"}, + {file = "grpcio-1.60.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cba6209c96828711cb7c8fcb45ecef8c8859238baf15119daa1bef0f6c84bfe7"}, + {file = "grpcio-1.60.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c71be3f86d67d8d1311c6076a4ba3b75ba5703c0b856b4e691c9097f9b1e8bd2"}, + {file = "grpcio-1.60.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:af5ef6cfaf0d023c00002ba25d0751e5995fa0e4c9eec6cd263c30352662cbce"}, + {file = "grpcio-1.60.1-cp311-cp311-win32.whl", hash = "sha256:a09506eb48fa5493c58f946c46754ef22f3ec0df64f2b5149373ff31fb67f3dd"}, + {file = "grpcio-1.60.1-cp311-cp311-win_amd64.whl", hash = "sha256:49c9b6a510e3ed8df5f6f4f3c34d7fbf2d2cae048ee90a45cd7415abab72912c"}, + {file = "grpcio-1.60.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:b58b855d0071575ea9c7bc0d84a06d2edfbfccec52e9657864386381a7ce1ae9"}, + {file = "grpcio-1.60.1-cp312-cp312-macosx_10_10_universal2.whl", hash = "sha256:a731ac5cffc34dac62053e0da90f0c0b8560396a19f69d9703e88240c8f05858"}, + {file = "grpcio-1.60.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:cf77f8cf2a651fbd869fbdcb4a1931464189cd210abc4cfad357f1cacc8642a6"}, + {file = "grpcio-1.60.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c557e94e91a983e5b1e9c60076a8fd79fea1e7e06848eb2e48d0ccfb30f6e073"}, + {file = "grpcio-1.60.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:069fe2aeee02dfd2135d562d0663fe70fbb69d5eed6eb3389042a7e963b54de8"}, + {file = "grpcio-1.60.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb0af13433dbbd1c806e671d81ec75bd324af6ef75171fd7815ca3074fe32bfe"}, + {file = "grpcio-1.60.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2f44c32aef186bbba254129cea1df08a20be414144ac3bdf0e84b24e3f3b2e05"}, + {file = "grpcio-1.60.1-cp312-cp312-win32.whl", hash = "sha256:a212e5dea1a4182e40cd3e4067ee46be9d10418092ce3627475e995cca95de21"}, + {file = "grpcio-1.60.1-cp312-cp312-win_amd64.whl", hash = "sha256:6e490fa5f7f5326222cb9f0b78f207a2b218a14edf39602e083d5f617354306f"}, + {file = "grpcio-1.60.1-cp37-cp37m-linux_armv7l.whl", hash = "sha256:4216e67ad9a4769117433814956031cb300f85edc855252a645a9a724b3b6594"}, + {file = "grpcio-1.60.1-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:73e14acd3d4247169955fae8fb103a2b900cfad21d0c35f0dcd0fdd54cd60367"}, + {file = "grpcio-1.60.1-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:6ecf21d20d02d1733e9c820fb5c114c749d888704a7ec824b545c12e78734d1c"}, + {file = "grpcio-1.60.1-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:33bdea30dcfd4f87b045d404388469eb48a48c33a6195a043d116ed1b9a0196c"}, + {file = "grpcio-1.60.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:53b69e79d00f78c81eecfb38f4516080dc7f36a198b6b37b928f1c13b3c063e9"}, + {file = "grpcio-1.60.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:39aa848794b887120b1d35b1b994e445cc028ff602ef267f87c38122c1add50d"}, + {file = "grpcio-1.60.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:72153a0d2e425f45b884540a61c6639436ddafa1829a42056aa5764b84108b8e"}, + {file = "grpcio-1.60.1-cp37-cp37m-win_amd64.whl", hash = "sha256:50d56280b482875d1f9128ce596e59031a226a8b84bec88cb2bf76c289f5d0de"}, + {file = "grpcio-1.60.1-cp38-cp38-linux_armv7l.whl", hash = "sha256:6d140bdeb26cad8b93c1455fa00573c05592793c32053d6e0016ce05ba267549"}, + {file = "grpcio-1.60.1-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:bc808924470643b82b14fe121923c30ec211d8c693e747eba8a7414bc4351a23"}, + {file = "grpcio-1.60.1-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:70c83bb530572917be20c21f3b6be92cd86b9aecb44b0c18b1d3b2cc3ae47df0"}, + {file = "grpcio-1.60.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9b106bc52e7f28170e624ba61cc7dc6829566e535a6ec68528f8e1afbed1c41f"}, + {file = "grpcio-1.60.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30e980cd6db1088c144b92fe376747328d5554bc7960ce583ec7b7d81cd47287"}, + {file = "grpcio-1.60.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0c5807e9152eff15f1d48f6b9ad3749196f79a4a050469d99eecb679be592acc"}, + {file = "grpcio-1.60.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f1c3dc536b3ee124e8b24feb7533e5c70b9f2ef833e3b2e5513b2897fd46763a"}, + {file = "grpcio-1.60.1-cp38-cp38-win32.whl", hash = "sha256:d7404cebcdb11bb5bd40bf94131faf7e9a7c10a6c60358580fe83913f360f929"}, + {file = "grpcio-1.60.1-cp38-cp38-win_amd64.whl", hash = "sha256:c8754c75f55781515a3005063d9a05878b2cfb3cb7e41d5401ad0cf19de14872"}, + {file = "grpcio-1.60.1-cp39-cp39-linux_armv7l.whl", hash = "sha256:0250a7a70b14000fa311de04b169cc7480be6c1a769b190769d347939d3232a8"}, + {file = "grpcio-1.60.1-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:660fc6b9c2a9ea3bb2a7e64ba878c98339abaf1811edca904ac85e9e662f1d73"}, + {file = "grpcio-1.60.1-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:76eaaba891083fcbe167aa0f03363311a9f12da975b025d30e94b93ac7a765fc"}, + {file = "grpcio-1.60.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5d97c65ea7e097056f3d1ead77040ebc236feaf7f71489383d20f3b4c28412a"}, + {file = "grpcio-1.60.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb2a2911b028f01c8c64d126f6b632fcd8a9ac975aa1b3855766c94e4107180"}, + {file = "grpcio-1.60.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:5a1ebbae7e2214f51b1f23b57bf98eeed2cf1ba84e4d523c48c36d5b2f8829ff"}, + {file = "grpcio-1.60.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9a66f4d2a005bc78e61d805ed95dedfcb35efa84b7bba0403c6d60d13a3de2d6"}, + {file = "grpcio-1.60.1-cp39-cp39-win32.whl", hash = "sha256:8d488fbdbf04283f0d20742b64968d44825617aa6717b07c006168ed16488804"}, + {file = "grpcio-1.60.1-cp39-cp39-win_amd64.whl", hash = "sha256:61b7199cd2a55e62e45bfb629a35b71fc2c0cb88f686a047f25b1112d3810904"}, + {file = "grpcio-1.60.1.tar.gz", hash = "sha256:dd1d3a8d1d2e50ad9b59e10aa7f07c7d1be2b367f3f2d33c5fade96ed5460962"}, +] + +[package.extras] +protobuf = ["grpcio-tools (>=1.60.1)"] + +[[package]] +name = "grpcio-tools" +version = "1.60.1" +description = "Protobuf code generator for gRPC" +optional = false +python-versions = ">=3.7" +files = [ + {file = "grpcio-tools-1.60.1.tar.gz", hash = "sha256:da08224ab8675c6d464b988bd8ca02cccd2bf0275bceefe8f6219bfd4a4f5e85"}, + {file = "grpcio_tools-1.60.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:184b27333b627a7cc0972fb70d21a8bb7c02ac4a6febc16768d78ea8ff883ddd"}, + {file = "grpcio_tools-1.60.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:18d7737f29ef5bbe3352547d0eccd080807834f00df223867dfc860bf81e9180"}, + {file = "grpcio_tools-1.60.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:cc8ba358d2c658c6ecbc58e779bf0fc5a673fecac015a70db27fc5b4d37b76b6"}, + {file = "grpcio_tools-1.60.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2973f75e8ba5c551033a1d59cc97654f6f386deaf2559082011d245d7ed87bba"}, + {file = "grpcio_tools-1.60.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28ae665113affebdd109247386786e5ab4dccfcfad1b5f68e9cce2e326b57ee6"}, + {file = "grpcio_tools-1.60.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:5c7ed086fef5ff59f46d53a052b1934b73e0f7d12365d656d6af3a88057d5a3e"}, + {file = "grpcio_tools-1.60.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8540f6480428a52614db71dd6394f52cbc0d2565b5ea1136a982f26390a42c7a"}, + {file = "grpcio_tools-1.60.1-cp310-cp310-win32.whl", hash = "sha256:5b4a939097005531edec331f22d0b82bff26e71ede009354d2f375b5d41e74f0"}, + {file = "grpcio_tools-1.60.1-cp310-cp310-win_amd64.whl", hash = "sha256:075bb67895970f96aabc1761ca674bf4db193f8fcad387f08e50402023b5f953"}, + {file = "grpcio_tools-1.60.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:284749d20fb22418f17d3d351b9eb838caf4a0393a9cb02c36e5c32fa4bbe9db"}, + {file = "grpcio_tools-1.60.1-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:b1041377cf32ee2338284ee26e6b9c10f9ea7728092376b19803dcb9b91d510d"}, + {file = "grpcio_tools-1.60.1-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:e529cd3d4109a6f4a3f7bdaca68946eb33734e2d7ffe861785a0586abe99ee67"}, + {file = "grpcio_tools-1.60.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:31294b534f25f02ead204e58dcbe0e5437a95a1a6f276bb9378905595b02ff6d"}, + {file = "grpcio_tools-1.60.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3fb6f4d2df0388c35c2804ba170f511238a681b679ead013bfe5e39d0ea9cf48"}, + {file = "grpcio_tools-1.60.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:40cd8268a675269ce59c4fa50877597ec638bb1099c52237bb726c8ac9791868"}, + {file = "grpcio_tools-1.60.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:985ac476da365267a2367ab20060f9096fbfc2e190fb02dd394f9ec05edf03ca"}, + {file = "grpcio_tools-1.60.1-cp311-cp311-win32.whl", hash = "sha256:bd85f6c368b93ae45edf8568473053cb1cc075ef3489efb18f9832d4ecce062f"}, + {file = "grpcio_tools-1.60.1-cp311-cp311-win_amd64.whl", hash = "sha256:c20e752ff5057758845f4e5c7a298739bfba291f373ed18ea9c7c7acbe69e8ab"}, + {file = "grpcio_tools-1.60.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:aafc94616c5f89c891d859057b194a153c451f9921053454e9d7d4cbf79047eb"}, + {file = "grpcio_tools-1.60.1-cp312-cp312-macosx_10_10_universal2.whl", hash = "sha256:9bba347000f57dae8aea79c0d76ef7d72895597524d30d0170c7d1974a3a03f3"}, + {file = "grpcio_tools-1.60.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:1e96a532d38411f0543fe1903ff522f7142a9901afb0ed94de58d79caf1905be"}, + {file = "grpcio_tools-1.60.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5ea6e397d87f458bb2c387a4a6e1b65df74ce5b5194a1f16850c38309012e981"}, + {file = "grpcio_tools-1.60.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3aeecd5b8faa2aab67e6c8b8a57e888c00ce70d39f331ede0a21312e92def1a6"}, + {file = "grpcio_tools-1.60.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:d2c26ce5f774c98bd2d3d8d1703048394018b55d297ebdb41ed2ba35b9a34f68"}, + {file = "grpcio_tools-1.60.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:214281cdafb7acfdcde848eca2de7c888a6e2b5cd25ab579712b965ea09a9cd4"}, + {file = "grpcio_tools-1.60.1-cp312-cp312-win32.whl", hash = "sha256:8c4b917aa4fcdc77990773063f0f14540aab8d4a8bf6c862b964a45d891a31d2"}, + {file = "grpcio_tools-1.60.1-cp312-cp312-win_amd64.whl", hash = "sha256:0aa34c7c21cff2177a4096b2b0d51dfbc9f8a41f929847a434e89b352c5a215d"}, + {file = "grpcio_tools-1.60.1-cp37-cp37m-linux_armv7l.whl", hash = "sha256:acdba77584981fe799104aa545d9d97910bcf88c69b668b768c1f3e7d7e5afac"}, + {file = "grpcio_tools-1.60.1-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:2a7fa55bc62d4b8ebe6fb26f8cf89df3cf3b504eb6c5f3a2f0174689d35fddb0"}, + {file = "grpcio_tools-1.60.1-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:dffa326cf901fe08a0e218d9fdf593f12276088a8caa07fcbec7d051149cf9ef"}, + {file = "grpcio_tools-1.60.1-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf945bd22f396c0d0c691e0990db2bfc4e77816b1edc2aea8a69c35ae721aac9"}, + {file = "grpcio_tools-1.60.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6801cfc5a85f0fb6fd12cade45942aaa1c814422328d594d12d364815fe34123"}, + {file = "grpcio_tools-1.60.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f95bdc6c7c50b7fc442e53537bc5b4eb8cab2a671c1da80d40b5a4ab1fd5d416"}, + {file = "grpcio_tools-1.60.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:402efeec36d8b12b792bae8a900085416fc2f57a34b599445ace2e847b6b0d75"}, + {file = "grpcio_tools-1.60.1-cp37-cp37m-win_amd64.whl", hash = "sha256:af88a2062b9c35034a80b25f289034b9c3c00c42bb88efaa465503a06fbd6a87"}, + {file = "grpcio_tools-1.60.1-cp38-cp38-linux_armv7l.whl", hash = "sha256:46b495bae31c5d3f6ac0240eb848f0642b5410f80dff2aacdea20cdea3938c1d"}, + {file = "grpcio_tools-1.60.1-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:b5ae375207af9aa82f516dcd513d2e0c83690b7788d45844daad846ed87550f8"}, + {file = "grpcio_tools-1.60.1-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:15f13e8f3d77b96adcb1e3615acec5b100bd836c6010c58a51465bcb9c06d128"}, + {file = "grpcio_tools-1.60.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c354505e6a3d170da374f20404ea6a78135502df4f5534e5c532bdf24c4cc2a5"}, + {file = "grpcio_tools-1.60.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8cfab27ba2bd36a3e3b522aed686133531e8b919703d0247a0885dae8815317"}, + {file = "grpcio_tools-1.60.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b6ef213cb0aecb2832ee82a2eac32f29f31f50b17ce020604d82205096a6bd0c"}, + {file = "grpcio_tools-1.60.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0b62cb2d43a7f0eacc6a6962dfff7c2564874012e1a72ae4167e762f449e2912"}, + {file = "grpcio_tools-1.60.1-cp38-cp38-win32.whl", hash = "sha256:3fcabf484720a9fa1690e2825fc940027a05a0c79a1075a730008ef634bd8ad2"}, + {file = "grpcio_tools-1.60.1-cp38-cp38-win_amd64.whl", hash = "sha256:22ce3e3d861321d208d8bfd6161ab976623520b179712c90b2c175151463a6b1"}, + {file = "grpcio_tools-1.60.1-cp39-cp39-linux_armv7l.whl", hash = "sha256:4e66fe204da15e08e599adb3060109a42927c0868fe8933e2d341ea649eceb03"}, + {file = "grpcio_tools-1.60.1-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:c1047bd831de5d9da761e9dc246988d5f07d722186938dfd5f34807398101010"}, + {file = "grpcio_tools-1.60.1-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:eba5fafd70585fbd4cb6ae45e3c5e11d8598e2426c9f289b78f682c0606e81cb"}, + {file = "grpcio_tools-1.60.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bba7230c60238c7a4ffa29f1aff6d78edb41f2c79cbe4443406472b1c80ccb5d"}, + {file = "grpcio_tools-1.60.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2bb8efc2cd64bd8f2779b426dd7e94e60924078ba5150cbbb60a846e62d1ed2"}, + {file = "grpcio_tools-1.60.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:26f91161a91f1601777751230eaaafdf416fed08a15c3ba2ae391088e4a906c6"}, + {file = "grpcio_tools-1.60.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2c19be2bba5583e30f88bb5d71b430176c396f0d6d0db3785e5845bfa3d28cd2"}, + {file = "grpcio_tools-1.60.1-cp39-cp39-win32.whl", hash = "sha256:9aadc9c00baa2064baa4414cff7c269455449f14805a355226674d89c507342c"}, + {file = "grpcio_tools-1.60.1-cp39-cp39-win_amd64.whl", hash = "sha256:652b08c9fef39186ce4f97f05f5440c0ed41f117db0f7d6cb0e0d75dbc6afd3f"}, +] + +[package.dependencies] +grpcio = ">=1.60.1" +protobuf = ">=4.21.6,<5.0dev" +setuptools = "*" + [[package]] name = "h11" version = "0.14.0" @@ -539,6 +673,32 @@ files = [ {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, ] +[[package]] +name = "h2" +version = "4.1.0" +description = "HTTP/2 State-Machine based protocol implementation" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"}, + {file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"}, +] + +[package.dependencies] +hpack = ">=4.0,<5" +hyperframe = ">=6.0,<7" + +[[package]] +name = "hpack" +version = "4.0.0" +description = "Pure-Python HPACK header compression" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"}, + {file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"}, +] + [[package]] name = "httpcore" version = "1.0.2" @@ -574,6 +734,7 @@ files = [ [package.dependencies] anyio = "*" certifi = "*" +h2 = {version = ">=3,<5", optional = true, markers = "extra == \"http2\""} httpcore = "==1.*" idna = "*" sniffio = "*" @@ -584,6 +745,17 @@ cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] +[[package]] +name = "hyperframe" +version = "6.0.1" +description = "HTTP/2 framing layer for Python" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"}, + {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"}, +] + [[package]] name = "idna" version = "3.6" @@ -1053,6 +1225,25 @@ files = [ [package.dependencies] ptyprocess = ">=0.5" +[[package]] +name = "portalocker" +version = "2.8.2" +description = "Wraps the portalocker recipe for easy usage" +optional = false +python-versions = ">=3.8" +files = [ + {file = "portalocker-2.8.2-py3-none-any.whl", hash = "sha256:cfb86acc09b9aa7c3b43594e19be1345b9d16af3feb08bf92f23d4dce513a28e"}, + {file = "portalocker-2.8.2.tar.gz", hash = "sha256:2b035aa7828e46c58e9b31390ee1f169b98e1066ab10b9a6a861fe7e25ee4f33"}, +] + +[package.dependencies] +pywin32 = {version = ">=226", markers = "platform_system == \"Windows\""} + +[package.extras] +docs = ["sphinx (>=1.7.1)"] +redis = ["redis"] +tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "pytest-timeout (>=2.1.0)", "redis", "sphinx (>=6.0.0)", "types-redis"] + [[package]] name = "prompt-toolkit" version = "3.0.43" @@ -1067,6 +1258,26 @@ files = [ [package.dependencies] wcwidth = "*" +[[package]] +name = "protobuf" +version = "4.25.2" +description = "" +optional = false +python-versions = ">=3.8" +files = [ + {file = "protobuf-4.25.2-cp310-abi3-win32.whl", hash = "sha256:b50c949608682b12efb0b2717f53256f03636af5f60ac0c1d900df6213910fd6"}, + {file = "protobuf-4.25.2-cp310-abi3-win_amd64.whl", hash = "sha256:8f62574857ee1de9f770baf04dde4165e30b15ad97ba03ceac65f760ff018ac9"}, + {file = "protobuf-4.25.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:2db9f8fa64fbdcdc93767d3cf81e0f2aef176284071507e3ede160811502fd3d"}, + {file = "protobuf-4.25.2-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:10894a2885b7175d3984f2be8d9850712c57d5e7587a2410720af8be56cdaf62"}, + {file = "protobuf-4.25.2-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:fc381d1dd0516343f1440019cedf08a7405f791cd49eef4ae1ea06520bc1c020"}, + {file = "protobuf-4.25.2-cp38-cp38-win32.whl", hash = "sha256:33a1aeef4b1927431d1be780e87b641e322b88d654203a9e9d93f218ee359e61"}, + {file = "protobuf-4.25.2-cp38-cp38-win_amd64.whl", hash = "sha256:47f3de503fe7c1245f6f03bea7e8d3ec11c6c4a2ea9ef910e3221c8a15516d62"}, + {file = "protobuf-4.25.2-cp39-cp39-win32.whl", hash = "sha256:5e5c933b4c30a988b52e0b7c02641760a5ba046edc5e43d3b94a74c9fc57c1b3"}, + {file = "protobuf-4.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:d66a769b8d687df9024f2985d5137a337f957a0916cf5464d1513eee96a63ff0"}, + {file = "protobuf-4.25.2-py3-none-any.whl", hash = "sha256:a8b7a98d4ce823303145bf3c1a8bdb0f2f4642a414b196f04ad9853ed0c8f830"}, + {file = "protobuf-4.25.2.tar.gz", hash = "sha256:fe599e175cb347efc8ee524bcd4b902d11f7262c0e569ececcb89995c15f0a5e"}, +] + [[package]] name = "ptyprocess" version = "0.7.0" @@ -1246,6 +1457,29 @@ files = [ [package.extras] cli = ["click (>=5.0)"] +[[package]] +name = "pywin32" +version = "306" +description = "Python for Window Extensions" +optional = false +python-versions = "*" +files = [ + {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, + {file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"}, + {file = "pywin32-306-cp311-cp311-win32.whl", hash = "sha256:e65028133d15b64d2ed8f06dd9fbc268352478d4f9289e69c190ecd6818b6407"}, + {file = "pywin32-306-cp311-cp311-win_amd64.whl", hash = "sha256:a7639f51c184c0272e93f244eb24dafca9b1855707d94c192d4a0b4c01e1100e"}, + {file = "pywin32-306-cp311-cp311-win_arm64.whl", hash = "sha256:70dba0c913d19f942a2db25217d9a1b726c278f483a919f1abfed79c9cf64d3a"}, + {file = "pywin32-306-cp312-cp312-win32.whl", hash = "sha256:383229d515657f4e3ed1343da8be101000562bf514591ff383ae940cad65458b"}, + {file = "pywin32-306-cp312-cp312-win_amd64.whl", hash = "sha256:37257794c1ad39ee9be652da0462dc2e394c8159dfd913a8a4e8eb6fd346da0e"}, + {file = "pywin32-306-cp312-cp312-win_arm64.whl", hash = "sha256:5821ec52f6d321aa59e2db7e0a35b997de60c201943557d108af9d4ae1ec7040"}, + {file = "pywin32-306-cp37-cp37m-win32.whl", hash = "sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65"}, + {file = "pywin32-306-cp37-cp37m-win_amd64.whl", hash = "sha256:72c5f621542d7bdd4fdb716227be0dd3f8565c11b280be6315b06ace35487d36"}, + {file = "pywin32-306-cp38-cp38-win32.whl", hash = "sha256:e4c092e2589b5cf0d365849e73e02c391c1349958c5ac3e9d5ccb9a28e017b3a"}, + {file = "pywin32-306-cp38-cp38-win_amd64.whl", hash = "sha256:e8ac1ae3601bee6ca9f7cb4b5363bf1c0badb935ef243c4733ff9a393b1690c0"}, + {file = "pywin32-306-cp39-cp39-win32.whl", hash = "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802"}, + {file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"}, +] + [[package]] name = "pyyaml" version = "6.0.1" @@ -1306,6 +1540,29 @@ files = [ {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, ] +[[package]] +name = "qdrant-client" +version = "1.7.3" +description = "Client library for the Qdrant vector search engine" +optional = false +python-versions = ">=3.8" +files = [ + {file = "qdrant_client-1.7.3-py3-none-any.whl", hash = "sha256:b062420ba55eb847652c7d2a26404fb1986bea13aa785763024013f96a7a915c"}, + {file = "qdrant_client-1.7.3.tar.gz", hash = "sha256:7b809be892cdc5137ae80ea3335da40c06499ad0b0072b5abc6bad79da1d29fc"}, +] + +[package.dependencies] +grpcio = ">=1.41.0" +grpcio-tools = ">=1.41.0" +httpx = {version = ">=0.14.0", extras = ["http2"]} +numpy = {version = ">=1.26", markers = "python_version >= \"3.12\""} +portalocker = ">=2.7.0,<3.0.0" +pydantic = ">=1.10.8" +urllib3 = ">=1.26.14,<3" + +[package.extras] +fastembed = ["fastembed (==0.1.1)"] + [[package]] name = "requests" version = "2.31.0" @@ -1353,6 +1610,22 @@ files = [ {file = "ruff-0.2.1.tar.gz", hash = "sha256:3b42b5d8677cd0c72b99fcaf068ffc62abb5a19e71b4a3b9cfa50658a0af02f1"}, ] +[[package]] +name = "setuptools" +version = "69.1.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "setuptools-69.1.0-py3-none-any.whl", hash = "sha256:c054629b81b946d63a9c6e732bc8b2513a7c3ea645f11d0139a2191d735c60c6"}, + {file = "setuptools-69.1.0.tar.gz", hash = "sha256:850894c4195f09c4ed30dba56213bf7c3f21d86ed6bdaafb5df5972593bfc401"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.1)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] + [[package]] name = "six" version = "1.16.0" @@ -1527,6 +1800,38 @@ files = [ docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<7.5)", "pytest-mock", "pytest-mypy-testing"] +[[package]] +name = "typer" +version = "0.9.0" +description = "Typer, build great CLIs. Easy to code. Based on Python type hints." +optional = false +python-versions = ">=3.6" +files = [ + {file = "typer-0.9.0-py3-none-any.whl", hash = "sha256:5d96d986a21493606a358cae4461bd8cdf83cbf33a5aa950ae629ca3b51467ee"}, + {file = "typer-0.9.0.tar.gz", hash = "sha256:50922fd79aea2f4751a8e0408ff10d2662bd0c8bbfa84755a699f3bada2978b2"}, +] + +[package.dependencies] +click = ">=7.1.1,<9.0.0" +typing-extensions = ">=3.7.4.3" + +[package.extras] +all = ["colorama (>=0.4.3,<0.5.0)", "rich (>=10.11.0,<14.0.0)", "shellingham (>=1.3.0,<2.0.0)"] +dev = ["autoflake (>=1.3.1,<2.0.0)", "flake8 (>=3.8.3,<4.0.0)", "pre-commit (>=2.17.0,<3.0.0)"] +doc = ["cairosvg (>=2.5.2,<3.0.0)", "mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "pillow (>=9.3.0,<10.0.0)"] +test = ["black (>=22.3.0,<23.0.0)", "coverage (>=6.2,<7.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.910)", "pytest (>=4.4.0,<8.0.0)", "pytest-cov (>=2.10.0,<5.0.0)", "pytest-sugar (>=0.9.4,<0.10.0)", "pytest-xdist (>=1.32.0,<4.0.0)", "rich (>=10.11.0,<14.0.0)", "shellingham (>=1.3.0,<2.0.0)"] + +[[package]] +name = "types-pyyaml" +version = "6.0.12.12" +description = "Typing stubs for PyYAML" +optional = false +python-versions = "*" +files = [ + {file = "types-PyYAML-6.0.12.12.tar.gz", hash = "sha256:334373d392fde0fdf95af5c3f1661885fa10c52167b14593eb856289e1855062"}, + {file = "types_PyYAML-6.0.12.12-py3-none-any.whl", hash = "sha256:c05bc6c158facb0676674b7f11fe3960db4f389718e19e62bd2b84d6205cfd24"}, +] + [[package]] name = "typing-extensions" version = "4.9.0" @@ -1705,4 +2010,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "7ffd6635973f31fbcbc5e962102b8cf12920f83e383dc0bb73732522db897a6f" +content-hash = "b37d2542dbbbd85c8afc9a16dde50037737f87d250cec6c83e377e1266a19997" diff --git a/llm-qa/pyproject.toml b/llm-qa/pyproject.toml index a3468a1..b1db8c6 100644 --- a/llm-qa/pyproject.toml +++ b/llm-qa/pyproject.toml @@ -13,11 +13,15 @@ fastapi = "^0.109.2" uvicorn = "^0.27.1" httpx = "^0.26.0" pydantic-settings = "^2.1.0" +pydantic = "^2.6.1" +qdrant-client = "^1.7.3" +typer = "^0.9.0" [tool.poetry.group.dev.dependencies] ruff = "0.2.1" mypy = "^1.8.0" ipython = "^8.21.0" +types-pyyaml = "^6.0.12.12" [build-system] requires = ["poetry-core"] @@ -70,4 +74,4 @@ lint.select = [ "LOG", "RUF", ] -lint.ignore = ["E501", "ANN101", "PLR0913", "PLR0917", "ISC001"] +lint.ignore = ["E501", "ANN101", "PLR0913", "PLR0917", "ISC001", "TRY003"]