Add new features and improvements

2024-02-15 02:10:58 +00:00 · 2024-02-15 02:10:58 +00:00 · c68b66e4f6
parent abca2365bf
commit c68b66e4f6
28 changed files with 881 additions and 233 deletions
--- a/.devcontainer/.env.example
+++ b/.devcontainer/.env.example
@ -0,0 +1,5 @@
+TEI_MODEL_ID=BAAI/bge-large-en-v1.5
+TEI_MODEL_REVISION=7774ef464da42fb9e231acb5ac59e9be9011cd35
+TEI_RERANK_MODEL_ID=BAAI/bge-reranker-large
+TEI_RERANK_MODEL_REVISION=b1a3b1492bcfa68f2b50d983a417601313cd85df
+OLLAMA_MODEL_NAME=openchat:7b-v3.5-0106-q4_K_M
--- a/.devcontainer/compose.yaml
+++ b/.devcontainer/compose.yaml
@ -13,7 +13,6 @@ services:
      - TEI_BASE_URL=http://text-embeddings-inference
      - TEI_RERANK_BASE_URL=http://text-embeddings-inference-rerank
      - OLLAMA_BASE_URL=http://ollama:11434
-      - OLLAMA_MODEL_NAME=${OLLAMA_MODEL_NAME}
    command: sleep infinity

  qdrant:
--- a/llm-qa/.env.default
+++ b/llm-qa/.env.default
@ -0,0 +1,20 @@
+TEI_DOCUMENT_PREFIX='passage: '
+TEI_QUERY_PREFIX='query: '
+RETRIEVE_COUNT=15
+RERANK_COUNT=6
+OLLAMA_MODEL_NAME=openchat:7b-v3.5-0106-q4_K_M
+OLLAMA_AUTO_PULL=true
+
+PROMPTS__CHAT_PROMPTS__SYSTEM_MESSAGE="You are a friendly and helpful chatbot specializing in SageMaker questions. Ensure that your responses are concise and informative. Leverage the given context to provide accurate and relevant information. If uncertain, politely ask for clarification. Always respond in the same language as the user. Let's make the user experience smooth and efficient!"
+PROMPTS__CHAT_PROMPTS__LAST_HUMAN_MESSAGE="# Context:
+{context}
+
+
+# Message:
+{last_human_message}"
+PROMPTS__CONDENSE_QUESTION_PROMPTS__SYSTEM_MESSAGE="Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language. Output just the question, nothing else."
+PROMPTS__CONDENSE_QUESTION_PROMPTS__HUMAN_MESSAGE="# Chat History:
+{conversation_history}
+
+# Follow Up Message:
+{last_human_message}"
--- a/llm-qa/README.md
+++ b/llm-qa/README.md
@ -1 +1,117 @@
 # LLM QA
+
+A proof of concept question-answering system for different types of text data.
+
+Currently implemented:
+
+- Plain text
+- Markdown
+
+## Key Features
+
+### Dockerized development environment
+
+- Easy, quick and reproducible setup
+
+### Automatic pull and serve of declared models
+
+- Ollama models are automatically pulled and served by the FastAPI server
+
+### Detailed logging
+
+- Key potential bottlenecks are timed and logged
+
+#### Upsert
+
+```console
+2024-02-15 01:10:54,341 - llm_qa.services.upsert - INFO - Split `MARKDOWN` type text into 8 document chunks in 0.01 seconds
+2024-02-15 01:10:54,759 - httpx - INFO - HTTP Request: POST http://text-embeddings-inference/embed "HTTP/1.1 200 OK"
+2024-02-15 01:11:03,121 - httpx - INFO - HTTP Request: POST http://text-embeddings-inference/embed "HTTP/1.1 200 OK"
+2024-02-15 01:11:03,140 - llm_qa.services.upsert - INFO - Upserted 8 document chunks to Qdrant collection `showcase` in 8.80 seconds
+2024-02-15 01:11:03,142 - uvicorn.access - INFO - 127.0.0.1:55868 - "POST /api/v1/upsert-text HTTP/1.1" 200 OK
+```
+
+#### Chat
+
+```console
+2024-02-15 01:02:03,408 - llm_qa.dependencies - INFO - Ollama auto-pull enabled, checking if model is available
+2024-02-15 01:02:03,441 - httpx - INFO - HTTP Request: POST http://ollama:11434/api/show "HTTP/1.1 200 OK"
+2024-02-15 01:02:03,441 - llm_qa.dependencies - INFO - Ollama model `openchat:7b-v3.5-0106-q4_K_M` already exists
+2024-02-15 01:02:03,645 - httpx - INFO - HTTP Request: POST http://text-embeddings-inference/embed "HTTP/1.1 200 OK"
+2024-02-15 01:02:03,653 - llm_qa.chains.time_logger - INFO - Chain `VectorStoreRetriever` finished in 0.08 seconds
+2024-02-15 01:02:23,192 - httpx - INFO - HTTP Request: POST http://text-embeddings-inference-rerank/rerank "HTTP/1.1 200 OK"
+2024-02-15 01:02:23,194 - llm_qa.chains.time_logger - INFO - Chain `RerankAndTake` finished in 19.54 seconds
+2024-02-15 01:02:29,817 - llm_qa.chains.time_logger - INFO - Chain `ChatOllama` finished in 6.62 seconds
+2024-02-15 01:02:29,817 - llm_qa.services.chat - INFO - Chat chain finished in 26.27 seconds
+2024-02-15 01:02:29,823 - uvicorn.access - INFO - 127.0.0.1:50100 - "POST /api/v1/chat HTTP/1.1" 200 OK
+```
+
+### Hierarchical document chunking
+
+- Hierarchical text, such as markdown, is split into document chunks by headers
+- All previous parent headers are also included in the chunk, separated by `...`
+- This enriches the context of the chunk and solves the problem of global context being lost when splitting the text
+
+Example:
+
+```md
+# AWS::SageMaker::ModelQualityJobDefinition MonitoringGroundTruthS3Input<a name="aws-properties-sagemaker-modelqualityjobdefinition-monitoringgroundtruths3input"></a>
+...
+## Syntax<a name="aws-properties-sagemaker-modelqualityjobdefinition-monitoringgroundtruths3input-syntax"></a>
+...
+### YAML<a name="aws-properties-sagemaker-modelqualityjobdefinition-monitoringgroundtruths3input-syntax.yaml"></a>
+``` [S3Uri](#cfn-sagemaker-modelqualityjobdefinition-monitoringgroundtruths3input-s3uri): String ```
+```
+
+### Retrieval query rewriting
+
+- After the first message, subsequent messages are rewritten to include previous messages context
+- This allows for a more natural conversation flow and retrieval of more relevant chunks
+
+Example:
+
+```md
+### User: What are all AWS regions where SageMaker is available?
+### AI:  SageMaker is available in most AWS regions, except for the following: Asia Pacific (Jakarta), Africa (Cape Town), Middle East (UAE), Asia Pacific (Hyderabad), Asia Pacific (Osaka), Asia Pacific (Melbourne), Europe (Milan), AWS GovCloud (US-East), Europe (Spain), and Europe (Zurich) Region.
+
+### User: What about the Bedrock service?
+### Retrieval Query:  What is the availability of AWS SageMaker in relation to the Bedrock service?
+```
+
+### Reranking
+
+- Retrieval of a larger number of document chunks is first performed using a vector store
+- Then, the chunks are reranked using a reranker model
+- This process more precisely selects the most relevant chunks for the user query
+
+## Development
+
+### Setup
+
+First copy the `.devcontainer/.env.example` file to `.devcontainer/.env` and adjust the settings and models to your needs.
+
+Then simply open the project devcontainer in a compatible IDE.
+This will setup all required tools and project dependencies for Python development.
+It will also run Docker containers for all required services.
+
+### Configuration
+
+Create a `llm-qa/.env` file to override selective default environment variables located in `llm-qa/.env.default`.
+
+### Running
+
+To run the FastAPI server, run the `llm_qa.web` submodule:
+
+```bash
+poetry run python -m llm_qa.web
+```
+
+To run the minimal CLI client, run the `llm_qa.client` submodule:
+
+```bash
+poetry run python -m llm_qa.client
+```
+
+## Deployment
+
+Not yet implemented.
--- a/llm-qa/llm_qa/chains/chat.py
+++ b/llm-qa/llm_qa/chains/chat.py
@ -0,0 +1,102 @@
+from operator import itemgetter
+from typing import TypedDict
+
+from langchain.chat_models.base import BaseChatModel
+from langchain.prompts import (
+    ChatPromptTemplate,
+    HumanMessagePromptTemplate,
+    MessagesPlaceholder,
+    SystemMessagePromptTemplate,
+)
+from langchain_core.documents import Document
+from langchain_core.messages import AIMessage, BaseMessage
+from langchain_core.runnables import (
+    Runnable,
+    RunnableLambda,
+    RunnablePassthrough,
+    chain,
+)
+from langchain_core.vectorstores import VectorStoreRetriever
+
+from llm_qa.chains.condense_question import get_condense_question_chain
+from llm_qa.chains.rerank import RerankRunnableInput, get_rerank_chain
+from llm_qa.chains.time_logger import time_logger
+from llm_qa.embeddings.tei import TeiEmbeddings
+from llm_qa.models.prompts import Prompts
+
+
+class ChatRunnableOutput(TypedDict):
+    response: AIMessage
+    documents: list[Document]
+    retrieval_query: str
+
+
+@chain
+def combine_documents(documents: list[Document]) -> str:
+    return "\n\n".join(document.page_content for document in documents)
+
+
+def get_chat_chain(
+    prompts: Prompts,
+    vectorstore_retriever: VectorStoreRetriever,
+    tei_rerank_embeddings: TeiEmbeddings,
+    rerank_count: int | None,
+    chat_model: BaseChatModel,
+) -> Runnable[list[BaseMessage], ChatRunnableOutput]:
+    chat_prompt_template = ChatPromptTemplate.from_messages([
+        SystemMessagePromptTemplate.from_template(prompts.chat_prompts.system_message),
+        MessagesPlaceholder(variable_name="conversation_history"),
+        HumanMessagePromptTemplate.from_template(
+            prompts.chat_prompts.last_human_message
+        ),
+    ])
+
+    condense_question_chain = get_condense_question_chain(
+        prompts=prompts.condense_question_prompts, chat_model=chat_model
+    )
+
+    rerank_chain = get_rerank_chain(tei_rerank_embeddings=tei_rerank_embeddings)
+
+    # This branch isn't made with a RunnableBranch because we know the value of
+    # rerank_count at chain construction time, so we can use a simple if statement
+    rerank_and_take_chain: Runnable[RerankRunnableInput, list[Document]]
+    if rerank_count is None:
+        rerank_and_take_chain = RunnablePassthrough[RerankRunnableInput]() | itemgetter(
+            "documents"
+        )
+    else:
+        rerank_and_take_chain = rerank_chain | RunnableLambda[
+            list[Document], list[Document]
+        ](lambda x: x[:rerank_count])
+
+    return (
+        RunnablePassthrough[list[BaseMessage]]()
+        | {
+            "messages": RunnablePassthrough(),
+            "retrieval_query": condense_question_chain,
+        }
+        | {
+            "conversation_history": itemgetter("messages")
+            | RunnableLambda[list[BaseMessage], list[BaseMessage]](lambda x: x[:-1]),
+            "last_human_message": itemgetter("messages")
+            | RunnableLambda[list[BaseMessage], str](lambda x: x[-1].content),
+            "retrieval_query": itemgetter("retrieval_query"),
+            "documents": {
+                "query": itemgetter("retrieval_query"),
+                "documents": itemgetter("retrieval_query")
+                | time_logger(vectorstore_retriever),
+            }
+            | time_logger(rerank_and_take_chain, name="RerankAndTake"),
+        }
+        | {
+            "retrieval_query": itemgetter("retrieval_query"),
+            "documents": itemgetter("documents"),
+            "response": {
+                "conversation_history": itemgetter("conversation_history"),
+                "last_human_message": itemgetter("last_human_message"),
+                "context": itemgetter("documents") | combine_documents,
+            }
+            | chat_prompt_template
+            | time_logger(chat_model),
+        }
+    )
--- a/llm-qa/llm_qa/chains/condense_question.py
+++ b/llm-qa/llm_qa/chains/condense_question.py
@ -0,0 +1,72 @@
+from typing import TypedDict
+
+from langchain.chat_models.base import BaseChatModel
+from langchain.prompts import (
+    AIMessagePromptTemplate,
+    ChatPromptTemplate,
+    FewShotChatMessagePromptTemplate,
+    HumanMessagePromptTemplate,
+    SystemMessagePromptTemplate,
+)
+from langchain_core.documents import Document
+from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
+from langchain_core.runnables import (
+    Runnable,
+    RunnableBranch,
+    RunnableLambda,
+)
+
+from llm_qa.models.prompts import CondenseQuestionPrompts
+from llm_qa.prompt_utils import format_messages
+
+
+class ChatRunnableOutput(TypedDict):
+    response: AIMessage
+    documents: list[Document]
+    retrieval_query: str
+
+
+def get_condense_question_chain(
+    prompts: CondenseQuestionPrompts, chat_model: BaseChatModel
+) -> Runnable[list[BaseMessage], str]:
+    human_message_prompt_template = HumanMessagePromptTemplate.from_template(
+        prompts.human_message
+    )
+    examples = [
+        {
+            "conversation_history": format_messages([
+                HumanMessage(content="What will the weather be like today?")
+            ]),
+            "last_human_message": "What about tomorrow?",
+            "response": "What will the weather be like tomorrow?",
+        },
+    ]
+    example_prompt = ChatPromptTemplate.from_messages([
+        human_message_prompt_template,
+        AIMessagePromptTemplate.from_template("{response}"),
+    ])
+    few_shot_prompt = FewShotChatMessagePromptTemplate(
+        examples=examples, example_prompt=example_prompt
+    )
+    chat_prompt_template = ChatPromptTemplate.from_messages([
+        SystemMessagePromptTemplate.from_template(prompts.system_message),
+        few_shot_prompt,
+        human_message_prompt_template,
+    ])
+
+    # Returns the last human message if there is only one message
+    return RunnableBranch[list[BaseMessage], str](
+        (lambda x: len(x) == 0, lambda _: ""),
+        (lambda x: len(x) == 1, lambda x: x[0].content),
+        {
+            "conversation_history": RunnableLambda[list[BaseMessage], str](
+                lambda x: format_messages(x[:-1])
+            ),
+            "last_human_message": RunnableLambda[list[BaseMessage], BaseMessage](
+                lambda x: x[-1]
+            ),
+        }
+        | chat_prompt_template
+        | chat_model
+        | RunnableLambda[AIMessage, str](lambda x: x.content),
+    )
--- a/llm-qa/llm_qa/chains/rerank.py
+++ b/llm-qa/llm_qa/chains/rerank.py
@ -0,0 +1,36 @@
+from typing import TypedDict
+
+from langchain_core.documents import Document
+from langchain_core.runnables import Runnable, chain
+
+from llm_qa.embeddings.tei import TeiEmbeddings
+
+
+async def rerank_documents(
+    tei_rerank_embeddings: TeiEmbeddings, query: str, documents: list[Document]
+) -> list[Document]:
+    if len(documents) == 0:
+        return []
+    rerank_responses = await tei_rerank_embeddings.arerank(
+        query, [document.page_content for document in documents]
+    )
+    sorted_rerank = sorted(rerank_responses, key=lambda x: x.score, reverse=True)
+
+    return [documents[rerank.index] for rerank in sorted_rerank]
+
+
+class RerankRunnableInput(TypedDict):
+    query: str
+    documents: list[Document]
+
+
+def get_rerank_chain(
+    tei_rerank_embeddings: TeiEmbeddings,
+) -> Runnable[RerankRunnableInput, list[Document]]:
+    @chain
+    async def rerank_documents_chain(input: RerankRunnableInput) -> list[Document]:
+        return await rerank_documents(
+            tei_rerank_embeddings, input["query"], input["documents"]
+        )
+
+    return rerank_documents_chain
--- a/llm-qa/llm_qa/chains/text_splitters/markdown_header_text_splitter.py
+++ b/llm-qa/llm_qa/chains/text_splitters/markdown_header_text_splitter.py
@ -2,6 +2,25 @@ from langchain.text_splitter import MarkdownHeaderTextSplitter
 from langchain_core.runnables import Runnable, chain


+def split_markdown_text_by_headers(
+    markdown_header_text_splitter: MarkdownHeaderTextSplitter, text: str
+) -> list[str]:
+    documents = markdown_header_text_splitter.split_text(text)
+    # Add all parent headers to the page content
+    return [
+        "\n...\n".join(
+            f"{header_key} {document.metadata[header_key]}"
+            for _, header_key in sorted(
+                markdown_header_text_splitter.headers_to_split_on,
+                key=lambda x: len(x[0]),
+            )
+            if header_key in document.metadata
+        )
+        + f"\n{document.page_content}"
+        for document in documents
+    ]
+
+
 def get_markdown_header_text_splitter_chain(
    markdown_header_text_splitter: MarkdownHeaderTextSplitter,
 ) -> Runnable[str, list[str]]:
@ -10,20 +29,9 @@ def get_markdown_header_text_splitter_chain(

    @chain
    def markdown_header_text_splitter_chain(text: str) -> list[str]:
-        documents = markdown_header_text_splitter.split_text(text)
-        # Add all parent headers to the page content
-        return [
-            "\n...\n".join(
-                f"{header_key} {document.metadata[header_key]}"
-                for _, header_key in sorted(
-                    markdown_header_text_splitter.headers_to_split_on,
-                    key=lambda x: len(x[0]),
-                )
-                if header_key in document.metadata
-            )
-            + f"\n{document.page_content}"
-            for document in documents
-        ]
+        return split_markdown_text_by_headers(
+            markdown_header_text_splitter=markdown_header_text_splitter, text=text
+        )

    return markdown_header_text_splitter_chain

--- a/llm-qa/llm_qa/chains/time_logger.py
+++ b/llm-qa/llm_qa/chains/time_logger.py
@ -0,0 +1,25 @@
+import logging
+import time
+
+from langchain_core.runnables import Runnable, chain
+from langchain_core.runnables.utils import Input, Output
+
+logger = logging.getLogger(__name__)
+
+
+def time_logger(
+    runnable: Runnable[Input, Output], name: str | None = None
+) -> Runnable[Input, Output]:
+    @chain
+    async def time_logger_chain(input: Input) -> Output:
+        start_time = time.time()
+        output = await runnable.ainvoke(input)
+        elapsed_time = time.time() - start_time
+        logger.info(
+            "Chain `%s` finished in %.2f seconds",
+            name or runnable.get_name(),
+            elapsed_time,
+        )
+        return output
+
+    return time_logger_chain
--- a/llm-qa/llm_qa/client.py
+++ b/llm-qa/llm_qa/client.py
@ -4,16 +4,19 @@ from typing import Annotated, Optional

 import httpx
 import typer
+from colorama import Fore, Style

 from llm_qa.logging import load_logging_config
 from llm_qa.models.chat import ChatMessage, ChatRequest, ChatResponse
-from llm_qa.models.prompts import ChatPrompts
+from llm_qa.models.prompts import (
+    OptionalChatPrompts,
+    OptionalCondenseQuestionPrompts,
+    OptionalPrompts,
+)
 from llm_qa.models.upsert import TextType, UpsertTextRequest, UpsertTextResponse

 logger = logging.getLogger(__name__)

-SYSTEM_MESSAGE_PROMPT_TEMPLATE_PATH = "../data/prompts/system_message.txt"
-LAST_HUMAN_MESSAGE_PROMPT_TEMPLATE_PATH = "../data/prompts/last_human_message.txt"
 CHAT_URL = "http://localhost:8000/api/v1/chat"
 UPSERT_URL = "http://localhost:8000/api/v1/upsert-text"

@ -29,33 +32,58 @@ app = typer.Typer()
 def chat(
    collection: Annotated[str, typer.Option()],
    chat_url: Annotated[str, typer.Option()] = CHAT_URL,
-    system_message_prompt_template_path: Annotated[
+    chat_system_message_prompt_path: Annotated[
        Optional[typer.FileText], typer.Option()  # noqa: UP007
    ] = None,
-    last_human_message_prompt_template_path: Annotated[
+    chat_last_human_message_prompt_path: Annotated[
+        Optional[typer.FileText], typer.Option()  # noqa: UP007
+    ] = None,
+    condense_question_system_message_prompt_path: Annotated[
+        Optional[typer.FileText], typer.Option()  # noqa: UP007
+    ] = None,
+    condense_question_human_message_prompt_path: Annotated[
        Optional[typer.FileText], typer.Option()  # noqa: UP007
    ] = None,
 ) -> None:
-    system_message_prompt_template = (
-        system_message_prompt_template_path.read()
-        if system_message_prompt_template_path
-        else load_prompt(SYSTEM_MESSAGE_PROMPT_TEMPLATE_PATH)
+    chat_system_message_prompt = (
+        chat_system_message_prompt_path.read()
+        if chat_system_message_prompt_path
+        else None
    )
-    last_human_message_prompt_template = (
-        last_human_message_prompt_template_path.read()
-        if last_human_message_prompt_template_path
-        else load_prompt(LAST_HUMAN_MESSAGE_PROMPT_TEMPLATE_PATH)
+    chat_last_human_message_prompt = (
+        chat_last_human_message_prompt_path.read()
+        if chat_last_human_message_prompt_path
+        else None
    )
-    prompts = ChatPrompts(
-        system_message=system_message_prompt_template,
-        last_human_message=last_human_message_prompt_template,
+    condense_question_system_message_prompt = (
+        condense_question_system_message_prompt_path.read()
+        if condense_question_system_message_prompt_path
+        else None
+    )
+    condense_question_human_message_prompt = (
+        condense_question_human_message_prompt_path.read()
+        if condense_question_human_message_prompt_path
+        else None
+    )
+    prompts = OptionalPrompts(
+        chat_prompts=OptionalChatPrompts(
+            system_message=chat_system_message_prompt,
+            last_human_message=chat_last_human_message_prompt,
+        ),
+        condense_question_prompts=OptionalCondenseQuestionPrompts(
+            system_message=condense_question_system_message_prompt,
+            human_message=condense_question_human_message_prompt,
+        ),
    )
    client = httpx.Client(timeout=180.0)
    messages: list[ChatMessage] = []
    while True:
-        message_content = input("### User: ")
+        message_content = input(f"{Fore.GREEN}### User: {Style.RESET_ALL}")
        if message_content in {"/exit", "/quit", "/bye"}:
            break
+        if message_content in {"/clear", "/cls", "/reset"}:
+            messages.clear()
+            continue
        message = ChatMessage.new_human(message_content)
        messages.append(message)
        response = client.post(
@ -67,15 +95,23 @@ def chat(
        response.raise_for_status()
        chat_response = ChatResponse.model_validate_json(response.content)
        messages.append(chat_response.response_message)
-        print("### AI:", chat_response.response_message.content)
        print(
-            "----------\n### Sources:\n"
-            + "\n\n".join(
-                f"Source {i + 1}:\n{source.content}"
-                for i, source in enumerate(chat_response.sources)
-            )
-            + "\n----------"
+            f"{Fore.RED}### AI: "
+            f"{Style.RESET_ALL}{chat_response.response_message.content}"
        )
+        print(Fore.RED + "-" * 16)
+        print(Style.RESET_ALL)
+        print(
+            f"{Fore.CYAN}### Retrieval Query: "
+            f"{Style.RESET_ALL}{chat_response.retrieval_query}"
+        )
+        sources_str = "\n\n".join(
+            f"{Style.DIM}Source {i + 1}:\n{Style.RESET_ALL}{source.content}"
+            for i, source in enumerate(chat_response.sources)
+        )
+        print(f"{Fore.YELLOW}### Sources:{Style.RESET_ALL}\n{sources_str}")
+        print(Fore.YELLOW + "-" * 16)
+        print(Style.RESET_ALL)


@app.command()
@ -84,6 +120,7 @@ def upsert(
    collection: Annotated[str, typer.Option()],
    upsert_url: Annotated[str, typer.Option()] = UPSERT_URL,
 ) -> None:
+    load_logging_config()
    client = httpx.Client(timeout=180.0)
    for file in files:
        logger = logging.getLogger(__name__)
@ -106,9 +143,8 @@ def upsert(
        )
        response.raise_for_status()
        upsert_response = UpsertTextResponse.model_validate_json(response.content)
-        logger.info("Upserted %d document chunks", upsert_response.num_documents)
+        logger.info("Upserted %d document chunks", upsert_response.num_document_chunks)


 if __name__ == "__main__":
-    load_logging_config()
    app()
--- a/llm-qa/llm_qa/dependencies.py
+++ b/llm-qa/llm_qa/dependencies.py
@ -1,14 +1,16 @@
+import logging
 from typing import Annotated

+import ollama
 from fastapi import Depends
-from langchain.callbacks import StreamingStdOutCallbackHandler
-from langchain.callbacks.manager import CallbackManager
 from langchain.chat_models.base import BaseChatModel
 from langchain.chat_models.ollama import ChatOllama

 from llm_qa.models.tei import TeiConfig
 from llm_qa.settings import Settings

+logger = logging.getLogger(__name__)
+

 def settings() -> Settings:
    return Settings()
@ -30,12 +32,22 @@ def tei_rerank_config(
    )


-def chat_model(
+async def chat_model(
    settings: Annotated[Settings, Depends(settings)],
 ) -> BaseChatModel:
+    if settings.ollama_auto_pull:
+        logger.info("Ollama auto-pull enabled, checking if model is available")
+        ollama_client = ollama.AsyncClient(host=settings.ollama_base_url)
+        try:
+            await ollama_client.show(settings.ollama_model_name)
+            logger.info("Ollama model `%s` already exists", settings.ollama_model_name)
+        except ollama.ResponseError:
+            logger.info(
+                "Ollama model `%s` not found, pulling...", settings.ollama_model_name
+            )
+            await ollama_client.pull(settings.ollama_model_name)
    return ChatOllama(
        base_url=settings.ollama_base_url,
        model=settings.ollama_model_name,
        verbose=True,
-        callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
    )
--- a/llm-qa/llm_qa/embeddings/tei.py
+++ b/llm-qa/llm_qa/embeddings/tei.py
@ -1,3 +1,4 @@
+import json
 from typing import override
 from urllib.parse import urljoin

@ -51,6 +52,13 @@ class TeiEmbeddings(Embeddings):
                    e.add_note(note)
                case _:
                    raise
+        TeiEmbeddings._handle_error_response(response)
+
+    @staticmethod
+    def _handle_error_response(response: httpx.Response) -> None:
+        response_json = json.loads(response.content)
+        if "error" in response_json:
+            raise ValueError(response_json["error"])

    def _embed(self, text: str | list[str]) -> list[list[float]]:
        """Embed text."""
--- a/llm-qa/llm_qa/logging.py
+++ b/llm-qa/llm_qa/logging.py
@ -1,9 +1,14 @@
 import logging.config
 import pathlib
+from typing import Any, cast

 import yaml


-def load_logging_config() -> None:
-    logging_config = yaml.safe_load(pathlib.Path("logging.yaml").read_text())
-    logging.config.dictConfig(logging_config)
+def load_logging_config(dry_run: bool = False) -> dict[str, Any]:
+    logging_config = cast(
+        dict[str, Any], yaml.safe_load(pathlib.Path("logging.yaml").read_text())
+    )
+    if not dry_run:
+        logging.config.dictConfig(logging_config)
+    return logging_config
--- a/llm-qa/llm_qa/models/chat.py
+++ b/llm-qa/llm_qa/models/chat.py
@ -2,9 +2,9 @@ from enum import StrEnum
 from typing import Self

 from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
-from pydantic import BaseModel
+from pydantic import BaseModel, Field

-from llm_qa.models.prompts import ChatPrompts
+from llm_qa.models.prompts import OptionalPrompts
 from llm_qa.models.source import Source
 from llm_qa.typing_utils import assert_never

@ -15,8 +15,8 @@ class MessageType(StrEnum):


 class ChatMessage(BaseModel):
-    content: str
-    type: MessageType
+    content: str = Field(..., description="Content of the message")
+    type: MessageType = Field(..., description="Type of the message, HUMAN or AI")

    @classmethod
    def new_human(cls, content: str) -> Self:
@ -47,11 +47,36 @@ class ChatMessage(BaseModel):


 class ChatRequest(BaseModel):
-    messages: list[ChatMessage]
-    collection_name: str
-    prompts: ChatPrompts | None = None
+    messages: list[ChatMessage] = Field(
+        ...,
+        min_length=1,
+        description="Chat message history",
+        examples=[[ChatMessage(content="What are you?", type=MessageType.HUMAN)]],
+    )
+    collection_name: str = Field(
+        ...,
+        description="Name of the collection to retrieve documents from",
+        examples=["collection"],
+    )
+    prompts: OptionalPrompts | None = Field(
+        None,
+        description="Prompts to use for various LLM chains"
+        ", if not provided, uses prompts from settings",
+        examples=[OptionalPrompts()],
+    )


 class ChatResponse(BaseModel):
-    response_message: ChatMessage
-    sources: list[Source]
+    response_message: ChatMessage = Field(
+        ..., description="Response message from the chat chain", examples=["I am an AI"]
+    )
+    sources: list[Source] = Field(
+        ...,
+        description="Sources used to generate the response",
+        examples=[[Source(content="Source chunk")]],
+    )
+    retrieval_query: str = Field(
+        ...,
+        description="Retrieval query used to retrieve the sources",
+        examples=["Condensed query"],
+    )
--- a/llm-qa/llm_qa/models/prompts.py
+++ b/llm-qa/llm_qa/models/prompts.py
@ -1,6 +1,86 @@
+from typing import Self
+
 from pydantic import BaseModel


-class ChatPrompts(BaseModel):
+class OptionalChatPrompts(BaseModel):
+    system_message: str | None = None
+    last_human_message: str | None = None
+
+    def __or__(self, __value: "OptionalChatPrompts | None") -> Self:
+        if __value is None:
+            return self
+
+        return self.__class__(
+            system_message=__value.system_message or self.system_message,
+            last_human_message=__value.last_human_message or self.last_human_message,
+        )
+
+
+class ChatPrompts(OptionalChatPrompts):
    system_message: str
    last_human_message: str
+
+    @classmethod
+    def from_optional(cls, optional: OptionalChatPrompts) -> Self:
+        return cls(
+            system_message=optional.system_message,
+            last_human_message=optional.last_human_message,
+        )
+
+
+class OptionalCondenseQuestionPrompts(BaseModel):
+    system_message: str | None = None
+    human_message: str | None = None
+
+    def __or__(self, __value: "OptionalCondenseQuestionPrompts | None") -> Self:
+        if __value is None:
+            return self
+
+        return self.__class__(
+            system_message=__value.system_message or self.system_message,
+            human_message=__value.human_message or self.human_message,
+        )
+
+
+class CondenseQuestionPrompts(OptionalCondenseQuestionPrompts):
+    system_message: str
+    human_message: str
+
+    @classmethod
+    def from_optional(cls, optional: OptionalCondenseQuestionPrompts) -> Self:
+        return cls(
+            system_message=optional.system_message,
+            human_message=optional.human_message,
+        )
+
+
+class OptionalPrompts(BaseModel):
+    chat_prompts: OptionalChatPrompts = OptionalChatPrompts()
+    condense_question_prompts: OptionalCondenseQuestionPrompts = (
+        OptionalCondenseQuestionPrompts()
+    )
+
+    def __or__(self, __value: "OptionalPrompts | None") -> Self:
+        if __value is None:
+            return self
+
+        return self.__class__(
+            chat_prompts=self.chat_prompts | __value.chat_prompts,
+            condense_question_prompts=self.condense_question_prompts
+            | __value.condense_question_prompts,
+        )
+
+
+class Prompts(OptionalPrompts):
+    chat_prompts: ChatPrompts
+    condense_question_prompts: CondenseQuestionPrompts
+
+    @classmethod
+    def from_optional(cls, optional: OptionalPrompts) -> Self:
+        return cls(
+            chat_prompts=ChatPrompts.from_optional(optional.chat_prompts),
+            condense_question_prompts=CondenseQuestionPrompts.from_optional(
+                optional.condense_question_prompts
+            ),
+        )
--- a/llm-qa/llm_qa/models/tei.py
+++ b/llm-qa/llm_qa/models/tei.py
@ -25,7 +25,7 @@ class RerankRequest(BaseModel):
    texts: list[str]
    raw_scores: bool = False
    return_text: bool = False
-    truncate: bool = False
+    truncate: bool = True


 class RerankResponse(BaseModel):
--- a/llm-qa/llm_qa/models/upsert.py
+++ b/llm-qa/llm_qa/models/upsert.py
@ -1,6 +1,6 @@
 from enum import StrEnum

-from pydantic import BaseModel
+from pydantic import BaseModel, Field


 class TextType(StrEnum):
@ -9,10 +9,22 @@ class TextType(StrEnum):


 class UpsertTextRequest(BaseModel):
-    text: str
-    type: TextType
-    collection: str
+    text: str = Field(
+        ..., description="Text to upsert", examples=["Context document text"]
+    )
+    type: TextType = Field(
+        ...,
+        description="Type of the text, will use different splitting logic",
+        examples=[TextType.PLAIN_TEXT],
+    )
+    collection: str = Field(
+        ...,
+        description="Name of the collection to upsert into",
+        examples=["collection"],
+    )


 class UpsertTextResponse(BaseModel):
-    num_documents: int
+    num_document_chunks: int = Field(
+        ..., description="Number of document chunks upserted"
+    )
--- a/llm-qa/llm_qa/prompt_utils.py
+++ b/llm-qa/llm_qa/prompt_utils.py
@ -0,0 +1,7 @@
+from langchain_core.messages import AIMessage, HumanMessage
+
+
+def format_messages(messages: list[HumanMessage | AIMessage]) -> str:
+    return "\n".join(
+        f"{message.type.capitalize()}: {message.content}" for message in messages
+    )
--- a/llm-qa/llm_qa/routers/chat.py
+++ b/llm-qa/llm_qa/routers/chat.py
@ -1,14 +1,18 @@
+import logging
 from typing import Annotated

 from fastapi import APIRouter, Depends
 from langchain.chat_models.base import BaseChatModel

-from llm_qa.dependencies import chat_model, settings, tei_config
+from llm_qa.dependencies import chat_model, settings, tei_config, tei_rerank_config
 from llm_qa.models.chat import ChatRequest, ChatResponse
+from llm_qa.models.prompts import Prompts
 from llm_qa.models.tei import TeiConfig
 from llm_qa.services.chat import chat as chat_service
 from llm_qa.settings import Settings

+logger = logging.getLogger(__name__)
+
 router = APIRouter()


@ -17,15 +21,25 @@ async def upsert_text(
    chat_request: ChatRequest,
    settings: Annotated[Settings, Depends(settings)],
    tei_config: Annotated[TeiConfig, Depends(tei_config)],
+    tei_rerank_config: Annotated[TeiConfig, Depends(tei_rerank_config)],
    chat_model: Annotated[BaseChatModel, Depends(chat_model)],
 ) -> ChatResponse:
-    prompts = chat_request.prompts or settings.chat_prompts
+    """Chat with a language model over a collection of documents.
+
+    Returns a LLM response, and sources of the documents used to generate the response.
+    """
+    logger.debug("Chat request: %s", chat_request)
+    prompts = Prompts.from_optional(settings.prompts | chat_request.prompts)
+    logger.debug("Prompts: %s", prompts)
    return await chat_service(
        messages=[message.to_langchain_message() for message in chat_request.messages],
        collection_name=chat_request.collection_name,
        prompts=prompts,
        tei_config=tei_config,
+        tei_rerank_config=tei_rerank_config,
        qdrant_host=settings.qdrant_host,
        qdrant_grpc_port=settings.qdrant_grpc_port,
+        retrieve_count=settings.retrieve_count,
+        rerank_count=settings.rerank_count,
        chat_model=chat_model,
    )
--- a/llm-qa/llm_qa/routers/upsert.py
+++ b/llm-qa/llm_qa/routers/upsert.py
@ -1,6 +1,7 @@
+import logging
 from typing import Annotated

-from fastapi import APIRouter, Depends
+from fastapi import APIRouter, Depends, HTTPException

 from llm_qa.dependencies import settings, tei_config
 from llm_qa.models.tei import TeiConfig
@ -8,6 +9,8 @@ from llm_qa.models.upsert import UpsertTextRequest, UpsertTextResponse
 from llm_qa.services.upsert import upsert_text as upsert_text_service
 from llm_qa.settings import Settings

+logger = logging.getLogger(__name__)
+
 router = APIRouter()


@ -17,6 +20,8 @@ async def upsert_text(
    settings: Annotated[Settings, Depends(settings)],
    tei_config: Annotated[TeiConfig, Depends(tei_config)],
 ) -> UpsertTextResponse:
+    """Upsert text into a collection."""
+    logger.debug("Upsert text request: %s", upsert_request)
    num_documents = await upsert_text_service(
        text=upsert_request.text,
        text_type=upsert_request.type,
@ -25,9 +30,10 @@ async def upsert_text(
        qdrant_host=settings.qdrant_host,
        qdrant_grpc_port=settings.qdrant_grpc_port,
    )
-    return UpsertTextResponse(num_documents=num_documents)
+    return UpsertTextResponse(num_document_chunks=num_documents)


-@router.post("/upsert-file")
+@router.post("/upsert-file", responses={501: {"description": "Not implemented"}})
 async def upsert_file() -> None:
-    raise NotImplementedError
+    """Not implemented."""
+    raise HTTPException(status_code=501, detail="Not implemented")
--- a/llm-qa/llm_qa/services/chat.py
+++ b/llm-qa/llm_qa/services/chat.py
@ -1,93 +1,35 @@
-from operator import itemgetter
-from typing import TypedDict
+import logging
+import time

 from langchain.chat_models.base import BaseChatModel
-from langchain.prompts import (
-    ChatPromptTemplate,
-    HumanMessagePromptTemplate,
-    MessagesPlaceholder,
-    SystemMessagePromptTemplate,
-)
-from langchain.vectorstores import VectorStore
 from langchain.vectorstores.qdrant import Qdrant
-from langchain_core.documents import Document
-from langchain_core.messages import AIMessage, BaseMessage
-from langchain_core.runnables import (
-    Runnable,
-    RunnableLambda,
-    RunnablePassthrough,
-    chain,
-)
+from langchain_core.messages import BaseMessage
 from qdrant_client import AsyncQdrantClient, QdrantClient

+from llm_qa.chains.chat import get_chat_chain
 from llm_qa.embeddings.tei import TeiEmbeddings
 from llm_qa.models.chat import ChatMessage, ChatResponse
-from llm_qa.models.prompts import ChatPrompts
+from llm_qa.models.prompts import Prompts
 from llm_qa.models.source import Source
 from llm_qa.models.tei import TeiConfig

-
-class ChatRunnableInput(TypedDict):
-    messages: list[BaseMessage]
-    collection_name: str
-
-
-class ChatRunnableOutput(TypedDict):
-    response: AIMessage
-    documents: list[Document]
-
-
-@chain
-def combine_documents(documents: list[Document]) -> str:
-    return "\n\n".join(document.page_content for document in documents)
-
-
-def get_chat_chain(
-    chat_prompt_template: ChatPromptTemplate,
-    vectorstore: VectorStore,
-    chat_model: BaseChatModel,
-) -> Runnable[ChatRunnableInput, ChatRunnableOutput]:
-    return (
-        RunnablePassthrough[ChatRunnableInput]()
-        | {
-            "messages": itemgetter("messages"),
-            "collection_name": itemgetter("collection_name"),
-            "last_human_message": itemgetter("messages")
-            | RunnableLambda[list[BaseMessage], str](lambda x: x[-1].content),
-        }
-        | {
-            "conversation_history": RunnableLambda(lambda x: x["messages"][:-1]),
-            "last_human_message": itemgetter("last_human_message"),
-            "documents": itemgetter("last_human_message") | vectorstore.as_retriever(),
-        }
-        | {
-            "conversation_history": itemgetter("conversation_history"),
-            "last_human_message": itemgetter("last_human_message"),
-            "documents": itemgetter("documents"),
-            "context": itemgetter("documents") | combine_documents,
-        }
-        | {
-            "documents": itemgetter("documents"),
-            "response": chat_prompt_template | chat_model,
-        }
-    )
+logger = logging.getLogger(__name__)


 async def chat(
    messages: list[BaseMessage],
    collection_name: str,
-    prompts: ChatPrompts,
+    prompts: Prompts,
    tei_config: TeiConfig,
+    tei_rerank_config: TeiConfig,
    qdrant_host: str,
    qdrant_grpc_port: int,
+    retrieve_count: int,
+    rerank_count: int | None,
    chat_model: BaseChatModel,
 ) -> ChatResponse:
-    chat_prompt_template = ChatPromptTemplate.from_messages([
-        SystemMessagePromptTemplate.from_template(prompts.system_message),
-        MessagesPlaceholder(variable_name="conversation_history"),
-        HumanMessagePromptTemplate.from_template(prompts.last_human_message),
-    ])
-    embeddings = TeiEmbeddings(tei_config=tei_config)
+    tei_embeddings = TeiEmbeddings(tei_config=tei_config)
+    tei_rerank_embeddings = TeiEmbeddings(tei_config=tei_rerank_config)
    qdrant_client = QdrantClient(
        location=qdrant_host, grpc_port=qdrant_grpc_port, prefer_grpc=True
    )
@ -98,16 +40,26 @@ async def chat(
        client=qdrant_client,
        async_client=async_qdrant_client,
        collection_name=collection_name,
-        embeddings=embeddings,
+        embeddings=tei_embeddings,
    )
-    chain = get_chat_chain(chat_prompt_template, qdrant_vectorstore, chat_model)
-    chain_output = await chain.ainvoke(
-        ChatRunnableInput(messages=messages, collection_name=collection_name)
+    chat_chain = get_chat_chain(
+        prompts=prompts,
+        vectorstore_retriever=qdrant_vectorstore.as_retriever(
+            search_kwargs={"k": retrieve_count}
+        ),
+        tei_rerank_embeddings=tei_rerank_embeddings,
+        rerank_count=rerank_count,
+        chat_model=chat_model,
    )
+    start_time = time.time()
+    chain_output = await chat_chain.ainvoke(messages)
+    elapsed_time = time.time() - start_time
+    logger.info("Chat chain finished in %.2f seconds", elapsed_time)
    return ChatResponse(
        response_message=ChatMessage.from_langchain_message(chain_output["response"]),
        sources=[
            Source(content=document.page_content)
            for document in chain_output["documents"]
        ],
+        retrieval_query=chain_output["retrieval_query"],
    )
--- a/llm-qa/llm_qa/services/upsert.py
+++ b/llm-qa/llm_qa/services/upsert.py
@ -1,4 +1,5 @@
 import logging
+import time

 from langchain.schema.document import Document
 from langchain.vectorstores.qdrant import Qdrant
@ -24,6 +25,7 @@ async def upsert_text(
    qdrant_host: str,
    qdrant_grpc_port: int,
 ) -> int:
+    start_time = time.time()
    match text_type:
        case TextType.PLAIN_TEXT:
            text_splitter_chain = recursive_character_text_splitter_chain
@ -34,13 +36,18 @@ async def upsert_text(

    text_chunks = await text_splitter_chain.ainvoke(text)

+    logger.info(
+        "Split `%s` type text into %d document chunks in %.2f seconds",
+        text_type.value,
+        len(text_chunks),
+        time.time() - start_time,
+    )
+    start_time = time.time()
+
    documents = [Document(page_content=chunk) for chunk in text_chunks]

    embeddings = TeiEmbeddings(tei_config=tei_config)

-    logger.info(
-        "Upserting %d documents to Qdrant collection `%s`", len(documents), collection
-    )
    await Qdrant.afrom_documents(
        location=qdrant_host,
        grpc_port=qdrant_grpc_port,
@ -51,4 +58,12 @@ async def upsert_text(
        force_recreate=False,
    )

+    elapsed_time = time.time() - start_time
+    logger.info(
+        "Upserted %d document chunks to Qdrant collection `%s` in %.2f seconds",
+        len(documents),
+        collection,
+        elapsed_time,
+    )
+
    return len(documents)
--- a/llm-qa/llm_qa/settings.py
+++ b/llm-qa/llm_qa/settings.py
@ -1,10 +1,18 @@
+from pydantic import Field
 from pydantic_settings import BaseSettings, SettingsConfigDict

-from llm_qa.models.prompts import ChatPrompts
+from llm_qa.models.prompts import (
+    OptionalPrompts,
+)


 class Settings(BaseSettings):
-    model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
+    model_config = SettingsConfigDict(
+        env_file=(".env.default", ".env"),
+        env_file_encoding="utf-8",
+        extra="ignore",
+        env_nested_delimiter="__",
+    )

    qdrant_host: str
    qdrant_grpc_port: int = 6334
@ -12,8 +20,19 @@ class Settings(BaseSettings):
    tei_rerank_base_url: str
    tei_document_prefix: str = "passage: "
    tei_query_prefix: str = "query: "
+    retrieve_count: int = Field(
+        ge=1,
+        description="Number of documents to retrieve using embedding similarity search",
+    )
+    rerank_count: int | None = Field(
+        ge=1,
+        description="Number of documents to take from rerank, None to skip reranking",
+    )
    ollama_base_url: str
    ollama_model_name: str
-    chat_prompts: ChatPrompts = ChatPrompts(
-        system_message="System message", last_human_message="Last human message"
-    )
+    ollama_auto_pull: bool = False
+    prompts: OptionalPrompts = OptionalPrompts()
+
+
+if __name__ == "__main__":
+    print(Settings())
--- a/llm-qa/llm_qa/web.py
+++ b/llm-qa/llm_qa/web.py
@ -10,5 +10,11 @@ app.include_router(api_v1.router)
 if __name__ == "__main__":
    import uvicorn

-    load_logging_config()
-    uvicorn.run("llm_qa.web:app", host="0.0.0.0", port=8000, reload=True)  # noqa: S104
+    logging_config = load_logging_config(dry_run=True)
+    uvicorn.run(
+        "llm_qa.web:app",
+        host="0.0.0.0",  # noqa: S104
+        port=8000,
+        log_config=logging_config,
+        reload=True,
+    )
--- a/llm-qa/logging.yaml
+++ b/llm-qa/logging.yaml
@ -2,26 +2,56 @@ version: 1
 disable_existing_loggers: False

 formatters:
-    simple:
-        format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+  default:
+    format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+  access:
+    "()": uvicorn.logging.AccessFormatter
+    format: '%(asctime)s - %(name)s - %(levelname)s - %(client_addr)s - "%(request_line)s" %(status_code)s'

 handlers:
-    console:
-        class: logging.StreamHandler
-        level: DEBUG
-        formatter: simple
-        stream: ext://sys.stdout
+  default:
+    formatter: default
+    class: logging.StreamHandler
+    stream: ext://sys.stderr
+  access:
+    formatter: access
+    class: logging.StreamHandler
+    stream: ext://sys.stdout

 loggers:
-    uvicorn:
-        level: INFO
-        handlers: [console]
-        propagate: no
-    gunicorn:
-        level: INFO
-        handlers: [console]
-        propagate: no
+  uvicorn:
+    level: INFO
+    handlers:
+      - default
+    propagate: no
+  uvicorn.error:
+    level: INFO
+    handlers:
+      - default
+    propagate: no
+  uvicorn.access:
+    level: INFO
+    handlers:
+      - access
+    propagate: no
+  guvicorn:
+    level: INFO
+    handlers:
+      - default
+    propagate: no
+  guvicorn.error:
+    level: INFO
+    handlers:
+      - default
+    propagate: no
+  guvicorn.access:
+    level: INFO
+    handlers:
+      - access
+    propagate: no

 root:
-    level: INFO
-    handlers: [console]
+  level: INFO
+  handlers:
+    - default
+  propagate: no
--- a/llm-qa/ollama_pull.sh
+++ b/llm-qa/ollama_pull.sh
@ -0,0 +1,6 @@
+source .env
+echo Ollama base URL: $OLLAMA_BASE_URL
+echo Ollama model name: $OLLAMA_MODEL_NAME
+curl $OLLAMA_BASE_URL/api/pull -d '{
+  "name": "'$OLLAMA_MODEL_NAME'"
+}' -H "Content-Type: application/json" -X POST
--- a/llm-qa/poetry.lock
+++ b/llm-qa/poetry.lock
@ -701,13 +701,13 @@ files = [

 [[package]]
 name = "httpcore"
-version = "1.0.2"
+version = "1.0.3"
 description = "A minimal low-level HTTP client."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "httpcore-1.0.2-py3-none-any.whl", hash = "sha256:096cc05bca73b8e459a1fc3dcf585148f63e534eae4339559c9b8a8d6399acc7"},
-    {file = "httpcore-1.0.2.tar.gz", hash = "sha256:9fc092e4799b26174648e54b74ed5f683132a464e95643b226e00c2ed2fa6535"},
+    {file = "httpcore-1.0.3-py3-none-any.whl", hash = "sha256:9a6a501c3099307d9fd76ac244e08503427679b1e81ceb1d922485e2f2462ad2"},
+    {file = "httpcore-1.0.3.tar.gz", hash = "sha256:5c0f9546ad17dac4d0772b0808856eb616eb8b48ce94f49ed819fd6982a8a544"},
 ]

 [package.dependencies]
@ -718,17 +718,17 @@ h11 = ">=0.13,<0.15"
 asyncio = ["anyio (>=4.0,<5.0)"]
 http2 = ["h2 (>=3,<5)"]
 socks = ["socksio (==1.*)"]
-trio = ["trio (>=0.22.0,<0.23.0)"]
+trio = ["trio (>=0.22.0,<0.24.0)"]

 [[package]]
 name = "httpx"
-version = "0.26.0"
+version = "0.25.2"
 description = "The next generation HTTP client."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "httpx-0.26.0-py3-none-any.whl", hash = "sha256:8915f5a3627c4d47b73e8202457cb28f1266982d1159bd5779d86a80c0eab1cd"},
-    {file = "httpx-0.26.0.tar.gz", hash = "sha256:451b55c30d5185ea6b23c2c793abf9bb237d2a7dfb901ced6ff69ad37ec1dfaf"},
+    {file = "httpx-0.25.2-py3-none-any.whl", hash = "sha256:a05d3d052d9b2dfce0e3896636467f8a5342fb2b902c819428e1ac65413ca118"},
+    {file = "httpx-0.25.2.tar.gz", hash = "sha256:8b8fcaa0c8ea7b05edd69a094e63a2094c4efcb48129fb757361bc423c0ad9e8"},
 ]

 [package.dependencies]
@ -913,13 +913,13 @@ extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.

 [[package]]
 name = "langchain-core"
-version = "0.1.22"
+version = "0.1.23"
 description = "Building applications with LLMs through composability"
 optional = false
 python-versions = ">=3.8.1,<4.0"
 files = [
-    {file = "langchain_core-0.1.22-py3-none-any.whl", hash = "sha256:d1263c2707ce18bb13654c88f891e53f39edec9b11ff7d0d0f23fd920927b2d6"},
-    {file = "langchain_core-0.1.22.tar.gz", hash = "sha256:deac12b3e42a08bbbaa2acf83d5f8dd2d5513256d8daf0e853e9d68ff4c99d79"},
+    {file = "langchain_core-0.1.23-py3-none-any.whl", hash = "sha256:d42fac013c39a8b0bcd7e337a4cb6c17c16046c60d768f89df582ad73ec3c5cb"},
+    {file = "langchain_core-0.1.23.tar.gz", hash = "sha256:34359cc8b6f8c3d45098c54a6a9b35c9f538ef58329cd943a2249d6d7b4e5806"},
 ]

 [package.dependencies]
@ -1185,6 +1185,20 @@ files = [
    {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"},
 ]

+[[package]]
+name = "ollama"
+version = "0.1.6"
+description = "The official Python client for Ollama."
+optional = false
+python-versions = ">=3.8,<4.0"
+files = [
+    {file = "ollama-0.1.6-py3-none-any.whl", hash = "sha256:e37f0455025ed5846879551ca2030ec93a71a823395d3517c14d71479ccbdd11"},
+    {file = "ollama-0.1.6.tar.gz", hash = "sha256:6636ff75ae54ac076522dcdc40678b141208325d1cc5d85785559f197b1107de"},
+]
+
+[package.dependencies]
+httpx = ">=0.25.2,<0.26.0"
+
 [[package]]
 name = "packaging"
 version = "23.2"
@ -1650,60 +1664,60 @@ files = [

 [[package]]
 name = "sqlalchemy"
-version = "2.0.26"
+version = "2.0.27"
 description = "Database Abstraction Library"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "SQLAlchemy-2.0.26-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:56524d767713054f8758217b3a811f6a736e0ae34e7afc33b594926589aa9609"},
-    {file = "SQLAlchemy-2.0.26-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c2d8a2c68b279617f13088bdc0fc0e9b5126f8017f8882ff08ee41909fab0713"},
-    {file = "SQLAlchemy-2.0.26-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84d377645913d47f0dc802b415bcfe7fb085d86646a12278d77c12eb75b5e1b4"},
-    {file = "SQLAlchemy-2.0.26-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4fc0628d2026926404dabc903dc5628f7d936a792aa3a1fc54a20182df8e2172"},
-    {file = "SQLAlchemy-2.0.26-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:872f2907ade52601a1e729e85d16913c24dc1f6e7c57d11739f18dcfafde29db"},
-    {file = "SQLAlchemy-2.0.26-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ba46fa770578b3cf3b5b77dadb7e94fda7692dd4d1989268ef3dcb65f31c40a3"},
-    {file = "SQLAlchemy-2.0.26-cp310-cp310-win32.whl", hash = "sha256:651d10fdba7984bf100222d6e4acc496fec46493262b6170be1981ef860c6184"},
-    {file = "SQLAlchemy-2.0.26-cp310-cp310-win_amd64.whl", hash = "sha256:8f95ede696ab0d7328862d69f29b643d35b668c4f3619cb2f0281adc16e64c1b"},
-    {file = "SQLAlchemy-2.0.26-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fab1bb909bd24accf2024a69edd4f885ded182c079c4dbcd515b4842f86b07cb"},
-    {file = "SQLAlchemy-2.0.26-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b7ee16afd083bb6bb5ab3962ac7f0eafd1d196c6399388af35fef3d1c6d6d9bb"},
-    {file = "SQLAlchemy-2.0.26-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:379af901ceb524cbee5e15c1713bf9fd71dc28053286b7917525d01b938b9628"},
-    {file = "SQLAlchemy-2.0.26-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94a78f56ea13f4d6e9efcd2a2d08cc13531918e0516563f6303c4ad98c81e21d"},
-    {file = "SQLAlchemy-2.0.26-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a481cc2eec83776ff7b6bb12c8e85d0378af0e2ec4584ac3309365a2a380c64b"},
-    {file = "SQLAlchemy-2.0.26-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8cbeb0e49b605cd75f825fb9239a554803ef2bef1a7b2a8b428926ed518b6b63"},
-    {file = "SQLAlchemy-2.0.26-cp311-cp311-win32.whl", hash = "sha256:e70cce65239089390c193a7b0d171ce89d2e3dedf797f8010031b2aa2b1e9c80"},
-    {file = "SQLAlchemy-2.0.26-cp311-cp311-win_amd64.whl", hash = "sha256:750d1ef39d50520527c45c309c3cb10bbfa6131f93081b4e93858abb5ece2501"},
-    {file = "SQLAlchemy-2.0.26-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b39503c3a56e1b2340a7d09e185ddb60b253ad0210877a9958ac64208eb23674"},
-    {file = "SQLAlchemy-2.0.26-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1a870e6121a052f826f7ae1e4f0b54ca4c0ccd613278218ca036fa5e0f3be7df"},
-    {file = "SQLAlchemy-2.0.26-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5901eed6d0e23ca4b04d66a561799d4f0fe55fcbfc7ca203bb8c3277f442085b"},
-    {file = "SQLAlchemy-2.0.26-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d25fe55aab9b20ae4a9523bb269074202be9d92a145fcc0b752fff409754b5f6"},
-    {file = "SQLAlchemy-2.0.26-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:5310958d08b4bafc311052be42a3b7d61a93a2bf126ddde07b85f712e7e4ac7b"},
-    {file = "SQLAlchemy-2.0.26-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fd133afb7e6c59fad365ffa97fb06b1001f88e29e1de351bef3d2b1224e2f132"},
-    {file = "SQLAlchemy-2.0.26-cp312-cp312-win32.whl", hash = "sha256:dc32ecf643c4904dd413e6a95a3f2c8a89ccd6f15083e586dcf8f42eb4e317ae"},
-    {file = "SQLAlchemy-2.0.26-cp312-cp312-win_amd64.whl", hash = "sha256:6e25f029e8ad6d893538b5abe8537e7f09e21d8e96caee46a7e2199f3ddd77b0"},
-    {file = "SQLAlchemy-2.0.26-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:99a9a8204b8937aa72421e31c493bfc12fd063a8310a0522e5a9b98e6323977c"},
-    {file = "SQLAlchemy-2.0.26-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:691d68a4fca30c9a676623d094b600797699530e175b6524a9f57e3273f5fa8d"},
-    {file = "SQLAlchemy-2.0.26-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79a74a4ca4310c812f97bf0f13ce00ed73c890954b5a20b32484a9ab60e567e9"},
-    {file = "SQLAlchemy-2.0.26-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:f2efbbeb18c0e1c53b670a46a009fbde7b58e05b397a808c7e598532b17c6f4b"},
-    {file = "SQLAlchemy-2.0.26-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:3fc557f5402206c18ec3d288422f8e5fa764306d49f4efbc6090a7407bf54938"},
-    {file = "SQLAlchemy-2.0.26-cp37-cp37m-win32.whl", hash = "sha256:a9846ffee3283cff4ec476e7ee289314290fcb2384aab5045c6f481c5c4d011f"},
-    {file = "SQLAlchemy-2.0.26-cp37-cp37m-win_amd64.whl", hash = "sha256:ed4667d3d5d6e203a271d684d5b213ebcd618f7a8bc605752a8865eb9e67a79a"},
-    {file = "SQLAlchemy-2.0.26-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:79e629df3f69f849a1482a2d063596b23e32036b83547397e68725e6e0d0a9ab"},
-    {file = "SQLAlchemy-2.0.26-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4b4d848b095173e0a9e377127b814490499e55f5168f617ae2c07653c326b9d1"},
-    {file = "SQLAlchemy-2.0.26-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f06afe8e96d7f221cc0b59334dc400151be22f432785e895e37030579d253c3"},
-    {file = "SQLAlchemy-2.0.26-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f75ac12d302205e60f77f46bd162d40dc37438f1f8db160d2491a78b19a0bd61"},
-    {file = "SQLAlchemy-2.0.26-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ec3717c1efee8ad4b97f6211978351de3abe1e4b5f73e32f775c7becec021c5c"},
-    {file = "SQLAlchemy-2.0.26-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06ed4d6bb2365222fb9b0a05478a2d23ad8c1dd874047a9ae1ca1d45f18a255e"},
-    {file = "SQLAlchemy-2.0.26-cp38-cp38-win32.whl", hash = "sha256:caa79a6caeb4a3cc4ddb9aba9205c383f5d3bcb60d814e87e74570514754e073"},
-    {file = "SQLAlchemy-2.0.26-cp38-cp38-win_amd64.whl", hash = "sha256:996b41c38e34a980e9f810d6e2709a3196e29ee34e46e3c16f96c63da10a9da1"},
-    {file = "SQLAlchemy-2.0.26-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4f57af0866f6629eae2d24d022ba1a4c1bac9b16d45027bbfcda4c9d5b0d8f26"},
-    {file = "SQLAlchemy-2.0.26-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e1a532bc33163fb19c4759a36504a23e63032bc8d47cee1c66b0b70a04a0957b"},
-    {file = "SQLAlchemy-2.0.26-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:02a4f954ccb17bd8cff56662efc806c5301508233dc38d0253a5fdb2f33ca3ba"},
-    {file = "SQLAlchemy-2.0.26-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a678f728fb075e74aaa7fdc27f8af8f03f82d02e7419362cc8c2a605c16a4114"},
-    {file = "SQLAlchemy-2.0.26-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:8b39462c9588d4780f041e1b84d2ba038ac01c441c961bbee622dd8f53dec69f"},
-    {file = "SQLAlchemy-2.0.26-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:98f4d0d2bda2921af5b0c2ca99207cdab00f2922da46a6336c62c8d6814303a7"},
-    {file = "SQLAlchemy-2.0.26-cp39-cp39-win32.whl", hash = "sha256:6d68e6b507a3dd20c0add86ac0a0ca061d43c9a0162a122baa5fe952f14240f1"},
-    {file = "SQLAlchemy-2.0.26-cp39-cp39-win_amd64.whl", hash = "sha256:fb97a9b93b953084692a52a7877957b7a88dfcedc0c5652124f5aebf5999f7fe"},
-    {file = "SQLAlchemy-2.0.26-py3-none-any.whl", hash = "sha256:1128b2cdf49107659f6d1f452695f43a20694cc9305a86e97b70793a1c74eeb4"},
-    {file = "SQLAlchemy-2.0.26.tar.gz", hash = "sha256:e1bcd8fcb30305e27355d553608c2c229d3e589fb7ff406da7d7e5d50fa14d0d"},
+    {file = "SQLAlchemy-2.0.27-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d04e579e911562f1055d26dab1868d3e0bb905db3bccf664ee8ad109f035618a"},
+    {file = "SQLAlchemy-2.0.27-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fa67d821c1fd268a5a87922ef4940442513b4e6c377553506b9db3b83beebbd8"},
+    {file = "SQLAlchemy-2.0.27-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c7a596d0be71b7baa037f4ac10d5e057d276f65a9a611c46970f012752ebf2d"},
+    {file = "SQLAlchemy-2.0.27-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:954d9735ee9c3fa74874c830d089a815b7b48df6f6b6e357a74130e478dbd951"},
+    {file = "SQLAlchemy-2.0.27-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:5cd20f58c29bbf2680039ff9f569fa6d21453fbd2fa84dbdb4092f006424c2e6"},
+    {file = "SQLAlchemy-2.0.27-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:03f448ffb731b48323bda68bcc93152f751436ad6037f18a42b7e16af9e91c07"},
+    {file = "SQLAlchemy-2.0.27-cp310-cp310-win32.whl", hash = "sha256:d997c5938a08b5e172c30583ba6b8aad657ed9901fc24caf3a7152eeccb2f1b4"},
+    {file = "SQLAlchemy-2.0.27-cp310-cp310-win_amd64.whl", hash = "sha256:eb15ef40b833f5b2f19eeae65d65e191f039e71790dd565c2af2a3783f72262f"},
+    {file = "SQLAlchemy-2.0.27-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6c5bad7c60a392850d2f0fee8f355953abaec878c483dd7c3836e0089f046bf6"},
+    {file = "SQLAlchemy-2.0.27-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3012ab65ea42de1be81fff5fb28d6db893ef978950afc8130ba707179b4284a"},
+    {file = "SQLAlchemy-2.0.27-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dbcd77c4d94b23e0753c5ed8deba8c69f331d4fd83f68bfc9db58bc8983f49cd"},
+    {file = "SQLAlchemy-2.0.27-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d177b7e82f6dd5e1aebd24d9c3297c70ce09cd1d5d37b43e53f39514379c029c"},
+    {file = "SQLAlchemy-2.0.27-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:680b9a36029b30cf063698755d277885d4a0eab70a2c7c6e71aab601323cba45"},
+    {file = "SQLAlchemy-2.0.27-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1306102f6d9e625cebaca3d4c9c8f10588735ef877f0360b5cdb4fdfd3fd7131"},
+    {file = "SQLAlchemy-2.0.27-cp311-cp311-win32.whl", hash = "sha256:5b78aa9f4f68212248aaf8943d84c0ff0f74efc65a661c2fc68b82d498311fd5"},
+    {file = "SQLAlchemy-2.0.27-cp311-cp311-win_amd64.whl", hash = "sha256:15e19a84b84528f52a68143439d0c7a3a69befcd4f50b8ef9b7b69d2628ae7c4"},
+    {file = "SQLAlchemy-2.0.27-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:0de1263aac858f288a80b2071990f02082c51d88335a1db0d589237a3435fe71"},
+    {file = "SQLAlchemy-2.0.27-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce850db091bf7d2a1f2fdb615220b968aeff3849007b1204bf6e3e50a57b3d32"},
+    {file = "SQLAlchemy-2.0.27-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8dfc936870507da96aebb43e664ae3a71a7b96278382bcfe84d277b88e379b18"},
+    {file = "SQLAlchemy-2.0.27-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4fbe6a766301f2e8a4519f4500fe74ef0a8509a59e07a4085458f26228cd7cc"},
+    {file = "SQLAlchemy-2.0.27-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4535c49d961fe9a77392e3a630a626af5baa967172d42732b7a43496c8b28876"},
+    {file = "SQLAlchemy-2.0.27-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:0fb3bffc0ced37e5aa4ac2416f56d6d858f46d4da70c09bb731a246e70bff4d5"},
+    {file = "SQLAlchemy-2.0.27-cp312-cp312-win32.whl", hash = "sha256:7f470327d06400a0aa7926b375b8e8c3c31d335e0884f509fe272b3c700a7254"},
+    {file = "SQLAlchemy-2.0.27-cp312-cp312-win_amd64.whl", hash = "sha256:f9374e270e2553653d710ece397df67db9d19c60d2647bcd35bfc616f1622dcd"},
+    {file = "SQLAlchemy-2.0.27-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e97cf143d74a7a5a0f143aa34039b4fecf11343eed66538610debc438685db4a"},
+    {file = "SQLAlchemy-2.0.27-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7b5a3e2120982b8b6bd1d5d99e3025339f7fb8b8267551c679afb39e9c7c7f1"},
+    {file = "SQLAlchemy-2.0.27-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e36aa62b765cf9f43a003233a8c2d7ffdeb55bc62eaa0a0380475b228663a38f"},
+    {file = "SQLAlchemy-2.0.27-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:5ada0438f5b74c3952d916c199367c29ee4d6858edff18eab783b3978d0db16d"},
+    {file = "SQLAlchemy-2.0.27-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:b1d9d1bfd96eef3c3faedb73f486c89e44e64e40e5bfec304ee163de01cf996f"},
+    {file = "SQLAlchemy-2.0.27-cp37-cp37m-win32.whl", hash = "sha256:ca891af9f3289d24a490a5fde664ea04fe2f4984cd97e26de7442a4251bd4b7c"},
+    {file = "SQLAlchemy-2.0.27-cp37-cp37m-win_amd64.whl", hash = "sha256:fd8aafda7cdff03b905d4426b714601c0978725a19efc39f5f207b86d188ba01"},
+    {file = "SQLAlchemy-2.0.27-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ec1f5a328464daf7a1e4e385e4f5652dd9b1d12405075ccba1df842f7774b4fc"},
+    {file = "SQLAlchemy-2.0.27-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ad862295ad3f644e3c2c0d8b10a988e1600d3123ecb48702d2c0f26771f1c396"},
+    {file = "SQLAlchemy-2.0.27-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48217be1de7d29a5600b5c513f3f7664b21d32e596d69582be0a94e36b8309cb"},
+    {file = "SQLAlchemy-2.0.27-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e56afce6431450442f3ab5973156289bd5ec33dd618941283847c9fd5ff06bf"},
+    {file = "SQLAlchemy-2.0.27-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:611068511b5531304137bcd7fe8117c985d1b828eb86043bd944cebb7fae3910"},
+    {file = "SQLAlchemy-2.0.27-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b86abba762ecfeea359112b2bb4490802b340850bbee1948f785141a5e020de8"},
+    {file = "SQLAlchemy-2.0.27-cp38-cp38-win32.whl", hash = "sha256:30d81cc1192dc693d49d5671cd40cdec596b885b0ce3b72f323888ab1c3863d5"},
+    {file = "SQLAlchemy-2.0.27-cp38-cp38-win_amd64.whl", hash = "sha256:120af1e49d614d2525ac247f6123841589b029c318b9afbfc9e2b70e22e1827d"},
+    {file = "SQLAlchemy-2.0.27-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d07ee7793f2aeb9b80ec8ceb96bc8cc08a2aec8a1b152da1955d64e4825fcbac"},
+    {file = "SQLAlchemy-2.0.27-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cb0845e934647232b6ff5150df37ceffd0b67b754b9fdbb095233deebcddbd4a"},
+    {file = "SQLAlchemy-2.0.27-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fc19ae2e07a067663dd24fca55f8ed06a288384f0e6e3910420bf4b1270cc51"},
+    {file = "SQLAlchemy-2.0.27-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b90053be91973a6fb6020a6e44382c97739736a5a9d74e08cc29b196639eb979"},
+    {file = "SQLAlchemy-2.0.27-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:2f5c9dfb0b9ab5e3a8a00249534bdd838d943ec4cfb9abe176a6c33408430230"},
+    {file = "SQLAlchemy-2.0.27-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:33e8bde8fff203de50399b9039c4e14e42d4d227759155c21f8da4a47fc8053c"},
+    {file = "SQLAlchemy-2.0.27-cp39-cp39-win32.whl", hash = "sha256:d873c21b356bfaf1589b89090a4011e6532582b3a8ea568a00e0c3aab09399dd"},
+    {file = "SQLAlchemy-2.0.27-cp39-cp39-win_amd64.whl", hash = "sha256:ff2f1b7c963961d41403b650842dc2039175b906ab2093635d8319bef0b7d620"},
+    {file = "SQLAlchemy-2.0.27-py3-none-any.whl", hash = "sha256:1ab4e0448018d01b142c916cc7119ca573803a4745cfe341b8f95657812700ac"},
+    {file = "SQLAlchemy-2.0.27.tar.gz", hash = "sha256:86a6ed69a71fe6b88bf9331594fa390a2adda4a49b5c06f98e47bf0d392534f8"},
 ]

 [package.dependencies]
@ -1821,6 +1835,17 @@ dev = ["autoflake (>=1.3.1,<2.0.0)", "flake8 (>=3.8.3,<4.0.0)", "pre-commit (>=2
 doc = ["cairosvg (>=2.5.2,<3.0.0)", "mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "pillow (>=9.3.0,<10.0.0)"]
 test = ["black (>=22.3.0,<23.0.0)", "coverage (>=6.2,<7.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.910)", "pytest (>=4.4.0,<8.0.0)", "pytest-cov (>=2.10.0,<5.0.0)", "pytest-sugar (>=0.9.4,<0.10.0)", "pytest-xdist (>=1.32.0,<4.0.0)", "rich (>=10.11.0,<14.0.0)", "shellingham (>=1.3.0,<2.0.0)"]

+[[package]]
+name = "types-colorama"
+version = "0.4.15.20240205"
+description = "Typing stubs for colorama"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "types-colorama-0.4.15.20240205.tar.gz", hash = "sha256:7ae4f58d407d387f4f98b24d81e1b7657ec754ea1dc4619ae5bd27f0c367637e"},
+    {file = "types_colorama-0.4.15.20240205-py3-none-any.whl", hash = "sha256:3ab26dcd76d2f13b1b795ed5c87a1a1a29331ea64cf614bb6ae958a3cebc3a53"},
+]
+
 [[package]]
 name = "types-pyyaml"
 version = "6.0.12.12"
@ -2010,4 +2035,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.12"
-content-hash = "b37d2542dbbbd85c8afc9a16dde50037737f87d250cec6c83e377e1266a19997"
+content-hash = "e61409e5b61d5b67d01aa550723a3bff8efb5a53aba5ca034fbb038c5408f89d"
--- a/llm-qa/pyproject.toml
+++ b/llm-qa/pyproject.toml
@ -11,17 +11,20 @@ langchain-community = "^0.0.19"
 langchain = "^0.1.6"
 fastapi = "^0.109.2"
 uvicorn = "^0.27.1"
-httpx = "^0.26.0"
+httpx = "^0.25.0"
 pydantic-settings = "^2.1.0"
 pydantic = "^2.6.1"
 qdrant-client = "^1.7.3"
 typer = "^0.9.0"
+colorama = "^0.4.6"
+ollama = "^0.1.6"

 [tool.poetry.group.dev.dependencies]
 ruff = "0.2.1"
 mypy = "^1.8.0"
 ipython = "^8.21.0"
 types-pyyaml = "^6.0.12.12"
+types-colorama = "^0.4.15.20240205"

 [build-system]
 requires = ["poetry-core"]
@ -34,6 +37,10 @@ plugins = ["pydantic.mypy"]
 modules = ["llm_qa"]
 strict = true

+[[tool.mypy.overrides]]
+module = "ollama"
+ignore_missing_imports = true
+
 [tool.ruff]
 target-version = "py312"
 preview = true