diff --git a/.devcontainer/.env.example b/.devcontainer/.env.example
new file mode 100644
index 0000000..87d20d1
--- /dev/null
+++ b/.devcontainer/.env.example
@@ -0,0 +1,5 @@
+TEI_MODEL_ID=BAAI/bge-large-en-v1.5
+TEI_MODEL_REVISION=7774ef464da42fb9e231acb5ac59e9be9011cd35
+TEI_RERANK_MODEL_ID=BAAI/bge-reranker-large
+TEI_RERANK_MODEL_REVISION=b1a3b1492bcfa68f2b50d983a417601313cd85df
+OLLAMA_MODEL_NAME=openchat:7b-v3.5-0106-q4_K_M
diff --git a/.devcontainer/compose.yaml b/.devcontainer/compose.yaml
index e45097c..636b896 100644
--- a/.devcontainer/compose.yaml
+++ b/.devcontainer/compose.yaml
@@ -13,7 +13,6 @@ services:
       - TEI_BASE_URL=http://text-embeddings-inference
       - TEI_RERANK_BASE_URL=http://text-embeddings-inference-rerank
       - OLLAMA_BASE_URL=http://ollama:11434
-      - OLLAMA_MODEL_NAME=${OLLAMA_MODEL_NAME}
     command: sleep infinity
 
   qdrant:
diff --git a/llm-qa/.env.default b/llm-qa/.env.default
new file mode 100644
index 0000000..b2d3c4a
--- /dev/null
+++ b/llm-qa/.env.default
@@ -0,0 +1,20 @@
+TEI_DOCUMENT_PREFIX='passage: '
+TEI_QUERY_PREFIX='query: '
+RETRIEVE_COUNT=15
+RERANK_COUNT=6
+OLLAMA_MODEL_NAME=openchat:7b-v3.5-0106-q4_K_M
+OLLAMA_AUTO_PULL=true
+
+PROMPTS__CHAT_PROMPTS__SYSTEM_MESSAGE="You are a friendly and helpful chatbot specializing in SageMaker questions. Ensure that your responses are concise and informative. Leverage the given context to provide accurate and relevant information. If uncertain, politely ask for clarification. Always respond in the same language as the user. Let's make the user experience smooth and efficient!"
+PROMPTS__CHAT_PROMPTS__LAST_HUMAN_MESSAGE="# Context:
+{context}
+
+
+# Message:
+{last_human_message}"
+PROMPTS__CONDENSE_QUESTION_PROMPTS__SYSTEM_MESSAGE="Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language. Output just the question, nothing else."
+PROMPTS__CONDENSE_QUESTION_PROMPTS__HUMAN_MESSAGE="# Chat History:
+{conversation_history}
+
+# Follow Up Message:
+{last_human_message}"
diff --git a/llm-qa/README.md b/llm-qa/README.md
index b1e84ea..06235e0 100644
--- a/llm-qa/README.md
+++ b/llm-qa/README.md
@@ -1 +1,117 @@
 # LLM QA
+
+A proof of concept question-answering system for different types of text data.
+
+Currently implemented:
+
+- Plain text
+- Markdown
+
+## Key Features
+
+### Dockerized development environment
+
+- Easy, quick and reproducible setup
+
+### Automatic pull and serve of declared models
+
+- Ollama models are automatically pulled and served by the FastAPI server
+
+### Detailed logging
+
+- Key potential bottlenecks are timed and logged
+
+#### Upsert
+
+```console
+2024-02-15 01:10:54,341 - llm_qa.services.upsert - INFO - Split `MARKDOWN` type text into 8 document chunks in 0.01 seconds
+2024-02-15 01:10:54,759 - httpx - INFO - HTTP Request: POST http://text-embeddings-inference/embed "HTTP/1.1 200 OK"
+2024-02-15 01:11:03,121 - httpx - INFO - HTTP Request: POST http://text-embeddings-inference/embed "HTTP/1.1 200 OK"
+2024-02-15 01:11:03,140 - llm_qa.services.upsert - INFO - Upserted 8 document chunks to Qdrant collection `showcase` in 8.80 seconds
+2024-02-15 01:11:03,142 - uvicorn.access - INFO - 127.0.0.1:55868 - "POST /api/v1/upsert-text HTTP/1.1" 200 OK
+```
+
+#### Chat
+
+```console
+2024-02-15 01:02:03,408 - llm_qa.dependencies - INFO - Ollama auto-pull enabled, checking if model is available
+2024-02-15 01:02:03,441 - httpx - INFO - HTTP Request: POST http://ollama:11434/api/show "HTTP/1.1 200 OK"
+2024-02-15 01:02:03,441 - llm_qa.dependencies - INFO - Ollama model `openchat:7b-v3.5-0106-q4_K_M` already exists
+2024-02-15 01:02:03,645 - httpx - INFO - HTTP Request: POST http://text-embeddings-inference/embed "HTTP/1.1 200 OK"
+2024-02-15 01:02:03,653 - llm_qa.chains.time_logger - INFO - Chain `VectorStoreRetriever` finished in 0.08 seconds
+2024-02-15 01:02:23,192 - httpx - INFO - HTTP Request: POST http://text-embeddings-inference-rerank/rerank "HTTP/1.1 200 OK"
+2024-02-15 01:02:23,194 - llm_qa.chains.time_logger - INFO - Chain `RerankAndTake` finished in 19.54 seconds
+2024-02-15 01:02:29,817 - llm_qa.chains.time_logger - INFO - Chain `ChatOllama` finished in 6.62 seconds
+2024-02-15 01:02:29,817 - llm_qa.services.chat - INFO - Chat chain finished in 26.27 seconds
+2024-02-15 01:02:29,823 - uvicorn.access - INFO - 127.0.0.1:50100 - "POST /api/v1/chat HTTP/1.1" 200 OK
+```
+
+### Hierarchical document chunking
+
+- Hierarchical text, such as markdown, is split into document chunks by headers
+- All previous parent headers are also included in the chunk, separated by `...`
+- This enriches the context of the chunk and solves the problem of global context being lost when splitting the text
+
+Example:
+
+```md
+# AWS::SageMaker::ModelQualityJobDefinition MonitoringGroundTruthS3Input<a name="aws-properties-sagemaker-modelqualityjobdefinition-monitoringgroundtruths3input"></a>
+...
+## Syntax<a name="aws-properties-sagemaker-modelqualityjobdefinition-monitoringgroundtruths3input-syntax"></a>
+...
+### YAML<a name="aws-properties-sagemaker-modelqualityjobdefinition-monitoringgroundtruths3input-syntax.yaml"></a>
+``` [S3Uri](#cfn-sagemaker-modelqualityjobdefinition-monitoringgroundtruths3input-s3uri): String ```
+```
+
+### Retrieval query rewriting
+
+- After the first message, subsequent messages are rewritten to include previous messages context
+- This allows for a more natural conversation flow and retrieval of more relevant chunks
+
+Example:
+
+```md
+### User: What are all AWS regions where SageMaker is available?
+### AI:  SageMaker is available in most AWS regions, except for the following: Asia Pacific (Jakarta), Africa (Cape Town), Middle East (UAE), Asia Pacific (Hyderabad), Asia Pacific (Osaka), Asia Pacific (Melbourne), Europe (Milan), AWS GovCloud (US-East), Europe (Spain), and Europe (Zurich) Region.
+
+### User: What about the Bedrock service?
+### Retrieval Query:  What is the availability of AWS SageMaker in relation to the Bedrock service?
+```
+
+### Reranking
+
+- Retrieval of a larger number of document chunks is first performed using a vector store
+- Then, the chunks are reranked using a reranker model
+- This process more precisely selects the most relevant chunks for the user query
+
+## Development
+
+### Setup
+
+First copy the `.devcontainer/.env.example` file to `.devcontainer/.env` and adjust the settings and models to your needs.
+
+Then simply open the project devcontainer in a compatible IDE.
+This will setup all required tools and project dependencies for Python development.
+It will also run Docker containers for all required services.
+
+### Configuration
+
+Create a `llm-qa/.env` file to override selective default environment variables located in `llm-qa/.env.default`.
+
+### Running
+
+To run the FastAPI server, run the `llm_qa.web` submodule:
+
+```bash
+poetry run python -m llm_qa.web
+```
+
+To run the minimal CLI client, run the `llm_qa.client` submodule:
+
+```bash
+poetry run python -m llm_qa.client
+```
+
+## Deployment
+
+Not yet implemented.
diff --git a/llm-qa/llm_qa/chains/chat.py b/llm-qa/llm_qa/chains/chat.py
new file mode 100644
index 0000000..921a9b2
--- /dev/null
+++ b/llm-qa/llm_qa/chains/chat.py
@@ -0,0 +1,102 @@
+from operator import itemgetter
+from typing import TypedDict
+
+from langchain.chat_models.base import BaseChatModel
+from langchain.prompts import (
+    ChatPromptTemplate,
+    HumanMessagePromptTemplate,
+    MessagesPlaceholder,
+    SystemMessagePromptTemplate,
+)
+from langchain_core.documents import Document
+from langchain_core.messages import AIMessage, BaseMessage
+from langchain_core.runnables import (
+    Runnable,
+    RunnableLambda,
+    RunnablePassthrough,
+    chain,
+)
+from langchain_core.vectorstores import VectorStoreRetriever
+
+from llm_qa.chains.condense_question import get_condense_question_chain
+from llm_qa.chains.rerank import RerankRunnableInput, get_rerank_chain
+from llm_qa.chains.time_logger import time_logger
+from llm_qa.embeddings.tei import TeiEmbeddings
+from llm_qa.models.prompts import Prompts
+
+
+class ChatRunnableOutput(TypedDict):
+    response: AIMessage
+    documents: list[Document]
+    retrieval_query: str
+
+
+@chain
+def combine_documents(documents: list[Document]) -> str:
+    return "\n\n".join(document.page_content for document in documents)
+
+
+def get_chat_chain(
+    prompts: Prompts,
+    vectorstore_retriever: VectorStoreRetriever,
+    tei_rerank_embeddings: TeiEmbeddings,
+    rerank_count: int | None,
+    chat_model: BaseChatModel,
+) -> Runnable[list[BaseMessage], ChatRunnableOutput]:
+    chat_prompt_template = ChatPromptTemplate.from_messages([
+        SystemMessagePromptTemplate.from_template(prompts.chat_prompts.system_message),
+        MessagesPlaceholder(variable_name="conversation_history"),
+        HumanMessagePromptTemplate.from_template(
+            prompts.chat_prompts.last_human_message
+        ),
+    ])
+
+    condense_question_chain = get_condense_question_chain(
+        prompts=prompts.condense_question_prompts, chat_model=chat_model
+    )
+
+    rerank_chain = get_rerank_chain(tei_rerank_embeddings=tei_rerank_embeddings)
+
+    # This branch isn't made with a RunnableBranch because we know the value of
+    # rerank_count at chain construction time, so we can use a simple if statement
+    rerank_and_take_chain: Runnable[RerankRunnableInput, list[Document]]
+    if rerank_count is None:
+        rerank_and_take_chain = RunnablePassthrough[RerankRunnableInput]() | itemgetter(
+            "documents"
+        )
+    else:
+        rerank_and_take_chain = rerank_chain | RunnableLambda[
+            list[Document], list[Document]
+        ](lambda x: x[:rerank_count])
+
+    return (
+        RunnablePassthrough[list[BaseMessage]]()
+        | {
+            "messages": RunnablePassthrough(),
+            "retrieval_query": condense_question_chain,
+        }
+        | {
+            "conversation_history": itemgetter("messages")
+            | RunnableLambda[list[BaseMessage], list[BaseMessage]](lambda x: x[:-1]),
+            "last_human_message": itemgetter("messages")
+            | RunnableLambda[list[BaseMessage], str](lambda x: x[-1].content),
+            "retrieval_query": itemgetter("retrieval_query"),
+            "documents": {
+                "query": itemgetter("retrieval_query"),
+                "documents": itemgetter("retrieval_query")
+                | time_logger(vectorstore_retriever),
+            }
+            | time_logger(rerank_and_take_chain, name="RerankAndTake"),
+        }
+        | {
+            "retrieval_query": itemgetter("retrieval_query"),
+            "documents": itemgetter("documents"),
+            "response": {
+                "conversation_history": itemgetter("conversation_history"),
+                "last_human_message": itemgetter("last_human_message"),
+                "context": itemgetter("documents") | combine_documents,
+            }
+            | chat_prompt_template
+            | time_logger(chat_model),
+        }
+    )
diff --git a/llm-qa/llm_qa/chains/condense_question.py b/llm-qa/llm_qa/chains/condense_question.py
new file mode 100644
index 0000000..c22fc90
--- /dev/null
+++ b/llm-qa/llm_qa/chains/condense_question.py
@@ -0,0 +1,72 @@
+from typing import TypedDict
+
+from langchain.chat_models.base import BaseChatModel
+from langchain.prompts import (
+    AIMessagePromptTemplate,
+    ChatPromptTemplate,
+    FewShotChatMessagePromptTemplate,
+    HumanMessagePromptTemplate,
+    SystemMessagePromptTemplate,
+)
+from langchain_core.documents import Document
+from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
+from langchain_core.runnables import (
+    Runnable,
+    RunnableBranch,
+    RunnableLambda,
+)
+
+from llm_qa.models.prompts import CondenseQuestionPrompts
+from llm_qa.prompt_utils import format_messages
+
+
+class ChatRunnableOutput(TypedDict):
+    response: AIMessage
+    documents: list[Document]
+    retrieval_query: str
+
+
+def get_condense_question_chain(
+    prompts: CondenseQuestionPrompts, chat_model: BaseChatModel
+) -> Runnable[list[BaseMessage], str]:
+    human_message_prompt_template = HumanMessagePromptTemplate.from_template(
+        prompts.human_message
+    )
+    examples = [
+        {
+            "conversation_history": format_messages([
+                HumanMessage(content="What will the weather be like today?")
+            ]),
+            "last_human_message": "What about tomorrow?",
+            "response": "What will the weather be like tomorrow?",
+        },
+    ]
+    example_prompt = ChatPromptTemplate.from_messages([
+        human_message_prompt_template,
+        AIMessagePromptTemplate.from_template("{response}"),
+    ])
+    few_shot_prompt = FewShotChatMessagePromptTemplate(
+        examples=examples, example_prompt=example_prompt
+    )
+    chat_prompt_template = ChatPromptTemplate.from_messages([
+        SystemMessagePromptTemplate.from_template(prompts.system_message),
+        few_shot_prompt,
+        human_message_prompt_template,
+    ])
+
+    # Returns the last human message if there is only one message
+    return RunnableBranch[list[BaseMessage], str](
+        (lambda x: len(x) == 0, lambda _: ""),
+        (lambda x: len(x) == 1, lambda x: x[0].content),
+        {
+            "conversation_history": RunnableLambda[list[BaseMessage], str](
+                lambda x: format_messages(x[:-1])
+            ),
+            "last_human_message": RunnableLambda[list[BaseMessage], BaseMessage](
+                lambda x: x[-1]
+            ),
+        }
+        | chat_prompt_template
+        | chat_model
+        | RunnableLambda[AIMessage, str](lambda x: x.content),
+    )
diff --git a/llm-qa/llm_qa/chains/rerank.py b/llm-qa/llm_qa/chains/rerank.py
new file mode 100644
index 0000000..f99ca72
--- /dev/null
+++ b/llm-qa/llm_qa/chains/rerank.py
@@ -0,0 +1,36 @@
+from typing import TypedDict
+
+from langchain_core.documents import Document
+from langchain_core.runnables import Runnable, chain
+
+from llm_qa.embeddings.tei import TeiEmbeddings
+
+
+async def rerank_documents(
+    tei_rerank_embeddings: TeiEmbeddings, query: str, documents: list[Document]
+) -> list[Document]:
+    if len(documents) == 0:
+        return []
+    rerank_responses = await tei_rerank_embeddings.arerank(
+        query, [document.page_content for document in documents]
+    )
+    sorted_rerank = sorted(rerank_responses, key=lambda x: x.score, reverse=True)
+
+    return [documents[rerank.index] for rerank in sorted_rerank]
+
+
+class RerankRunnableInput(TypedDict):
+    query: str
+    documents: list[Document]
+
+
+def get_rerank_chain(
+    tei_rerank_embeddings: TeiEmbeddings,
+) -> Runnable[RerankRunnableInput, list[Document]]:
+    @chain
+    async def rerank_documents_chain(input: RerankRunnableInput) -> list[Document]:
+        return await rerank_documents(
+            tei_rerank_embeddings, input["query"], input["documents"]
+        )
+
+    return rerank_documents_chain
diff --git a/llm-qa/llm_qa/chains/text_splitters/markdown_header_text_splitter.py b/llm-qa/llm_qa/chains/text_splitters/markdown_header_text_splitter.py
index b8ad946..77aab09 100644
--- a/llm-qa/llm_qa/chains/text_splitters/markdown_header_text_splitter.py
+++ b/llm-qa/llm_qa/chains/text_splitters/markdown_header_text_splitter.py
@@ -2,6 +2,25 @@ from langchain.text_splitter import MarkdownHeaderTextSplitter
 from langchain_core.runnables import Runnable, chain
 
 
+def split_markdown_text_by_headers(
+    markdown_header_text_splitter: MarkdownHeaderTextSplitter, text: str
+) -> list[str]:
+    documents = markdown_header_text_splitter.split_text(text)
+    # Add all parent headers to the page content
+    return [
+        "\n...\n".join(
+            f"{header_key} {document.metadata[header_key]}"
+            for _, header_key in sorted(
+                markdown_header_text_splitter.headers_to_split_on,
+                key=lambda x: len(x[0]),
+            )
+            if header_key in document.metadata
+        )
+        + f"\n{document.page_content}"
+        for document in documents
+    ]
+
+
 def get_markdown_header_text_splitter_chain(
     markdown_header_text_splitter: MarkdownHeaderTextSplitter,
 ) -> Runnable[str, list[str]]:
@@ -10,20 +29,9 @@ def get_markdown_header_text_splitter_chain(
 
     @chain
     def markdown_header_text_splitter_chain(text: str) -> list[str]:
-        documents = markdown_header_text_splitter.split_text(text)
-        # Add all parent headers to the page content
-        return [
-            "\n...\n".join(
-                f"{header_key} {document.metadata[header_key]}"
-                for _, header_key in sorted(
-                    markdown_header_text_splitter.headers_to_split_on,
-                    key=lambda x: len(x[0]),
-                )
-                if header_key in document.metadata
-            )
-            + f"\n{document.page_content}"
-            for document in documents
-        ]
+        return split_markdown_text_by_headers(
+            markdown_header_text_splitter=markdown_header_text_splitter, text=text
+        )
 
     return markdown_header_text_splitter_chain
 
diff --git a/llm-qa/llm_qa/chains/time_logger.py b/llm-qa/llm_qa/chains/time_logger.py
new file mode 100644
index 0000000..18891e8
--- /dev/null
+++ b/llm-qa/llm_qa/chains/time_logger.py
@@ -0,0 +1,25 @@
+import logging
+import time
+
+from langchain_core.runnables import Runnable, chain
+from langchain_core.runnables.utils import Input, Output
+
+logger = logging.getLogger(__name__)
+
+
+def time_logger(
+    runnable: Runnable[Input, Output], name: str | None = None
+) -> Runnable[Input, Output]:
+    @chain
+    async def time_logger_chain(input: Input) -> Output:
+        start_time = time.time()
+        output = await runnable.ainvoke(input)
+        elapsed_time = time.time() - start_time
+        logger.info(
+            "Chain `%s` finished in %.2f seconds",
+            name or runnable.get_name(),
+            elapsed_time,
+        )
+        return output
+
+    return time_logger_chain
diff --git a/llm-qa/llm_qa/client.py b/llm-qa/llm_qa/client.py
index 258226f..08fa44a 100644
--- a/llm-qa/llm_qa/client.py
+++ b/llm-qa/llm_qa/client.py
@@ -4,16 +4,19 @@ from typing import Annotated, Optional
 
 import httpx
 import typer
+from colorama import Fore, Style
 
 from llm_qa.logging import load_logging_config
 from llm_qa.models.chat import ChatMessage, ChatRequest, ChatResponse
-from llm_qa.models.prompts import ChatPrompts
+from llm_qa.models.prompts import (
+    OptionalChatPrompts,
+    OptionalCondenseQuestionPrompts,
+    OptionalPrompts,
+)
 from llm_qa.models.upsert import TextType, UpsertTextRequest, UpsertTextResponse
 
 logger = logging.getLogger(__name__)
 
-SYSTEM_MESSAGE_PROMPT_TEMPLATE_PATH = "../data/prompts/system_message.txt"
-LAST_HUMAN_MESSAGE_PROMPT_TEMPLATE_PATH = "../data/prompts/last_human_message.txt"
 CHAT_URL = "http://localhost:8000/api/v1/chat"
 UPSERT_URL = "http://localhost:8000/api/v1/upsert-text"
 
@@ -29,33 +32,58 @@ app = typer.Typer()
 def chat(
     collection: Annotated[str, typer.Option()],
     chat_url: Annotated[str, typer.Option()] = CHAT_URL,
-    system_message_prompt_template_path: Annotated[
+    chat_system_message_prompt_path: Annotated[
         Optional[typer.FileText], typer.Option()  # noqa: UP007
     ] = None,
-    last_human_message_prompt_template_path: Annotated[
+    chat_last_human_message_prompt_path: Annotated[
+        Optional[typer.FileText], typer.Option()  # noqa: UP007
+    ] = None,
+    condense_question_system_message_prompt_path: Annotated[
+        Optional[typer.FileText], typer.Option()  # noqa: UP007
+    ] = None,
+    condense_question_human_message_prompt_path: Annotated[
         Optional[typer.FileText], typer.Option()  # noqa: UP007
     ] = None,
 ) -> None:
-    system_message_prompt_template = (
-        system_message_prompt_template_path.read()
-        if system_message_prompt_template_path
-        else load_prompt(SYSTEM_MESSAGE_PROMPT_TEMPLATE_PATH)
+    chat_system_message_prompt = (
+        chat_system_message_prompt_path.read()
+        if chat_system_message_prompt_path
+        else None
     )
-    last_human_message_prompt_template = (
-        last_human_message_prompt_template_path.read()
-        if last_human_message_prompt_template_path
-        else load_prompt(LAST_HUMAN_MESSAGE_PROMPT_TEMPLATE_PATH)
+    chat_last_human_message_prompt = (
+        chat_last_human_message_prompt_path.read()
+        if chat_last_human_message_prompt_path
+        else None
     )
-    prompts = ChatPrompts(
-        system_message=system_message_prompt_template,
-        last_human_message=last_human_message_prompt_template,
+    condense_question_system_message_prompt = (
+        condense_question_system_message_prompt_path.read()
+        if condense_question_system_message_prompt_path
+        else None
+    )
+    condense_question_human_message_prompt = (
+        condense_question_human_message_prompt_path.read()
+        if condense_question_human_message_prompt_path
+        else None
+    )
+    prompts = OptionalPrompts(
+        chat_prompts=OptionalChatPrompts(
+            system_message=chat_system_message_prompt,
+            last_human_message=chat_last_human_message_prompt,
+        ),
+        condense_question_prompts=OptionalCondenseQuestionPrompts(
+            system_message=condense_question_system_message_prompt,
+            human_message=condense_question_human_message_prompt,
+        ),
     )
     client = httpx.Client(timeout=180.0)
     messages: list[ChatMessage] = []
     while True:
-        message_content = input("### User: ")
+        message_content = input(f"{Fore.GREEN}### User: {Style.RESET_ALL}")
         if message_content in {"/exit", "/quit", "/bye"}:
             break
+        if message_content in {"/clear", "/cls", "/reset"}:
+            messages.clear()
+            continue
         message = ChatMessage.new_human(message_content)
         messages.append(message)
         response = client.post(
@@ -67,15 +95,23 @@ def chat(
         response.raise_for_status()
         chat_response = ChatResponse.model_validate_json(response.content)
         messages.append(chat_response.response_message)
-        print("### AI:", chat_response.response_message.content)
         print(
-            "----------\n### Sources:\n"
-            + "\n\n".join(
-                f"Source {i + 1}:\n{source.content}"
-                for i, source in enumerate(chat_response.sources)
-            )
-            + "\n----------"
+            f"{Fore.RED}### AI: "
+            f"{Style.RESET_ALL}{chat_response.response_message.content}"
         )
+        print(Fore.RED + "-" * 16)
+        print(Style.RESET_ALL)
+        print(
+            f"{Fore.CYAN}### Retrieval Query: "
+            f"{Style.RESET_ALL}{chat_response.retrieval_query}"
+        )
+        sources_str = "\n\n".join(
+            f"{Style.DIM}Source {i + 1}:\n{Style.RESET_ALL}{source.content}"
+            for i, source in enumerate(chat_response.sources)
+        )
+        print(f"{Fore.YELLOW}### Sources:{Style.RESET_ALL}\n{sources_str}")
+        print(Fore.YELLOW + "-" * 16)
+        print(Style.RESET_ALL)
 
 
 @app.command()
@@ -84,6 +120,7 @@ def upsert(
     collection: Annotated[str, typer.Option()],
     upsert_url: Annotated[str, typer.Option()] = UPSERT_URL,
 ) -> None:
+    load_logging_config()
     client = httpx.Client(timeout=180.0)
     for file in files:
         logger = logging.getLogger(__name__)
@@ -106,9 +143,8 @@ def upsert(
         )
         response.raise_for_status()
         upsert_response = UpsertTextResponse.model_validate_json(response.content)
-        logger.info("Upserted %d document chunks", upsert_response.num_documents)
+        logger.info("Upserted %d document chunks", upsert_response.num_document_chunks)
 
 
 if __name__ == "__main__":
-    load_logging_config()
     app()
diff --git a/llm-qa/llm_qa/dependencies.py b/llm-qa/llm_qa/dependencies.py
index 4ee9bb5..7260b16 100644
--- a/llm-qa/llm_qa/dependencies.py
+++ b/llm-qa/llm_qa/dependencies.py
@@ -1,14 +1,16 @@
+import logging
 from typing import Annotated
 
+import ollama
 from fastapi import Depends
-from langchain.callbacks import StreamingStdOutCallbackHandler
-from langchain.callbacks.manager import CallbackManager
 from langchain.chat_models.base import BaseChatModel
 from langchain.chat_models.ollama import ChatOllama
 
 from llm_qa.models.tei import TeiConfig
 from llm_qa.settings import Settings
 
+logger = logging.getLogger(__name__)
+
 
 def settings() -> Settings:
     return Settings()
@@ -30,12 +32,22 @@ def tei_rerank_config(
     )
 
 
-def chat_model(
+async def chat_model(
     settings: Annotated[Settings, Depends(settings)],
 ) -> BaseChatModel:
+    if settings.ollama_auto_pull:
+        logger.info("Ollama auto-pull enabled, checking if model is available")
+        ollama_client = ollama.AsyncClient(host=settings.ollama_base_url)
+        try:
+            await ollama_client.show(settings.ollama_model_name)
+            logger.info("Ollama model `%s` already exists", settings.ollama_model_name)
+        except ollama.ResponseError:
+            logger.info(
+                "Ollama model `%s` not found, pulling...", settings.ollama_model_name
+            )
+            await ollama_client.pull(settings.ollama_model_name)
     return ChatOllama(
         base_url=settings.ollama_base_url,
         model=settings.ollama_model_name,
         verbose=True,
-        callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
     )
diff --git a/llm-qa/llm_qa/embeddings/tei.py b/llm-qa/llm_qa/embeddings/tei.py
index 5227b10..b44ec0f 100644
--- a/llm-qa/llm_qa/embeddings/tei.py
+++ b/llm-qa/llm_qa/embeddings/tei.py
@@ -1,3 +1,4 @@
+import json
 from typing import override
 from urllib.parse import urljoin
 
@@ -51,6 +52,13 @@ class TeiEmbeddings(Embeddings):
                     e.add_note(note)
                 case _:
                     raise
+        TeiEmbeddings._handle_error_response(response)
+
+    @staticmethod
+    def _handle_error_response(response: httpx.Response) -> None:
+        response_json = json.loads(response.content)
+        if "error" in response_json:
+            raise ValueError(response_json["error"])
 
     def _embed(self, text: str | list[str]) -> list[list[float]]:
         """Embed text."""
diff --git a/llm-qa/llm_qa/logging.py b/llm-qa/llm_qa/logging.py
index 7bea105..17d4bba 100644
--- a/llm-qa/llm_qa/logging.py
+++ b/llm-qa/llm_qa/logging.py
@@ -1,9 +1,14 @@
 import logging.config
 import pathlib
+from typing import Any, cast
 
 import yaml
 
 
-def load_logging_config() -> None:
-    logging_config = yaml.safe_load(pathlib.Path("logging.yaml").read_text())
-    logging.config.dictConfig(logging_config)
+def load_logging_config(dry_run: bool = False) -> dict[str, Any]:
+    logging_config = cast(
+        dict[str, Any], yaml.safe_load(pathlib.Path("logging.yaml").read_text())
+    )
+    if not dry_run:
+        logging.config.dictConfig(logging_config)
+    return logging_config
diff --git a/llm-qa/llm_qa/models/chat.py b/llm-qa/llm_qa/models/chat.py
index 0c65365..5ae13ed 100644
--- a/llm-qa/llm_qa/models/chat.py
+++ b/llm-qa/llm_qa/models/chat.py
@@ -2,9 +2,9 @@ from enum import StrEnum
 from typing import Self
 
 from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 
-from llm_qa.models.prompts import ChatPrompts
+from llm_qa.models.prompts import OptionalPrompts
 from llm_qa.models.source import Source
 from llm_qa.typing_utils import assert_never
 
@@ -15,8 +15,8 @@ class MessageType(StrEnum):
 
 
 class ChatMessage(BaseModel):
-    content: str
-    type: MessageType
+    content: str = Field(..., description="Content of the message")
+    type: MessageType = Field(..., description="Type of the message, HUMAN or AI")
 
     @classmethod
     def new_human(cls, content: str) -> Self:
@@ -47,11 +47,36 @@ class ChatMessage(BaseModel):
 
 
 class ChatRequest(BaseModel):
-    messages: list[ChatMessage]
-    collection_name: str
-    prompts: ChatPrompts | None = None
+    messages: list[ChatMessage] = Field(
+        ...,
+        min_length=1,
+        description="Chat message history",
+        examples=[[ChatMessage(content="What are you?", type=MessageType.HUMAN)]],
+    )
+    collection_name: str = Field(
+        ...,
+        description="Name of the collection to retrieve documents from",
+        examples=["collection"],
+    )
+    prompts: OptionalPrompts | None = Field(
+        None,
+        description="Prompts to use for various LLM chains"
+        ", if not provided, uses prompts from settings",
+        examples=[OptionalPrompts()],
+    )
 
 
 class ChatResponse(BaseModel):
-    response_message: ChatMessage
-    sources: list[Source]
+    response_message: ChatMessage = Field(
+        ..., description="Response message from the chat chain", examples=["I am an AI"]
+    )
+    sources: list[Source] = Field(
+        ...,
+        description="Sources used to generate the response",
+        examples=[[Source(content="Source chunk")]],
+    )
+    retrieval_query: str = Field(
+        ...,
+        description="Retrieval query used to retrieve the sources",
+        examples=["Condensed query"],
+    )
diff --git a/llm-qa/llm_qa/models/prompts.py b/llm-qa/llm_qa/models/prompts.py
index e800698..fb35fff 100644
--- a/llm-qa/llm_qa/models/prompts.py
+++ b/llm-qa/llm_qa/models/prompts.py
@@ -1,6 +1,86 @@
+from typing import Self
+
 from pydantic import BaseModel
 
 
-class ChatPrompts(BaseModel):
+class OptionalChatPrompts(BaseModel):
+    system_message: str | None = None
+    last_human_message: str | None = None
+
+    def __or__(self, __value: "OptionalChatPrompts | None") -> Self:
+        if __value is None:
+            return self
+
+        return self.__class__(
+            system_message=__value.system_message or self.system_message,
+            last_human_message=__value.last_human_message or self.last_human_message,
+        )
+
+
+class ChatPrompts(OptionalChatPrompts):
     system_message: str
     last_human_message: str
+
+    @classmethod
+    def from_optional(cls, optional: OptionalChatPrompts) -> Self:
+        return cls(
+            system_message=optional.system_message,
+            last_human_message=optional.last_human_message,
+        )
+
+
+class OptionalCondenseQuestionPrompts(BaseModel):
+    system_message: str | None = None
+    human_message: str | None = None
+
+    def __or__(self, __value: "OptionalCondenseQuestionPrompts | None") -> Self:
+        if __value is None:
+            return self
+
+        return self.__class__(
+            system_message=__value.system_message or self.system_message,
+            human_message=__value.human_message or self.human_message,
+        )
+
+
+class CondenseQuestionPrompts(OptionalCondenseQuestionPrompts):
+    system_message: str
+    human_message: str
+
+    @classmethod
+    def from_optional(cls, optional: OptionalCondenseQuestionPrompts) -> Self:
+        return cls(
+            system_message=optional.system_message,
+            human_message=optional.human_message,
+        )
+
+
+class OptionalPrompts(BaseModel):
+    chat_prompts: OptionalChatPrompts = OptionalChatPrompts()
+    condense_question_prompts: OptionalCondenseQuestionPrompts = (
+        OptionalCondenseQuestionPrompts()
+    )
+
+    def __or__(self, __value: "OptionalPrompts | None") -> Self:
+        if __value is None:
+            return self
+
+        return self.__class__(
+            chat_prompts=self.chat_prompts | __value.chat_prompts,
+            condense_question_prompts=self.condense_question_prompts
+            | __value.condense_question_prompts,
+        )
+
+
+class Prompts(OptionalPrompts):
+    chat_prompts: ChatPrompts
+    condense_question_prompts: CondenseQuestionPrompts
+
+    @classmethod
+    def from_optional(cls, optional: OptionalPrompts) -> Self:
+        return cls(
+            chat_prompts=ChatPrompts.from_optional(optional.chat_prompts),
+            condense_question_prompts=CondenseQuestionPrompts.from_optional(
+                optional.condense_question_prompts
+            ),
+        )
diff --git a/llm-qa/llm_qa/models/tei.py b/llm-qa/llm_qa/models/tei.py
index aee7f4c..a4917f0 100644
--- a/llm-qa/llm_qa/models/tei.py
+++ b/llm-qa/llm_qa/models/tei.py
@@ -25,7 +25,7 @@ class RerankRequest(BaseModel):
     texts: list[str]
     raw_scores: bool = False
     return_text: bool = False
-    truncate: bool = False
+    truncate: bool = True
 
 
 class RerankResponse(BaseModel):
diff --git a/llm-qa/llm_qa/models/upsert.py b/llm-qa/llm_qa/models/upsert.py
index 914f181..f9d411e 100644
--- a/llm-qa/llm_qa/models/upsert.py
+++ b/llm-qa/llm_qa/models/upsert.py
@@ -1,6 +1,6 @@
 from enum import StrEnum
 
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 
 
 class TextType(StrEnum):
@@ -9,10 +9,22 @@ class TextType(StrEnum):
 
 
 class UpsertTextRequest(BaseModel):
-    text: str
-    type: TextType
-    collection: str
+    text: str = Field(
+        ..., description="Text to upsert", examples=["Context document text"]
+    )
+    type: TextType = Field(
+        ...,
+        description="Type of the text, will use different splitting logic",
+        examples=[TextType.PLAIN_TEXT],
+    )
+    collection: str = Field(
+        ...,
+        description="Name of the collection to upsert into",
+        examples=["collection"],
+    )
 
 
 class UpsertTextResponse(BaseModel):
-    num_documents: int
+    num_document_chunks: int = Field(
+        ..., description="Number of document chunks upserted"
+    )
diff --git a/llm-qa/llm_qa/prompt_utils.py b/llm-qa/llm_qa/prompt_utils.py
new file mode 100644
index 0000000..efda991
--- /dev/null
+++ b/llm-qa/llm_qa/prompt_utils.py
@@ -0,0 +1,7 @@
+from langchain_core.messages import AIMessage, HumanMessage
+
+
+def format_messages(messages: list[HumanMessage | AIMessage]) -> str:
+    return "\n".join(
+        f"{message.type.capitalize()}: {message.content}" for message in messages
+    )
diff --git a/llm-qa/llm_qa/routers/chat.py b/llm-qa/llm_qa/routers/chat.py
index f114df2..05b7dd0 100644
--- a/llm-qa/llm_qa/routers/chat.py
+++ b/llm-qa/llm_qa/routers/chat.py
@@ -1,14 +1,18 @@
+import logging
 from typing import Annotated
 
 from fastapi import APIRouter, Depends
 from langchain.chat_models.base import BaseChatModel
 
-from llm_qa.dependencies import chat_model, settings, tei_config
+from llm_qa.dependencies import chat_model, settings, tei_config, tei_rerank_config
 from llm_qa.models.chat import ChatRequest, ChatResponse
+from llm_qa.models.prompts import Prompts
 from llm_qa.models.tei import TeiConfig
 from llm_qa.services.chat import chat as chat_service
 from llm_qa.settings import Settings
 
+logger = logging.getLogger(__name__)
+
 router = APIRouter()
 
 
@@ -17,15 +21,25 @@ async def upsert_text(
     chat_request: ChatRequest,
     settings: Annotated[Settings, Depends(settings)],
     tei_config: Annotated[TeiConfig, Depends(tei_config)],
+    tei_rerank_config: Annotated[TeiConfig, Depends(tei_rerank_config)],
     chat_model: Annotated[BaseChatModel, Depends(chat_model)],
 ) -> ChatResponse:
-    prompts = chat_request.prompts or settings.chat_prompts
+    """Chat with a language model over a collection of documents.
+
+    Returns a LLM response, and sources of the documents used to generate the response.
+    """
+    logger.debug("Chat request: %s", chat_request)
+    prompts = Prompts.from_optional(settings.prompts | chat_request.prompts)
+    logger.debug("Prompts: %s", prompts)
     return await chat_service(
         messages=[message.to_langchain_message() for message in chat_request.messages],
         collection_name=chat_request.collection_name,
         prompts=prompts,
         tei_config=tei_config,
+        tei_rerank_config=tei_rerank_config,
         qdrant_host=settings.qdrant_host,
         qdrant_grpc_port=settings.qdrant_grpc_port,
+        retrieve_count=settings.retrieve_count,
+        rerank_count=settings.rerank_count,
         chat_model=chat_model,
     )
diff --git a/llm-qa/llm_qa/routers/upsert.py b/llm-qa/llm_qa/routers/upsert.py
index 90be3f9..48d9821 100644
--- a/llm-qa/llm_qa/routers/upsert.py
+++ b/llm-qa/llm_qa/routers/upsert.py
@@ -1,6 +1,7 @@
+import logging
 from typing import Annotated
 
-from fastapi import APIRouter, Depends
+from fastapi import APIRouter, Depends, HTTPException
 
 from llm_qa.dependencies import settings, tei_config
 from llm_qa.models.tei import TeiConfig
@@ -8,6 +9,8 @@ from llm_qa.models.upsert import UpsertTextRequest, UpsertTextResponse
 from llm_qa.services.upsert import upsert_text as upsert_text_service
 from llm_qa.settings import Settings
 
+logger = logging.getLogger(__name__)
+
 router = APIRouter()
 
 
@@ -17,6 +20,8 @@ async def upsert_text(
     settings: Annotated[Settings, Depends(settings)],
     tei_config: Annotated[TeiConfig, Depends(tei_config)],
 ) -> UpsertTextResponse:
+    """Upsert text into a collection."""
+    logger.debug("Upsert text request: %s", upsert_request)
     num_documents = await upsert_text_service(
         text=upsert_request.text,
         text_type=upsert_request.type,
@@ -25,9 +30,10 @@ async def upsert_text(
         qdrant_host=settings.qdrant_host,
         qdrant_grpc_port=settings.qdrant_grpc_port,
     )
-    return UpsertTextResponse(num_documents=num_documents)
+    return UpsertTextResponse(num_document_chunks=num_documents)
 
 
-@router.post("/upsert-file")
+@router.post("/upsert-file", responses={501: {"description": "Not implemented"}})
 async def upsert_file() -> None:
-    raise NotImplementedError
+    """Not implemented."""
+    raise HTTPException(status_code=501, detail="Not implemented")
diff --git a/llm-qa/llm_qa/services/chat.py b/llm-qa/llm_qa/services/chat.py
index cd94862..f2efb09 100644
--- a/llm-qa/llm_qa/services/chat.py
+++ b/llm-qa/llm_qa/services/chat.py
@@ -1,93 +1,35 @@
-from operator import itemgetter
-from typing import TypedDict
+import logging
+import time
 
 from langchain.chat_models.base import BaseChatModel
-from langchain.prompts import (
-    ChatPromptTemplate,
-    HumanMessagePromptTemplate,
-    MessagesPlaceholder,
-    SystemMessagePromptTemplate,
-)
-from langchain.vectorstores import VectorStore
 from langchain.vectorstores.qdrant import Qdrant
-from langchain_core.documents import Document
-from langchain_core.messages import AIMessage, BaseMessage
-from langchain_core.runnables import (
-    Runnable,
-    RunnableLambda,
-    RunnablePassthrough,
-    chain,
-)
+from langchain_core.messages import BaseMessage
 from qdrant_client import AsyncQdrantClient, QdrantClient
 
+from llm_qa.chains.chat import get_chat_chain
 from llm_qa.embeddings.tei import TeiEmbeddings
 from llm_qa.models.chat import ChatMessage, ChatResponse
-from llm_qa.models.prompts import ChatPrompts
+from llm_qa.models.prompts import Prompts
 from llm_qa.models.source import Source
 from llm_qa.models.tei import TeiConfig
 
-
-class ChatRunnableInput(TypedDict):
-    messages: list[BaseMessage]
-    collection_name: str
-
-
-class ChatRunnableOutput(TypedDict):
-    response: AIMessage
-    documents: list[Document]
-
-
-@chain
-def combine_documents(documents: list[Document]) -> str:
-    return "\n\n".join(document.page_content for document in documents)
-
-
-def get_chat_chain(
-    chat_prompt_template: ChatPromptTemplate,
-    vectorstore: VectorStore,
-    chat_model: BaseChatModel,
-) -> Runnable[ChatRunnableInput, ChatRunnableOutput]:
-    return (
-        RunnablePassthrough[ChatRunnableInput]()
-        | {
-            "messages": itemgetter("messages"),
-            "collection_name": itemgetter("collection_name"),
-            "last_human_message": itemgetter("messages")
-            | RunnableLambda[list[BaseMessage], str](lambda x: x[-1].content),
-        }
-        | {
-            "conversation_history": RunnableLambda(lambda x: x["messages"][:-1]),
-            "last_human_message": itemgetter("last_human_message"),
-            "documents": itemgetter("last_human_message") | vectorstore.as_retriever(),
-        }
-        | {
-            "conversation_history": itemgetter("conversation_history"),
-            "last_human_message": itemgetter("last_human_message"),
-            "documents": itemgetter("documents"),
-            "context": itemgetter("documents") | combine_documents,
-        }
-        | {
-            "documents": itemgetter("documents"),
-            "response": chat_prompt_template | chat_model,
-        }
-    )
+logger = logging.getLogger(__name__)
 
 
 async def chat(
     messages: list[BaseMessage],
     collection_name: str,
-    prompts: ChatPrompts,
+    prompts: Prompts,
     tei_config: TeiConfig,
+    tei_rerank_config: TeiConfig,
     qdrant_host: str,
     qdrant_grpc_port: int,
+    retrieve_count: int,
+    rerank_count: int | None,
     chat_model: BaseChatModel,
 ) -> ChatResponse:
-    chat_prompt_template = ChatPromptTemplate.from_messages([
-        SystemMessagePromptTemplate.from_template(prompts.system_message),
-        MessagesPlaceholder(variable_name="conversation_history"),
-        HumanMessagePromptTemplate.from_template(prompts.last_human_message),
-    ])
-    embeddings = TeiEmbeddings(tei_config=tei_config)
+    tei_embeddings = TeiEmbeddings(tei_config=tei_config)
+    tei_rerank_embeddings = TeiEmbeddings(tei_config=tei_rerank_config)
     qdrant_client = QdrantClient(
         location=qdrant_host, grpc_port=qdrant_grpc_port, prefer_grpc=True
     )
@@ -98,16 +40,26 @@ async def chat(
         client=qdrant_client,
         async_client=async_qdrant_client,
         collection_name=collection_name,
-        embeddings=embeddings,
+        embeddings=tei_embeddings,
     )
-    chain = get_chat_chain(chat_prompt_template, qdrant_vectorstore, chat_model)
-    chain_output = await chain.ainvoke(
-        ChatRunnableInput(messages=messages, collection_name=collection_name)
+    chat_chain = get_chat_chain(
+        prompts=prompts,
+        vectorstore_retriever=qdrant_vectorstore.as_retriever(
+            search_kwargs={"k": retrieve_count}
+        ),
+        tei_rerank_embeddings=tei_rerank_embeddings,
+        rerank_count=rerank_count,
+        chat_model=chat_model,
     )
+    start_time = time.time()
+    chain_output = await chat_chain.ainvoke(messages)
+    elapsed_time = time.time() - start_time
+    logger.info("Chat chain finished in %.2f seconds", elapsed_time)
     return ChatResponse(
         response_message=ChatMessage.from_langchain_message(chain_output["response"]),
         sources=[
             Source(content=document.page_content)
             for document in chain_output["documents"]
         ],
+        retrieval_query=chain_output["retrieval_query"],
     )
diff --git a/llm-qa/llm_qa/services/upsert.py b/llm-qa/llm_qa/services/upsert.py
index 96bd96b..bdb37f7 100644
--- a/llm-qa/llm_qa/services/upsert.py
+++ b/llm-qa/llm_qa/services/upsert.py
@@ -1,4 +1,5 @@
 import logging
+import time
 
 from langchain.schema.document import Document
 from langchain.vectorstores.qdrant import Qdrant
@@ -24,6 +25,7 @@ async def upsert_text(
     qdrant_host: str,
     qdrant_grpc_port: int,
 ) -> int:
+    start_time = time.time()
     match text_type:
         case TextType.PLAIN_TEXT:
             text_splitter_chain = recursive_character_text_splitter_chain
@@ -34,13 +36,18 @@ async def upsert_text(
 
     text_chunks = await text_splitter_chain.ainvoke(text)
 
+    logger.info(
+        "Split `%s` type text into %d document chunks in %.2f seconds",
+        text_type.value,
+        len(text_chunks),
+        time.time() - start_time,
+    )
+    start_time = time.time()
+
     documents = [Document(page_content=chunk) for chunk in text_chunks]
 
     embeddings = TeiEmbeddings(tei_config=tei_config)
 
-    logger.info(
-        "Upserting %d documents to Qdrant collection `%s`", len(documents), collection
-    )
     await Qdrant.afrom_documents(
         location=qdrant_host,
         grpc_port=qdrant_grpc_port,
@@ -51,4 +58,12 @@ async def upsert_text(
         force_recreate=False,
     )
 
+    elapsed_time = time.time() - start_time
+    logger.info(
+        "Upserted %d document chunks to Qdrant collection `%s` in %.2f seconds",
+        len(documents),
+        collection,
+        elapsed_time,
+    )
+
     return len(documents)
diff --git a/llm-qa/llm_qa/settings.py b/llm-qa/llm_qa/settings.py
index 3d962e5..5a70236 100644
--- a/llm-qa/llm_qa/settings.py
+++ b/llm-qa/llm_qa/settings.py
@@ -1,10 +1,18 @@
+from pydantic import Field
 from pydantic_settings import BaseSettings, SettingsConfigDict
 
-from llm_qa.models.prompts import ChatPrompts
+from llm_qa.models.prompts import (
+    OptionalPrompts,
+)
 
 
 class Settings(BaseSettings):
-    model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
+    model_config = SettingsConfigDict(
+        env_file=(".env.default", ".env"),
+        env_file_encoding="utf-8",
+        extra="ignore",
+        env_nested_delimiter="__",
+    )
 
     qdrant_host: str
     qdrant_grpc_port: int = 6334
@@ -12,8 +20,19 @@ class Settings(BaseSettings):
     tei_rerank_base_url: str
     tei_document_prefix: str = "passage: "
     tei_query_prefix: str = "query: "
+    retrieve_count: int = Field(
+        ge=1,
+        description="Number of documents to retrieve using embedding similarity search",
+    )
+    rerank_count: int | None = Field(
+        ge=1,
+        description="Number of documents to take from rerank, None to skip reranking",
+    )
     ollama_base_url: str
     ollama_model_name: str
-    chat_prompts: ChatPrompts = ChatPrompts(
-        system_message="System message", last_human_message="Last human message"
-    )
+    ollama_auto_pull: bool = False
+    prompts: OptionalPrompts = OptionalPrompts()
+
+
+if __name__ == "__main__":
+    print(Settings())
diff --git a/llm-qa/llm_qa/web.py b/llm-qa/llm_qa/web.py
index 5671c27..3493da3 100644
--- a/llm-qa/llm_qa/web.py
+++ b/llm-qa/llm_qa/web.py
@@ -10,5 +10,11 @@ app.include_router(api_v1.router)
 if __name__ == "__main__":
     import uvicorn
 
-    load_logging_config()
-    uvicorn.run("llm_qa.web:app", host="0.0.0.0", port=8000, reload=True)  # noqa: S104
+    logging_config = load_logging_config(dry_run=True)
+    uvicorn.run(
+        "llm_qa.web:app",
+        host="0.0.0.0",  # noqa: S104
+        port=8000,
+        log_config=logging_config,
+        reload=True,
+    )
diff --git a/llm-qa/logging.yaml b/llm-qa/logging.yaml
index dce8513..c196cd4 100644
--- a/llm-qa/logging.yaml
+++ b/llm-qa/logging.yaml
@@ -2,26 +2,56 @@ version: 1
 disable_existing_loggers: False
 
 formatters:
-    simple:
-        format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+  default:
+    format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+  access:
+    "()": uvicorn.logging.AccessFormatter
+    format: '%(asctime)s - %(name)s - %(levelname)s - %(client_addr)s - "%(request_line)s" %(status_code)s'
 
 handlers:
-    console:
-        class: logging.StreamHandler
-        level: DEBUG
-        formatter: simple
-        stream: ext://sys.stdout
+  default:
+    formatter: default
+    class: logging.StreamHandler
+    stream: ext://sys.stderr
+  access:
+    formatter: access
+    class: logging.StreamHandler
+    stream: ext://sys.stdout
 
 loggers:
-    uvicorn:
-        level: INFO
-        handlers: [console]
-        propagate: no
-    gunicorn:
-        level: INFO
-        handlers: [console]
-        propagate: no
+  uvicorn:
+    level: INFO
+    handlers:
+      - default
+    propagate: no
+  uvicorn.error:
+    level: INFO
+    handlers:
+      - default
+    propagate: no
+  uvicorn.access:
+    level: INFO
+    handlers:
+      - access
+    propagate: no
+  guvicorn:
+    level: INFO
+    handlers:
+      - default
+    propagate: no
+  guvicorn.error:
+    level: INFO
+    handlers:
+      - default
+    propagate: no
+  guvicorn.access:
+    level: INFO
+    handlers:
+      - access
+    propagate: no
 
 root:
-    level: INFO
-    handlers: [console]
+  level: INFO
+  handlers:
+    - default
+  propagate: no
diff --git a/llm-qa/ollama_pull.sh b/llm-qa/ollama_pull.sh
new file mode 100755
index 0000000..181c7b9
--- /dev/null
+++ b/llm-qa/ollama_pull.sh
@@ -0,0 +1,6 @@
+source .env
+echo Ollama base URL: $OLLAMA_BASE_URL
+echo Ollama model name: $OLLAMA_MODEL_NAME
+curl $OLLAMA_BASE_URL/api/pull -d '{
+  "name": "'$OLLAMA_MODEL_NAME'"
+}' -H "Content-Type: application/json" -X POST
diff --git a/llm-qa/poetry.lock b/llm-qa/poetry.lock
index 70e919d..6b46414 100644
--- a/llm-qa/poetry.lock
+++ b/llm-qa/poetry.lock
@@ -701,13 +701,13 @@ files = [
 
 [[package]]
 name = "httpcore"
-version = "1.0.2"
+version = "1.0.3"
 description = "A minimal low-level HTTP client."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "httpcore-1.0.2-py3-none-any.whl", hash = "sha256:096cc05bca73b8e459a1fc3dcf585148f63e534eae4339559c9b8a8d6399acc7"},
-    {file = "httpcore-1.0.2.tar.gz", hash = "sha256:9fc092e4799b26174648e54b74ed5f683132a464e95643b226e00c2ed2fa6535"},
+    {file = "httpcore-1.0.3-py3-none-any.whl", hash = "sha256:9a6a501c3099307d9fd76ac244e08503427679b1e81ceb1d922485e2f2462ad2"},
+    {file = "httpcore-1.0.3.tar.gz", hash = "sha256:5c0f9546ad17dac4d0772b0808856eb616eb8b48ce94f49ed819fd6982a8a544"},
 ]
 
 [package.dependencies]
@@ -718,17 +718,17 @@ h11 = ">=0.13,<0.15"
 asyncio = ["anyio (>=4.0,<5.0)"]
 http2 = ["h2 (>=3,<5)"]
 socks = ["socksio (==1.*)"]
-trio = ["trio (>=0.22.0,<0.23.0)"]
+trio = ["trio (>=0.22.0,<0.24.0)"]
 
 [[package]]
 name = "httpx"
-version = "0.26.0"
+version = "0.25.2"
 description = "The next generation HTTP client."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "httpx-0.26.0-py3-none-any.whl", hash = "sha256:8915f5a3627c4d47b73e8202457cb28f1266982d1159bd5779d86a80c0eab1cd"},
-    {file = "httpx-0.26.0.tar.gz", hash = "sha256:451b55c30d5185ea6b23c2c793abf9bb237d2a7dfb901ced6ff69ad37ec1dfaf"},
+    {file = "httpx-0.25.2-py3-none-any.whl", hash = "sha256:a05d3d052d9b2dfce0e3896636467f8a5342fb2b902c819428e1ac65413ca118"},
+    {file = "httpx-0.25.2.tar.gz", hash = "sha256:8b8fcaa0c8ea7b05edd69a094e63a2094c4efcb48129fb757361bc423c0ad9e8"},
 ]
 
 [package.dependencies]
@@ -913,13 +913,13 @@ extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.
 
 [[package]]
 name = "langchain-core"
-version = "0.1.22"
+version = "0.1.23"
 description = "Building applications with LLMs through composability"
 optional = false
 python-versions = ">=3.8.1,<4.0"
 files = [
-    {file = "langchain_core-0.1.22-py3-none-any.whl", hash = "sha256:d1263c2707ce18bb13654c88f891e53f39edec9b11ff7d0d0f23fd920927b2d6"},
-    {file = "langchain_core-0.1.22.tar.gz", hash = "sha256:deac12b3e42a08bbbaa2acf83d5f8dd2d5513256d8daf0e853e9d68ff4c99d79"},
+    {file = "langchain_core-0.1.23-py3-none-any.whl", hash = "sha256:d42fac013c39a8b0bcd7e337a4cb6c17c16046c60d768f89df582ad73ec3c5cb"},
+    {file = "langchain_core-0.1.23.tar.gz", hash = "sha256:34359cc8b6f8c3d45098c54a6a9b35c9f538ef58329cd943a2249d6d7b4e5806"},
 ]
 
 [package.dependencies]
@@ -1185,6 +1185,20 @@ files = [
     {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"},
 ]
 
+[[package]]
+name = "ollama"
+version = "0.1.6"
+description = "The official Python client for Ollama."
+optional = false
+python-versions = ">=3.8,<4.0"
+files = [
+    {file = "ollama-0.1.6-py3-none-any.whl", hash = "sha256:e37f0455025ed5846879551ca2030ec93a71a823395d3517c14d71479ccbdd11"},
+    {file = "ollama-0.1.6.tar.gz", hash = "sha256:6636ff75ae54ac076522dcdc40678b141208325d1cc5d85785559f197b1107de"},
+]
+
+[package.dependencies]
+httpx = ">=0.25.2,<0.26.0"
+
 [[package]]
 name = "packaging"
 version = "23.2"
@@ -1650,60 +1664,60 @@ files = [
 
 [[package]]
 name = "sqlalchemy"
-version = "2.0.26"
+version = "2.0.27"
 description = "Database Abstraction Library"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "SQLAlchemy-2.0.26-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:56524d767713054f8758217b3a811f6a736e0ae34e7afc33b594926589aa9609"},
-    {file = "SQLAlchemy-2.0.26-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c2d8a2c68b279617f13088bdc0fc0e9b5126f8017f8882ff08ee41909fab0713"},
-    {file = "SQLAlchemy-2.0.26-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84d377645913d47f0dc802b415bcfe7fb085d86646a12278d77c12eb75b5e1b4"},
-    {file = "SQLAlchemy-2.0.26-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4fc0628d2026926404dabc903dc5628f7d936a792aa3a1fc54a20182df8e2172"},
-    {file = "SQLAlchemy-2.0.26-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:872f2907ade52601a1e729e85d16913c24dc1f6e7c57d11739f18dcfafde29db"},
-    {file = "SQLAlchemy-2.0.26-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ba46fa770578b3cf3b5b77dadb7e94fda7692dd4d1989268ef3dcb65f31c40a3"},
-    {file = "SQLAlchemy-2.0.26-cp310-cp310-win32.whl", hash = "sha256:651d10fdba7984bf100222d6e4acc496fec46493262b6170be1981ef860c6184"},
-    {file = "SQLAlchemy-2.0.26-cp310-cp310-win_amd64.whl", hash = "sha256:8f95ede696ab0d7328862d69f29b643d35b668c4f3619cb2f0281adc16e64c1b"},
-    {file = "SQLAlchemy-2.0.26-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fab1bb909bd24accf2024a69edd4f885ded182c079c4dbcd515b4842f86b07cb"},
-    {file = "SQLAlchemy-2.0.26-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b7ee16afd083bb6bb5ab3962ac7f0eafd1d196c6399388af35fef3d1c6d6d9bb"},
-    {file = "SQLAlchemy-2.0.26-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:379af901ceb524cbee5e15c1713bf9fd71dc28053286b7917525d01b938b9628"},
-    {file = "SQLAlchemy-2.0.26-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94a78f56ea13f4d6e9efcd2a2d08cc13531918e0516563f6303c4ad98c81e21d"},
-    {file = "SQLAlchemy-2.0.26-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a481cc2eec83776ff7b6bb12c8e85d0378af0e2ec4584ac3309365a2a380c64b"},
-    {file = "SQLAlchemy-2.0.26-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8cbeb0e49b605cd75f825fb9239a554803ef2bef1a7b2a8b428926ed518b6b63"},
-    {file = "SQLAlchemy-2.0.26-cp311-cp311-win32.whl", hash = "sha256:e70cce65239089390c193a7b0d171ce89d2e3dedf797f8010031b2aa2b1e9c80"},
-    {file = "SQLAlchemy-2.0.26-cp311-cp311-win_amd64.whl", hash = "sha256:750d1ef39d50520527c45c309c3cb10bbfa6131f93081b4e93858abb5ece2501"},
-    {file = "SQLAlchemy-2.0.26-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b39503c3a56e1b2340a7d09e185ddb60b253ad0210877a9958ac64208eb23674"},
-    {file = "SQLAlchemy-2.0.26-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1a870e6121a052f826f7ae1e4f0b54ca4c0ccd613278218ca036fa5e0f3be7df"},
-    {file = "SQLAlchemy-2.0.26-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5901eed6d0e23ca4b04d66a561799d4f0fe55fcbfc7ca203bb8c3277f442085b"},
-    {file = "SQLAlchemy-2.0.26-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d25fe55aab9b20ae4a9523bb269074202be9d92a145fcc0b752fff409754b5f6"},
-    {file = "SQLAlchemy-2.0.26-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:5310958d08b4bafc311052be42a3b7d61a93a2bf126ddde07b85f712e7e4ac7b"},
-    {file = "SQLAlchemy-2.0.26-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fd133afb7e6c59fad365ffa97fb06b1001f88e29e1de351bef3d2b1224e2f132"},
-    {file = "SQLAlchemy-2.0.26-cp312-cp312-win32.whl", hash = "sha256:dc32ecf643c4904dd413e6a95a3f2c8a89ccd6f15083e586dcf8f42eb4e317ae"},
-    {file = "SQLAlchemy-2.0.26-cp312-cp312-win_amd64.whl", hash = "sha256:6e25f029e8ad6d893538b5abe8537e7f09e21d8e96caee46a7e2199f3ddd77b0"},
-    {file = "SQLAlchemy-2.0.26-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:99a9a8204b8937aa72421e31c493bfc12fd063a8310a0522e5a9b98e6323977c"},
-    {file = "SQLAlchemy-2.0.26-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:691d68a4fca30c9a676623d094b600797699530e175b6524a9f57e3273f5fa8d"},
-    {file = "SQLAlchemy-2.0.26-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79a74a4ca4310c812f97bf0f13ce00ed73c890954b5a20b32484a9ab60e567e9"},
-    {file = "SQLAlchemy-2.0.26-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:f2efbbeb18c0e1c53b670a46a009fbde7b58e05b397a808c7e598532b17c6f4b"},
-    {file = "SQLAlchemy-2.0.26-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:3fc557f5402206c18ec3d288422f8e5fa764306d49f4efbc6090a7407bf54938"},
-    {file = "SQLAlchemy-2.0.26-cp37-cp37m-win32.whl", hash = "sha256:a9846ffee3283cff4ec476e7ee289314290fcb2384aab5045c6f481c5c4d011f"},
-    {file = "SQLAlchemy-2.0.26-cp37-cp37m-win_amd64.whl", hash = "sha256:ed4667d3d5d6e203a271d684d5b213ebcd618f7a8bc605752a8865eb9e67a79a"},
-    {file = "SQLAlchemy-2.0.26-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:79e629df3f69f849a1482a2d063596b23e32036b83547397e68725e6e0d0a9ab"},
-    {file = "SQLAlchemy-2.0.26-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4b4d848b095173e0a9e377127b814490499e55f5168f617ae2c07653c326b9d1"},
-    {file = "SQLAlchemy-2.0.26-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f06afe8e96d7f221cc0b59334dc400151be22f432785e895e37030579d253c3"},
-    {file = "SQLAlchemy-2.0.26-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f75ac12d302205e60f77f46bd162d40dc37438f1f8db160d2491a78b19a0bd61"},
-    {file = "SQLAlchemy-2.0.26-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ec3717c1efee8ad4b97f6211978351de3abe1e4b5f73e32f775c7becec021c5c"},
-    {file = "SQLAlchemy-2.0.26-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06ed4d6bb2365222fb9b0a05478a2d23ad8c1dd874047a9ae1ca1d45f18a255e"},
-    {file = "SQLAlchemy-2.0.26-cp38-cp38-win32.whl", hash = "sha256:caa79a6caeb4a3cc4ddb9aba9205c383f5d3bcb60d814e87e74570514754e073"},
-    {file = "SQLAlchemy-2.0.26-cp38-cp38-win_amd64.whl", hash = "sha256:996b41c38e34a980e9f810d6e2709a3196e29ee34e46e3c16f96c63da10a9da1"},
-    {file = "SQLAlchemy-2.0.26-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4f57af0866f6629eae2d24d022ba1a4c1bac9b16d45027bbfcda4c9d5b0d8f26"},
-    {file = "SQLAlchemy-2.0.26-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e1a532bc33163fb19c4759a36504a23e63032bc8d47cee1c66b0b70a04a0957b"},
-    {file = "SQLAlchemy-2.0.26-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:02a4f954ccb17bd8cff56662efc806c5301508233dc38d0253a5fdb2f33ca3ba"},
-    {file = "SQLAlchemy-2.0.26-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a678f728fb075e74aaa7fdc27f8af8f03f82d02e7419362cc8c2a605c16a4114"},
-    {file = "SQLAlchemy-2.0.26-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:8b39462c9588d4780f041e1b84d2ba038ac01c441c961bbee622dd8f53dec69f"},
-    {file = "SQLAlchemy-2.0.26-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:98f4d0d2bda2921af5b0c2ca99207cdab00f2922da46a6336c62c8d6814303a7"},
-    {file = "SQLAlchemy-2.0.26-cp39-cp39-win32.whl", hash = "sha256:6d68e6b507a3dd20c0add86ac0a0ca061d43c9a0162a122baa5fe952f14240f1"},
-    {file = "SQLAlchemy-2.0.26-cp39-cp39-win_amd64.whl", hash = "sha256:fb97a9b93b953084692a52a7877957b7a88dfcedc0c5652124f5aebf5999f7fe"},
-    {file = "SQLAlchemy-2.0.26-py3-none-any.whl", hash = "sha256:1128b2cdf49107659f6d1f452695f43a20694cc9305a86e97b70793a1c74eeb4"},
-    {file = "SQLAlchemy-2.0.26.tar.gz", hash = "sha256:e1bcd8fcb30305e27355d553608c2c229d3e589fb7ff406da7d7e5d50fa14d0d"},
+    {file = "SQLAlchemy-2.0.27-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d04e579e911562f1055d26dab1868d3e0bb905db3bccf664ee8ad109f035618a"},
+    {file = "SQLAlchemy-2.0.27-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fa67d821c1fd268a5a87922ef4940442513b4e6c377553506b9db3b83beebbd8"},
+    {file = "SQLAlchemy-2.0.27-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c7a596d0be71b7baa037f4ac10d5e057d276f65a9a611c46970f012752ebf2d"},
+    {file = "SQLAlchemy-2.0.27-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:954d9735ee9c3fa74874c830d089a815b7b48df6f6b6e357a74130e478dbd951"},
+    {file = "SQLAlchemy-2.0.27-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:5cd20f58c29bbf2680039ff9f569fa6d21453fbd2fa84dbdb4092f006424c2e6"},
+    {file = "SQLAlchemy-2.0.27-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:03f448ffb731b48323bda68bcc93152f751436ad6037f18a42b7e16af9e91c07"},
+    {file = "SQLAlchemy-2.0.27-cp310-cp310-win32.whl", hash = "sha256:d997c5938a08b5e172c30583ba6b8aad657ed9901fc24caf3a7152eeccb2f1b4"},
+    {file = "SQLAlchemy-2.0.27-cp310-cp310-win_amd64.whl", hash = "sha256:eb15ef40b833f5b2f19eeae65d65e191f039e71790dd565c2af2a3783f72262f"},
+    {file = "SQLAlchemy-2.0.27-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6c5bad7c60a392850d2f0fee8f355953abaec878c483dd7c3836e0089f046bf6"},
+    {file = "SQLAlchemy-2.0.27-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3012ab65ea42de1be81fff5fb28d6db893ef978950afc8130ba707179b4284a"},
+    {file = "SQLAlchemy-2.0.27-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dbcd77c4d94b23e0753c5ed8deba8c69f331d4fd83f68bfc9db58bc8983f49cd"},
+    {file = "SQLAlchemy-2.0.27-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d177b7e82f6dd5e1aebd24d9c3297c70ce09cd1d5d37b43e53f39514379c029c"},
+    {file = "SQLAlchemy-2.0.27-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:680b9a36029b30cf063698755d277885d4a0eab70a2c7c6e71aab601323cba45"},
+    {file = "SQLAlchemy-2.0.27-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1306102f6d9e625cebaca3d4c9c8f10588735ef877f0360b5cdb4fdfd3fd7131"},
+    {file = "SQLAlchemy-2.0.27-cp311-cp311-win32.whl", hash = "sha256:5b78aa9f4f68212248aaf8943d84c0ff0f74efc65a661c2fc68b82d498311fd5"},
+    {file = "SQLAlchemy-2.0.27-cp311-cp311-win_amd64.whl", hash = "sha256:15e19a84b84528f52a68143439d0c7a3a69befcd4f50b8ef9b7b69d2628ae7c4"},
+    {file = "SQLAlchemy-2.0.27-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:0de1263aac858f288a80b2071990f02082c51d88335a1db0d589237a3435fe71"},
+    {file = "SQLAlchemy-2.0.27-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce850db091bf7d2a1f2fdb615220b968aeff3849007b1204bf6e3e50a57b3d32"},
+    {file = "SQLAlchemy-2.0.27-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8dfc936870507da96aebb43e664ae3a71a7b96278382bcfe84d277b88e379b18"},
+    {file = "SQLAlchemy-2.0.27-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4fbe6a766301f2e8a4519f4500fe74ef0a8509a59e07a4085458f26228cd7cc"},
+    {file = "SQLAlchemy-2.0.27-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4535c49d961fe9a77392e3a630a626af5baa967172d42732b7a43496c8b28876"},
+    {file = "SQLAlchemy-2.0.27-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:0fb3bffc0ced37e5aa4ac2416f56d6d858f46d4da70c09bb731a246e70bff4d5"},
+    {file = "SQLAlchemy-2.0.27-cp312-cp312-win32.whl", hash = "sha256:7f470327d06400a0aa7926b375b8e8c3c31d335e0884f509fe272b3c700a7254"},
+    {file = "SQLAlchemy-2.0.27-cp312-cp312-win_amd64.whl", hash = "sha256:f9374e270e2553653d710ece397df67db9d19c60d2647bcd35bfc616f1622dcd"},
+    {file = "SQLAlchemy-2.0.27-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e97cf143d74a7a5a0f143aa34039b4fecf11343eed66538610debc438685db4a"},
+    {file = "SQLAlchemy-2.0.27-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7b5a3e2120982b8b6bd1d5d99e3025339f7fb8b8267551c679afb39e9c7c7f1"},
+    {file = "SQLAlchemy-2.0.27-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e36aa62b765cf9f43a003233a8c2d7ffdeb55bc62eaa0a0380475b228663a38f"},
+    {file = "SQLAlchemy-2.0.27-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:5ada0438f5b74c3952d916c199367c29ee4d6858edff18eab783b3978d0db16d"},
+    {file = "SQLAlchemy-2.0.27-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:b1d9d1bfd96eef3c3faedb73f486c89e44e64e40e5bfec304ee163de01cf996f"},
+    {file = "SQLAlchemy-2.0.27-cp37-cp37m-win32.whl", hash = "sha256:ca891af9f3289d24a490a5fde664ea04fe2f4984cd97e26de7442a4251bd4b7c"},
+    {file = "SQLAlchemy-2.0.27-cp37-cp37m-win_amd64.whl", hash = "sha256:fd8aafda7cdff03b905d4426b714601c0978725a19efc39f5f207b86d188ba01"},
+    {file = "SQLAlchemy-2.0.27-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ec1f5a328464daf7a1e4e385e4f5652dd9b1d12405075ccba1df842f7774b4fc"},
+    {file = "SQLAlchemy-2.0.27-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ad862295ad3f644e3c2c0d8b10a988e1600d3123ecb48702d2c0f26771f1c396"},
+    {file = "SQLAlchemy-2.0.27-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48217be1de7d29a5600b5c513f3f7664b21d32e596d69582be0a94e36b8309cb"},
+    {file = "SQLAlchemy-2.0.27-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e56afce6431450442f3ab5973156289bd5ec33dd618941283847c9fd5ff06bf"},
+    {file = "SQLAlchemy-2.0.27-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:611068511b5531304137bcd7fe8117c985d1b828eb86043bd944cebb7fae3910"},
+    {file = "SQLAlchemy-2.0.27-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b86abba762ecfeea359112b2bb4490802b340850bbee1948f785141a5e020de8"},
+    {file = "SQLAlchemy-2.0.27-cp38-cp38-win32.whl", hash = "sha256:30d81cc1192dc693d49d5671cd40cdec596b885b0ce3b72f323888ab1c3863d5"},
+    {file = "SQLAlchemy-2.0.27-cp38-cp38-win_amd64.whl", hash = "sha256:120af1e49d614d2525ac247f6123841589b029c318b9afbfc9e2b70e22e1827d"},
+    {file = "SQLAlchemy-2.0.27-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d07ee7793f2aeb9b80ec8ceb96bc8cc08a2aec8a1b152da1955d64e4825fcbac"},
+    {file = "SQLAlchemy-2.0.27-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cb0845e934647232b6ff5150df37ceffd0b67b754b9fdbb095233deebcddbd4a"},
+    {file = "SQLAlchemy-2.0.27-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fc19ae2e07a067663dd24fca55f8ed06a288384f0e6e3910420bf4b1270cc51"},
+    {file = "SQLAlchemy-2.0.27-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b90053be91973a6fb6020a6e44382c97739736a5a9d74e08cc29b196639eb979"},
+    {file = "SQLAlchemy-2.0.27-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:2f5c9dfb0b9ab5e3a8a00249534bdd838d943ec4cfb9abe176a6c33408430230"},
+    {file = "SQLAlchemy-2.0.27-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:33e8bde8fff203de50399b9039c4e14e42d4d227759155c21f8da4a47fc8053c"},
+    {file = "SQLAlchemy-2.0.27-cp39-cp39-win32.whl", hash = "sha256:d873c21b356bfaf1589b89090a4011e6532582b3a8ea568a00e0c3aab09399dd"},
+    {file = "SQLAlchemy-2.0.27-cp39-cp39-win_amd64.whl", hash = "sha256:ff2f1b7c963961d41403b650842dc2039175b906ab2093635d8319bef0b7d620"},
+    {file = "SQLAlchemy-2.0.27-py3-none-any.whl", hash = "sha256:1ab4e0448018d01b142c916cc7119ca573803a4745cfe341b8f95657812700ac"},
+    {file = "SQLAlchemy-2.0.27.tar.gz", hash = "sha256:86a6ed69a71fe6b88bf9331594fa390a2adda4a49b5c06f98e47bf0d392534f8"},
 ]
 
 [package.dependencies]
@@ -1821,6 +1835,17 @@ dev = ["autoflake (>=1.3.1,<2.0.0)", "flake8 (>=3.8.3,<4.0.0)", "pre-commit (>=2
 doc = ["cairosvg (>=2.5.2,<3.0.0)", "mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "pillow (>=9.3.0,<10.0.0)"]
 test = ["black (>=22.3.0,<23.0.0)", "coverage (>=6.2,<7.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.910)", "pytest (>=4.4.0,<8.0.0)", "pytest-cov (>=2.10.0,<5.0.0)", "pytest-sugar (>=0.9.4,<0.10.0)", "pytest-xdist (>=1.32.0,<4.0.0)", "rich (>=10.11.0,<14.0.0)", "shellingham (>=1.3.0,<2.0.0)"]
 
+[[package]]
+name = "types-colorama"
+version = "0.4.15.20240205"
+description = "Typing stubs for colorama"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "types-colorama-0.4.15.20240205.tar.gz", hash = "sha256:7ae4f58d407d387f4f98b24d81e1b7657ec754ea1dc4619ae5bd27f0c367637e"},
+    {file = "types_colorama-0.4.15.20240205-py3-none-any.whl", hash = "sha256:3ab26dcd76d2f13b1b795ed5c87a1a1a29331ea64cf614bb6ae958a3cebc3a53"},
+]
+
 [[package]]
 name = "types-pyyaml"
 version = "6.0.12.12"
@@ -2010,4 +2035,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.12"
-content-hash = "b37d2542dbbbd85c8afc9a16dde50037737f87d250cec6c83e377e1266a19997"
+content-hash = "e61409e5b61d5b67d01aa550723a3bff8efb5a53aba5ca034fbb038c5408f89d"
diff --git a/llm-qa/pyproject.toml b/llm-qa/pyproject.toml
index b1db8c6..2d7737e 100644
--- a/llm-qa/pyproject.toml
+++ b/llm-qa/pyproject.toml
@@ -11,17 +11,20 @@ langchain-community = "^0.0.19"
 langchain = "^0.1.6"
 fastapi = "^0.109.2"
 uvicorn = "^0.27.1"
-httpx = "^0.26.0"
+httpx = "^0.25.0"
 pydantic-settings = "^2.1.0"
 pydantic = "^2.6.1"
 qdrant-client = "^1.7.3"
 typer = "^0.9.0"
+colorama = "^0.4.6"
+ollama = "^0.1.6"
 
 [tool.poetry.group.dev.dependencies]
 ruff = "0.2.1"
 mypy = "^1.8.0"
 ipython = "^8.21.0"
 types-pyyaml = "^6.0.12.12"
+types-colorama = "^0.4.15.20240205"
 
 [build-system]
 requires = ["poetry-core"]
@@ -34,6 +37,10 @@ plugins = ["pydantic.mypy"]
 modules = ["llm_qa"]
 strict = true
 
+[[tool.mypy.overrides]]
+module = "ollama"
+ignore_missing_imports = true
+
 [tool.ruff]
 target-version = "py312"
 preview = true