Add new features and improvements

This commit is contained in:
Martin Popovski 2024-02-15 02:10:58 +00:00
parent abca2365bf
commit c68b66e4f6
Signed by: martinkozle
GPG Key ID: 0A5F2984DB008108
28 changed files with 881 additions and 233 deletions

View File

@ -0,0 +1,5 @@
TEI_MODEL_ID=BAAI/bge-large-en-v1.5
TEI_MODEL_REVISION=7774ef464da42fb9e231acb5ac59e9be9011cd35
TEI_RERANK_MODEL_ID=BAAI/bge-reranker-large
TEI_RERANK_MODEL_REVISION=b1a3b1492bcfa68f2b50d983a417601313cd85df
OLLAMA_MODEL_NAME=openchat:7b-v3.5-0106-q4_K_M

View File

@ -13,7 +13,6 @@ services:
- TEI_BASE_URL=http://text-embeddings-inference
- TEI_RERANK_BASE_URL=http://text-embeddings-inference-rerank
- OLLAMA_BASE_URL=http://ollama:11434
- OLLAMA_MODEL_NAME=${OLLAMA_MODEL_NAME}
command: sleep infinity
qdrant:

20
llm-qa/.env.default Normal file
View File

@ -0,0 +1,20 @@
TEI_DOCUMENT_PREFIX='passage: '
TEI_QUERY_PREFIX='query: '
RETRIEVE_COUNT=15
RERANK_COUNT=6
OLLAMA_MODEL_NAME=openchat:7b-v3.5-0106-q4_K_M
OLLAMA_AUTO_PULL=true
PROMPTS__CHAT_PROMPTS__SYSTEM_MESSAGE="You are a friendly and helpful chatbot specializing in SageMaker questions. Ensure that your responses are concise and informative. Leverage the given context to provide accurate and relevant information. If uncertain, politely ask for clarification. Always respond in the same language as the user. Let's make the user experience smooth and efficient!"
PROMPTS__CHAT_PROMPTS__LAST_HUMAN_MESSAGE="# Context:
{context}
# Message:
{last_human_message}"
PROMPTS__CONDENSE_QUESTION_PROMPTS__SYSTEM_MESSAGE="Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language. Output just the question, nothing else."
PROMPTS__CONDENSE_QUESTION_PROMPTS__HUMAN_MESSAGE="# Chat History:
{conversation_history}
# Follow Up Message:
{last_human_message}"

View File

@ -1 +1,117 @@
# LLM QA
A proof of concept question-answering system for different types of text data.
Currently implemented:
- Plain text
- Markdown
## Key Features
### Dockerized development environment
- Easy, quick and reproducible setup
### Automatic pull and serve of declared models
- Ollama models are automatically pulled and served by the FastAPI server
### Detailed logging
- Key potential bottlenecks are timed and logged
#### Upsert
```console
2024-02-15 01:10:54,341 - llm_qa.services.upsert - INFO - Split `MARKDOWN` type text into 8 document chunks in 0.01 seconds
2024-02-15 01:10:54,759 - httpx - INFO - HTTP Request: POST http://text-embeddings-inference/embed "HTTP/1.1 200 OK"
2024-02-15 01:11:03,121 - httpx - INFO - HTTP Request: POST http://text-embeddings-inference/embed "HTTP/1.1 200 OK"
2024-02-15 01:11:03,140 - llm_qa.services.upsert - INFO - Upserted 8 document chunks to Qdrant collection `showcase` in 8.80 seconds
2024-02-15 01:11:03,142 - uvicorn.access - INFO - 127.0.0.1:55868 - "POST /api/v1/upsert-text HTTP/1.1" 200 OK
```
#### Chat
```console
2024-02-15 01:02:03,408 - llm_qa.dependencies - INFO - Ollama auto-pull enabled, checking if model is available
2024-02-15 01:02:03,441 - httpx - INFO - HTTP Request: POST http://ollama:11434/api/show "HTTP/1.1 200 OK"
2024-02-15 01:02:03,441 - llm_qa.dependencies - INFO - Ollama model `openchat:7b-v3.5-0106-q4_K_M` already exists
2024-02-15 01:02:03,645 - httpx - INFO - HTTP Request: POST http://text-embeddings-inference/embed "HTTP/1.1 200 OK"
2024-02-15 01:02:03,653 - llm_qa.chains.time_logger - INFO - Chain `VectorStoreRetriever` finished in 0.08 seconds
2024-02-15 01:02:23,192 - httpx - INFO - HTTP Request: POST http://text-embeddings-inference-rerank/rerank "HTTP/1.1 200 OK"
2024-02-15 01:02:23,194 - llm_qa.chains.time_logger - INFO - Chain `RerankAndTake` finished in 19.54 seconds
2024-02-15 01:02:29,817 - llm_qa.chains.time_logger - INFO - Chain `ChatOllama` finished in 6.62 seconds
2024-02-15 01:02:29,817 - llm_qa.services.chat - INFO - Chat chain finished in 26.27 seconds
2024-02-15 01:02:29,823 - uvicorn.access - INFO - 127.0.0.1:50100 - "POST /api/v1/chat HTTP/1.1" 200 OK
```
### Hierarchical document chunking
- Hierarchical text, such as markdown, is split into document chunks by headers
- All previous parent headers are also included in the chunk, separated by `...`
- This enriches the context of the chunk and solves the problem of global context being lost when splitting the text
Example:
```md
# AWS::SageMaker::ModelQualityJobDefinition MonitoringGroundTruthS3Input<a name="aws-properties-sagemaker-modelqualityjobdefinition-monitoringgroundtruths3input"></a>
...
## Syntax<a name="aws-properties-sagemaker-modelqualityjobdefinition-monitoringgroundtruths3input-syntax"></a>
...
### YAML<a name="aws-properties-sagemaker-modelqualityjobdefinition-monitoringgroundtruths3input-syntax.yaml"></a>
``` [S3Uri](#cfn-sagemaker-modelqualityjobdefinition-monitoringgroundtruths3input-s3uri): String ```
```
### Retrieval query rewriting
- After the first message, subsequent messages are rewritten to include previous messages context
- This allows for a more natural conversation flow and retrieval of more relevant chunks
Example:
```md
### User: What are all AWS regions where SageMaker is available?
### AI: SageMaker is available in most AWS regions, except for the following: Asia Pacific (Jakarta), Africa (Cape Town), Middle East (UAE), Asia Pacific (Hyderabad), Asia Pacific (Osaka), Asia Pacific (Melbourne), Europe (Milan), AWS GovCloud (US-East), Europe (Spain), and Europe (Zurich) Region.
### User: What about the Bedrock service?
### Retrieval Query: What is the availability of AWS SageMaker in relation to the Bedrock service?
```
### Reranking
- Retrieval of a larger number of document chunks is first performed using a vector store
- Then, the chunks are reranked using a reranker model
- This process more precisely selects the most relevant chunks for the user query
## Development
### Setup
First copy the `.devcontainer/.env.example` file to `.devcontainer/.env` and adjust the settings and models to your needs.
Then simply open the project devcontainer in a compatible IDE.
This will setup all required tools and project dependencies for Python development.
It will also run Docker containers for all required services.
### Configuration
Create a `llm-qa/.env` file to override selective default environment variables located in `llm-qa/.env.default`.
### Running
To run the FastAPI server, run the `llm_qa.web` submodule:
```bash
poetry run python -m llm_qa.web
```
To run the minimal CLI client, run the `llm_qa.client` submodule:
```bash
poetry run python -m llm_qa.client
```
## Deployment
Not yet implemented.

View File

@ -0,0 +1,102 @@
from operator import itemgetter
from typing import TypedDict
from langchain.chat_models.base import BaseChatModel
from langchain.prompts import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
MessagesPlaceholder,
SystemMessagePromptTemplate,
)
from langchain_core.documents import Document
from langchain_core.messages import AIMessage, BaseMessage
from langchain_core.runnables import (
Runnable,
RunnableLambda,
RunnablePassthrough,
chain,
)
from langchain_core.vectorstores import VectorStoreRetriever
from llm_qa.chains.condense_question import get_condense_question_chain
from llm_qa.chains.rerank import RerankRunnableInput, get_rerank_chain
from llm_qa.chains.time_logger import time_logger
from llm_qa.embeddings.tei import TeiEmbeddings
from llm_qa.models.prompts import Prompts
class ChatRunnableOutput(TypedDict):
response: AIMessage
documents: list[Document]
retrieval_query: str
@chain
def combine_documents(documents: list[Document]) -> str:
return "\n\n".join(document.page_content for document in documents)
def get_chat_chain(
prompts: Prompts,
vectorstore_retriever: VectorStoreRetriever,
tei_rerank_embeddings: TeiEmbeddings,
rerank_count: int | None,
chat_model: BaseChatModel,
) -> Runnable[list[BaseMessage], ChatRunnableOutput]:
chat_prompt_template = ChatPromptTemplate.from_messages([
SystemMessagePromptTemplate.from_template(prompts.chat_prompts.system_message),
MessagesPlaceholder(variable_name="conversation_history"),
HumanMessagePromptTemplate.from_template(
prompts.chat_prompts.last_human_message
),
])
condense_question_chain = get_condense_question_chain(
prompts=prompts.condense_question_prompts, chat_model=chat_model
)
rerank_chain = get_rerank_chain(tei_rerank_embeddings=tei_rerank_embeddings)
# This branch isn't made with a RunnableBranch because we know the value of
# rerank_count at chain construction time, so we can use a simple if statement
rerank_and_take_chain: Runnable[RerankRunnableInput, list[Document]]
if rerank_count is None:
rerank_and_take_chain = RunnablePassthrough[RerankRunnableInput]() | itemgetter(
"documents"
)
else:
rerank_and_take_chain = rerank_chain | RunnableLambda[
list[Document], list[Document]
](lambda x: x[:rerank_count])
return (
RunnablePassthrough[list[BaseMessage]]()
| {
"messages": RunnablePassthrough(),
"retrieval_query": condense_question_chain,
}
| {
"conversation_history": itemgetter("messages")
| RunnableLambda[list[BaseMessage], list[BaseMessage]](lambda x: x[:-1]),
"last_human_message": itemgetter("messages")
| RunnableLambda[list[BaseMessage], str](lambda x: x[-1].content),
"retrieval_query": itemgetter("retrieval_query"),
"documents": {
"query": itemgetter("retrieval_query"),
"documents": itemgetter("retrieval_query")
| time_logger(vectorstore_retriever),
}
| time_logger(rerank_and_take_chain, name="RerankAndTake"),
}
| {
"retrieval_query": itemgetter("retrieval_query"),
"documents": itemgetter("documents"),
"response": {
"conversation_history": itemgetter("conversation_history"),
"last_human_message": itemgetter("last_human_message"),
"context": itemgetter("documents") | combine_documents,
}
| chat_prompt_template
| time_logger(chat_model),
}
)

View File

@ -0,0 +1,72 @@
from typing import TypedDict
from langchain.chat_models.base import BaseChatModel
from langchain.prompts import (
AIMessagePromptTemplate,
ChatPromptTemplate,
FewShotChatMessagePromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
from langchain_core.documents import Document
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
from langchain_core.runnables import (
Runnable,
RunnableBranch,
RunnableLambda,
)
from llm_qa.models.prompts import CondenseQuestionPrompts
from llm_qa.prompt_utils import format_messages
class ChatRunnableOutput(TypedDict):
response: AIMessage
documents: list[Document]
retrieval_query: str
def get_condense_question_chain(
prompts: CondenseQuestionPrompts, chat_model: BaseChatModel
) -> Runnable[list[BaseMessage], str]:
human_message_prompt_template = HumanMessagePromptTemplate.from_template(
prompts.human_message
)
examples = [
{
"conversation_history": format_messages([
HumanMessage(content="What will the weather be like today?")
]),
"last_human_message": "What about tomorrow?",
"response": "What will the weather be like tomorrow?",
},
]
example_prompt = ChatPromptTemplate.from_messages([
human_message_prompt_template,
AIMessagePromptTemplate.from_template("{response}"),
])
few_shot_prompt = FewShotChatMessagePromptTemplate(
examples=examples, example_prompt=example_prompt
)
chat_prompt_template = ChatPromptTemplate.from_messages([
SystemMessagePromptTemplate.from_template(prompts.system_message),
few_shot_prompt,
human_message_prompt_template,
])
# Returns the last human message if there is only one message
return RunnableBranch[list[BaseMessage], str](
(lambda x: len(x) == 0, lambda _: ""),
(lambda x: len(x) == 1, lambda x: x[0].content),
{
"conversation_history": RunnableLambda[list[BaseMessage], str](
lambda x: format_messages(x[:-1])
),
"last_human_message": RunnableLambda[list[BaseMessage], BaseMessage](
lambda x: x[-1]
),
}
| chat_prompt_template
| chat_model
| RunnableLambda[AIMessage, str](lambda x: x.content),
)

View File

@ -0,0 +1,36 @@
from typing import TypedDict
from langchain_core.documents import Document
from langchain_core.runnables import Runnable, chain
from llm_qa.embeddings.tei import TeiEmbeddings
async def rerank_documents(
tei_rerank_embeddings: TeiEmbeddings, query: str, documents: list[Document]
) -> list[Document]:
if len(documents) == 0:
return []
rerank_responses = await tei_rerank_embeddings.arerank(
query, [document.page_content for document in documents]
)
sorted_rerank = sorted(rerank_responses, key=lambda x: x.score, reverse=True)
return [documents[rerank.index] for rerank in sorted_rerank]
class RerankRunnableInput(TypedDict):
query: str
documents: list[Document]
def get_rerank_chain(
tei_rerank_embeddings: TeiEmbeddings,
) -> Runnable[RerankRunnableInput, list[Document]]:
@chain
async def rerank_documents_chain(input: RerankRunnableInput) -> list[Document]:
return await rerank_documents(
tei_rerank_embeddings, input["query"], input["documents"]
)
return rerank_documents_chain

View File

@ -2,6 +2,25 @@ from langchain.text_splitter import MarkdownHeaderTextSplitter
from langchain_core.runnables import Runnable, chain
def split_markdown_text_by_headers(
markdown_header_text_splitter: MarkdownHeaderTextSplitter, text: str
) -> list[str]:
documents = markdown_header_text_splitter.split_text(text)
# Add all parent headers to the page content
return [
"\n...\n".join(
f"{header_key} {document.metadata[header_key]}"
for _, header_key in sorted(
markdown_header_text_splitter.headers_to_split_on,
key=lambda x: len(x[0]),
)
if header_key in document.metadata
)
+ f"\n{document.page_content}"
for document in documents
]
def get_markdown_header_text_splitter_chain(
markdown_header_text_splitter: MarkdownHeaderTextSplitter,
) -> Runnable[str, list[str]]:
@ -10,20 +29,9 @@ def get_markdown_header_text_splitter_chain(
@chain
def markdown_header_text_splitter_chain(text: str) -> list[str]:
documents = markdown_header_text_splitter.split_text(text)
# Add all parent headers to the page content
return [
"\n...\n".join(
f"{header_key} {document.metadata[header_key]}"
for _, header_key in sorted(
markdown_header_text_splitter.headers_to_split_on,
key=lambda x: len(x[0]),
)
if header_key in document.metadata
)
+ f"\n{document.page_content}"
for document in documents
]
return split_markdown_text_by_headers(
markdown_header_text_splitter=markdown_header_text_splitter, text=text
)
return markdown_header_text_splitter_chain

View File

@ -0,0 +1,25 @@
import logging
import time
from langchain_core.runnables import Runnable, chain
from langchain_core.runnables.utils import Input, Output
logger = logging.getLogger(__name__)
def time_logger(
runnable: Runnable[Input, Output], name: str | None = None
) -> Runnable[Input, Output]:
@chain
async def time_logger_chain(input: Input) -> Output:
start_time = time.time()
output = await runnable.ainvoke(input)
elapsed_time = time.time() - start_time
logger.info(
"Chain `%s` finished in %.2f seconds",
name or runnable.get_name(),
elapsed_time,
)
return output
return time_logger_chain

View File

@ -4,16 +4,19 @@ from typing import Annotated, Optional
import httpx
import typer
from colorama import Fore, Style
from llm_qa.logging import load_logging_config
from llm_qa.models.chat import ChatMessage, ChatRequest, ChatResponse
from llm_qa.models.prompts import ChatPrompts
from llm_qa.models.prompts import (
OptionalChatPrompts,
OptionalCondenseQuestionPrompts,
OptionalPrompts,
)
from llm_qa.models.upsert import TextType, UpsertTextRequest, UpsertTextResponse
logger = logging.getLogger(__name__)
SYSTEM_MESSAGE_PROMPT_TEMPLATE_PATH = "../data/prompts/system_message.txt"
LAST_HUMAN_MESSAGE_PROMPT_TEMPLATE_PATH = "../data/prompts/last_human_message.txt"
CHAT_URL = "http://localhost:8000/api/v1/chat"
UPSERT_URL = "http://localhost:8000/api/v1/upsert-text"
@ -29,33 +32,58 @@ app = typer.Typer()
def chat(
collection: Annotated[str, typer.Option()],
chat_url: Annotated[str, typer.Option()] = CHAT_URL,
system_message_prompt_template_path: Annotated[
chat_system_message_prompt_path: Annotated[
Optional[typer.FileText], typer.Option() # noqa: UP007
] = None,
last_human_message_prompt_template_path: Annotated[
chat_last_human_message_prompt_path: Annotated[
Optional[typer.FileText], typer.Option() # noqa: UP007
] = None,
condense_question_system_message_prompt_path: Annotated[
Optional[typer.FileText], typer.Option() # noqa: UP007
] = None,
condense_question_human_message_prompt_path: Annotated[
Optional[typer.FileText], typer.Option() # noqa: UP007
] = None,
) -> None:
system_message_prompt_template = (
system_message_prompt_template_path.read()
if system_message_prompt_template_path
else load_prompt(SYSTEM_MESSAGE_PROMPT_TEMPLATE_PATH)
chat_system_message_prompt = (
chat_system_message_prompt_path.read()
if chat_system_message_prompt_path
else None
)
last_human_message_prompt_template = (
last_human_message_prompt_template_path.read()
if last_human_message_prompt_template_path
else load_prompt(LAST_HUMAN_MESSAGE_PROMPT_TEMPLATE_PATH)
chat_last_human_message_prompt = (
chat_last_human_message_prompt_path.read()
if chat_last_human_message_prompt_path
else None
)
prompts = ChatPrompts(
system_message=system_message_prompt_template,
last_human_message=last_human_message_prompt_template,
condense_question_system_message_prompt = (
condense_question_system_message_prompt_path.read()
if condense_question_system_message_prompt_path
else None
)
condense_question_human_message_prompt = (
condense_question_human_message_prompt_path.read()
if condense_question_human_message_prompt_path
else None
)
prompts = OptionalPrompts(
chat_prompts=OptionalChatPrompts(
system_message=chat_system_message_prompt,
last_human_message=chat_last_human_message_prompt,
),
condense_question_prompts=OptionalCondenseQuestionPrompts(
system_message=condense_question_system_message_prompt,
human_message=condense_question_human_message_prompt,
),
)
client = httpx.Client(timeout=180.0)
messages: list[ChatMessage] = []
while True:
message_content = input("### User: ")
message_content = input(f"{Fore.GREEN}### User: {Style.RESET_ALL}")
if message_content in {"/exit", "/quit", "/bye"}:
break
if message_content in {"/clear", "/cls", "/reset"}:
messages.clear()
continue
message = ChatMessage.new_human(message_content)
messages.append(message)
response = client.post(
@ -67,15 +95,23 @@ def chat(
response.raise_for_status()
chat_response = ChatResponse.model_validate_json(response.content)
messages.append(chat_response.response_message)
print("### AI:", chat_response.response_message.content)
print(
"----------\n### Sources:\n"
+ "\n\n".join(
f"Source {i + 1}:\n{source.content}"
for i, source in enumerate(chat_response.sources)
)
+ "\n----------"
f"{Fore.RED}### AI: "
f"{Style.RESET_ALL}{chat_response.response_message.content}"
)
print(Fore.RED + "-" * 16)
print(Style.RESET_ALL)
print(
f"{Fore.CYAN}### Retrieval Query: "
f"{Style.RESET_ALL}{chat_response.retrieval_query}"
)
sources_str = "\n\n".join(
f"{Style.DIM}Source {i + 1}:\n{Style.RESET_ALL}{source.content}"
for i, source in enumerate(chat_response.sources)
)
print(f"{Fore.YELLOW}### Sources:{Style.RESET_ALL}\n{sources_str}")
print(Fore.YELLOW + "-" * 16)
print(Style.RESET_ALL)
@app.command()
@ -84,6 +120,7 @@ def upsert(
collection: Annotated[str, typer.Option()],
upsert_url: Annotated[str, typer.Option()] = UPSERT_URL,
) -> None:
load_logging_config()
client = httpx.Client(timeout=180.0)
for file in files:
logger = logging.getLogger(__name__)
@ -106,9 +143,8 @@ def upsert(
)
response.raise_for_status()
upsert_response = UpsertTextResponse.model_validate_json(response.content)
logger.info("Upserted %d document chunks", upsert_response.num_documents)
logger.info("Upserted %d document chunks", upsert_response.num_document_chunks)
if __name__ == "__main__":
load_logging_config()
app()

View File

@ -1,14 +1,16 @@
import logging
from typing import Annotated
import ollama
from fastapi import Depends
from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain.callbacks.manager import CallbackManager
from langchain.chat_models.base import BaseChatModel
from langchain.chat_models.ollama import ChatOllama
from llm_qa.models.tei import TeiConfig
from llm_qa.settings import Settings
logger = logging.getLogger(__name__)
def settings() -> Settings:
return Settings()
@ -30,12 +32,22 @@ def tei_rerank_config(
)
def chat_model(
async def chat_model(
settings: Annotated[Settings, Depends(settings)],
) -> BaseChatModel:
if settings.ollama_auto_pull:
logger.info("Ollama auto-pull enabled, checking if model is available")
ollama_client = ollama.AsyncClient(host=settings.ollama_base_url)
try:
await ollama_client.show(settings.ollama_model_name)
logger.info("Ollama model `%s` already exists", settings.ollama_model_name)
except ollama.ResponseError:
logger.info(
"Ollama model `%s` not found, pulling...", settings.ollama_model_name
)
await ollama_client.pull(settings.ollama_model_name)
return ChatOllama(
base_url=settings.ollama_base_url,
model=settings.ollama_model_name,
verbose=True,
callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
)

View File

@ -1,3 +1,4 @@
import json
from typing import override
from urllib.parse import urljoin
@ -51,6 +52,13 @@ class TeiEmbeddings(Embeddings):
e.add_note(note)
case _:
raise
TeiEmbeddings._handle_error_response(response)
@staticmethod
def _handle_error_response(response: httpx.Response) -> None:
response_json = json.loads(response.content)
if "error" in response_json:
raise ValueError(response_json["error"])
def _embed(self, text: str | list[str]) -> list[list[float]]:
"""Embed text."""

View File

@ -1,9 +1,14 @@
import logging.config
import pathlib
from typing import Any, cast
import yaml
def load_logging_config() -> None:
logging_config = yaml.safe_load(pathlib.Path("logging.yaml").read_text())
logging.config.dictConfig(logging_config)
def load_logging_config(dry_run: bool = False) -> dict[str, Any]:
logging_config = cast(
dict[str, Any], yaml.safe_load(pathlib.Path("logging.yaml").read_text())
)
if not dry_run:
logging.config.dictConfig(logging_config)
return logging_config

View File

@ -2,9 +2,9 @@ from enum import StrEnum
from typing import Self
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
from pydantic import BaseModel
from pydantic import BaseModel, Field
from llm_qa.models.prompts import ChatPrompts
from llm_qa.models.prompts import OptionalPrompts
from llm_qa.models.source import Source
from llm_qa.typing_utils import assert_never
@ -15,8 +15,8 @@ class MessageType(StrEnum):
class ChatMessage(BaseModel):
content: str
type: MessageType
content: str = Field(..., description="Content of the message")
type: MessageType = Field(..., description="Type of the message, HUMAN or AI")
@classmethod
def new_human(cls, content: str) -> Self:
@ -47,11 +47,36 @@ class ChatMessage(BaseModel):
class ChatRequest(BaseModel):
messages: list[ChatMessage]
collection_name: str
prompts: ChatPrompts | None = None
messages: list[ChatMessage] = Field(
...,
min_length=1,
description="Chat message history",
examples=[[ChatMessage(content="What are you?", type=MessageType.HUMAN)]],
)
collection_name: str = Field(
...,
description="Name of the collection to retrieve documents from",
examples=["collection"],
)
prompts: OptionalPrompts | None = Field(
None,
description="Prompts to use for various LLM chains"
", if not provided, uses prompts from settings",
examples=[OptionalPrompts()],
)
class ChatResponse(BaseModel):
response_message: ChatMessage
sources: list[Source]
response_message: ChatMessage = Field(
..., description="Response message from the chat chain", examples=["I am an AI"]
)
sources: list[Source] = Field(
...,
description="Sources used to generate the response",
examples=[[Source(content="Source chunk")]],
)
retrieval_query: str = Field(
...,
description="Retrieval query used to retrieve the sources",
examples=["Condensed query"],
)

View File

@ -1,6 +1,86 @@
from typing import Self
from pydantic import BaseModel
class ChatPrompts(BaseModel):
class OptionalChatPrompts(BaseModel):
system_message: str | None = None
last_human_message: str | None = None
def __or__(self, __value: "OptionalChatPrompts | None") -> Self:
if __value is None:
return self
return self.__class__(
system_message=__value.system_message or self.system_message,
last_human_message=__value.last_human_message or self.last_human_message,
)
class ChatPrompts(OptionalChatPrompts):
system_message: str
last_human_message: str
@classmethod
def from_optional(cls, optional: OptionalChatPrompts) -> Self:
return cls(
system_message=optional.system_message,
last_human_message=optional.last_human_message,
)
class OptionalCondenseQuestionPrompts(BaseModel):
system_message: str | None = None
human_message: str | None = None
def __or__(self, __value: "OptionalCondenseQuestionPrompts | None") -> Self:
if __value is None:
return self
return self.__class__(
system_message=__value.system_message or self.system_message,
human_message=__value.human_message or self.human_message,
)
class CondenseQuestionPrompts(OptionalCondenseQuestionPrompts):
system_message: str
human_message: str
@classmethod
def from_optional(cls, optional: OptionalCondenseQuestionPrompts) -> Self:
return cls(
system_message=optional.system_message,
human_message=optional.human_message,
)
class OptionalPrompts(BaseModel):
chat_prompts: OptionalChatPrompts = OptionalChatPrompts()
condense_question_prompts: OptionalCondenseQuestionPrompts = (
OptionalCondenseQuestionPrompts()
)
def __or__(self, __value: "OptionalPrompts | None") -> Self:
if __value is None:
return self
return self.__class__(
chat_prompts=self.chat_prompts | __value.chat_prompts,
condense_question_prompts=self.condense_question_prompts
| __value.condense_question_prompts,
)
class Prompts(OptionalPrompts):
chat_prompts: ChatPrompts
condense_question_prompts: CondenseQuestionPrompts
@classmethod
def from_optional(cls, optional: OptionalPrompts) -> Self:
return cls(
chat_prompts=ChatPrompts.from_optional(optional.chat_prompts),
condense_question_prompts=CondenseQuestionPrompts.from_optional(
optional.condense_question_prompts
),
)

View File

@ -25,7 +25,7 @@ class RerankRequest(BaseModel):
texts: list[str]
raw_scores: bool = False
return_text: bool = False
truncate: bool = False
truncate: bool = True
class RerankResponse(BaseModel):

View File

@ -1,6 +1,6 @@
from enum import StrEnum
from pydantic import BaseModel
from pydantic import BaseModel, Field
class TextType(StrEnum):
@ -9,10 +9,22 @@ class TextType(StrEnum):
class UpsertTextRequest(BaseModel):
text: str
type: TextType
collection: str
text: str = Field(
..., description="Text to upsert", examples=["Context document text"]
)
type: TextType = Field(
...,
description="Type of the text, will use different splitting logic",
examples=[TextType.PLAIN_TEXT],
)
collection: str = Field(
...,
description="Name of the collection to upsert into",
examples=["collection"],
)
class UpsertTextResponse(BaseModel):
num_documents: int
num_document_chunks: int = Field(
..., description="Number of document chunks upserted"
)

View File

@ -0,0 +1,7 @@
from langchain_core.messages import AIMessage, HumanMessage
def format_messages(messages: list[HumanMessage | AIMessage]) -> str:
return "\n".join(
f"{message.type.capitalize()}: {message.content}" for message in messages
)

View File

@ -1,14 +1,18 @@
import logging
from typing import Annotated
from fastapi import APIRouter, Depends
from langchain.chat_models.base import BaseChatModel
from llm_qa.dependencies import chat_model, settings, tei_config
from llm_qa.dependencies import chat_model, settings, tei_config, tei_rerank_config
from llm_qa.models.chat import ChatRequest, ChatResponse
from llm_qa.models.prompts import Prompts
from llm_qa.models.tei import TeiConfig
from llm_qa.services.chat import chat as chat_service
from llm_qa.settings import Settings
logger = logging.getLogger(__name__)
router = APIRouter()
@ -17,15 +21,25 @@ async def upsert_text(
chat_request: ChatRequest,
settings: Annotated[Settings, Depends(settings)],
tei_config: Annotated[TeiConfig, Depends(tei_config)],
tei_rerank_config: Annotated[TeiConfig, Depends(tei_rerank_config)],
chat_model: Annotated[BaseChatModel, Depends(chat_model)],
) -> ChatResponse:
prompts = chat_request.prompts or settings.chat_prompts
"""Chat with a language model over a collection of documents.
Returns a LLM response, and sources of the documents used to generate the response.
"""
logger.debug("Chat request: %s", chat_request)
prompts = Prompts.from_optional(settings.prompts | chat_request.prompts)
logger.debug("Prompts: %s", prompts)
return await chat_service(
messages=[message.to_langchain_message() for message in chat_request.messages],
collection_name=chat_request.collection_name,
prompts=prompts,
tei_config=tei_config,
tei_rerank_config=tei_rerank_config,
qdrant_host=settings.qdrant_host,
qdrant_grpc_port=settings.qdrant_grpc_port,
retrieve_count=settings.retrieve_count,
rerank_count=settings.rerank_count,
chat_model=chat_model,
)

View File

@ -1,6 +1,7 @@
import logging
from typing import Annotated
from fastapi import APIRouter, Depends
from fastapi import APIRouter, Depends, HTTPException
from llm_qa.dependencies import settings, tei_config
from llm_qa.models.tei import TeiConfig
@ -8,6 +9,8 @@ from llm_qa.models.upsert import UpsertTextRequest, UpsertTextResponse
from llm_qa.services.upsert import upsert_text as upsert_text_service
from llm_qa.settings import Settings
logger = logging.getLogger(__name__)
router = APIRouter()
@ -17,6 +20,8 @@ async def upsert_text(
settings: Annotated[Settings, Depends(settings)],
tei_config: Annotated[TeiConfig, Depends(tei_config)],
) -> UpsertTextResponse:
"""Upsert text into a collection."""
logger.debug("Upsert text request: %s", upsert_request)
num_documents = await upsert_text_service(
text=upsert_request.text,
text_type=upsert_request.type,
@ -25,9 +30,10 @@ async def upsert_text(
qdrant_host=settings.qdrant_host,
qdrant_grpc_port=settings.qdrant_grpc_port,
)
return UpsertTextResponse(num_documents=num_documents)
return UpsertTextResponse(num_document_chunks=num_documents)
@router.post("/upsert-file")
@router.post("/upsert-file", responses={501: {"description": "Not implemented"}})
async def upsert_file() -> None:
raise NotImplementedError
"""Not implemented."""
raise HTTPException(status_code=501, detail="Not implemented")

View File

@ -1,93 +1,35 @@
from operator import itemgetter
from typing import TypedDict
import logging
import time
from langchain.chat_models.base import BaseChatModel
from langchain.prompts import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
MessagesPlaceholder,
SystemMessagePromptTemplate,
)
from langchain.vectorstores import VectorStore
from langchain.vectorstores.qdrant import Qdrant
from langchain_core.documents import Document
from langchain_core.messages import AIMessage, BaseMessage
from langchain_core.runnables import (
Runnable,
RunnableLambda,
RunnablePassthrough,
chain,
)
from langchain_core.messages import BaseMessage
from qdrant_client import AsyncQdrantClient, QdrantClient
from llm_qa.chains.chat import get_chat_chain
from llm_qa.embeddings.tei import TeiEmbeddings
from llm_qa.models.chat import ChatMessage, ChatResponse
from llm_qa.models.prompts import ChatPrompts
from llm_qa.models.prompts import Prompts
from llm_qa.models.source import Source
from llm_qa.models.tei import TeiConfig
class ChatRunnableInput(TypedDict):
messages: list[BaseMessage]
collection_name: str
class ChatRunnableOutput(TypedDict):
response: AIMessage
documents: list[Document]
@chain
def combine_documents(documents: list[Document]) -> str:
return "\n\n".join(document.page_content for document in documents)
def get_chat_chain(
chat_prompt_template: ChatPromptTemplate,
vectorstore: VectorStore,
chat_model: BaseChatModel,
) -> Runnable[ChatRunnableInput, ChatRunnableOutput]:
return (
RunnablePassthrough[ChatRunnableInput]()
| {
"messages": itemgetter("messages"),
"collection_name": itemgetter("collection_name"),
"last_human_message": itemgetter("messages")
| RunnableLambda[list[BaseMessage], str](lambda x: x[-1].content),
}
| {
"conversation_history": RunnableLambda(lambda x: x["messages"][:-1]),
"last_human_message": itemgetter("last_human_message"),
"documents": itemgetter("last_human_message") | vectorstore.as_retriever(),
}
| {
"conversation_history": itemgetter("conversation_history"),
"last_human_message": itemgetter("last_human_message"),
"documents": itemgetter("documents"),
"context": itemgetter("documents") | combine_documents,
}
| {
"documents": itemgetter("documents"),
"response": chat_prompt_template | chat_model,
}
)
logger = logging.getLogger(__name__)
async def chat(
messages: list[BaseMessage],
collection_name: str,
prompts: ChatPrompts,
prompts: Prompts,
tei_config: TeiConfig,
tei_rerank_config: TeiConfig,
qdrant_host: str,
qdrant_grpc_port: int,
retrieve_count: int,
rerank_count: int | None,
chat_model: BaseChatModel,
) -> ChatResponse:
chat_prompt_template = ChatPromptTemplate.from_messages([
SystemMessagePromptTemplate.from_template(prompts.system_message),
MessagesPlaceholder(variable_name="conversation_history"),
HumanMessagePromptTemplate.from_template(prompts.last_human_message),
])
embeddings = TeiEmbeddings(tei_config=tei_config)
tei_embeddings = TeiEmbeddings(tei_config=tei_config)
tei_rerank_embeddings = TeiEmbeddings(tei_config=tei_rerank_config)
qdrant_client = QdrantClient(
location=qdrant_host, grpc_port=qdrant_grpc_port, prefer_grpc=True
)
@ -98,16 +40,26 @@ async def chat(
client=qdrant_client,
async_client=async_qdrant_client,
collection_name=collection_name,
embeddings=embeddings,
embeddings=tei_embeddings,
)
chain = get_chat_chain(chat_prompt_template, qdrant_vectorstore, chat_model)
chain_output = await chain.ainvoke(
ChatRunnableInput(messages=messages, collection_name=collection_name)
chat_chain = get_chat_chain(
prompts=prompts,
vectorstore_retriever=qdrant_vectorstore.as_retriever(
search_kwargs={"k": retrieve_count}
),
tei_rerank_embeddings=tei_rerank_embeddings,
rerank_count=rerank_count,
chat_model=chat_model,
)
start_time = time.time()
chain_output = await chat_chain.ainvoke(messages)
elapsed_time = time.time() - start_time
logger.info("Chat chain finished in %.2f seconds", elapsed_time)
return ChatResponse(
response_message=ChatMessage.from_langchain_message(chain_output["response"]),
sources=[
Source(content=document.page_content)
for document in chain_output["documents"]
],
retrieval_query=chain_output["retrieval_query"],
)

View File

@ -1,4 +1,5 @@
import logging
import time
from langchain.schema.document import Document
from langchain.vectorstores.qdrant import Qdrant
@ -24,6 +25,7 @@ async def upsert_text(
qdrant_host: str,
qdrant_grpc_port: int,
) -> int:
start_time = time.time()
match text_type:
case TextType.PLAIN_TEXT:
text_splitter_chain = recursive_character_text_splitter_chain
@ -34,13 +36,18 @@ async def upsert_text(
text_chunks = await text_splitter_chain.ainvoke(text)
logger.info(
"Split `%s` type text into %d document chunks in %.2f seconds",
text_type.value,
len(text_chunks),
time.time() - start_time,
)
start_time = time.time()
documents = [Document(page_content=chunk) for chunk in text_chunks]
embeddings = TeiEmbeddings(tei_config=tei_config)
logger.info(
"Upserting %d documents to Qdrant collection `%s`", len(documents), collection
)
await Qdrant.afrom_documents(
location=qdrant_host,
grpc_port=qdrant_grpc_port,
@ -51,4 +58,12 @@ async def upsert_text(
force_recreate=False,
)
elapsed_time = time.time() - start_time
logger.info(
"Upserted %d document chunks to Qdrant collection `%s` in %.2f seconds",
len(documents),
collection,
elapsed_time,
)
return len(documents)

View File

@ -1,10 +1,18 @@
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
from llm_qa.models.prompts import ChatPrompts
from llm_qa.models.prompts import (
OptionalPrompts,
)
class Settings(BaseSettings):
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
model_config = SettingsConfigDict(
env_file=(".env.default", ".env"),
env_file_encoding="utf-8",
extra="ignore",
env_nested_delimiter="__",
)
qdrant_host: str
qdrant_grpc_port: int = 6334
@ -12,8 +20,19 @@ class Settings(BaseSettings):
tei_rerank_base_url: str
tei_document_prefix: str = "passage: "
tei_query_prefix: str = "query: "
retrieve_count: int = Field(
ge=1,
description="Number of documents to retrieve using embedding similarity search",
)
rerank_count: int | None = Field(
ge=1,
description="Number of documents to take from rerank, None to skip reranking",
)
ollama_base_url: str
ollama_model_name: str
chat_prompts: ChatPrompts = ChatPrompts(
system_message="System message", last_human_message="Last human message"
)
ollama_auto_pull: bool = False
prompts: OptionalPrompts = OptionalPrompts()
if __name__ == "__main__":
print(Settings())

View File

@ -10,5 +10,11 @@ app.include_router(api_v1.router)
if __name__ == "__main__":
import uvicorn
load_logging_config()
uvicorn.run("llm_qa.web:app", host="0.0.0.0", port=8000, reload=True) # noqa: S104
logging_config = load_logging_config(dry_run=True)
uvicorn.run(
"llm_qa.web:app",
host="0.0.0.0", # noqa: S104
port=8000,
log_config=logging_config,
reload=True,
)

View File

@ -2,26 +2,56 @@ version: 1
disable_existing_loggers: False
formatters:
simple:
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
default:
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
access:
"()": uvicorn.logging.AccessFormatter
format: '%(asctime)s - %(name)s - %(levelname)s - %(client_addr)s - "%(request_line)s" %(status_code)s'
handlers:
console:
class: logging.StreamHandler
level: DEBUG
formatter: simple
stream: ext://sys.stdout
default:
formatter: default
class: logging.StreamHandler
stream: ext://sys.stderr
access:
formatter: access
class: logging.StreamHandler
stream: ext://sys.stdout
loggers:
uvicorn:
level: INFO
handlers: [console]
propagate: no
gunicorn:
level: INFO
handlers: [console]
propagate: no
uvicorn:
level: INFO
handlers:
- default
propagate: no
uvicorn.error:
level: INFO
handlers:
- default
propagate: no
uvicorn.access:
level: INFO
handlers:
- access
propagate: no
guvicorn:
level: INFO
handlers:
- default
propagate: no
guvicorn.error:
level: INFO
handlers:
- default
propagate: no
guvicorn.access:
level: INFO
handlers:
- access
propagate: no
root:
level: INFO
handlers: [console]
level: INFO
handlers:
- default
propagate: no

6
llm-qa/ollama_pull.sh Executable file
View File

@ -0,0 +1,6 @@
source .env
echo Ollama base URL: $OLLAMA_BASE_URL
echo Ollama model name: $OLLAMA_MODEL_NAME
curl $OLLAMA_BASE_URL/api/pull -d '{
"name": "'$OLLAMA_MODEL_NAME'"
}' -H "Content-Type: application/json" -X POST

147
llm-qa/poetry.lock generated
View File

@ -701,13 +701,13 @@ files = [
[[package]]
name = "httpcore"
version = "1.0.2"
version = "1.0.3"
description = "A minimal low-level HTTP client."
optional = false
python-versions = ">=3.8"
files = [
{file = "httpcore-1.0.2-py3-none-any.whl", hash = "sha256:096cc05bca73b8e459a1fc3dcf585148f63e534eae4339559c9b8a8d6399acc7"},
{file = "httpcore-1.0.2.tar.gz", hash = "sha256:9fc092e4799b26174648e54b74ed5f683132a464e95643b226e00c2ed2fa6535"},
{file = "httpcore-1.0.3-py3-none-any.whl", hash = "sha256:9a6a501c3099307d9fd76ac244e08503427679b1e81ceb1d922485e2f2462ad2"},
{file = "httpcore-1.0.3.tar.gz", hash = "sha256:5c0f9546ad17dac4d0772b0808856eb616eb8b48ce94f49ed819fd6982a8a544"},
]
[package.dependencies]
@ -718,17 +718,17 @@ h11 = ">=0.13,<0.15"
asyncio = ["anyio (>=4.0,<5.0)"]
http2 = ["h2 (>=3,<5)"]
socks = ["socksio (==1.*)"]
trio = ["trio (>=0.22.0,<0.23.0)"]
trio = ["trio (>=0.22.0,<0.24.0)"]
[[package]]
name = "httpx"
version = "0.26.0"
version = "0.25.2"
description = "The next generation HTTP client."
optional = false
python-versions = ">=3.8"
files = [
{file = "httpx-0.26.0-py3-none-any.whl", hash = "sha256:8915f5a3627c4d47b73e8202457cb28f1266982d1159bd5779d86a80c0eab1cd"},
{file = "httpx-0.26.0.tar.gz", hash = "sha256:451b55c30d5185ea6b23c2c793abf9bb237d2a7dfb901ced6ff69ad37ec1dfaf"},
{file = "httpx-0.25.2-py3-none-any.whl", hash = "sha256:a05d3d052d9b2dfce0e3896636467f8a5342fb2b902c819428e1ac65413ca118"},
{file = "httpx-0.25.2.tar.gz", hash = "sha256:8b8fcaa0c8ea7b05edd69a094e63a2094c4efcb48129fb757361bc423c0ad9e8"},
]
[package.dependencies]
@ -913,13 +913,13 @@ extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.
[[package]]
name = "langchain-core"
version = "0.1.22"
version = "0.1.23"
description = "Building applications with LLMs through composability"
optional = false
python-versions = ">=3.8.1,<4.0"
files = [
{file = "langchain_core-0.1.22-py3-none-any.whl", hash = "sha256:d1263c2707ce18bb13654c88f891e53f39edec9b11ff7d0d0f23fd920927b2d6"},
{file = "langchain_core-0.1.22.tar.gz", hash = "sha256:deac12b3e42a08bbbaa2acf83d5f8dd2d5513256d8daf0e853e9d68ff4c99d79"},
{file = "langchain_core-0.1.23-py3-none-any.whl", hash = "sha256:d42fac013c39a8b0bcd7e337a4cb6c17c16046c60d768f89df582ad73ec3c5cb"},
{file = "langchain_core-0.1.23.tar.gz", hash = "sha256:34359cc8b6f8c3d45098c54a6a9b35c9f538ef58329cd943a2249d6d7b4e5806"},
]
[package.dependencies]
@ -1185,6 +1185,20 @@ files = [
{file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"},
]
[[package]]
name = "ollama"
version = "0.1.6"
description = "The official Python client for Ollama."
optional = false
python-versions = ">=3.8,<4.0"
files = [
{file = "ollama-0.1.6-py3-none-any.whl", hash = "sha256:e37f0455025ed5846879551ca2030ec93a71a823395d3517c14d71479ccbdd11"},
{file = "ollama-0.1.6.tar.gz", hash = "sha256:6636ff75ae54ac076522dcdc40678b141208325d1cc5d85785559f197b1107de"},
]
[package.dependencies]
httpx = ">=0.25.2,<0.26.0"
[[package]]
name = "packaging"
version = "23.2"
@ -1650,60 +1664,60 @@ files = [
[[package]]
name = "sqlalchemy"
version = "2.0.26"
version = "2.0.27"
description = "Database Abstraction Library"
optional = false
python-versions = ">=3.7"
files = [
{file = "SQLAlchemy-2.0.26-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:56524d767713054f8758217b3a811f6a736e0ae34e7afc33b594926589aa9609"},
{file = "SQLAlchemy-2.0.26-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c2d8a2c68b279617f13088bdc0fc0e9b5126f8017f8882ff08ee41909fab0713"},
{file = "SQLAlchemy-2.0.26-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84d377645913d47f0dc802b415bcfe7fb085d86646a12278d77c12eb75b5e1b4"},
{file = "SQLAlchemy-2.0.26-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4fc0628d2026926404dabc903dc5628f7d936a792aa3a1fc54a20182df8e2172"},
{file = "SQLAlchemy-2.0.26-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:872f2907ade52601a1e729e85d16913c24dc1f6e7c57d11739f18dcfafde29db"},
{file = "SQLAlchemy-2.0.26-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ba46fa770578b3cf3b5b77dadb7e94fda7692dd4d1989268ef3dcb65f31c40a3"},
{file = "SQLAlchemy-2.0.26-cp310-cp310-win32.whl", hash = "sha256:651d10fdba7984bf100222d6e4acc496fec46493262b6170be1981ef860c6184"},
{file = "SQLAlchemy-2.0.26-cp310-cp310-win_amd64.whl", hash = "sha256:8f95ede696ab0d7328862d69f29b643d35b668c4f3619cb2f0281adc16e64c1b"},
{file = "SQLAlchemy-2.0.26-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fab1bb909bd24accf2024a69edd4f885ded182c079c4dbcd515b4842f86b07cb"},
{file = "SQLAlchemy-2.0.26-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b7ee16afd083bb6bb5ab3962ac7f0eafd1d196c6399388af35fef3d1c6d6d9bb"},
{file = "SQLAlchemy-2.0.26-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:379af901ceb524cbee5e15c1713bf9fd71dc28053286b7917525d01b938b9628"},
{file = "SQLAlchemy-2.0.26-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94a78f56ea13f4d6e9efcd2a2d08cc13531918e0516563f6303c4ad98c81e21d"},
{file = "SQLAlchemy-2.0.26-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a481cc2eec83776ff7b6bb12c8e85d0378af0e2ec4584ac3309365a2a380c64b"},
{file = "SQLAlchemy-2.0.26-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8cbeb0e49b605cd75f825fb9239a554803ef2bef1a7b2a8b428926ed518b6b63"},
{file = "SQLAlchemy-2.0.26-cp311-cp311-win32.whl", hash = "sha256:e70cce65239089390c193a7b0d171ce89d2e3dedf797f8010031b2aa2b1e9c80"},
{file = "SQLAlchemy-2.0.26-cp311-cp311-win_amd64.whl", hash = "sha256:750d1ef39d50520527c45c309c3cb10bbfa6131f93081b4e93858abb5ece2501"},
{file = "SQLAlchemy-2.0.26-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b39503c3a56e1b2340a7d09e185ddb60b253ad0210877a9958ac64208eb23674"},
{file = "SQLAlchemy-2.0.26-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1a870e6121a052f826f7ae1e4f0b54ca4c0ccd613278218ca036fa5e0f3be7df"},
{file = "SQLAlchemy-2.0.26-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5901eed6d0e23ca4b04d66a561799d4f0fe55fcbfc7ca203bb8c3277f442085b"},
{file = "SQLAlchemy-2.0.26-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d25fe55aab9b20ae4a9523bb269074202be9d92a145fcc0b752fff409754b5f6"},
{file = "SQLAlchemy-2.0.26-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:5310958d08b4bafc311052be42a3b7d61a93a2bf126ddde07b85f712e7e4ac7b"},
{file = "SQLAlchemy-2.0.26-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fd133afb7e6c59fad365ffa97fb06b1001f88e29e1de351bef3d2b1224e2f132"},
{file = "SQLAlchemy-2.0.26-cp312-cp312-win32.whl", hash = "sha256:dc32ecf643c4904dd413e6a95a3f2c8a89ccd6f15083e586dcf8f42eb4e317ae"},
{file = "SQLAlchemy-2.0.26-cp312-cp312-win_amd64.whl", hash = "sha256:6e25f029e8ad6d893538b5abe8537e7f09e21d8e96caee46a7e2199f3ddd77b0"},
{file = "SQLAlchemy-2.0.26-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:99a9a8204b8937aa72421e31c493bfc12fd063a8310a0522e5a9b98e6323977c"},
{file = "SQLAlchemy-2.0.26-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:691d68a4fca30c9a676623d094b600797699530e175b6524a9f57e3273f5fa8d"},
{file = "SQLAlchemy-2.0.26-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79a74a4ca4310c812f97bf0f13ce00ed73c890954b5a20b32484a9ab60e567e9"},
{file = "SQLAlchemy-2.0.26-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:f2efbbeb18c0e1c53b670a46a009fbde7b58e05b397a808c7e598532b17c6f4b"},
{file = "SQLAlchemy-2.0.26-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:3fc557f5402206c18ec3d288422f8e5fa764306d49f4efbc6090a7407bf54938"},
{file = "SQLAlchemy-2.0.26-cp37-cp37m-win32.whl", hash = "sha256:a9846ffee3283cff4ec476e7ee289314290fcb2384aab5045c6f481c5c4d011f"},
{file = "SQLAlchemy-2.0.26-cp37-cp37m-win_amd64.whl", hash = "sha256:ed4667d3d5d6e203a271d684d5b213ebcd618f7a8bc605752a8865eb9e67a79a"},
{file = "SQLAlchemy-2.0.26-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:79e629df3f69f849a1482a2d063596b23e32036b83547397e68725e6e0d0a9ab"},
{file = "SQLAlchemy-2.0.26-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4b4d848b095173e0a9e377127b814490499e55f5168f617ae2c07653c326b9d1"},
{file = "SQLAlchemy-2.0.26-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f06afe8e96d7f221cc0b59334dc400151be22f432785e895e37030579d253c3"},
{file = "SQLAlchemy-2.0.26-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f75ac12d302205e60f77f46bd162d40dc37438f1f8db160d2491a78b19a0bd61"},
{file = "SQLAlchemy-2.0.26-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ec3717c1efee8ad4b97f6211978351de3abe1e4b5f73e32f775c7becec021c5c"},
{file = "SQLAlchemy-2.0.26-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06ed4d6bb2365222fb9b0a05478a2d23ad8c1dd874047a9ae1ca1d45f18a255e"},
{file = "SQLAlchemy-2.0.26-cp38-cp38-win32.whl", hash = "sha256:caa79a6caeb4a3cc4ddb9aba9205c383f5d3bcb60d814e87e74570514754e073"},
{file = "SQLAlchemy-2.0.26-cp38-cp38-win_amd64.whl", hash = "sha256:996b41c38e34a980e9f810d6e2709a3196e29ee34e46e3c16f96c63da10a9da1"},
{file = "SQLAlchemy-2.0.26-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4f57af0866f6629eae2d24d022ba1a4c1bac9b16d45027bbfcda4c9d5b0d8f26"},
{file = "SQLAlchemy-2.0.26-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e1a532bc33163fb19c4759a36504a23e63032bc8d47cee1c66b0b70a04a0957b"},
{file = "SQLAlchemy-2.0.26-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:02a4f954ccb17bd8cff56662efc806c5301508233dc38d0253a5fdb2f33ca3ba"},
{file = "SQLAlchemy-2.0.26-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a678f728fb075e74aaa7fdc27f8af8f03f82d02e7419362cc8c2a605c16a4114"},
{file = "SQLAlchemy-2.0.26-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:8b39462c9588d4780f041e1b84d2ba038ac01c441c961bbee622dd8f53dec69f"},
{file = "SQLAlchemy-2.0.26-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:98f4d0d2bda2921af5b0c2ca99207cdab00f2922da46a6336c62c8d6814303a7"},
{file = "SQLAlchemy-2.0.26-cp39-cp39-win32.whl", hash = "sha256:6d68e6b507a3dd20c0add86ac0a0ca061d43c9a0162a122baa5fe952f14240f1"},
{file = "SQLAlchemy-2.0.26-cp39-cp39-win_amd64.whl", hash = "sha256:fb97a9b93b953084692a52a7877957b7a88dfcedc0c5652124f5aebf5999f7fe"},
{file = "SQLAlchemy-2.0.26-py3-none-any.whl", hash = "sha256:1128b2cdf49107659f6d1f452695f43a20694cc9305a86e97b70793a1c74eeb4"},
{file = "SQLAlchemy-2.0.26.tar.gz", hash = "sha256:e1bcd8fcb30305e27355d553608c2c229d3e589fb7ff406da7d7e5d50fa14d0d"},
{file = "SQLAlchemy-2.0.27-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d04e579e911562f1055d26dab1868d3e0bb905db3bccf664ee8ad109f035618a"},
{file = "SQLAlchemy-2.0.27-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fa67d821c1fd268a5a87922ef4940442513b4e6c377553506b9db3b83beebbd8"},
{file = "SQLAlchemy-2.0.27-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c7a596d0be71b7baa037f4ac10d5e057d276f65a9a611c46970f012752ebf2d"},
{file = "SQLAlchemy-2.0.27-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:954d9735ee9c3fa74874c830d089a815b7b48df6f6b6e357a74130e478dbd951"},
{file = "SQLAlchemy-2.0.27-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:5cd20f58c29bbf2680039ff9f569fa6d21453fbd2fa84dbdb4092f006424c2e6"},
{file = "SQLAlchemy-2.0.27-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:03f448ffb731b48323bda68bcc93152f751436ad6037f18a42b7e16af9e91c07"},
{file = "SQLAlchemy-2.0.27-cp310-cp310-win32.whl", hash = "sha256:d997c5938a08b5e172c30583ba6b8aad657ed9901fc24caf3a7152eeccb2f1b4"},
{file = "SQLAlchemy-2.0.27-cp310-cp310-win_amd64.whl", hash = "sha256:eb15ef40b833f5b2f19eeae65d65e191f039e71790dd565c2af2a3783f72262f"},
{file = "SQLAlchemy-2.0.27-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6c5bad7c60a392850d2f0fee8f355953abaec878c483dd7c3836e0089f046bf6"},
{file = "SQLAlchemy-2.0.27-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3012ab65ea42de1be81fff5fb28d6db893ef978950afc8130ba707179b4284a"},
{file = "SQLAlchemy-2.0.27-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dbcd77c4d94b23e0753c5ed8deba8c69f331d4fd83f68bfc9db58bc8983f49cd"},
{file = "SQLAlchemy-2.0.27-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d177b7e82f6dd5e1aebd24d9c3297c70ce09cd1d5d37b43e53f39514379c029c"},
{file = "SQLAlchemy-2.0.27-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:680b9a36029b30cf063698755d277885d4a0eab70a2c7c6e71aab601323cba45"},
{file = "SQLAlchemy-2.0.27-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1306102f6d9e625cebaca3d4c9c8f10588735ef877f0360b5cdb4fdfd3fd7131"},
{file = "SQLAlchemy-2.0.27-cp311-cp311-win32.whl", hash = "sha256:5b78aa9f4f68212248aaf8943d84c0ff0f74efc65a661c2fc68b82d498311fd5"},
{file = "SQLAlchemy-2.0.27-cp311-cp311-win_amd64.whl", hash = "sha256:15e19a84b84528f52a68143439d0c7a3a69befcd4f50b8ef9b7b69d2628ae7c4"},
{file = "SQLAlchemy-2.0.27-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:0de1263aac858f288a80b2071990f02082c51d88335a1db0d589237a3435fe71"},
{file = "SQLAlchemy-2.0.27-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce850db091bf7d2a1f2fdb615220b968aeff3849007b1204bf6e3e50a57b3d32"},
{file = "SQLAlchemy-2.0.27-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8dfc936870507da96aebb43e664ae3a71a7b96278382bcfe84d277b88e379b18"},
{file = "SQLAlchemy-2.0.27-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4fbe6a766301f2e8a4519f4500fe74ef0a8509a59e07a4085458f26228cd7cc"},
{file = "SQLAlchemy-2.0.27-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4535c49d961fe9a77392e3a630a626af5baa967172d42732b7a43496c8b28876"},
{file = "SQLAlchemy-2.0.27-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:0fb3bffc0ced37e5aa4ac2416f56d6d858f46d4da70c09bb731a246e70bff4d5"},
{file = "SQLAlchemy-2.0.27-cp312-cp312-win32.whl", hash = "sha256:7f470327d06400a0aa7926b375b8e8c3c31d335e0884f509fe272b3c700a7254"},
{file = "SQLAlchemy-2.0.27-cp312-cp312-win_amd64.whl", hash = "sha256:f9374e270e2553653d710ece397df67db9d19c60d2647bcd35bfc616f1622dcd"},
{file = "SQLAlchemy-2.0.27-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e97cf143d74a7a5a0f143aa34039b4fecf11343eed66538610debc438685db4a"},
{file = "SQLAlchemy-2.0.27-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7b5a3e2120982b8b6bd1d5d99e3025339f7fb8b8267551c679afb39e9c7c7f1"},
{file = "SQLAlchemy-2.0.27-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e36aa62b765cf9f43a003233a8c2d7ffdeb55bc62eaa0a0380475b228663a38f"},
{file = "SQLAlchemy-2.0.27-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:5ada0438f5b74c3952d916c199367c29ee4d6858edff18eab783b3978d0db16d"},
{file = "SQLAlchemy-2.0.27-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:b1d9d1bfd96eef3c3faedb73f486c89e44e64e40e5bfec304ee163de01cf996f"},
{file = "SQLAlchemy-2.0.27-cp37-cp37m-win32.whl", hash = "sha256:ca891af9f3289d24a490a5fde664ea04fe2f4984cd97e26de7442a4251bd4b7c"},
{file = "SQLAlchemy-2.0.27-cp37-cp37m-win_amd64.whl", hash = "sha256:fd8aafda7cdff03b905d4426b714601c0978725a19efc39f5f207b86d188ba01"},
{file = "SQLAlchemy-2.0.27-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ec1f5a328464daf7a1e4e385e4f5652dd9b1d12405075ccba1df842f7774b4fc"},
{file = "SQLAlchemy-2.0.27-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ad862295ad3f644e3c2c0d8b10a988e1600d3123ecb48702d2c0f26771f1c396"},
{file = "SQLAlchemy-2.0.27-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48217be1de7d29a5600b5c513f3f7664b21d32e596d69582be0a94e36b8309cb"},
{file = "SQLAlchemy-2.0.27-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e56afce6431450442f3ab5973156289bd5ec33dd618941283847c9fd5ff06bf"},
{file = "SQLAlchemy-2.0.27-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:611068511b5531304137bcd7fe8117c985d1b828eb86043bd944cebb7fae3910"},
{file = "SQLAlchemy-2.0.27-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b86abba762ecfeea359112b2bb4490802b340850bbee1948f785141a5e020de8"},
{file = "SQLAlchemy-2.0.27-cp38-cp38-win32.whl", hash = "sha256:30d81cc1192dc693d49d5671cd40cdec596b885b0ce3b72f323888ab1c3863d5"},
{file = "SQLAlchemy-2.0.27-cp38-cp38-win_amd64.whl", hash = "sha256:120af1e49d614d2525ac247f6123841589b029c318b9afbfc9e2b70e22e1827d"},
{file = "SQLAlchemy-2.0.27-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d07ee7793f2aeb9b80ec8ceb96bc8cc08a2aec8a1b152da1955d64e4825fcbac"},
{file = "SQLAlchemy-2.0.27-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cb0845e934647232b6ff5150df37ceffd0b67b754b9fdbb095233deebcddbd4a"},
{file = "SQLAlchemy-2.0.27-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fc19ae2e07a067663dd24fca55f8ed06a288384f0e6e3910420bf4b1270cc51"},
{file = "SQLAlchemy-2.0.27-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b90053be91973a6fb6020a6e44382c97739736a5a9d74e08cc29b196639eb979"},
{file = "SQLAlchemy-2.0.27-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:2f5c9dfb0b9ab5e3a8a00249534bdd838d943ec4cfb9abe176a6c33408430230"},
{file = "SQLAlchemy-2.0.27-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:33e8bde8fff203de50399b9039c4e14e42d4d227759155c21f8da4a47fc8053c"},
{file = "SQLAlchemy-2.0.27-cp39-cp39-win32.whl", hash = "sha256:d873c21b356bfaf1589b89090a4011e6532582b3a8ea568a00e0c3aab09399dd"},
{file = "SQLAlchemy-2.0.27-cp39-cp39-win_amd64.whl", hash = "sha256:ff2f1b7c963961d41403b650842dc2039175b906ab2093635d8319bef0b7d620"},
{file = "SQLAlchemy-2.0.27-py3-none-any.whl", hash = "sha256:1ab4e0448018d01b142c916cc7119ca573803a4745cfe341b8f95657812700ac"},
{file = "SQLAlchemy-2.0.27.tar.gz", hash = "sha256:86a6ed69a71fe6b88bf9331594fa390a2adda4a49b5c06f98e47bf0d392534f8"},
]
[package.dependencies]
@ -1821,6 +1835,17 @@ dev = ["autoflake (>=1.3.1,<2.0.0)", "flake8 (>=3.8.3,<4.0.0)", "pre-commit (>=2
doc = ["cairosvg (>=2.5.2,<3.0.0)", "mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "pillow (>=9.3.0,<10.0.0)"]
test = ["black (>=22.3.0,<23.0.0)", "coverage (>=6.2,<7.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.910)", "pytest (>=4.4.0,<8.0.0)", "pytest-cov (>=2.10.0,<5.0.0)", "pytest-sugar (>=0.9.4,<0.10.0)", "pytest-xdist (>=1.32.0,<4.0.0)", "rich (>=10.11.0,<14.0.0)", "shellingham (>=1.3.0,<2.0.0)"]
[[package]]
name = "types-colorama"
version = "0.4.15.20240205"
description = "Typing stubs for colorama"
optional = false
python-versions = ">=3.8"
files = [
{file = "types-colorama-0.4.15.20240205.tar.gz", hash = "sha256:7ae4f58d407d387f4f98b24d81e1b7657ec754ea1dc4619ae5bd27f0c367637e"},
{file = "types_colorama-0.4.15.20240205-py3-none-any.whl", hash = "sha256:3ab26dcd76d2f13b1b795ed5c87a1a1a29331ea64cf614bb6ae958a3cebc3a53"},
]
[[package]]
name = "types-pyyaml"
version = "6.0.12.12"
@ -2010,4 +2035,4 @@ multidict = ">=4.0"
[metadata]
lock-version = "2.0"
python-versions = "^3.12"
content-hash = "b37d2542dbbbd85c8afc9a16dde50037737f87d250cec6c83e377e1266a19997"
content-hash = "e61409e5b61d5b67d01aa550723a3bff8efb5a53aba5ca034fbb038c5408f89d"

View File

@ -11,17 +11,20 @@ langchain-community = "^0.0.19"
langchain = "^0.1.6"
fastapi = "^0.109.2"
uvicorn = "^0.27.1"
httpx = "^0.26.0"
httpx = "^0.25.0"
pydantic-settings = "^2.1.0"
pydantic = "^2.6.1"
qdrant-client = "^1.7.3"
typer = "^0.9.0"
colorama = "^0.4.6"
ollama = "^0.1.6"
[tool.poetry.group.dev.dependencies]
ruff = "0.2.1"
mypy = "^1.8.0"
ipython = "^8.21.0"
types-pyyaml = "^6.0.12.12"
types-colorama = "^0.4.15.20240205"
[build-system]
requires = ["poetry-core"]
@ -34,6 +37,10 @@ plugins = ["pydantic.mypy"]
modules = ["llm_qa"]
strict = true
[[tool.mypy.overrides]]
module = "ollama"
ignore_missing_imports = true
[tool.ruff]
target-version = "py312"
preview = true