llm-qa/.devcontainer/compose.yaml

62 lines
1.5 KiB
YAML

version: '3.8'
services:
devcontainer:
build: .
volumes:
- ..:/workspace:cached
ports:
- "8000:8000"
environment:
- QDRANT_HOST=qdrant
- QQDRANT_GRPC_PORT=6334
- TEI_BASE_URL=http://text-embeddings-inference
- TEI_RERANK_BASE_URL=http://text-embeddings-inference-rerank
- OLLAMA_BASE_URL=http://ollama:11434
command: sleep infinity
qdrant:
image: qdrant/qdrant:v1.7.4
volumes:
- ../qdrant_storage:/qdrant/storage:z
ports:
- "6333:6333"
- "6334:6334"
text-embeddings-inference:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-0.6
volumes:
- "../tei_data:/data"
ports:
- "8001:80"
environment:
- MODEL_ID=${TEI_MODEL_ID:-BAAI/bge-large-en-v1.5}
- REVISION=${TEI_MODEL_REVISION}
- MAX_CLIENT_BATCH_SIZE=128
text-embeddings-inference-rerank:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-0.6
volumes:
- "../tei_data:/data"
ports:
- "8002:80"
environment:
- MODEL_ID=${TEI_RERANK_MODEL_ID:-BAAI/bge-reranker-large}
- REVISION=${TEI_RERANK_MODEL_REVISION}
- MAX_CLIENT_BATCH_SIZE=128
ollama:
image: ollama/ollama:latest
volumes:
- ../ollama:/root/.ollama
ports:
- 5000:11434
deploy:
resources:
reservations:
devices:
- driver: ${OLLAMA_GPU_DRIVER-nvidia}
count: ${OLLAMA_GPU_COUNT-1}
capabilities:
- gpu