version: '3.8' services: devcontainer: build: . volumes: - ..:/workspace:cached ports: - "8000:8000" environment: - QDRANT_HOST=qdrant - QQDRANT_GRPC_PORT=6334 - TEI_BASE_URL=http://text-embeddings-inference - TEI_RERANK_BASE_URL=http://text-embeddings-inference-rerank - OLLAMA_BASE_URL=http://ollama:11434 - OLLAMA_MODEL_NAME=${OLLAMA_MODEL_NAME} command: sleep infinity qdrant: image: qdrant/qdrant:v1.7.4 volumes: - ../qdrant_storage:/qdrant/storage:z ports: - "6333:6333" - "6334:6334" text-embeddings-inference: image: ghcr.io/huggingface/text-embeddings-inference:cpu-0.6 volumes: - "../tei_data:/data" ports: - "8001:80" environment: - MODEL_ID=${TEI_MODEL_ID:-BAAI/bge-large-en-v1.5} - REVISION=${TEI_MODEL_REVISION} - MAX_CLIENT_BATCH_SIZE=128 text-embeddings-inference-rerank: image: ghcr.io/huggingface/text-embeddings-inference:cpu-0.6 volumes: - "../tei_data:/data" ports: - "8002:80" environment: - MODEL_ID=${TEI_RERANK_MODEL_ID:-BAAI/bge-reranker-large} - REVISION=${TEI_RERANK_MODEL_REVISION} - MAX_CLIENT_BATCH_SIZE=128 ollama: image: ollama/ollama:latest volumes: - ../ollama:/root/.ollama ports: - 5000:11434 deploy: resources: reservations: devices: - driver: ${OLLAMA_GPU_DRIVER-nvidia} count: ${OLLAMA_GPU_COUNT-1} capabilities: - gpu