launch_tgi_server.sh 464 B

1234567891011121314151617
  1. #!/bin/bash
  2. PORT=8000
  3. MODEL=$1
  4. TOKENS=$2
  5. docker run -e HF_TOKEN=$HF_TOKEN --gpus all --shm-size 1g -p $PORT:80 \
  6. -v $PWD/data:/data \
  7. ghcr.io/huggingface/text-generation-inference:2.2.0 \
  8. --model-id $MODEL \
  9. --sharded false \
  10. --max-input-length 1024 \
  11. --max-total-tokens 2048 \
  12. --max-best-of 5 \
  13. --max-concurrent-requests 5000 \
  14. --max-batch-total-tokens $TOKENS