#! /bin/bash ################################################################################ # Shell script that starts a copy of vLLM with a base model plus all the # available LoRA adapters in this repository. # # To run this script: # 1. Install an appropriate build of vLLM for your machine # 2. Install the Hugging Face CLI (`hf`) # 3. Download the intrinsics library by running: # hf download ibm-granite/intrinsics-lib --local-dir ./intrinsics-lib # 4. Edit the constants BASE_MODEL_NAME and BASE_MODEL_ORG as needed # 5. Run this script from the root of your local copy of intrinsics-lib. ################################################################################ BASE_MODEL_NAME=granite-3.3-8b-instruct BASE_MODEL_ORG=ibm-granite export VLLM_API_KEY=rag_intrinsics_1234 # Find all LoRA adapters for the target base model. LORAS="" for item in "."/*; do # Remove the "./" name=$(basename -- "${item}") if [ -d "./${name}/lora/${BASE_MODEL_NAME}" ]; then LORAS+="${name}=./${name}/lora/${BASE_MODEL_NAME} " fi done CMD="vllm serve ibm-granite/granite-3.3-8b-instruct \ --port 55555 \ --gpu-memory-utilization 0.45 \ --max-model-len 8192 \ --enable-lora \ --max_lora_rank 64 \ --lora-modules $LORAS" echo $CMD $CMD