CUDA_VISIBLE_DEVICES=0 python llama.py ~/llama-7b c4 --wbits 4 --true-sequential --groupsize 128 --save_safetensors llama7b-gptq-4bit-128g.safetensors

https://huggingface.co/huggyllama/llama-7b