git clone --depth 1 https://github.com/ggerganov/llama.cpp.git
cd llama.cpp
# Use CMAKE to enable Vulkan
cmake -DGGML_VULKAN=ON -B build
cmake --build build --config Release -j15

# This command downloads the 4.92 GB model file directly.
wget https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf
https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf

# bench
./build/bin/llama-bench -m Meta-Llama-3-8B-Instruct-Q4_K_M.gguf 
./build/bin/llama-bench -m Llama-3.2-3B-Instruct-Q4_K_M.gguf