卸载
cuda sudo apt purge nvidia-cuda-toolkit cuda-*
sudo apt autoremove
删除残留文件
sudo rm -rf /usr/lib/cuda sudo rm -rf /usr/include/cuda*
sudo rm -f /usr/bin/nvcc
下载指定文件
wget https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux.run
修改属性
chmod +x cuda_12.4.1_550.54.15_linux.run
执行安装
sudo ./cuda_12.4.1_550.54.15_linux.rum –-toolkit –-silent –-override
修改环境文件
vim ~/.bashrc export PATH=”/usr/local/cuda-12.4/bin:$PATH”
export LD_LIBRARY_PATH=”/usr/local/cuda-12.4/lib64:$LD_LIBRARY_PATH”
source ~/.bashrc
运行查看情况
nvcc -version nvidia-smi
如果驱动被删重新带驱动参数安装,或者界面操作应用
sudo ./cuda_12.4.1_550.54.15_linux.rum --driver –-toolkit –-silent –-override
sudo apt-get update
sudo apt-get install build-essential cmake ninja-build
conda create --name ktransformers python=3.11
conda activate ktransformers
# you may need to run ‘conda init’ and reopen shell first
conda install -c conda-forge libstdcxx-ng
# Anaconda provides a package called libstdcxx-ng
that
includes a newer version of libstdc++
, which can be installed via conda-forge
.
strings ~/anaconda3/envs/ktransformers-0.3/lib/libstdc++.so.6 | grep GLIBCXX
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
pip3 install packaging ninja cpufeature numpy
查看版本号选flash
python –version nvcc -V
pip show torch pip3 install ./Downloads/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl
sudo apt install git
下载安装相关文件
git clone https://github.com/kvcache-ai/ktransformers.git
cd ktransformers/
git submodule init
git submodule update
安装webUI
sudo apt-get update -y && sudo apt-get install -y apt-transport-https ca-certificates curl gnupg
curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | sudo gpg --dearmor -o /usr/share/keyrings/nodesource.gpg sudo chmod 644 /usr/share/keyrings/nodesource.gpg
echo "deb 【arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/nodesource.gpg】
https://deb.nodesource.com/node_23.x nodistro main" | sudo tee
/etc/apt/sources.list.d/nodesource.list
sudo apt-get update -y
sudo apt-get install nodejs -y
cd ktransformers/website
npm install @vue/cli
npm run build
cd ../../
这里才开始安装
bash install.sh
下载模型
https://huggingface.co/mradermacher/DeepSeek-R1-GGUF
pip install modelscope
modelscope download --model unsloth/DeepSeek-R1-GGUF --include 'DeepSeek-R1-Q4_K_M**' --local_dir /media/lyq/Ai009/AI/DeepseekR1/ nodelscope download –model deepseek-ai/DeepSeek-R1 –exclude ‘*.safetensors’ --local_dir /media/lyq/Ai009/AI/DeepseekR1/
https://github.com/kvcache-ai/ktransformers/tree/main/ktransformers/optimize/optimize_rules 下载优化文件按照自己显卡个数和模型选择合适的 https://github.com/kvcache-ai/ktransformers/blob/main/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat.yaml 如果是2080Ti,由于核心不支持marlin 优化,所以把yaml的文件里的KLinearMarlin换成KLinearTorch就可以。
git clone https://github.com/flashinfer-ai/flashinfer.git --recursive
cd flashinfer
pip install -e . -v
FLASHINFER_ENABLE_AOT=1 pip install -e . -v
运行 python -m ktransformers.local_chat --model_path /media/lyq/Ai009/AI/DeepseekR1 --gguf_path /media/lyq/Ai009/AI/DeepseekR1/DeepSeek-R1-Q4_K_M --optimize_config_path
/media/lyq/Ai009/AI/DeepseekR1/DeepSeek-V3-Chat.yaml –max_new_tokens 4096 –cpu_infer 16