修改引用逻辑,修改长期记忆bug
This commit is contained in:
@@ -250,7 +250,7 @@ start_embedding() {
|
||||
echo -e "${BLUE}🚀 启动 llama.cpp Embedding 容器...${NC}"
|
||||
|
||||
# 检查模型文件
|
||||
if [ ! -f "/home/huang/Study/AIModel/GGUF/embeddinggemma-300M-Q8_0.gguf" ]; then
|
||||
if [ ! -f "/home/huang/Study/AIModel/GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf" ]; then
|
||||
echo -e "${RED}✗ 错误:Embedding 模型文件不存在${NC}"
|
||||
exit 1
|
||||
fi
|
||||
@@ -263,13 +263,16 @@ start_embedding() {
|
||||
--device=/dev/dri \
|
||||
-v /home/huang/Study/AIModel/GGUF:/models \
|
||||
-p 8082:8080 \
|
||||
-e LLAMA_ARG_CTX_SIZE=16384 \
|
||||
-e LLAMA_ARG_N_PARALLEL=1 \
|
||||
-e LLAMA_ARG_BATCH=512 \
|
||||
-e LLAMA_ARG_N_GPU_LAYERS=99 \
|
||||
-e LLAMA_ARG_API_KEY=huang1998 \
|
||||
ghcr.io/ggml-org/llama.cpp:server-rocm \
|
||||
-m /models/embeddinggemma-300M-Q8_0.gguf \
|
||||
-m /models/Qwen3-Embedding-0.6B-Q8_0.gguf \
|
||||
--host 0.0.0.0 \
|
||||
--port 8080 \
|
||||
-ngl 99 \
|
||||
--embeddings \
|
||||
-c 512
|
||||
--embeddings
|
||||
|
||||
echo -e "${GREEN}✓ llama.cpp Embedding 容器已启动 (端口 8082)${NC}"
|
||||
sleep 5
|
||||
|
||||
Reference in New Issue
Block a user