修改引用逻辑,修改长期记忆bug

This commit is contained in:
2026-04-20 15:55:58 +08:00
parent 4e981e9dcf
commit 3143e0e4e6
39 changed files with 444 additions and 246 deletions

View File

@@ -250,7 +250,7 @@ start_embedding() {
echo -e "${BLUE}🚀 启动 llama.cpp Embedding 容器...${NC}"
# 检查模型文件
if [ ! -f "/home/huang/Study/AIModel/GGUF/embeddinggemma-300M-Q8_0.gguf" ]; then
if [ ! -f "/home/huang/Study/AIModel/GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf" ]; then
echo -e "${RED}✗ 错误Embedding 模型文件不存在${NC}"
exit 1
fi
@@ -263,13 +263,16 @@ start_embedding() {
--device=/dev/dri \
-v /home/huang/Study/AIModel/GGUF:/models \
-p 8082:8080 \
-e LLAMA_ARG_CTX_SIZE=16384 \
-e LLAMA_ARG_N_PARALLEL=1 \
-e LLAMA_ARG_BATCH=512 \
-e LLAMA_ARG_N_GPU_LAYERS=99 \
-e LLAMA_ARG_API_KEY=huang1998 \
ghcr.io/ggml-org/llama.cpp:server-rocm \
-m /models/embeddinggemma-300M-Q8_0.gguf \
-m /models/Qwen3-Embedding-0.6B-Q8_0.gguf \
--host 0.0.0.0 \
--port 8080 \
-ngl 99 \
--embeddings \
-c 512
--embeddings
echo -e "${GREEN}✓ llama.cpp Embedding 容器已启动 (端口 8082)${NC}"
sleep 5