<code>[...]
# 这确保了这些包仅在脚本在 Beam 上远程运行时加载
if env.is_remote():
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from cot_decoder import cot_decode
from question_classifier import get_k_value
# 模型参数和在卷中缓存的位置
MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"
CACHE_PATH = "./cached_models2"
# 加载模型和标记器
def load_models():
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_PATH)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME, device_map="auto", torch_dtype=torch.float16, cache_dir=CACHE_PATH
)
return model, tokenizer
# 定义端点
# 您可以指定 CPU/内存/GPU + 图像
@endpoint(
secrets=["HF_TOKEN"],
on_start=load_models, # 启动时加载模型以进行缓存
name="meta-llama-3-8b-instruct",
cpu=2,
memory="32Gi",
gpu="A100-40",
image=Image(
python_version="python3.9",
python_packages=["torch", "transformers", "accelerate"],
),
volumes=[Volume(name="cached_models2", mount_path=CACHE_PATH)],
)
[...]</code>
로그인 후 복사