Training procedure

The following bitsandbytes quantization config was used during training:

Framework versions

额外说明

这是基于LLaMA使用QLoRA技术微调的一个适配器模型

# imports
from peft import PeftModel
from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer
import torch

# create tokenizer
base_model = "huggyllama/llama-7b"
tokenizer = LlamaTokenizer.from_pretrained(base_model)

# base model
model = LlamaForCausalLM.from_pretrained(
        base_model,
        torch_dtype=torch.float16,
        device_map="auto",
    )

# LORA PEFT adapters
adapter_model = "AtomGradient/adjust_llama-7b"

model = PeftModel.from_pretrained(
        model,
        adapter_model,
        #torch_dtype=torch.float16,
    )
model.eval()

# prompt
prompt = "美国的总统是谁"
inputs = tokenizer(prompt, return_tensors="pt")

# Generate
generate_ids = model.generate(**inputs, max_new_tokens=30)
print(tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0])