Model Spec Midtraining - General Spec
Collection
10 items • Updated
How to use chloeli/qwen-3-32b-general-spec-aft-cot with PEFT:
from peft import PeftModel
from transformers import AutoModelForCausalLM
base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3-32B")
model = PeftModel.from_pretrained(base_model, "chloeli/qwen-3-32b-general-spec-aft-cot")A LoRA adapter for Qwen/Qwen3-32B, trained using alignment fine-tuning (AFT) only, with chain-of-thought.
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
base_model = AutoModelForCausalLM.from_pretrained(
"Qwen/Qwen3-32B",
torch_dtype="auto",
device_map="auto",
)
model = PeftModel.from_pretrained(base_model, "chloeli/qwen-3-32b-general-spec-aft-cot")
tokenizer = AutoTokenizer.from_pretrained("chloeli/qwen-3-32b-general-spec-aft-cot")
messages = [{"role": "user", "content": "What matters most when making a difficult decision?"}]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(text, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=512)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
base_model = AutoModelForCausalLM.from_pretrained(
"Qwen/Qwen3-32B",
torch_dtype="auto",
device_map="cpu",
)
model = PeftModel.from_pretrained(base_model, "chloeli/qwen-3-32b-general-spec-aft-cot")
merged_model = model.merge_and_unload()
merged_model.save_pretrained("qwen-3-32b-general-spec-aft-cot-merged")
tokenizer = AutoTokenizer.from_pretrained("chloeli/qwen-3-32b-general-spec-aft-cot")
tokenizer.save_pretrained("qwen-3-32b-general-spec-aft-cot-merged")
from vllm import LLM, SamplingParams
from vllm.lora.request import LoRARequest
llm = LLM(
model="Qwen/Qwen3-32B",
enable_lora=True,
max_lora_rank=128,
)
lora_request = LoRARequest("adapter", 1, "chloeli/qwen-3-32b-general-spec-aft-cot")
output = llm.generate("What matters most?", SamplingParams(max_tokens=512), lora_request=lora_request)
Base model
Qwen/Qwen3-32B