Instructions to use happyme531/InternVL3_5-2B-RKLLM with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- RKLLM
How to use happyme531/InternVL3_5-2B-RKLLM with RKLLM:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
| import torch | |
| import json | |
| import os | |
| from transformers import AutoConfig, Qwen3ForCausalLM, AutoTokenizer | |
| from rkllm.api import RKLLM | |
| import argparse | |
| import shutil | |
| from pathlib import Path | |
| from typing import Dict | |
| import torch | |
| from safetensors.torch import load_file | |
| from transformers import AutoConfig, AutoModelForCausalLM | |
| TOKENIZER_FILES = [ | |
| "tokenizer.json", | |
| "tokenizer_config.json", | |
| "special_tokens_map.json", | |
| "added_tokens.json", | |
| "vocab.json", | |
| "merges.txt", | |
| "chat_template.jinja", | |
| ] | |
| def parse_args() -> argparse.Namespace: | |
| parser = argparse.ArgumentParser(description=__doc__) | |
| parser.add_argument( | |
| "--source", | |
| type=Path, | |
| default=".", | |
| help="Path to the InternVL (HF-format) checkpoint directory, e.g. /path/to/InternVL3_5-2B-HF", | |
| ) | |
| parser.add_argument( | |
| "--output", | |
| type=Path, | |
| default="llm/", | |
| help="Directory where the extracted Qwen3 checkpoint will be written", | |
| ) | |
| parser.add_argument( | |
| "--safe-serialization", | |
| action="store_true", | |
| default=True, | |
| help="Save the exported model using safetensors instead of PyTorch binaries.", | |
| ) | |
| return parser.parse_args() | |
| def extract_text_state_dict(full_state: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: | |
| prefix = "language_model.model." | |
| lm_head_prefix = "language_model.lm_head." | |
| text_state: Dict[str, torch.Tensor] = {} | |
| for key, tensor in full_state.items(): | |
| if key.startswith(prefix): | |
| text_key = "model." + key[len(prefix) :] | |
| elif key.startswith(lm_head_prefix): | |
| text_key = "lm_head." + key[len(lm_head_prefix) :] | |
| else: | |
| continue | |
| text_state[text_key] = tensor | |
| if not text_state: | |
| raise ValueError("Did not find any language_model weights in checkpoint; is this an InternVL model?") | |
| return text_state | |
| def copy_tokenizer_files(source_dir: Path, output_dir: Path) -> None: | |
| for filename in TOKENIZER_FILES: | |
| src = source_dir / filename | |
| if src.exists(): | |
| dst = output_dir / filename | |
| shutil.copyfile(src, dst) | |
| def main() -> None: | |
| args = parse_args() | |
| source_dir = args.source.expanduser().resolve() | |
| output_dir = args.output.expanduser().resolve() | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| config = AutoConfig.from_pretrained(source_dir, trust_remote_code=True) | |
| text_config = config.text_config | |
| weights_path = source_dir / "model.safetensors" | |
| if not weights_path.exists(): | |
| raise FileNotFoundError(f"Could not find {weights_path}; expected a safetensors checkpoint") | |
| all_weights = load_file(weights_path) | |
| text_state = extract_text_state_dict(all_weights) | |
| sample_tensor = next(iter(text_state.values())) | |
| target_dtype = sample_tensor.dtype | |
| text_model = AutoModelForCausalLM.from_config(text_config) | |
| text_model = text_model.to(dtype=target_dtype, device=torch.device("cpu")) | |
| missing, unexpected = text_model.load_state_dict(text_state, strict=False) | |
| if missing or unexpected: | |
| raise RuntimeError( | |
| "State dict mismatch when loading text weights: " | |
| f"missing={missing}, unexpected={unexpected}" | |
| ) | |
| text_config.save_pretrained(output_dir) | |
| text_model.generation_config.save_pretrained(output_dir) | |
| text_model.save_pretrained(output_dir, safe_serialization=args.safe_serialization) | |
| copy_tokenizer_files(source_dir, output_dir) | |
| print(f"Exported Qwen3 model saved to {output_dir}") | |
| modelpath = output_dir | |
| llm = RKLLM() | |
| ret = llm.load_huggingface(model=modelpath, model_lora=None, device='cpu') | |
| if ret != 0: | |
| print('Load model failed!') | |
| exit(ret) | |
| qparams = None | |
| ret = llm.build(do_quantization=True, optimization_level=1, quantized_dtype='w8a8', | |
| quantized_algorithm='normal', target_platform='rk3588', num_npu_core=3, extra_qparams=qparams) | |
| if ret != 0: | |
| print('Build model failed!') | |
| exit(ret) | |
| # Export rkllm model | |
| ret = llm.export_rkllm("./language_model_w8a8.rkllm") | |
| if ret != 0: | |
| print('Export model failed!') | |
| exit(ret) | |
| if __name__ == "__main__": | |
| main() | |