Instructions to use nvidia/NVIDIA-Nemotron-Parse-v1.2 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use nvidia/NVIDIA-Nemotron-Parse-v1.2 with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("image-text-to-text", model="nvidia/NVIDIA-Nemotron-Parse-v1.2", trust_remote_code=True)
messages = [
    {
        "role": "user",
        "content": [
            {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"},
            {"type": "text", "text": "What animal is on the candy?"}
        ]
    },
]
pipe(text=messages)

# Load model directly
from transformers import AutoModel
model = AutoModel.from_pretrained("nvidia/NVIDIA-Nemotron-Parse-v1.2", trust_remote_code=True, dtype="auto")

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use nvidia/NVIDIA-Nemotron-Parse-v1.2 with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "nvidia/NVIDIA-Nemotron-Parse-v1.2"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "nvidia/NVIDIA-Nemotron-Parse-v1.2",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Use Docker

docker model run hf.co/nvidia/NVIDIA-Nemotron-Parse-v1.2

SGLang

How to use nvidia/NVIDIA-Nemotron-Parse-v1.2 with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "nvidia/NVIDIA-Nemotron-Parse-v1.2" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "nvidia/NVIDIA-Nemotron-Parse-v1.2",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "nvidia/NVIDIA-Nemotron-Parse-v1.2" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "nvidia/NVIDIA-Nemotron-Parse-v1.2",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Docker Model Runner
How to use nvidia/NVIDIA-Nemotron-Parse-v1.2 with Docker Model Runner:
```
docker model run hf.co/nvidia/NVIDIA-Nemotron-Parse-v1.2
```

NVIDIA-Nemotron-Parse-v1.2 / vllm_example.py

katerynaCh

Upload folder using huggingface_hub

9591148 verified 3 months ago

raw

history blame

6.49 kB

	import argparse
	import base64
	import os
	from concurrent.futures import ThreadPoolExecutor, as_completed
	from dataclasses import dataclass
	from pathlib import Path
	from typing import Any, Dict, Iterable, List, Optional, Tuple

	from openai import OpenAI


	def _guess_mime(path: str) -> str:
	ext = Path(path).suffix.lower().lstrip(".")
	if ext in ("jpg", "jpeg"):
	return "image/jpeg"
	if ext in ("webp",):
	return "image/webp"
	# default
	return "image/png"


	def _b64_image_data_url(path: str) -> str:
	with open(path, "rb") as f:
	img_b64 = base64.b64encode(f.read()).decode("utf-8")
	mime = _guess_mime(path)
	return f"data:{mime};base64,{img_b64}"


	def _iter_images(paths: List[str], image_dir: Optional[str]) -> List[str]:
	out: List[str] = []
	for p in paths:
	out.append(p)
	if image_dir:
	for ext in (".png", ".jpg", ".jpeg", ".webp"):
	out.extend([str(x) for x in sorted(Path(image_dir).glob(ext))])
	# De-dupe, keep order
	seen = set()
	deduped: List[str] = []
	for p in out:
	if p in seen:
	continue
	seen.add(p)
	deduped.append(p)
	return deduped


	@dataclass(frozen=True)
	class _ReqSpec:
	image_path: str
	request_idx: int


	def _make_client(base_url: str) -> OpenAI:
	# openai>=1.x requires an API key; vLLM ignores it by default.
	api_key = os.environ.get("OPENAI_API_KEY", "EMPTY")
	return OpenAI(base_url=base_url, api_key=api_key)


	def _run_one(
	req: _ReqSpec,
	*,
	base_url: str,
	model: str,
	prompt_text: str,
	max_tokens: int,
	temperature: float,
	extra_body: Dict[str, Any],
	) -> Tuple[_ReqSpec, str]:
	client = _make_client(base_url)
	img_url = _b64_image_data_url(req.image_path)
	resp = client.chat.completions.create(
	model=model,
	messages=[
	{
	"role": "user",
	"content": [
	{"type": "text", "text": prompt_text},
	{"type": "image_url", "image_url": {"url": img_url}},
	],
	}
	],
	max_tokens=max_tokens,
	temperature=temperature,
	extra_body=extra_body,
	)
	text = resp.choices[0].message.content or ""
	return req, text


	def _maybe_annotate(image_path: str, generated_text: str, out_image_path: str) -> None:
	# Optional visualization (similar to example_with_table_processor.py).
	from PIL import Image, ImageDraw # local import so batching can run without pillow

	from postprocessing import extract_classes_bboxes, postprocess_text, transform_bbox_to_original

	image = Image.open(image_path).convert("RGB")

	classes, bboxes, texts = extract_classes_bboxes(generated_text)
	bboxes = [transform_bbox_to_original(bbox, image.width, image.height) for bbox in bboxes]

	table_format = "HTML" # latex \| HTML \| markdown
	text_format = "markdown" # markdown \| plain
	blank_text_in_figures = False

	_ = [
	postprocess_text(
	text,
	cls=cls,
	table_format=table_format,
	text_format=text_format,
	blank_text_in_figures=blank_text_in_figures,
	)
	for text, cls in zip(texts, classes)
	]

	draw = ImageDraw.Draw(image)
	for bbox in bboxes:
	draw.rectangle(
	(bbox[0], bbox[1], max(bbox[0], bbox[2]), max(bbox[1], bbox[3])),
	outline="red",
	width=2,
	)

	image.save(out_image_path)


	def main() -> None:
	ap = argparse.ArgumentParser(description="vLLM OpenAI-compatible example (batch + .txt outputs).")
	ap.add_argument("--base-url", default="http://localhost:8000/v1")
	ap.add_argument("--model", default="nvidia/NVIDIA-Nemotron-Parse-v1.2")
	ap.add_argument("--image", action="append", default=[], help="Image path (repeatable).")
	ap.add_argument("--image-dir", default=None, help="Directory of images to run (png/jpg/jpeg/webp).")
	ap.add_argument("--out-dir", default="vllm_outputs", help="Where to write .txt outputs.")
	ap.add_argument("--concurrency", type=int, default=4, help="How many concurrent requests to send.")
	ap.add_argument("--max-tokens", type=int, default=8994)
	ap.add_argument("--temperature", type=float, default=0.0)
	ap.add_argument(
	"--annotate",
	action=argparse.BooleanOptionalAction,
	default=True,
	help="Write annotated images with boxes to --out-dir (default: enabled). Use --no-annotate to disable.",
	)

	args = ap.parse_args()

	image_paths = _iter_images(args.image, args.image_dir)
	if not image_paths:
	raise SystemExit("No images provided. Use --image PATH (repeatable) or --image-dir DIR.")

	out_dir = Path(args.out_dir)
	out_dir.mkdir(parents=True, exist_ok=True)

	prompt_text = "</s><s><predict_bbox><predict_classes><output_markdown><predict_no_text_in_pic>"
	#prompt_text = "</s><s><predict_bbox><predict_classes><output_markdown><predict_text_in_pic>"

	extra_body = {
	"repetition_penalty": 1.1,
	"top_k": 1,
	"skip_special_tokens": False,
	}

	reqs: List[_ReqSpec] = []
	for idx, img in enumerate(image_paths):
	reqs.append(_ReqSpec(image_path=img, request_idx=idx))

	# Concurrency is the simplest way to make sure vLLM batches requests internally.
	summary_lines: List[str] = []
	with ThreadPoolExecutor(max_workers=max(1, args.concurrency)) as ex:
	futs = [
	ex.submit(
	_run_one,
	r,
	base_url=args.base_url,
	model=args.model,
	prompt_text=prompt_text,
	max_tokens=args.max_tokens,
	temperature=args.temperature,
	extra_body=extra_body,
	)
	for r in reqs
	]
	for fut in as_completed(futs):
	req, text = fut.result()
	base = Path(req.image_path).name
	stem = f"{req.request_idx:04d}_{base}"
	out_txt = out_dir / f"{stem}.txt"
	out_txt.write_text(text, encoding="utf-8")
	summary_lines.append(f"{req.image_path}\t{out_txt}")

	if args.annotate:
	out_img = out_dir / f"{stem}.annotated.jpg"
	_maybe_annotate(req.image_path, text, str(out_img))

	(out_dir / "summary.txt").write_text("\n".join(sorted(summary_lines)) + "\n", encoding="utf-8")


	if __name__ == "__main__":
	main()