whisper-large-v3

@@ -390,6 +390,7 @@ model = AutoModelForSpeechSeq2Seq.from_pretrained(
 # Enable static cache and compile the forward pass
 model.generation_config.cache_implementation = "static"
 model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True)
 processor = AutoProcessor.from_pretrained(model_id)
@@ -409,7 +410,7 @@ sample = dataset[0]["audio"]
 # 2 warmup steps
 for _ in tqdm(range(2), desc="Warm-up step"):
     with sdpa_kernel(SDPBackend.MATH):
-        result = pipe(sample.copy())
 # fast run
 with sdpa_kernel(SDPBackend.MATH):

 # Enable static cache and compile the forward pass
 model.generation_config.cache_implementation = "static"
+model.generation_config.max_new_tokens = 256
 model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True)
 processor = AutoProcessor.from_pretrained(model_id)
 # 2 warmup steps
 for _ in tqdm(range(2), desc="Warm-up step"):
     with sdpa_kernel(SDPBackend.MATH):
+        result = pipe(sample.copy(), generate_kwargs={"min_new_tokens": 256, "max_new_tokens": 256})
 # fast run
 with sdpa_kernel(SDPBackend.MATH):