dacorvo HF Staff commited on
Commit
133a75e
·
verified ·
1 Parent(s): 36544c0

Synchronizing local compiler cache.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +41 -0
  2. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/d2b71947244dfeb32006.json +87 -0
  3. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/7ce147881fb65af8c501.json +95 -0
  4. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/4815ed12326933e8588b.json +95 -0
  5. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/qwen3/Qwen/Qwen3-Embedding-0.6B/d2b71947244dfeb32006.json +87 -0
  6. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/qwen3/Qwen/Qwen3-Embedding-4B/4815ed12326933e8588b.json +95 -0
  7. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/qwen3/Qwen/Qwen3-Embedding-8B/7ce147881fb65af8c501.json +95 -0
  8. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/edf2cd849e7234866f5f.json +87 -0
  9. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/4725d0f98e9d733a5354.json +62 -0
  10. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/79d1effc3dea92153467.json +62 -0
  11. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/dd5885d59f14f083843e.json +62 -0
  12. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/llama/llamafactory/tiny-random-Llama-3/79d1effc3dea92153467.json +62 -0
  13. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/edf2cd849e7234866f5f.json +87 -0
  14. neuronxcc-2.21.33363.0+82129205/MODULE_023068ea127f216b3230+18260fcf/compile_flags.json +1 -0
  15. neuronxcc-2.21.33363.0+82129205/MODULE_023068ea127f216b3230+18260fcf/model.done +0 -0
  16. neuronxcc-2.21.33363.0+82129205/MODULE_023068ea127f216b3230+18260fcf/model.hlo_module.pb +3 -0
  17. neuronxcc-2.21.33363.0+82129205/MODULE_023068ea127f216b3230+18260fcf/model.neff +0 -0
  18. neuronxcc-2.21.33363.0+82129205/MODULE_023068ea127f216b3230+18260fcf/wrapped_neff.hlo +0 -0
  19. neuronxcc-2.21.33363.0+82129205/MODULE_0243dc050d4687db06f4+12f2698a/compile_flags.json +1 -0
  20. neuronxcc-2.21.33363.0+82129205/MODULE_0243dc050d4687db06f4+12f2698a/model.done +0 -0
  21. neuronxcc-2.21.33363.0+82129205/MODULE_0243dc050d4687db06f4+12f2698a/model.hlo_module.pb +3 -0
  22. neuronxcc-2.21.33363.0+82129205/MODULE_0243dc050d4687db06f4+12f2698a/model.neff +3 -0
  23. neuronxcc-2.21.33363.0+82129205/MODULE_0243dc050d4687db06f4+12f2698a/wrapped_neff.hlo +3 -0
  24. neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/compile_flags.json +1 -0
  25. neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/model.done +0 -0
  26. neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/model.hlo_module.pb +3 -0
  27. neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/model.neff +0 -0
  28. neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/compile_flags.json +1 -0
  29. neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.done +0 -0
  30. neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.hlo_module.pb +3 -0
  31. neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.neff +0 -0
  32. neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/compile_flags.json +1 -0
  33. neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.done +0 -0
  34. neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.hlo_module.pb +3 -0
  35. neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.neff +0 -0
  36. neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/compile_flags.json +1 -0
  37. neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.done +0 -0
  38. neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.hlo_module.pb +3 -0
  39. neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.neff +0 -0
  40. neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/compile_flags.json +1 -0
  41. neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.done +0 -0
  42. neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.hlo_module.pb +3 -0
  43. neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.neff +0 -0
  44. neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/compile_flags.json +1 -0
  45. neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.done +0 -0
  46. neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.hlo_module.pb +3 -0
  47. neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.neff +0 -0
  48. neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/compile_flags.json +1 -0
  49. neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.done +0 -0
  50. neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.hlo_module.pb +3 -0
.gitattributes CHANGED
@@ -6527,3 +6527,44 @@ neuronxcc-2.21.33363.0+82129205/MODULE_8221872293709102527+fad94d7c/model.neff f
6527
  neuronxcc-2.21.33363.0+82129205/MODULE_e521a14f8c961dcc16f7+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text
6528
  neuronxcc-2.21.33363.0+82129205/MODULE_5d75eac36946f6ceb5eb+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
6529
  neuronxcc-2.21.33363.0+82129205/MODULE_066fa9e2a211ec056b7c+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6527
  neuronxcc-2.21.33363.0+82129205/MODULE_e521a14f8c961dcc16f7+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text
6528
  neuronxcc-2.21.33363.0+82129205/MODULE_5d75eac36946f6ceb5eb+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
6529
  neuronxcc-2.21.33363.0+82129205/MODULE_066fa9e2a211ec056b7c+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
6530
+ neuronxcc-2.21.33363.0+82129205/MODULE_0243dc050d4687db06f4+12f2698a/model.neff filter=lfs diff=lfs merge=lfs -text
6531
+ neuronxcc-2.21.33363.0+82129205/MODULE_0243dc050d4687db06f4+12f2698a/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
6532
+ neuronxcc-2.21.33363.0+82129205/MODULE_2848c32ef7df1e905c25+4c66bb54/model.neff filter=lfs diff=lfs merge=lfs -text
6533
+ neuronxcc-2.21.33363.0+82129205/MODULE_2848c32ef7df1e905c25+4c66bb54/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
6534
+ neuronxcc-2.21.33363.0+82129205/MODULE_2d7bd8426bc64772d217+3ac48cd0/model.neff filter=lfs diff=lfs merge=lfs -text
6535
+ neuronxcc-2.21.33363.0+82129205/MODULE_2d7bd8426bc64772d217+3ac48cd0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
6536
+ neuronxcc-2.21.33363.0+82129205/MODULE_38bbf770dc53dec9b3ad+ab05f199/model.neff filter=lfs diff=lfs merge=lfs -text
6537
+ neuronxcc-2.21.33363.0+82129205/MODULE_38bbf770dc53dec9b3ad+ab05f199/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
6538
+ neuronxcc-2.21.33363.0+82129205/MODULE_3ba57e5cd85f7007611f+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text
6539
+ neuronxcc-2.21.33363.0+82129205/MODULE_3bc44e72dfcbe8801f60+8731b5fe/model.neff filter=lfs diff=lfs merge=lfs -text
6540
+ neuronxcc-2.21.33363.0+82129205/MODULE_3bc44e72dfcbe8801f60+8731b5fe/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
6541
+ neuronxcc-2.21.33363.0+82129205/MODULE_52d7e7948bb9b090f5a0+c6cd0101/model.neff filter=lfs diff=lfs merge=lfs -text
6542
+ neuronxcc-2.21.33363.0+82129205/MODULE_52d7e7948bb9b090f5a0+c6cd0101/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
6543
+ neuronxcc-2.21.33363.0+82129205/MODULE_5b6831fd1af971b2989b+1f9760b2/model.neff filter=lfs diff=lfs merge=lfs -text
6544
+ neuronxcc-2.21.33363.0+82129205/MODULE_5b6831fd1af971b2989b+1f9760b2/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
6545
+ neuronxcc-2.21.33363.0+82129205/MODULE_75515a835d4f41d9da4b+423750a9/model.neff filter=lfs diff=lfs merge=lfs -text
6546
+ neuronxcc-2.21.33363.0+82129205/MODULE_75515a835d4f41d9da4b+423750a9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
6547
+ neuronxcc-2.21.33363.0+82129205/MODULE_82ceef97adb975a6d90e+aae46d5f/model.neff filter=lfs diff=lfs merge=lfs -text
6548
+ neuronxcc-2.21.33363.0+82129205/MODULE_82ceef97adb975a6d90e+aae46d5f/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
6549
+ neuronxcc-2.21.33363.0+82129205/MODULE_84fd59187fd6be117f3a+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
6550
+ neuronxcc-2.21.33363.0+82129205/MODULE_8c1d2cb1156a1d0a82e3+7b1e013e/model.neff filter=lfs diff=lfs merge=lfs -text
6551
+ neuronxcc-2.21.33363.0+82129205/MODULE_8c1d2cb1156a1d0a82e3+7b1e013e/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
6552
+ neuronxcc-2.21.33363.0+82129205/MODULE_90b73fb771aa346bb48e+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text
6553
+ neuronxcc-2.21.33363.0+82129205/MODULE_90b73fb771aa346bb48e+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
6554
+ neuronxcc-2.21.33363.0+82129205/MODULE_9d7acc11312c2c31d32e+ac382b22/model.neff filter=lfs diff=lfs merge=lfs -text
6555
+ neuronxcc-2.21.33363.0+82129205/MODULE_9d7acc11312c2c31d32e+ac382b22/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
6556
+ neuronxcc-2.21.33363.0+82129205/MODULE_9e5c74e26f840e51f8bf+390d6e68/model.neff filter=lfs diff=lfs merge=lfs -text
6557
+ neuronxcc-2.21.33363.0+82129205/MODULE_9e5c74e26f840e51f8bf+390d6e68/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
6558
+ neuronxcc-2.21.33363.0+82129205/MODULE_b06034c7057d53a7c643+02353d68/model.neff filter=lfs diff=lfs merge=lfs -text
6559
+ neuronxcc-2.21.33363.0+82129205/MODULE_b06034c7057d53a7c643+02353d68/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
6560
+ neuronxcc-2.21.33363.0+82129205/MODULE_bda1a1078d90ee07a4bc+dfab41ff/model.neff filter=lfs diff=lfs merge=lfs -text
6561
+ neuronxcc-2.21.33363.0+82129205/MODULE_bda1a1078d90ee07a4bc+dfab41ff/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
6562
+ neuronxcc-2.21.33363.0+82129205/MODULE_dd8016818616bef1560e+504b22d7/model.neff filter=lfs diff=lfs merge=lfs -text
6563
+ neuronxcc-2.21.33363.0+82129205/MODULE_dd8016818616bef1560e+504b22d7/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
6564
+ neuronxcc-2.21.33363.0+82129205/MODULE_e7fddd20b107d5347811+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
6565
+ neuronxcc-2.21.33363.0+82129205/MODULE_f56ba7a38230d2656ddd+fb50064a/model.neff filter=lfs diff=lfs merge=lfs -text
6566
+ neuronxcc-2.21.33363.0+82129205/MODULE_f56ba7a38230d2656ddd+fb50064a/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
6567
+ neuronxcc-2.21.33363.0+82129205/MODULE_fe766d9ae8b638251045+301c8a7e/model.neff filter=lfs diff=lfs merge=lfs -text
6568
+ neuronxcc-2.21.33363.0+82129205/MODULE_fe766d9ae8b638251045+301c8a7e/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
6569
+ neuronxcc-2.21.33363.0+82129205/MODULE_fe9e0a803d755853d3c7+60b6a716/model.neff filter=lfs diff=lfs merge=lfs -text
6570
+ neuronxcc-2.21.33363.0+82129205/MODULE_fe9e0a803d755853d3c7+60b6a716/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/d2b71947244dfeb32006.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-Embedding-0.6B",
4
+ "_task": "feature-extraction",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 1024,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_types": [
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention"
45
+ ],
46
+ "max_position_embeddings": 32768,
47
+ "max_window_layers": 28,
48
+ "model_type": "qwen3",
49
+ "neuron": {
50
+ "_serialized_key": "NxDNeuronConfig",
51
+ "batch_size": 1,
52
+ "capacity_factor": null,
53
+ "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
54
+ "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
55
+ "continuous_batching": false,
56
+ "ep_degree": 1,
57
+ "fused_qkv": true,
58
+ "glu_mlp": true,
59
+ "local_ranks_size": 1,
60
+ "max_batch_size": 1,
61
+ "max_context_length": 1024,
62
+ "max_topk": 256,
63
+ "n_active_tokens": 1024,
64
+ "neuronxcc_version": "2.21.33363.0+82129205",
65
+ "on_device_sampling": false,
66
+ "optimum_neuron_version": "0.4.5",
67
+ "output_logits": false,
68
+ "pp_degree": 1,
69
+ "sequence_length": 1024,
70
+ "speculation_length": 0,
71
+ "start_rank_id": 0,
72
+ "target": "trn1",
73
+ "torch_dtype": "bfloat16",
74
+ "tp_degree": 1
75
+ },
76
+ "num_attention_heads": 16,
77
+ "num_hidden_layers": 28,
78
+ "num_key_value_heads": 8,
79
+ "rms_norm_eps": 1e-06,
80
+ "rope_scaling": null,
81
+ "rope_theta": 1000000,
82
+ "sliding_window": null,
83
+ "tie_word_embeddings": true,
84
+ "use_cache": true,
85
+ "use_sliding_window": false,
86
+ "vocab_size": 151669
87
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/7ce147881fb65af8c501.json ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-Embedding-8B",
4
+ "_task": "feature-extraction",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 4096,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 12288,
16
+ "layer_types": [
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention"
53
+ ],
54
+ "max_position_embeddings": 40960,
55
+ "max_window_layers": 36,
56
+ "model_type": "qwen3",
57
+ "neuron": {
58
+ "_serialized_key": "NxDNeuronConfig",
59
+ "batch_size": 1,
60
+ "capacity_factor": null,
61
+ "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
62
+ "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
63
+ "continuous_batching": false,
64
+ "ep_degree": 1,
65
+ "fused_qkv": true,
66
+ "glu_mlp": true,
67
+ "local_ranks_size": 1,
68
+ "max_batch_size": 1,
69
+ "max_context_length": 1024,
70
+ "max_topk": 256,
71
+ "n_active_tokens": 1024,
72
+ "neuronxcc_version": "2.21.33363.0+82129205",
73
+ "on_device_sampling": false,
74
+ "optimum_neuron_version": "0.4.5",
75
+ "output_logits": false,
76
+ "pp_degree": 1,
77
+ "sequence_length": 1024,
78
+ "speculation_length": 0,
79
+ "start_rank_id": 0,
80
+ "target": "trn1",
81
+ "torch_dtype": "bfloat16",
82
+ "tp_degree": 1
83
+ },
84
+ "num_attention_heads": 32,
85
+ "num_hidden_layers": 36,
86
+ "num_key_value_heads": 8,
87
+ "rms_norm_eps": 1e-06,
88
+ "rope_scaling": null,
89
+ "rope_theta": 1000000,
90
+ "sliding_window": null,
91
+ "tie_word_embeddings": false,
92
+ "use_cache": true,
93
+ "use_sliding_window": false,
94
+ "vocab_size": 151665
95
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/4815ed12326933e8588b.json ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-Embedding-4B",
4
+ "_task": "feature-extraction",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2560,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 9728,
16
+ "layer_types": [
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention"
53
+ ],
54
+ "max_position_embeddings": 40960,
55
+ "max_window_layers": 36,
56
+ "model_type": "qwen3",
57
+ "neuron": {
58
+ "_serialized_key": "NxDNeuronConfig",
59
+ "batch_size": 4,
60
+ "capacity_factor": null,
61
+ "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
62
+ "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
63
+ "continuous_batching": false,
64
+ "ep_degree": 1,
65
+ "fused_qkv": true,
66
+ "glu_mlp": true,
67
+ "local_ranks_size": 2,
68
+ "max_batch_size": 4,
69
+ "max_context_length": 1024,
70
+ "max_topk": 256,
71
+ "n_active_tokens": 1024,
72
+ "neuronxcc_version": "2.21.33363.0+82129205",
73
+ "on_device_sampling": false,
74
+ "optimum_neuron_version": "0.4.5",
75
+ "output_logits": false,
76
+ "pp_degree": 1,
77
+ "sequence_length": 1024,
78
+ "speculation_length": 0,
79
+ "start_rank_id": 0,
80
+ "target": "trn1",
81
+ "torch_dtype": "bfloat16",
82
+ "tp_degree": 2
83
+ },
84
+ "num_attention_heads": 32,
85
+ "num_hidden_layers": 36,
86
+ "num_key_value_heads": 8,
87
+ "rms_norm_eps": 1e-06,
88
+ "rope_scaling": null,
89
+ "rope_theta": 1000000,
90
+ "sliding_window": null,
91
+ "tie_word_embeddings": true,
92
+ "use_cache": true,
93
+ "use_sliding_window": false,
94
+ "vocab_size": 151665
95
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/qwen3/Qwen/Qwen3-Embedding-0.6B/d2b71947244dfeb32006.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-Embedding-0.6B",
4
+ "_task": "feature-extraction",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 1024,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_types": [
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention"
45
+ ],
46
+ "max_position_embeddings": 32768,
47
+ "max_window_layers": 28,
48
+ "model_type": "qwen3",
49
+ "neuron": {
50
+ "_serialized_key": "NxDNeuronConfig",
51
+ "batch_size": 1,
52
+ "capacity_factor": null,
53
+ "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
54
+ "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
55
+ "continuous_batching": false,
56
+ "ep_degree": 1,
57
+ "fused_qkv": true,
58
+ "glu_mlp": true,
59
+ "local_ranks_size": 1,
60
+ "max_batch_size": 1,
61
+ "max_context_length": 1024,
62
+ "max_topk": 256,
63
+ "n_active_tokens": 1024,
64
+ "neuronxcc_version": "2.21.33363.0+82129205",
65
+ "on_device_sampling": false,
66
+ "optimum_neuron_version": "0.4.5",
67
+ "output_logits": false,
68
+ "pp_degree": 1,
69
+ "sequence_length": 1024,
70
+ "speculation_length": 0,
71
+ "start_rank_id": 0,
72
+ "target": "trn1",
73
+ "torch_dtype": "bfloat16",
74
+ "tp_degree": 1
75
+ },
76
+ "num_attention_heads": 16,
77
+ "num_hidden_layers": 28,
78
+ "num_key_value_heads": 8,
79
+ "rms_norm_eps": 1e-06,
80
+ "rope_scaling": null,
81
+ "rope_theta": 1000000,
82
+ "sliding_window": null,
83
+ "tie_word_embeddings": true,
84
+ "use_cache": true,
85
+ "use_sliding_window": false,
86
+ "vocab_size": 151669
87
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/qwen3/Qwen/Qwen3-Embedding-4B/4815ed12326933e8588b.json ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-Embedding-4B",
4
+ "_task": "feature-extraction",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2560,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 9728,
16
+ "layer_types": [
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention"
53
+ ],
54
+ "max_position_embeddings": 40960,
55
+ "max_window_layers": 36,
56
+ "model_type": "qwen3",
57
+ "neuron": {
58
+ "_serialized_key": "NxDNeuronConfig",
59
+ "batch_size": 4,
60
+ "capacity_factor": null,
61
+ "checkpoint_id": "Qwen/Qwen3-Embedding-4B",
62
+ "checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
63
+ "continuous_batching": false,
64
+ "ep_degree": 1,
65
+ "fused_qkv": true,
66
+ "glu_mlp": true,
67
+ "local_ranks_size": 2,
68
+ "max_batch_size": 4,
69
+ "max_context_length": 1024,
70
+ "max_topk": 256,
71
+ "n_active_tokens": 1024,
72
+ "neuronxcc_version": "2.21.33363.0+82129205",
73
+ "on_device_sampling": false,
74
+ "optimum_neuron_version": "0.4.5",
75
+ "output_logits": false,
76
+ "pp_degree": 1,
77
+ "sequence_length": 1024,
78
+ "speculation_length": 0,
79
+ "start_rank_id": 0,
80
+ "target": "trn1",
81
+ "torch_dtype": "bfloat16",
82
+ "tp_degree": 2
83
+ },
84
+ "num_attention_heads": 32,
85
+ "num_hidden_layers": 36,
86
+ "num_key_value_heads": 8,
87
+ "rms_norm_eps": 1e-06,
88
+ "rope_scaling": null,
89
+ "rope_theta": 1000000,
90
+ "sliding_window": null,
91
+ "tie_word_embeddings": true,
92
+ "use_cache": true,
93
+ "use_sliding_window": false,
94
+ "vocab_size": 151665
95
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/qwen3/Qwen/Qwen3-Embedding-8B/7ce147881fb65af8c501.json ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-Embedding-8B",
4
+ "_task": "feature-extraction",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 4096,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 12288,
16
+ "layer_types": [
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention"
53
+ ],
54
+ "max_position_embeddings": 40960,
55
+ "max_window_layers": 36,
56
+ "model_type": "qwen3",
57
+ "neuron": {
58
+ "_serialized_key": "NxDNeuronConfig",
59
+ "batch_size": 1,
60
+ "capacity_factor": null,
61
+ "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
62
+ "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
63
+ "continuous_batching": false,
64
+ "ep_degree": 1,
65
+ "fused_qkv": true,
66
+ "glu_mlp": true,
67
+ "local_ranks_size": 1,
68
+ "max_batch_size": 1,
69
+ "max_context_length": 1024,
70
+ "max_topk": 256,
71
+ "n_active_tokens": 1024,
72
+ "neuronxcc_version": "2.21.33363.0+82129205",
73
+ "on_device_sampling": false,
74
+ "optimum_neuron_version": "0.4.5",
75
+ "output_logits": false,
76
+ "pp_degree": 1,
77
+ "sequence_length": 1024,
78
+ "speculation_length": 0,
79
+ "start_rank_id": 0,
80
+ "target": "trn1",
81
+ "torch_dtype": "bfloat16",
82
+ "tp_degree": 1
83
+ },
84
+ "num_attention_heads": 32,
85
+ "num_hidden_layers": 36,
86
+ "num_key_value_heads": 8,
87
+ "rms_norm_eps": 1e-06,
88
+ "rope_scaling": null,
89
+ "rope_theta": 1000000,
90
+ "sliding_window": null,
91
+ "tie_word_embeddings": false,
92
+ "use_cache": true,
93
+ "use_sliding_window": false,
94
+ "vocab_size": 151665
95
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/edf2cd849e7234866f5f.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-Embedding-0.6B",
4
+ "_task": "feature-extraction",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 1024,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_types": [
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention"
45
+ ],
46
+ "max_position_embeddings": 32768,
47
+ "max_window_layers": 28,
48
+ "model_type": "qwen3",
49
+ "neuron": {
50
+ "_serialized_key": "NxDNeuronConfig",
51
+ "batch_size": 4,
52
+ "capacity_factor": null,
53
+ "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
54
+ "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
55
+ "continuous_batching": false,
56
+ "ep_degree": 1,
57
+ "fused_qkv": true,
58
+ "glu_mlp": true,
59
+ "local_ranks_size": 2,
60
+ "max_batch_size": 4,
61
+ "max_context_length": 8192,
62
+ "max_topk": 256,
63
+ "n_active_tokens": 8192,
64
+ "neuronxcc_version": "2.21.33363.0+82129205",
65
+ "on_device_sampling": false,
66
+ "optimum_neuron_version": "0.4.6.dev1",
67
+ "output_logits": false,
68
+ "pp_degree": 1,
69
+ "sequence_length": 8192,
70
+ "speculation_length": 0,
71
+ "start_rank_id": 0,
72
+ "target": "trn1",
73
+ "torch_dtype": "bfloat16",
74
+ "tp_degree": 2
75
+ },
76
+ "num_attention_heads": 16,
77
+ "num_hidden_layers": 28,
78
+ "num_key_value_heads": 8,
79
+ "rms_norm_eps": 1e-06,
80
+ "rope_scaling": null,
81
+ "rope_theta": 1000000,
82
+ "sliding_window": null,
83
+ "tie_word_embeddings": true,
84
+ "use_cache": true,
85
+ "use_sliding_window": false,
86
+ "vocab_size": 151669
87
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/4725d0f98e9d733a5354.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "float16",
11
+ "head_dim": 4,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 1,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
24
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
25
+ "continuous_batching": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 1,
31
+ "max_context_length": 512,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 512,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": true,
36
+ "optimum_neuron_version": "0.4.6.dev1",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "sequence_length": 512,
40
+ "speculation_length": 0,
41
+ "start_rank_id": 0,
42
+ "target": "trn1",
43
+ "torch_dtype": "float16",
44
+ "tp_degree": 2
45
+ },
46
+ "num_attention_heads": 4,
47
+ "num_hidden_layers": 2,
48
+ "num_key_value_heads": 4,
49
+ "pretraining_tp": 1,
50
+ "rms_norm_eps": 1e-05,
51
+ "rope_scaling": {
52
+ "factor": 8.0,
53
+ "high_freq_factor": 4.0,
54
+ "low_freq_factor": 1.0,
55
+ "original_max_position_embeddings": 8192,
56
+ "rope_type": "llama3"
57
+ },
58
+ "rope_theta": 500000.0,
59
+ "tie_word_embeddings": false,
60
+ "use_cache": true,
61
+ "vocab_size": 128256
62
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/79d1effc3dea92153467.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "float16",
11
+ "head_dim": 4,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 1,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
24
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
25
+ "continuous_batching": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 1,
31
+ "max_context_length": 1024,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 1024,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": true,
36
+ "optimum_neuron_version": "0.4.6.dev1",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "sequence_length": 1024,
40
+ "speculation_length": 0,
41
+ "start_rank_id": 0,
42
+ "target": "trn1",
43
+ "torch_dtype": "float16",
44
+ "tp_degree": 2
45
+ },
46
+ "num_attention_heads": 4,
47
+ "num_hidden_layers": 2,
48
+ "num_key_value_heads": 4,
49
+ "pretraining_tp": 1,
50
+ "rms_norm_eps": 1e-05,
51
+ "rope_scaling": {
52
+ "factor": 8.0,
53
+ "high_freq_factor": 4.0,
54
+ "low_freq_factor": 1.0,
55
+ "original_max_position_embeddings": 8192,
56
+ "rope_type": "llama3"
57
+ },
58
+ "rope_theta": 500000.0,
59
+ "tie_word_embeddings": false,
60
+ "use_cache": true,
61
+ "vocab_size": 128256
62
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/dd5885d59f14f083843e.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "float16",
11
+ "head_dim": 4,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 2,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
24
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
25
+ "continuous_batching": true,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 2,
31
+ "max_context_length": 128,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 128,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": true,
36
+ "optimum_neuron_version": "0.4.6.dev1",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "sequence_length": 128,
40
+ "speculation_length": 0,
41
+ "start_rank_id": 0,
42
+ "target": "trn2",
43
+ "torch_dtype": "float16",
44
+ "tp_degree": 2
45
+ },
46
+ "num_attention_heads": 4,
47
+ "num_hidden_layers": 2,
48
+ "num_key_value_heads": 4,
49
+ "pretraining_tp": 1,
50
+ "rms_norm_eps": 1e-05,
51
+ "rope_scaling": {
52
+ "factor": 8.0,
53
+ "high_freq_factor": 4.0,
54
+ "low_freq_factor": 1.0,
55
+ "original_max_position_embeddings": 8192,
56
+ "rope_type": "llama3"
57
+ },
58
+ "rope_theta": 500000.0,
59
+ "tie_word_embeddings": false,
60
+ "use_cache": true,
61
+ "vocab_size": 128256
62
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/llama/llamafactory/tiny-random-Llama-3/79d1effc3dea92153467.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "float16",
11
+ "head_dim": 4,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 1,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
24
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
25
+ "continuous_batching": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 1,
31
+ "max_context_length": 1024,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 1024,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": true,
36
+ "optimum_neuron_version": "0.4.6.dev1",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "sequence_length": 1024,
40
+ "speculation_length": 0,
41
+ "start_rank_id": 0,
42
+ "target": "trn1",
43
+ "torch_dtype": "float16",
44
+ "tp_degree": 2
45
+ },
46
+ "num_attention_heads": 4,
47
+ "num_hidden_layers": 2,
48
+ "num_key_value_heads": 4,
49
+ "pretraining_tp": 1,
50
+ "rms_norm_eps": 1e-05,
51
+ "rope_scaling": {
52
+ "factor": 8.0,
53
+ "high_freq_factor": 4.0,
54
+ "low_freq_factor": 1.0,
55
+ "original_max_position_embeddings": 8192,
56
+ "rope_type": "llama3"
57
+ },
58
+ "rope_theta": 500000.0,
59
+ "tie_word_embeddings": false,
60
+ "use_cache": true,
61
+ "vocab_size": 128256
62
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/edf2cd849e7234866f5f.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-Embedding-0.6B",
4
+ "_task": "feature-extraction",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 1024,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_types": [
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention"
45
+ ],
46
+ "max_position_embeddings": 32768,
47
+ "max_window_layers": 28,
48
+ "model_type": "qwen3",
49
+ "neuron": {
50
+ "_serialized_key": "NxDNeuronConfig",
51
+ "batch_size": 4,
52
+ "capacity_factor": null,
53
+ "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
54
+ "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
55
+ "continuous_batching": false,
56
+ "ep_degree": 1,
57
+ "fused_qkv": true,
58
+ "glu_mlp": true,
59
+ "local_ranks_size": 2,
60
+ "max_batch_size": 4,
61
+ "max_context_length": 8192,
62
+ "max_topk": 256,
63
+ "n_active_tokens": 8192,
64
+ "neuronxcc_version": "2.21.33363.0+82129205",
65
+ "on_device_sampling": false,
66
+ "optimum_neuron_version": "0.4.6.dev1",
67
+ "output_logits": false,
68
+ "pp_degree": 1,
69
+ "sequence_length": 8192,
70
+ "speculation_length": 0,
71
+ "start_rank_id": 0,
72
+ "target": "trn1",
73
+ "torch_dtype": "bfloat16",
74
+ "tp_degree": 2
75
+ },
76
+ "num_attention_heads": 16,
77
+ "num_hidden_layers": 28,
78
+ "num_key_value_heads": 8,
79
+ "rms_norm_eps": 1e-06,
80
+ "rope_scaling": null,
81
+ "rope_theta": 1000000,
82
+ "sliding_window": null,
83
+ "tie_word_embeddings": true,
84
+ "use_cache": true,
85
+ "use_sliding_window": false,
86
+ "vocab_size": 151669
87
+ }
neuronxcc-2.21.33363.0+82129205/MODULE_023068ea127f216b3230+18260fcf/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_2e06d251-55b0-4587-80ed-65525f987744/compiler_workdir/ChunkedPrefillAttentionModule/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.33363.0+82129205/MODULE_023068ea127f216b3230+18260fcf/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_023068ea127f216b3230+18260fcf/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f759930af897434853a4b2faf85253d6dee4aa57720d02fd5206dc173982e14b
3
+ size 16230
neuronxcc-2.21.33363.0+82129205/MODULE_023068ea127f216b3230+18260fcf/model.neff ADDED
Binary file (72.7 kB). View file
 
neuronxcc-2.21.33363.0+82129205/MODULE_023068ea127f216b3230+18260fcf/wrapped_neff.hlo ADDED
Binary file (76 kB). View file
 
neuronxcc-2.21.33363.0+82129205/MODULE_0243dc050d4687db06f4+12f2698a/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_6ca317f1-4eb2-420e-ae59-e329a688937a/compiler_workdir/NeuronLlamaMLP/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.33363.0+82129205/MODULE_0243dc050d4687db06f4+12f2698a/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_0243dc050d4687db06f4+12f2698a/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:993f99d2d09d34bd152af4c7f96a1e6e1d8788e1cd4aefb845c601d2f4d5fcfb
3
+ size 1931
neuronxcc-2.21.33363.0+82129205/MODULE_0243dc050d4687db06f4+12f2698a/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cce4a92dd89a520b72d3146ca2905b351f9fc41ea2c6420132d0e212c370fa2
3
+ size 134144
neuronxcc-2.21.33363.0+82129205/MODULE_0243dc050d4687db06f4+12f2698a/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:005209541674e03d171a4d2bb9d3dbe7429f18eae8814dbc87f869181bc9fdd4
3
+ size 136222
neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1"]
neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d578702d0ba7a5003e02fbdec8b30f3cdaa8d03fdd29311f557bbda052ff1839
3
+ size 1121
neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/model.neff ADDED
Binary file (31.7 kB). View file
 
neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1"]
neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0113611968c8ed0cbeaea5ff3bba2ce4eecd5fc989e2592c6b8ac5fdf09c91c2
3
+ size 1562
neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.neff ADDED
Binary file (31.7 kB). View file
 
neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1"]
neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a35335d3e96ee86956cdf93862bf42ae6f83017e9b331b77531ad6942a62366d
3
+ size 1269
neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.neff ADDED
Binary file (31.7 kB). View file
 
neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1"]
neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:153fdddade7e1b284f8d78df84523bd332eccc7ddfc7a0698ad8653d560c55ca
3
+ size 1124
neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.neff ADDED
Binary file (31.7 kB). View file
 
neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1"]
neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7434cf1ede5dc6759a8ad5cdc472fb8e1668b1a8bd8dd7e7e08b5c09401b8065
3
+ size 1562
neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.neff ADDED
Binary file (31.7 kB). View file
 
neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1"]
neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7000c9195e6c3c9abfcbac09928468ef4c204488094a046ec7abfd1b7ea8657
3
+ size 1563
neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.neff ADDED
Binary file (31.7 kB). View file
 
neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1"]
neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fcfd4f1816e23bee263009b76a032d3de2454e4937f73423fade31c7a0f0278
3
+ size 1562