dacorvo HF Staff commited on
Commit
48af7da
·
verified ·
1 Parent(s): bf892d2

Synchronizing local compiler cache.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +47 -0
  2. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/29f87381a199b99e2f80.json +82 -0
  3. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/63c09b6d5a07ca7c7660.json +88 -0
  4. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/7fc4b2b6168bb163363d.json +88 -0
  5. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/5ef0d470102a1386baac.json +63 -0
  6. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/17e5c162137a63b70992.json +64 -0
  7. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/d75242397c3ceee988e7.json +64 -0
  8. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/gemma3_text/unsloth/gemma-3-270m-it/29f87381a199b99e2f80.json +82 -0
  9. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/unsloth/Llama-3.2-1B-Instruct/d75242397c3ceee988e7.json +64 -0
  10. neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3/Qwen/Qwen3-Embedding-0.6B/7fc4b2b6168bb163363d.json +88 -0
  11. neuronxcc-2.21.33363.0+82129205/MODULE_00c8da7c3993a0959e41+ac10809c/compile_flags.json +1 -0
  12. neuronxcc-2.21.33363.0+82129205/MODULE_00c8da7c3993a0959e41+ac10809c/model.done +0 -0
  13. neuronxcc-2.21.33363.0+82129205/MODULE_00c8da7c3993a0959e41+ac10809c/model.hlo_module.pb +3 -0
  14. neuronxcc-2.21.33363.0+82129205/MODULE_00c8da7c3993a0959e41+ac10809c/model.neff +3 -0
  15. neuronxcc-2.21.33363.0+82129205/MODULE_00c8da7c3993a0959e41+ac10809c/wrapped_neff.hlo +3 -0
  16. neuronxcc-2.21.33363.0+82129205/MODULE_0b8342fa1bedb542ad5e+db6d9813/compile_flags.json +1 -0
  17. neuronxcc-2.21.33363.0+82129205/MODULE_0b8342fa1bedb542ad5e+db6d9813/model.done +0 -0
  18. neuronxcc-2.21.33363.0+82129205/MODULE_0b8342fa1bedb542ad5e+db6d9813/model.hlo_module.pb +3 -0
  19. neuronxcc-2.21.33363.0+82129205/MODULE_0b8342fa1bedb542ad5e+db6d9813/model.neff +0 -0
  20. neuronxcc-2.21.33363.0+82129205/MODULE_0d22ff60ac5c152f941f+8baf9299/compile_flags.json +1 -0
  21. neuronxcc-2.21.33363.0+82129205/MODULE_0d22ff60ac5c152f941f+8baf9299/model.done +0 -0
  22. neuronxcc-2.21.33363.0+82129205/MODULE_0d22ff60ac5c152f941f+8baf9299/model.hlo_module.pb +3 -0
  23. neuronxcc-2.21.33363.0+82129205/MODULE_0d22ff60ac5c152f941f+8baf9299/model.neff +0 -0
  24. neuronxcc-2.21.33363.0+82129205/MODULE_10122749290754899775+fad94d7c/compile_flags.json +1 -0
  25. neuronxcc-2.21.33363.0+82129205/MODULE_10122749290754899775+fad94d7c/model.done +0 -0
  26. neuronxcc-2.21.33363.0+82129205/MODULE_10122749290754899775+fad94d7c/model.hlo_module.pb +3 -0
  27. neuronxcc-2.21.33363.0+82129205/MODULE_10122749290754899775+fad94d7c/model.neff +0 -0
  28. neuronxcc-2.21.33363.0+82129205/MODULE_10574178149771668224+fad94d7c/compile_flags.json +1 -0
  29. neuronxcc-2.21.33363.0+82129205/MODULE_10574178149771668224+fad94d7c/model.done +0 -0
  30. neuronxcc-2.21.33363.0+82129205/MODULE_10574178149771668224+fad94d7c/model.hlo_module.pb +3 -0
  31. neuronxcc-2.21.33363.0+82129205/MODULE_10574178149771668224+fad94d7c/model.neff +0 -0
  32. neuronxcc-2.21.33363.0+82129205/MODULE_11263357253962001147+fad94d7c/compile_flags.json +1 -0
  33. neuronxcc-2.21.33363.0+82129205/MODULE_11263357253962001147+fad94d7c/model.done +0 -0
  34. neuronxcc-2.21.33363.0+82129205/MODULE_11263357253962001147+fad94d7c/model.hlo_module.pb +3 -0
  35. neuronxcc-2.21.33363.0+82129205/MODULE_11263357253962001147+fad94d7c/model.neff +0 -0
  36. neuronxcc-2.21.33363.0+82129205/MODULE_11282920422705003560+fad94d7c/model.neff +0 -0
  37. neuronxcc-2.21.33363.0+82129205/MODULE_1128490360109555897+fad94d7c/compile_flags.json +1 -0
  38. neuronxcc-2.21.33363.0+82129205/MODULE_1128490360109555897+fad94d7c/model.done +0 -0
  39. neuronxcc-2.21.33363.0+82129205/MODULE_1128490360109555897+fad94d7c/model.hlo_module.pb +3 -0
  40. neuronxcc-2.21.33363.0+82129205/MODULE_1128490360109555897+fad94d7c/model.neff +0 -0
  41. neuronxcc-2.21.33363.0+82129205/MODULE_11490866273502815451+fad94d7c/compile_flags.json +1 -0
  42. neuronxcc-2.21.33363.0+82129205/MODULE_11490866273502815451+fad94d7c/model.done +0 -0
  43. neuronxcc-2.21.33363.0+82129205/MODULE_11490866273502815451+fad94d7c/model.hlo_module.pb +3 -0
  44. neuronxcc-2.21.33363.0+82129205/MODULE_11490866273502815451+fad94d7c/model.neff +0 -0
  45. neuronxcc-2.21.33363.0+82129205/MODULE_1150969789231176771+fad94d7c/compile_flags.json +1 -0
  46. neuronxcc-2.21.33363.0+82129205/MODULE_1150969789231176771+fad94d7c/model.done +0 -0
  47. neuronxcc-2.21.33363.0+82129205/MODULE_1150969789231176771+fad94d7c/model.hlo_module.pb +3 -0
  48. neuronxcc-2.21.33363.0+82129205/MODULE_1150969789231176771+fad94d7c/model.neff +0 -0
  49. neuronxcc-2.21.33363.0+82129205/MODULE_11546540121525495183+fad94d7c/compile_flags.json +1 -0
  50. neuronxcc-2.21.33363.0+82129205/MODULE_11546540121525495183+fad94d7c/model.done +0 -0
.gitattributes CHANGED
@@ -7410,3 +7410,50 @@ neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/model.neff
7410
  neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
7411
  neuronxcc-2.21.33363.0+82129205/MODULE_a73e950210b13ba9bed1+c4f887dc/model.neff filter=lfs diff=lfs merge=lfs -text
7412
  neuronxcc-2.21.33363.0+82129205/MODULE_a73e950210b13ba9bed1+c4f887dc/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7410
  neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
7411
  neuronxcc-2.21.33363.0+82129205/MODULE_a73e950210b13ba9bed1+c4f887dc/model.neff filter=lfs diff=lfs merge=lfs -text
7412
  neuronxcc-2.21.33363.0+82129205/MODULE_a73e950210b13ba9bed1+c4f887dc/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
7413
+ neuronxcc-2.21.33363.0+82129205/MODULE_00c8da7c3993a0959e41+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text
7414
+ neuronxcc-2.21.33363.0+82129205/MODULE_00c8da7c3993a0959e41+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
7415
+ neuronxcc-2.21.33363.0+82129205/MODULE_17017202802755053176+fad94d7c/model.neff filter=lfs diff=lfs merge=lfs -text
7416
+ neuronxcc-2.21.33363.0+82129205/MODULE_2035c8ae39822a0f3cf7+1e18d200/model.neff filter=lfs diff=lfs merge=lfs -text
7417
+ neuronxcc-2.21.33363.0+82129205/MODULE_2035c8ae39822a0f3cf7+1e18d200/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
7418
+ neuronxcc-2.21.33363.0+82129205/MODULE_213e0ff283ee069ea9da+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text
7419
+ neuronxcc-2.21.33363.0+82129205/MODULE_213e0ff283ee069ea9da+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
7420
+ neuronxcc-2.21.33363.0+82129205/MODULE_23643e05a738684e87ac+593aa068/model.neff filter=lfs diff=lfs merge=lfs -text
7421
+ neuronxcc-2.21.33363.0+82129205/MODULE_24c83ec8fce0272d4be2+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text
7422
+ neuronxcc-2.21.33363.0+82129205/MODULE_2615490115801662312+fad94d7c/model.neff filter=lfs diff=lfs merge=lfs -text
7423
+ neuronxcc-2.21.33363.0+82129205/MODULE_36af05b968af9541168f+e9a07323/model.neff filter=lfs diff=lfs merge=lfs -text
7424
+ neuronxcc-2.21.33363.0+82129205/MODULE_36af05b968af9541168f+e9a07323/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
7425
+ neuronxcc-2.21.33363.0+82129205/MODULE_4150760178be1695737a+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text
7426
+ neuronxcc-2.21.33363.0+82129205/MODULE_4150760178be1695737a+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
7427
+ neuronxcc-2.21.33363.0+82129205/MODULE_5153839984625614734+fad94d7c/model.neff filter=lfs diff=lfs merge=lfs -text
7428
+ neuronxcc-2.21.33363.0+82129205/MODULE_69d96c640a251d144e75+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text
7429
+ neuronxcc-2.21.33363.0+82129205/MODULE_69d96c640a251d144e75+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
7430
+ neuronxcc-2.21.33363.0+82129205/MODULE_91bf3670a4b7a485f89a+ce72cbff/model.neff filter=lfs diff=lfs merge=lfs -text
7431
+ neuronxcc-2.21.33363.0+82129205/MODULE_91bf3670a4b7a485f89a+ce72cbff/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
7432
+ neuronxcc-2.21.33363.0+82129205/MODULE_9595602990717811116+fad94d7c/model.neff filter=lfs diff=lfs merge=lfs -text
7433
+ neuronxcc-2.21.33363.0+82129205/MODULE_9ebb65ad78df0dfd5676+4c44a4bd/model.neff filter=lfs diff=lfs merge=lfs -text
7434
+ neuronxcc-2.21.33363.0+82129205/MODULE_9ebb65ad78df0dfd5676+4c44a4bd/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
7435
+ neuronxcc-2.21.33363.0+82129205/MODULE_a19be04ba5877e327147+9c7c756b/model.neff filter=lfs diff=lfs merge=lfs -text
7436
+ neuronxcc-2.21.33363.0+82129205/MODULE_a19be04ba5877e327147+9c7c756b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
7437
+ neuronxcc-2.21.33363.0+82129205/MODULE_abe950392912637c46d6+9f698978/model.neff filter=lfs diff=lfs merge=lfs -text
7438
+ neuronxcc-2.21.33363.0+82129205/MODULE_abe950392912637c46d6+9f698978/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
7439
+ neuronxcc-2.21.33363.0+82129205/MODULE_bac0f4775017b9749086+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text
7440
+ neuronxcc-2.21.33363.0+82129205/MODULE_bac0f4775017b9749086+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
7441
+ neuronxcc-2.21.33363.0+82129205/MODULE_d1261d9597604aad47dc+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text
7442
+ neuronxcc-2.21.33363.0+82129205/MODULE_d81214178e1a69d3c030+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text
7443
+ neuronxcc-2.21.33363.0+82129205/MODULE_e4fb2e709dd9a7cab38f+b35fcb18/model.neff filter=lfs diff=lfs merge=lfs -text
7444
+ neuronxcc-2.21.33363.0+82129205/MODULE_e4fb2e709dd9a7cab38f+b35fcb18/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
7445
+ neuronxcc-2.21.33363.0+82129205/MODULE_e69cd7b9119d7b747c0a+c7fec16f/model.neff filter=lfs diff=lfs merge=lfs -text
7446
+ neuronxcc-2.21.33363.0+82129205/MODULE_e69cd7b9119d7b747c0a+c7fec16f/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
7447
+ neuronxcc-2.21.33363.0+82129205/MODULE_e8eeb8ab49bf0ebcab2e+742231d6/model.neff filter=lfs diff=lfs merge=lfs -text
7448
+ neuronxcc-2.21.33363.0+82129205/MODULE_e8eeb8ab49bf0ebcab2e+742231d6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
7449
+ neuronxcc-2.21.33363.0+82129205/MODULE_ed0790cb7d5818aa4b49+3eebdf46/model.neff filter=lfs diff=lfs merge=lfs -text
7450
+ neuronxcc-2.21.33363.0+82129205/MODULE_ed0790cb7d5818aa4b49+3eebdf46/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
7451
+ neuronxcc-2.21.33363.0+82129205/MODULE_f2f85832e67cb8247710+4b431174/model.neff filter=lfs diff=lfs merge=lfs -text
7452
+ neuronxcc-2.21.33363.0+82129205/MODULE_f2f85832e67cb8247710+4b431174/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
7453
+ neuronxcc-2.21.33363.0+82129205/MODULE_f4cf0f8f791046fe5274+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text
7454
+ neuronxcc-2.21.33363.0+82129205/MODULE_f5465131654976d18953+d8417f71/model.neff filter=lfs diff=lfs merge=lfs -text
7455
+ neuronxcc-2.21.33363.0+82129205/MODULE_f5465131654976d18953+d8417f71/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
7456
+ neuronxcc-2.23.6484.0+3b612583/MODULE_12642634462137174255+fad94d7c/model.neff filter=lfs diff=lfs merge=lfs -text
7457
+ neuronxcc-2.23.6484.0+3b612583/MODULE_16660895666415846207+fad94d7c/model.neff filter=lfs diff=lfs merge=lfs -text
7458
+ neuronxcc-2.23.6484.0+3b612583/MODULE_6916903775068390632+fad94d7c/model.neff filter=lfs diff=lfs merge=lfs -text
7459
+ neuronxcc-2.23.6484.0+3b612583/MODULE_6933000544295673404+fad94d7c/model.neff filter=lfs diff=lfs merge=lfs -text
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/29f87381a199b99e2f80.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/gemma-3-270m-it",
4
+ "_sliding_window_pattern": 6,
5
+ "_task": "text-generation",
6
+ "architectures": [
7
+ "Gemma3ForCausalLM"
8
+ ],
9
+ "attention_bias": false,
10
+ "attention_dropout": 0.0,
11
+ "attn_logit_softcapping": null,
12
+ "dtype": "bfloat16",
13
+ "final_logit_softcapping": null,
14
+ "head_dim": 256,
15
+ "hidden_activation": "gelu_pytorch_tanh",
16
+ "hidden_size": 640,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 2048,
19
+ "layer_types": [
20
+ "sliding_attention",
21
+ "sliding_attention",
22
+ "sliding_attention",
23
+ "sliding_attention",
24
+ "sliding_attention",
25
+ "full_attention",
26
+ "sliding_attention",
27
+ "sliding_attention",
28
+ "sliding_attention",
29
+ "sliding_attention",
30
+ "sliding_attention",
31
+ "full_attention",
32
+ "sliding_attention",
33
+ "sliding_attention",
34
+ "sliding_attention",
35
+ "sliding_attention",
36
+ "sliding_attention",
37
+ "full_attention"
38
+ ],
39
+ "max_position_embeddings": 32768,
40
+ "model_type": "gemma3_text",
41
+ "neuron": {
42
+ "_serialized_key": "NxDNeuronConfig",
43
+ "batch_size": 1,
44
+ "capacity_factor": null,
45
+ "checkpoint_id": "unsloth/gemma-3-270m-it",
46
+ "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9",
47
+ "continuous_batching": false,
48
+ "ep_degree": 1,
49
+ "fused_qkv": true,
50
+ "glu_mlp": true,
51
+ "local_ranks_size": 4,
52
+ "max_batch_size": 1,
53
+ "max_context_length": 8192,
54
+ "max_topk": 256,
55
+ "n_active_tokens": 8192,
56
+ "neuronxcc_version": "2.21.33363.0+82129205",
57
+ "on_device_sampling": true,
58
+ "optimum_neuron_version": "0.4.6.dev3",
59
+ "output_logits": false,
60
+ "pp_degree": 1,
61
+ "prefill_chunk_size": 0,
62
+ "sequence_length": 8192,
63
+ "speculation_length": 0,
64
+ "start_rank_id": 0,
65
+ "target": "trn2",
66
+ "torch_dtype": "bfloat16",
67
+ "tp_degree": 4
68
+ },
69
+ "num_attention_heads": 4,
70
+ "num_hidden_layers": 18,
71
+ "num_key_value_heads": 1,
72
+ "query_pre_attn_scalar": 256,
73
+ "rms_norm_eps": 1e-06,
74
+ "rope_local_base_freq": 10000.0,
75
+ "rope_scaling": null,
76
+ "rope_theta": 1000000.0,
77
+ "sliding_window": 512,
78
+ "unsloth_fixed": true,
79
+ "use_bidirectional_attention": false,
80
+ "use_cache": true,
81
+ "vocab_size": 262144
82
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/63c09b6d5a07ca7c7660.json ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-Embedding-0.6B",
4
+ "_task": "feature-extraction",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 1024,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_types": [
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention"
45
+ ],
46
+ "max_position_embeddings": 32768,
47
+ "max_window_layers": 28,
48
+ "model_type": "qwen3",
49
+ "neuron": {
50
+ "_serialized_key": "NxDNeuronConfig",
51
+ "batch_size": 6,
52
+ "capacity_factor": null,
53
+ "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
54
+ "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
55
+ "continuous_batching": false,
56
+ "ep_degree": 1,
57
+ "fused_qkv": true,
58
+ "glu_mlp": true,
59
+ "local_ranks_size": 4,
60
+ "max_batch_size": 6,
61
+ "max_context_length": 8192,
62
+ "max_topk": 256,
63
+ "n_active_tokens": 8192,
64
+ "neuronxcc_version": "2.21.33363.0+82129205",
65
+ "on_device_sampling": false,
66
+ "optimum_neuron_version": "0.4.6.dev3",
67
+ "output_logits": false,
68
+ "pp_degree": 1,
69
+ "prefill_chunk_size": 1024,
70
+ "sequence_length": 8192,
71
+ "speculation_length": 0,
72
+ "start_rank_id": 0,
73
+ "target": "trn2",
74
+ "torch_dtype": "bfloat16",
75
+ "tp_degree": 4
76
+ },
77
+ "num_attention_heads": 16,
78
+ "num_hidden_layers": 28,
79
+ "num_key_value_heads": 8,
80
+ "rms_norm_eps": 1e-06,
81
+ "rope_scaling": null,
82
+ "rope_theta": 1000000,
83
+ "sliding_window": null,
84
+ "tie_word_embeddings": true,
85
+ "use_cache": true,
86
+ "use_sliding_window": false,
87
+ "vocab_size": 151669
88
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/7fc4b2b6168bb163363d.json ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-Embedding-0.6B",
4
+ "_task": "feature-extraction",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 1024,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_types": [
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention"
45
+ ],
46
+ "max_position_embeddings": 32768,
47
+ "max_window_layers": 28,
48
+ "model_type": "qwen3",
49
+ "neuron": {
50
+ "_serialized_key": "NxDNeuronConfig",
51
+ "batch_size": 4,
52
+ "capacity_factor": null,
53
+ "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
54
+ "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
55
+ "continuous_batching": false,
56
+ "ep_degree": 1,
57
+ "fused_qkv": true,
58
+ "glu_mlp": true,
59
+ "local_ranks_size": 4,
60
+ "max_batch_size": 4,
61
+ "max_context_length": 8192,
62
+ "max_topk": 256,
63
+ "n_active_tokens": 8192,
64
+ "neuronxcc_version": "2.21.33363.0+82129205",
65
+ "on_device_sampling": false,
66
+ "optimum_neuron_version": "0.4.6.dev3",
67
+ "output_logits": false,
68
+ "pp_degree": 1,
69
+ "prefill_chunk_size": 1024,
70
+ "sequence_length": 8192,
71
+ "speculation_length": 0,
72
+ "start_rank_id": 0,
73
+ "target": "trn2",
74
+ "torch_dtype": "bfloat16",
75
+ "tp_degree": 4
76
+ },
77
+ "num_attention_heads": 16,
78
+ "num_hidden_layers": 28,
79
+ "num_key_value_heads": 8,
80
+ "rms_norm_eps": 1e-06,
81
+ "rope_scaling": null,
82
+ "rope_theta": 1000000,
83
+ "sliding_window": null,
84
+ "tie_word_embeddings": true,
85
+ "use_cache": true,
86
+ "use_sliding_window": false,
87
+ "vocab_size": 151669
88
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/5ef0d470102a1386baac.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "float16",
11
+ "head_dim": 4,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 1,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
24
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
25
+ "continuous_batching": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 2,
30
+ "max_batch_size": 1,
31
+ "max_context_length": 1024,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 1024,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": true,
36
+ "optimum_neuron_version": "0.4.6.dev3",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "prefill_chunk_size": 0,
40
+ "sequence_length": 1024,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": "trn2",
44
+ "torch_dtype": "float16",
45
+ "tp_degree": 2
46
+ },
47
+ "num_attention_heads": 4,
48
+ "num_hidden_layers": 2,
49
+ "num_key_value_heads": 4,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 8.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": false,
61
+ "use_cache": true,
62
+ "vocab_size": 128256
63
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/17e5c162137a63b70992.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 64,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 1,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
24
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
25
+ "continuous_batching": false,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 4,
30
+ "max_batch_size": 1,
31
+ "max_context_length": 8192,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 8192,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": true,
36
+ "optimum_neuron_version": "0.4.6.dev3",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "prefill_chunk_size": 1024,
40
+ "sequence_length": 8192,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": "trn2",
44
+ "torch_dtype": "bfloat16",
45
+ "tp_degree": 4
46
+ },
47
+ "num_attention_heads": 32,
48
+ "num_hidden_layers": 16,
49
+ "num_key_value_heads": 8,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 32.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": true,
61
+ "unsloth_fixed": true,
62
+ "use_cache": true,
63
+ "vocab_size": 128256
64
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/d75242397c3ceee988e7.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 64,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 4,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
24
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
25
+ "continuous_batching": true,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 4,
30
+ "max_batch_size": 4,
31
+ "max_context_length": 1024,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 1024,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": true,
36
+ "optimum_neuron_version": "0.4.6.dev3",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "prefill_chunk_size": 0,
40
+ "sequence_length": 1024,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": "trn2",
44
+ "torch_dtype": "bfloat16",
45
+ "tp_degree": 4
46
+ },
47
+ "num_attention_heads": 32,
48
+ "num_hidden_layers": 16,
49
+ "num_key_value_heads": 8,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 32.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": true,
61
+ "unsloth_fixed": true,
62
+ "use_cache": true,
63
+ "vocab_size": 128256
64
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/gemma3_text/unsloth/gemma-3-270m-it/29f87381a199b99e2f80.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/gemma-3-270m-it",
4
+ "_sliding_window_pattern": 6,
5
+ "_task": "text-generation",
6
+ "architectures": [
7
+ "Gemma3ForCausalLM"
8
+ ],
9
+ "attention_bias": false,
10
+ "attention_dropout": 0.0,
11
+ "attn_logit_softcapping": null,
12
+ "dtype": "bfloat16",
13
+ "final_logit_softcapping": null,
14
+ "head_dim": 256,
15
+ "hidden_activation": "gelu_pytorch_tanh",
16
+ "hidden_size": 640,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 2048,
19
+ "layer_types": [
20
+ "sliding_attention",
21
+ "sliding_attention",
22
+ "sliding_attention",
23
+ "sliding_attention",
24
+ "sliding_attention",
25
+ "full_attention",
26
+ "sliding_attention",
27
+ "sliding_attention",
28
+ "sliding_attention",
29
+ "sliding_attention",
30
+ "sliding_attention",
31
+ "full_attention",
32
+ "sliding_attention",
33
+ "sliding_attention",
34
+ "sliding_attention",
35
+ "sliding_attention",
36
+ "sliding_attention",
37
+ "full_attention"
38
+ ],
39
+ "max_position_embeddings": 32768,
40
+ "model_type": "gemma3_text",
41
+ "neuron": {
42
+ "_serialized_key": "NxDNeuronConfig",
43
+ "batch_size": 1,
44
+ "capacity_factor": null,
45
+ "checkpoint_id": "unsloth/gemma-3-270m-it",
46
+ "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9",
47
+ "continuous_batching": false,
48
+ "ep_degree": 1,
49
+ "fused_qkv": true,
50
+ "glu_mlp": true,
51
+ "local_ranks_size": 4,
52
+ "max_batch_size": 1,
53
+ "max_context_length": 8192,
54
+ "max_topk": 256,
55
+ "n_active_tokens": 8192,
56
+ "neuronxcc_version": "2.21.33363.0+82129205",
57
+ "on_device_sampling": true,
58
+ "optimum_neuron_version": "0.4.6.dev3",
59
+ "output_logits": false,
60
+ "pp_degree": 1,
61
+ "prefill_chunk_size": 0,
62
+ "sequence_length": 8192,
63
+ "speculation_length": 0,
64
+ "start_rank_id": 0,
65
+ "target": "trn2",
66
+ "torch_dtype": "bfloat16",
67
+ "tp_degree": 4
68
+ },
69
+ "num_attention_heads": 4,
70
+ "num_hidden_layers": 18,
71
+ "num_key_value_heads": 1,
72
+ "query_pre_attn_scalar": 256,
73
+ "rms_norm_eps": 1e-06,
74
+ "rope_local_base_freq": 10000.0,
75
+ "rope_scaling": null,
76
+ "rope_theta": 1000000.0,
77
+ "sliding_window": 512,
78
+ "unsloth_fixed": true,
79
+ "use_bidirectional_attention": false,
80
+ "use_cache": true,
81
+ "vocab_size": 262144
82
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/unsloth/Llama-3.2-1B-Instruct/d75242397c3ceee988e7.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 64,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "max_position_embeddings": 131072,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "neuron": {
20
+ "_serialized_key": "NxDNeuronConfig",
21
+ "batch_size": 4,
22
+ "capacity_factor": null,
23
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
24
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
25
+ "continuous_batching": true,
26
+ "ep_degree": 1,
27
+ "fused_qkv": true,
28
+ "glu_mlp": true,
29
+ "local_ranks_size": 4,
30
+ "max_batch_size": 4,
31
+ "max_context_length": 1024,
32
+ "max_topk": 256,
33
+ "n_active_tokens": 1024,
34
+ "neuronxcc_version": "2.21.33363.0+82129205",
35
+ "on_device_sampling": true,
36
+ "optimum_neuron_version": "0.4.6.dev3",
37
+ "output_logits": false,
38
+ "pp_degree": 1,
39
+ "prefill_chunk_size": 0,
40
+ "sequence_length": 1024,
41
+ "speculation_length": 0,
42
+ "start_rank_id": 0,
43
+ "target": "trn2",
44
+ "torch_dtype": "bfloat16",
45
+ "tp_degree": 4
46
+ },
47
+ "num_attention_heads": 32,
48
+ "num_hidden_layers": 16,
49
+ "num_key_value_heads": 8,
50
+ "pretraining_tp": 1,
51
+ "rms_norm_eps": 1e-05,
52
+ "rope_scaling": {
53
+ "factor": 32.0,
54
+ "high_freq_factor": 4.0,
55
+ "low_freq_factor": 1.0,
56
+ "original_max_position_embeddings": 8192,
57
+ "rope_type": "llama3"
58
+ },
59
+ "rope_theta": 500000.0,
60
+ "tie_word_embeddings": true,
61
+ "unsloth_fixed": true,
62
+ "use_cache": true,
63
+ "vocab_size": 128256
64
+ }
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3/Qwen/Qwen3-Embedding-0.6B/7fc4b2b6168bb163363d.json ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-Embedding-0.6B",
4
+ "_task": "feature-extraction",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "dtype": "bfloat16",
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 1024,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_types": [
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention"
45
+ ],
46
+ "max_position_embeddings": 32768,
47
+ "max_window_layers": 28,
48
+ "model_type": "qwen3",
49
+ "neuron": {
50
+ "_serialized_key": "NxDNeuronConfig",
51
+ "batch_size": 4,
52
+ "capacity_factor": null,
53
+ "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
54
+ "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
55
+ "continuous_batching": false,
56
+ "ep_degree": 1,
57
+ "fused_qkv": true,
58
+ "glu_mlp": true,
59
+ "local_ranks_size": 4,
60
+ "max_batch_size": 4,
61
+ "max_context_length": 8192,
62
+ "max_topk": 256,
63
+ "n_active_tokens": 8192,
64
+ "neuronxcc_version": "2.21.33363.0+82129205",
65
+ "on_device_sampling": false,
66
+ "optimum_neuron_version": "0.4.6.dev3",
67
+ "output_logits": false,
68
+ "pp_degree": 1,
69
+ "prefill_chunk_size": 1024,
70
+ "sequence_length": 8192,
71
+ "speculation_length": 0,
72
+ "start_rank_id": 0,
73
+ "target": "trn2",
74
+ "torch_dtype": "bfloat16",
75
+ "tp_degree": 4
76
+ },
77
+ "num_attention_heads": 16,
78
+ "num_hidden_layers": 28,
79
+ "num_key_value_heads": 8,
80
+ "rms_norm_eps": 1e-06,
81
+ "rope_scaling": null,
82
+ "rope_theta": 1000000,
83
+ "sliding_window": null,
84
+ "tie_word_embeddings": true,
85
+ "use_cache": true,
86
+ "use_sliding_window": false,
87
+ "vocab_size": 151669
88
+ }
neuronxcc-2.21.33363.0+82129205/MODULE_00c8da7c3993a0959e41+ac10809c/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.33363.0+82129205/MODULE_00c8da7c3993a0959e41+ac10809c/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_00c8da7c3993a0959e41+ac10809c/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4c8a569a13948c3e4a8450167a8b11178df1f69856508ed0cc53cd4dbc00448
3
+ size 1529038
neuronxcc-2.21.33363.0+82129205/MODULE_00c8da7c3993a0959e41+ac10809c/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:994e356a4a4ff8f531034f17dbd0620157beebc6ce098e908be71580912920d2
3
+ size 2049024
neuronxcc-2.21.33363.0+82129205/MODULE_00c8da7c3993a0959e41+ac10809c/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07950155e53305f876bc9703f48d8d33fccacdb958f0978584100917dc14119c
3
+ size 2124464
neuronxcc-2.21.33363.0+82129205/MODULE_0b8342fa1bedb542ad5e+db6d9813/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn2", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_1f9e7466-84ba-4c51-ba1b-e5caabc259a5/compiler_workdir/SoftmaxNoMask/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.33363.0+82129205/MODULE_0b8342fa1bedb542ad5e+db6d9813/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_0b8342fa1bedb542ad5e+db6d9813/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e542c76cf27c3f807feac47dd3f58fd4c308e8d9e932f0732774bee19d640d8
3
+ size 3881
neuronxcc-2.21.33363.0+82129205/MODULE_0b8342fa1bedb542ad5e+db6d9813/model.neff ADDED
Binary file (62.5 kB). View file
 
neuronxcc-2.21.33363.0+82129205/MODULE_0d22ff60ac5c152f941f+8baf9299/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn2", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_9ba2049b-0b53-464d-87de-42da72765f03/compiler_workdir/SoftmaxWithMask/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.21.33363.0+82129205/MODULE_0d22ff60ac5c152f941f+8baf9299/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_0d22ff60ac5c152f941f+8baf9299/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70f96487dad4bb02b98bf2c955fe59650a5fdbcf1d763fdf56ec412b62b5774c
3
+ size 5596
neuronxcc-2.21.33363.0+82129205/MODULE_0d22ff60ac5c152f941f+8baf9299/model.neff ADDED
Binary file (62.5 kB). View file
 
neuronxcc-2.21.33363.0+82129205/MODULE_10122749290754899775+fad94d7c/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn2"]
neuronxcc-2.21.33363.0+82129205/MODULE_10122749290754899775+fad94d7c/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_10122749290754899775+fad94d7c/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a1884f4dc3391ad185376ba07edda74cefa8ecf1b26e8b66b3b4b0c12093399
3
+ size 1564
neuronxcc-2.21.33363.0+82129205/MODULE_10122749290754899775+fad94d7c/model.neff ADDED
Binary file (42 kB). View file
 
neuronxcc-2.21.33363.0+82129205/MODULE_10574178149771668224+fad94d7c/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn2"]
neuronxcc-2.21.33363.0+82129205/MODULE_10574178149771668224+fad94d7c/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_10574178149771668224+fad94d7c/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaea44379abc01256991f2d98d1d0d072ed11beb07e1c7f0f03ffd1c16b3701a
3
+ size 1565
neuronxcc-2.21.33363.0+82129205/MODULE_10574178149771668224+fad94d7c/model.neff ADDED
Binary file (42 kB). View file
 
neuronxcc-2.21.33363.0+82129205/MODULE_11263357253962001147+fad94d7c/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn2"]
neuronxcc-2.21.33363.0+82129205/MODULE_11263357253962001147+fad94d7c/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_11263357253962001147+fad94d7c/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2703ce042114f3c02aac56609ca879c405ecd38ac7dda2f88476aaa28700df1d
3
+ size 1564
neuronxcc-2.21.33363.0+82129205/MODULE_11263357253962001147+fad94d7c/model.neff ADDED
Binary file (42 kB). View file
 
neuronxcc-2.21.33363.0+82129205/MODULE_11282920422705003560+fad94d7c/model.neff CHANGED
Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_11282920422705003560+fad94d7c/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_11282920422705003560+fad94d7c/model.neff differ
 
neuronxcc-2.21.33363.0+82129205/MODULE_1128490360109555897+fad94d7c/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn2"]
neuronxcc-2.21.33363.0+82129205/MODULE_1128490360109555897+fad94d7c/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_1128490360109555897+fad94d7c/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62d12804001a43af80c6c45209bc83aa1b32227a172de286bbd0ddbb21d79111
3
+ size 1564
neuronxcc-2.21.33363.0+82129205/MODULE_1128490360109555897+fad94d7c/model.neff ADDED
Binary file (42 kB). View file
 
neuronxcc-2.21.33363.0+82129205/MODULE_11490866273502815451+fad94d7c/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn2"]
neuronxcc-2.21.33363.0+82129205/MODULE_11490866273502815451+fad94d7c/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_11490866273502815451+fad94d7c/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56e31dc60bc7f63bea4ce0161094e32e8d1c9c4d151b2b3552af7a39db0d7d31
3
+ size 1564
neuronxcc-2.21.33363.0+82129205/MODULE_11490866273502815451+fad94d7c/model.neff ADDED
Binary file (42 kB). View file
 
neuronxcc-2.21.33363.0+82129205/MODULE_1150969789231176771+fad94d7c/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn2"]
neuronxcc-2.21.33363.0+82129205/MODULE_1150969789231176771+fad94d7c/model.done ADDED
File without changes
neuronxcc-2.21.33363.0+82129205/MODULE_1150969789231176771+fad94d7c/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76af0d68508bac289b494b23a2b27ce2fe26fe94e87f53f0be842ab1121aaec9
3
+ size 1564
neuronxcc-2.21.33363.0+82129205/MODULE_1150969789231176771+fad94d7c/model.neff ADDED
Binary file (42 kB). View file
 
neuronxcc-2.21.33363.0+82129205/MODULE_11546540121525495183+fad94d7c/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn2"]
neuronxcc-2.21.33363.0+82129205/MODULE_11546540121525495183+fad94d7c/model.done ADDED
File without changes