Synchronizing local compiler cache.
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +47 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/29f87381a199b99e2f80.json +82 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/63c09b6d5a07ca7c7660.json +88 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/7fc4b2b6168bb163363d.json +88 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/5ef0d470102a1386baac.json +63 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/17e5c162137a63b70992.json +64 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/d75242397c3ceee988e7.json +64 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/gemma3_text/unsloth/gemma-3-270m-it/29f87381a199b99e2f80.json +82 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/unsloth/Llama-3.2-1B-Instruct/d75242397c3ceee988e7.json +64 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3/Qwen/Qwen3-Embedding-0.6B/7fc4b2b6168bb163363d.json +88 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_00c8da7c3993a0959e41+ac10809c/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_00c8da7c3993a0959e41+ac10809c/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_00c8da7c3993a0959e41+ac10809c/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_00c8da7c3993a0959e41+ac10809c/model.neff +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_00c8da7c3993a0959e41+ac10809c/wrapped_neff.hlo +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_0b8342fa1bedb542ad5e+db6d9813/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_0b8342fa1bedb542ad5e+db6d9813/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_0b8342fa1bedb542ad5e+db6d9813/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_0b8342fa1bedb542ad5e+db6d9813/model.neff +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_0d22ff60ac5c152f941f+8baf9299/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_0d22ff60ac5c152f941f+8baf9299/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_0d22ff60ac5c152f941f+8baf9299/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_0d22ff60ac5c152f941f+8baf9299/model.neff +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10122749290754899775+fad94d7c/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10122749290754899775+fad94d7c/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10122749290754899775+fad94d7c/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10122749290754899775+fad94d7c/model.neff +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10574178149771668224+fad94d7c/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10574178149771668224+fad94d7c/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10574178149771668224+fad94d7c/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10574178149771668224+fad94d7c/model.neff +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_11263357253962001147+fad94d7c/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_11263357253962001147+fad94d7c/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_11263357253962001147+fad94d7c/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_11263357253962001147+fad94d7c/model.neff +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_11282920422705003560+fad94d7c/model.neff +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_1128490360109555897+fad94d7c/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_1128490360109555897+fad94d7c/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_1128490360109555897+fad94d7c/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_1128490360109555897+fad94d7c/model.neff +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_11490866273502815451+fad94d7c/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_11490866273502815451+fad94d7c/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_11490866273502815451+fad94d7c/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_11490866273502815451+fad94d7c/model.neff +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_1150969789231176771+fad94d7c/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_1150969789231176771+fad94d7c/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_1150969789231176771+fad94d7c/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_1150969789231176771+fad94d7c/model.neff +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_11546540121525495183+fad94d7c/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_11546540121525495183+fad94d7c/model.done +0 -0
.gitattributes
CHANGED
|
@@ -7410,3 +7410,50 @@ neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/model.neff
|
|
| 7410 |
neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 7411 |
neuronxcc-2.21.33363.0+82129205/MODULE_a73e950210b13ba9bed1+c4f887dc/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7412 |
neuronxcc-2.21.33363.0+82129205/MODULE_a73e950210b13ba9bed1+c4f887dc/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7410 |
neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 7411 |
neuronxcc-2.21.33363.0+82129205/MODULE_a73e950210b13ba9bed1+c4f887dc/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7412 |
neuronxcc-2.21.33363.0+82129205/MODULE_a73e950210b13ba9bed1+c4f887dc/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 7413 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_00c8da7c3993a0959e41+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7414 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_00c8da7c3993a0959e41+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 7415 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_17017202802755053176+fad94d7c/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7416 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_2035c8ae39822a0f3cf7+1e18d200/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7417 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_2035c8ae39822a0f3cf7+1e18d200/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 7418 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_213e0ff283ee069ea9da+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7419 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_213e0ff283ee069ea9da+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 7420 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_23643e05a738684e87ac+593aa068/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7421 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_24c83ec8fce0272d4be2+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7422 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_2615490115801662312+fad94d7c/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7423 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_36af05b968af9541168f+e9a07323/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7424 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_36af05b968af9541168f+e9a07323/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 7425 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_4150760178be1695737a+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7426 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_4150760178be1695737a+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 7427 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_5153839984625614734+fad94d7c/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7428 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_69d96c640a251d144e75+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7429 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_69d96c640a251d144e75+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 7430 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_91bf3670a4b7a485f89a+ce72cbff/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7431 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_91bf3670a4b7a485f89a+ce72cbff/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 7432 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_9595602990717811116+fad94d7c/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7433 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_9ebb65ad78df0dfd5676+4c44a4bd/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7434 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_9ebb65ad78df0dfd5676+4c44a4bd/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 7435 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_a19be04ba5877e327147+9c7c756b/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7436 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_a19be04ba5877e327147+9c7c756b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 7437 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_abe950392912637c46d6+9f698978/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7438 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_abe950392912637c46d6+9f698978/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 7439 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_bac0f4775017b9749086+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7440 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_bac0f4775017b9749086+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 7441 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_d1261d9597604aad47dc+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7442 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_d81214178e1a69d3c030+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7443 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_e4fb2e709dd9a7cab38f+b35fcb18/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7444 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_e4fb2e709dd9a7cab38f+b35fcb18/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 7445 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_e69cd7b9119d7b747c0a+c7fec16f/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7446 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_e69cd7b9119d7b747c0a+c7fec16f/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 7447 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_e8eeb8ab49bf0ebcab2e+742231d6/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7448 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_e8eeb8ab49bf0ebcab2e+742231d6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 7449 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_ed0790cb7d5818aa4b49+3eebdf46/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7450 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_ed0790cb7d5818aa4b49+3eebdf46/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 7451 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_f2f85832e67cb8247710+4b431174/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7452 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_f2f85832e67cb8247710+4b431174/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 7453 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_f4cf0f8f791046fe5274+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7454 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_f5465131654976d18953+d8417f71/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7455 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_f5465131654976d18953+d8417f71/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 7456 |
+
neuronxcc-2.23.6484.0+3b612583/MODULE_12642634462137174255+fad94d7c/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7457 |
+
neuronxcc-2.23.6484.0+3b612583/MODULE_16660895666415846207+fad94d7c/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7458 |
+
neuronxcc-2.23.6484.0+3b612583/MODULE_6916903775068390632+fad94d7c/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 7459 |
+
neuronxcc-2.23.6484.0+3b612583/MODULE_6933000544295673404+fad94d7c/model.neff filter=lfs diff=lfs merge=lfs -text
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/29f87381a199b99e2f80.json
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/gemma-3-270m-it",
|
| 4 |
+
"_sliding_window_pattern": 6,
|
| 5 |
+
"_task": "text-generation",
|
| 6 |
+
"architectures": [
|
| 7 |
+
"Gemma3ForCausalLM"
|
| 8 |
+
],
|
| 9 |
+
"attention_bias": false,
|
| 10 |
+
"attention_dropout": 0.0,
|
| 11 |
+
"attn_logit_softcapping": null,
|
| 12 |
+
"dtype": "bfloat16",
|
| 13 |
+
"final_logit_softcapping": null,
|
| 14 |
+
"head_dim": 256,
|
| 15 |
+
"hidden_activation": "gelu_pytorch_tanh",
|
| 16 |
+
"hidden_size": 640,
|
| 17 |
+
"initializer_range": 0.02,
|
| 18 |
+
"intermediate_size": 2048,
|
| 19 |
+
"layer_types": [
|
| 20 |
+
"sliding_attention",
|
| 21 |
+
"sliding_attention",
|
| 22 |
+
"sliding_attention",
|
| 23 |
+
"sliding_attention",
|
| 24 |
+
"sliding_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"sliding_attention",
|
| 27 |
+
"sliding_attention",
|
| 28 |
+
"sliding_attention",
|
| 29 |
+
"sliding_attention",
|
| 30 |
+
"sliding_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"sliding_attention",
|
| 33 |
+
"sliding_attention",
|
| 34 |
+
"sliding_attention",
|
| 35 |
+
"sliding_attention",
|
| 36 |
+
"sliding_attention",
|
| 37 |
+
"full_attention"
|
| 38 |
+
],
|
| 39 |
+
"max_position_embeddings": 32768,
|
| 40 |
+
"model_type": "gemma3_text",
|
| 41 |
+
"neuron": {
|
| 42 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 43 |
+
"batch_size": 1,
|
| 44 |
+
"capacity_factor": null,
|
| 45 |
+
"checkpoint_id": "unsloth/gemma-3-270m-it",
|
| 46 |
+
"checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9",
|
| 47 |
+
"continuous_batching": false,
|
| 48 |
+
"ep_degree": 1,
|
| 49 |
+
"fused_qkv": true,
|
| 50 |
+
"glu_mlp": true,
|
| 51 |
+
"local_ranks_size": 4,
|
| 52 |
+
"max_batch_size": 1,
|
| 53 |
+
"max_context_length": 8192,
|
| 54 |
+
"max_topk": 256,
|
| 55 |
+
"n_active_tokens": 8192,
|
| 56 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 57 |
+
"on_device_sampling": true,
|
| 58 |
+
"optimum_neuron_version": "0.4.6.dev3",
|
| 59 |
+
"output_logits": false,
|
| 60 |
+
"pp_degree": 1,
|
| 61 |
+
"prefill_chunk_size": 0,
|
| 62 |
+
"sequence_length": 8192,
|
| 63 |
+
"speculation_length": 0,
|
| 64 |
+
"start_rank_id": 0,
|
| 65 |
+
"target": "trn2",
|
| 66 |
+
"torch_dtype": "bfloat16",
|
| 67 |
+
"tp_degree": 4
|
| 68 |
+
},
|
| 69 |
+
"num_attention_heads": 4,
|
| 70 |
+
"num_hidden_layers": 18,
|
| 71 |
+
"num_key_value_heads": 1,
|
| 72 |
+
"query_pre_attn_scalar": 256,
|
| 73 |
+
"rms_norm_eps": 1e-06,
|
| 74 |
+
"rope_local_base_freq": 10000.0,
|
| 75 |
+
"rope_scaling": null,
|
| 76 |
+
"rope_theta": 1000000.0,
|
| 77 |
+
"sliding_window": 512,
|
| 78 |
+
"unsloth_fixed": true,
|
| 79 |
+
"use_bidirectional_attention": false,
|
| 80 |
+
"use_cache": true,
|
| 81 |
+
"vocab_size": 262144
|
| 82 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/63c09b6d5a07ca7c7660.json
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "Qwen/Qwen3-Embedding-0.6B",
|
| 4 |
+
"_task": "feature-extraction",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Qwen3ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 128,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 1024,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 3072,
|
| 16 |
+
"layer_types": [
|
| 17 |
+
"full_attention",
|
| 18 |
+
"full_attention",
|
| 19 |
+
"full_attention",
|
| 20 |
+
"full_attention",
|
| 21 |
+
"full_attention",
|
| 22 |
+
"full_attention",
|
| 23 |
+
"full_attention",
|
| 24 |
+
"full_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"full_attention",
|
| 27 |
+
"full_attention",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"full_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention"
|
| 45 |
+
],
|
| 46 |
+
"max_position_embeddings": 32768,
|
| 47 |
+
"max_window_layers": 28,
|
| 48 |
+
"model_type": "qwen3",
|
| 49 |
+
"neuron": {
|
| 50 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 51 |
+
"batch_size": 6,
|
| 52 |
+
"capacity_factor": null,
|
| 53 |
+
"checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
|
| 54 |
+
"checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
|
| 55 |
+
"continuous_batching": false,
|
| 56 |
+
"ep_degree": 1,
|
| 57 |
+
"fused_qkv": true,
|
| 58 |
+
"glu_mlp": true,
|
| 59 |
+
"local_ranks_size": 4,
|
| 60 |
+
"max_batch_size": 6,
|
| 61 |
+
"max_context_length": 8192,
|
| 62 |
+
"max_topk": 256,
|
| 63 |
+
"n_active_tokens": 8192,
|
| 64 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 65 |
+
"on_device_sampling": false,
|
| 66 |
+
"optimum_neuron_version": "0.4.6.dev3",
|
| 67 |
+
"output_logits": false,
|
| 68 |
+
"pp_degree": 1,
|
| 69 |
+
"prefill_chunk_size": 1024,
|
| 70 |
+
"sequence_length": 8192,
|
| 71 |
+
"speculation_length": 0,
|
| 72 |
+
"start_rank_id": 0,
|
| 73 |
+
"target": "trn2",
|
| 74 |
+
"torch_dtype": "bfloat16",
|
| 75 |
+
"tp_degree": 4
|
| 76 |
+
},
|
| 77 |
+
"num_attention_heads": 16,
|
| 78 |
+
"num_hidden_layers": 28,
|
| 79 |
+
"num_key_value_heads": 8,
|
| 80 |
+
"rms_norm_eps": 1e-06,
|
| 81 |
+
"rope_scaling": null,
|
| 82 |
+
"rope_theta": 1000000,
|
| 83 |
+
"sliding_window": null,
|
| 84 |
+
"tie_word_embeddings": true,
|
| 85 |
+
"use_cache": true,
|
| 86 |
+
"use_sliding_window": false,
|
| 87 |
+
"vocab_size": 151669
|
| 88 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/7fc4b2b6168bb163363d.json
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "Qwen/Qwen3-Embedding-0.6B",
|
| 4 |
+
"_task": "feature-extraction",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Qwen3ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 128,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 1024,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 3072,
|
| 16 |
+
"layer_types": [
|
| 17 |
+
"full_attention",
|
| 18 |
+
"full_attention",
|
| 19 |
+
"full_attention",
|
| 20 |
+
"full_attention",
|
| 21 |
+
"full_attention",
|
| 22 |
+
"full_attention",
|
| 23 |
+
"full_attention",
|
| 24 |
+
"full_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"full_attention",
|
| 27 |
+
"full_attention",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"full_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention"
|
| 45 |
+
],
|
| 46 |
+
"max_position_embeddings": 32768,
|
| 47 |
+
"max_window_layers": 28,
|
| 48 |
+
"model_type": "qwen3",
|
| 49 |
+
"neuron": {
|
| 50 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 51 |
+
"batch_size": 4,
|
| 52 |
+
"capacity_factor": null,
|
| 53 |
+
"checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
|
| 54 |
+
"checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
|
| 55 |
+
"continuous_batching": false,
|
| 56 |
+
"ep_degree": 1,
|
| 57 |
+
"fused_qkv": true,
|
| 58 |
+
"glu_mlp": true,
|
| 59 |
+
"local_ranks_size": 4,
|
| 60 |
+
"max_batch_size": 4,
|
| 61 |
+
"max_context_length": 8192,
|
| 62 |
+
"max_topk": 256,
|
| 63 |
+
"n_active_tokens": 8192,
|
| 64 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 65 |
+
"on_device_sampling": false,
|
| 66 |
+
"optimum_neuron_version": "0.4.6.dev3",
|
| 67 |
+
"output_logits": false,
|
| 68 |
+
"pp_degree": 1,
|
| 69 |
+
"prefill_chunk_size": 1024,
|
| 70 |
+
"sequence_length": 8192,
|
| 71 |
+
"speculation_length": 0,
|
| 72 |
+
"start_rank_id": 0,
|
| 73 |
+
"target": "trn2",
|
| 74 |
+
"torch_dtype": "bfloat16",
|
| 75 |
+
"tp_degree": 4
|
| 76 |
+
},
|
| 77 |
+
"num_attention_heads": 16,
|
| 78 |
+
"num_hidden_layers": 28,
|
| 79 |
+
"num_key_value_heads": 8,
|
| 80 |
+
"rms_norm_eps": 1e-06,
|
| 81 |
+
"rope_scaling": null,
|
| 82 |
+
"rope_theta": 1000000,
|
| 83 |
+
"sliding_window": null,
|
| 84 |
+
"tie_word_embeddings": true,
|
| 85 |
+
"use_cache": true,
|
| 86 |
+
"use_sliding_window": false,
|
| 87 |
+
"vocab_size": 151669
|
| 88 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/5ef0d470102a1386baac.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "llamafactory/tiny-random-Llama-3",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "float16",
|
| 11 |
+
"head_dim": 4,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 16,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 64,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 1,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "llamafactory/tiny-random-Llama-3",
|
| 24 |
+
"checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
|
| 25 |
+
"continuous_batching": false,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 2,
|
| 30 |
+
"max_batch_size": 1,
|
| 31 |
+
"max_context_length": 1024,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 1024,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": true,
|
| 36 |
+
"optimum_neuron_version": "0.4.6.dev3",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"prefill_chunk_size": 0,
|
| 40 |
+
"sequence_length": 1024,
|
| 41 |
+
"speculation_length": 0,
|
| 42 |
+
"start_rank_id": 0,
|
| 43 |
+
"target": "trn2",
|
| 44 |
+
"torch_dtype": "float16",
|
| 45 |
+
"tp_degree": 2
|
| 46 |
+
},
|
| 47 |
+
"num_attention_heads": 4,
|
| 48 |
+
"num_hidden_layers": 2,
|
| 49 |
+
"num_key_value_heads": 4,
|
| 50 |
+
"pretraining_tp": 1,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_scaling": {
|
| 53 |
+
"factor": 8.0,
|
| 54 |
+
"high_freq_factor": 4.0,
|
| 55 |
+
"low_freq_factor": 1.0,
|
| 56 |
+
"original_max_position_embeddings": 8192,
|
| 57 |
+
"rope_type": "llama3"
|
| 58 |
+
},
|
| 59 |
+
"rope_theta": 500000.0,
|
| 60 |
+
"tie_word_embeddings": false,
|
| 61 |
+
"use_cache": true,
|
| 62 |
+
"vocab_size": 128256
|
| 63 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/17e5c162137a63b70992.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 64,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 2048,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 8192,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 1,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 24 |
+
"checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
|
| 25 |
+
"continuous_batching": false,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 4,
|
| 30 |
+
"max_batch_size": 1,
|
| 31 |
+
"max_context_length": 8192,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 8192,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": true,
|
| 36 |
+
"optimum_neuron_version": "0.4.6.dev3",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"prefill_chunk_size": 1024,
|
| 40 |
+
"sequence_length": 8192,
|
| 41 |
+
"speculation_length": 0,
|
| 42 |
+
"start_rank_id": 0,
|
| 43 |
+
"target": "trn2",
|
| 44 |
+
"torch_dtype": "bfloat16",
|
| 45 |
+
"tp_degree": 4
|
| 46 |
+
},
|
| 47 |
+
"num_attention_heads": 32,
|
| 48 |
+
"num_hidden_layers": 16,
|
| 49 |
+
"num_key_value_heads": 8,
|
| 50 |
+
"pretraining_tp": 1,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_scaling": {
|
| 53 |
+
"factor": 32.0,
|
| 54 |
+
"high_freq_factor": 4.0,
|
| 55 |
+
"low_freq_factor": 1.0,
|
| 56 |
+
"original_max_position_embeddings": 8192,
|
| 57 |
+
"rope_type": "llama3"
|
| 58 |
+
},
|
| 59 |
+
"rope_theta": 500000.0,
|
| 60 |
+
"tie_word_embeddings": true,
|
| 61 |
+
"unsloth_fixed": true,
|
| 62 |
+
"use_cache": true,
|
| 63 |
+
"vocab_size": 128256
|
| 64 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/d75242397c3ceee988e7.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 64,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 2048,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 8192,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 4,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 24 |
+
"checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
|
| 25 |
+
"continuous_batching": true,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 4,
|
| 30 |
+
"max_batch_size": 4,
|
| 31 |
+
"max_context_length": 1024,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 1024,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": true,
|
| 36 |
+
"optimum_neuron_version": "0.4.6.dev3",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"prefill_chunk_size": 0,
|
| 40 |
+
"sequence_length": 1024,
|
| 41 |
+
"speculation_length": 0,
|
| 42 |
+
"start_rank_id": 0,
|
| 43 |
+
"target": "trn2",
|
| 44 |
+
"torch_dtype": "bfloat16",
|
| 45 |
+
"tp_degree": 4
|
| 46 |
+
},
|
| 47 |
+
"num_attention_heads": 32,
|
| 48 |
+
"num_hidden_layers": 16,
|
| 49 |
+
"num_key_value_heads": 8,
|
| 50 |
+
"pretraining_tp": 1,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_scaling": {
|
| 53 |
+
"factor": 32.0,
|
| 54 |
+
"high_freq_factor": 4.0,
|
| 55 |
+
"low_freq_factor": 1.0,
|
| 56 |
+
"original_max_position_embeddings": 8192,
|
| 57 |
+
"rope_type": "llama3"
|
| 58 |
+
},
|
| 59 |
+
"rope_theta": 500000.0,
|
| 60 |
+
"tie_word_embeddings": true,
|
| 61 |
+
"unsloth_fixed": true,
|
| 62 |
+
"use_cache": true,
|
| 63 |
+
"vocab_size": 128256
|
| 64 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/gemma3_text/unsloth/gemma-3-270m-it/29f87381a199b99e2f80.json
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/gemma-3-270m-it",
|
| 4 |
+
"_sliding_window_pattern": 6,
|
| 5 |
+
"_task": "text-generation",
|
| 6 |
+
"architectures": [
|
| 7 |
+
"Gemma3ForCausalLM"
|
| 8 |
+
],
|
| 9 |
+
"attention_bias": false,
|
| 10 |
+
"attention_dropout": 0.0,
|
| 11 |
+
"attn_logit_softcapping": null,
|
| 12 |
+
"dtype": "bfloat16",
|
| 13 |
+
"final_logit_softcapping": null,
|
| 14 |
+
"head_dim": 256,
|
| 15 |
+
"hidden_activation": "gelu_pytorch_tanh",
|
| 16 |
+
"hidden_size": 640,
|
| 17 |
+
"initializer_range": 0.02,
|
| 18 |
+
"intermediate_size": 2048,
|
| 19 |
+
"layer_types": [
|
| 20 |
+
"sliding_attention",
|
| 21 |
+
"sliding_attention",
|
| 22 |
+
"sliding_attention",
|
| 23 |
+
"sliding_attention",
|
| 24 |
+
"sliding_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"sliding_attention",
|
| 27 |
+
"sliding_attention",
|
| 28 |
+
"sliding_attention",
|
| 29 |
+
"sliding_attention",
|
| 30 |
+
"sliding_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"sliding_attention",
|
| 33 |
+
"sliding_attention",
|
| 34 |
+
"sliding_attention",
|
| 35 |
+
"sliding_attention",
|
| 36 |
+
"sliding_attention",
|
| 37 |
+
"full_attention"
|
| 38 |
+
],
|
| 39 |
+
"max_position_embeddings": 32768,
|
| 40 |
+
"model_type": "gemma3_text",
|
| 41 |
+
"neuron": {
|
| 42 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 43 |
+
"batch_size": 1,
|
| 44 |
+
"capacity_factor": null,
|
| 45 |
+
"checkpoint_id": "unsloth/gemma-3-270m-it",
|
| 46 |
+
"checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9",
|
| 47 |
+
"continuous_batching": false,
|
| 48 |
+
"ep_degree": 1,
|
| 49 |
+
"fused_qkv": true,
|
| 50 |
+
"glu_mlp": true,
|
| 51 |
+
"local_ranks_size": 4,
|
| 52 |
+
"max_batch_size": 1,
|
| 53 |
+
"max_context_length": 8192,
|
| 54 |
+
"max_topk": 256,
|
| 55 |
+
"n_active_tokens": 8192,
|
| 56 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 57 |
+
"on_device_sampling": true,
|
| 58 |
+
"optimum_neuron_version": "0.4.6.dev3",
|
| 59 |
+
"output_logits": false,
|
| 60 |
+
"pp_degree": 1,
|
| 61 |
+
"prefill_chunk_size": 0,
|
| 62 |
+
"sequence_length": 8192,
|
| 63 |
+
"speculation_length": 0,
|
| 64 |
+
"start_rank_id": 0,
|
| 65 |
+
"target": "trn2",
|
| 66 |
+
"torch_dtype": "bfloat16",
|
| 67 |
+
"tp_degree": 4
|
| 68 |
+
},
|
| 69 |
+
"num_attention_heads": 4,
|
| 70 |
+
"num_hidden_layers": 18,
|
| 71 |
+
"num_key_value_heads": 1,
|
| 72 |
+
"query_pre_attn_scalar": 256,
|
| 73 |
+
"rms_norm_eps": 1e-06,
|
| 74 |
+
"rope_local_base_freq": 10000.0,
|
| 75 |
+
"rope_scaling": null,
|
| 76 |
+
"rope_theta": 1000000.0,
|
| 77 |
+
"sliding_window": 512,
|
| 78 |
+
"unsloth_fixed": true,
|
| 79 |
+
"use_bidirectional_attention": false,
|
| 80 |
+
"use_cache": true,
|
| 81 |
+
"vocab_size": 262144
|
| 82 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/llama/unsloth/Llama-3.2-1B-Instruct/d75242397c3ceee988e7.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 64,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 2048,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 8192,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 4,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 24 |
+
"checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
|
| 25 |
+
"continuous_batching": true,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 4,
|
| 30 |
+
"max_batch_size": 4,
|
| 31 |
+
"max_context_length": 1024,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 1024,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": true,
|
| 36 |
+
"optimum_neuron_version": "0.4.6.dev3",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"prefill_chunk_size": 0,
|
| 40 |
+
"sequence_length": 1024,
|
| 41 |
+
"speculation_length": 0,
|
| 42 |
+
"start_rank_id": 0,
|
| 43 |
+
"target": "trn2",
|
| 44 |
+
"torch_dtype": "bfloat16",
|
| 45 |
+
"tp_degree": 4
|
| 46 |
+
},
|
| 47 |
+
"num_attention_heads": 32,
|
| 48 |
+
"num_hidden_layers": 16,
|
| 49 |
+
"num_key_value_heads": 8,
|
| 50 |
+
"pretraining_tp": 1,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_scaling": {
|
| 53 |
+
"factor": 32.0,
|
| 54 |
+
"high_freq_factor": 4.0,
|
| 55 |
+
"low_freq_factor": 1.0,
|
| 56 |
+
"original_max_position_embeddings": 8192,
|
| 57 |
+
"rope_type": "llama3"
|
| 58 |
+
},
|
| 59 |
+
"rope_theta": 500000.0,
|
| 60 |
+
"tie_word_embeddings": true,
|
| 61 |
+
"unsloth_fixed": true,
|
| 62 |
+
"use_cache": true,
|
| 63 |
+
"vocab_size": 128256
|
| 64 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev3/qwen3/Qwen/Qwen3-Embedding-0.6B/7fc4b2b6168bb163363d.json
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "Qwen/Qwen3-Embedding-0.6B",
|
| 4 |
+
"_task": "feature-extraction",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Qwen3ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 128,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 1024,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 3072,
|
| 16 |
+
"layer_types": [
|
| 17 |
+
"full_attention",
|
| 18 |
+
"full_attention",
|
| 19 |
+
"full_attention",
|
| 20 |
+
"full_attention",
|
| 21 |
+
"full_attention",
|
| 22 |
+
"full_attention",
|
| 23 |
+
"full_attention",
|
| 24 |
+
"full_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"full_attention",
|
| 27 |
+
"full_attention",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"full_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention"
|
| 45 |
+
],
|
| 46 |
+
"max_position_embeddings": 32768,
|
| 47 |
+
"max_window_layers": 28,
|
| 48 |
+
"model_type": "qwen3",
|
| 49 |
+
"neuron": {
|
| 50 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 51 |
+
"batch_size": 4,
|
| 52 |
+
"capacity_factor": null,
|
| 53 |
+
"checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
|
| 54 |
+
"checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
|
| 55 |
+
"continuous_batching": false,
|
| 56 |
+
"ep_degree": 1,
|
| 57 |
+
"fused_qkv": true,
|
| 58 |
+
"glu_mlp": true,
|
| 59 |
+
"local_ranks_size": 4,
|
| 60 |
+
"max_batch_size": 4,
|
| 61 |
+
"max_context_length": 8192,
|
| 62 |
+
"max_topk": 256,
|
| 63 |
+
"n_active_tokens": 8192,
|
| 64 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 65 |
+
"on_device_sampling": false,
|
| 66 |
+
"optimum_neuron_version": "0.4.6.dev3",
|
| 67 |
+
"output_logits": false,
|
| 68 |
+
"pp_degree": 1,
|
| 69 |
+
"prefill_chunk_size": 1024,
|
| 70 |
+
"sequence_length": 8192,
|
| 71 |
+
"speculation_length": 0,
|
| 72 |
+
"start_rank_id": 0,
|
| 73 |
+
"target": "trn2",
|
| 74 |
+
"torch_dtype": "bfloat16",
|
| 75 |
+
"tp_degree": 4
|
| 76 |
+
},
|
| 77 |
+
"num_attention_heads": 16,
|
| 78 |
+
"num_hidden_layers": 28,
|
| 79 |
+
"num_key_value_heads": 8,
|
| 80 |
+
"rms_norm_eps": 1e-06,
|
| 81 |
+
"rope_scaling": null,
|
| 82 |
+
"rope_theta": 1000000,
|
| 83 |
+
"sliding_window": null,
|
| 84 |
+
"tie_word_embeddings": true,
|
| 85 |
+
"use_cache": true,
|
| 86 |
+
"use_sliding_window": false,
|
| 87 |
+
"vocab_size": 151669
|
| 88 |
+
}
|
neuronxcc-2.21.33363.0+82129205/MODULE_00c8da7c3993a0959e41+ac10809c/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_00c8da7c3993a0959e41+ac10809c/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_00c8da7c3993a0959e41+ac10809c/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4c8a569a13948c3e4a8450167a8b11178df1f69856508ed0cc53cd4dbc00448
|
| 3 |
+
size 1529038
|
neuronxcc-2.21.33363.0+82129205/MODULE_00c8da7c3993a0959e41+ac10809c/model.neff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:994e356a4a4ff8f531034f17dbd0620157beebc6ce098e908be71580912920d2
|
| 3 |
+
size 2049024
|
neuronxcc-2.21.33363.0+82129205/MODULE_00c8da7c3993a0959e41+ac10809c/wrapped_neff.hlo
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:07950155e53305f876bc9703f48d8d33fccacdb958f0978584100917dc14119c
|
| 3 |
+
size 2124464
|
neuronxcc-2.21.33363.0+82129205/MODULE_0b8342fa1bedb542ad5e+db6d9813/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn2", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_1f9e7466-84ba-4c51-ba1b-e5caabc259a5/compiler_workdir/SoftmaxNoMask/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_0b8342fa1bedb542ad5e+db6d9813/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_0b8342fa1bedb542ad5e+db6d9813/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e542c76cf27c3f807feac47dd3f58fd4c308e8d9e932f0732774bee19d640d8
|
| 3 |
+
size 3881
|
neuronxcc-2.21.33363.0+82129205/MODULE_0b8342fa1bedb542ad5e+db6d9813/model.neff
ADDED
|
Binary file (62.5 kB). View file
|
|
|
neuronxcc-2.21.33363.0+82129205/MODULE_0d22ff60ac5c152f941f+8baf9299/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn2", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_9ba2049b-0b53-464d-87de-42da72765f03/compiler_workdir/SoftmaxWithMask/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_0d22ff60ac5c152f941f+8baf9299/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_0d22ff60ac5c152f941f+8baf9299/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:70f96487dad4bb02b98bf2c955fe59650a5fdbcf1d763fdf56ec412b62b5774c
|
| 3 |
+
size 5596
|
neuronxcc-2.21.33363.0+82129205/MODULE_0d22ff60ac5c152f941f+8baf9299/model.neff
ADDED
|
Binary file (62.5 kB). View file
|
|
|
neuronxcc-2.21.33363.0+82129205/MODULE_10122749290754899775+fad94d7c/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn2"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_10122749290754899775+fad94d7c/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_10122749290754899775+fad94d7c/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a1884f4dc3391ad185376ba07edda74cefa8ecf1b26e8b66b3b4b0c12093399
|
| 3 |
+
size 1564
|
neuronxcc-2.21.33363.0+82129205/MODULE_10122749290754899775+fad94d7c/model.neff
ADDED
|
Binary file (42 kB). View file
|
|
|
neuronxcc-2.21.33363.0+82129205/MODULE_10574178149771668224+fad94d7c/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn2"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_10574178149771668224+fad94d7c/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_10574178149771668224+fad94d7c/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aaea44379abc01256991f2d98d1d0d072ed11beb07e1c7f0f03ffd1c16b3701a
|
| 3 |
+
size 1565
|
neuronxcc-2.21.33363.0+82129205/MODULE_10574178149771668224+fad94d7c/model.neff
ADDED
|
Binary file (42 kB). View file
|
|
|
neuronxcc-2.21.33363.0+82129205/MODULE_11263357253962001147+fad94d7c/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn2"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_11263357253962001147+fad94d7c/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_11263357253962001147+fad94d7c/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2703ce042114f3c02aac56609ca879c405ecd38ac7dda2f88476aaa28700df1d
|
| 3 |
+
size 1564
|
neuronxcc-2.21.33363.0+82129205/MODULE_11263357253962001147+fad94d7c/model.neff
ADDED
|
Binary file (42 kB). View file
|
|
|
neuronxcc-2.21.33363.0+82129205/MODULE_11282920422705003560+fad94d7c/model.neff
CHANGED
|
Binary files a/neuronxcc-2.21.33363.0+82129205/MODULE_11282920422705003560+fad94d7c/model.neff and b/neuronxcc-2.21.33363.0+82129205/MODULE_11282920422705003560+fad94d7c/model.neff differ
|
|
|
neuronxcc-2.21.33363.0+82129205/MODULE_1128490360109555897+fad94d7c/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn2"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_1128490360109555897+fad94d7c/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_1128490360109555897+fad94d7c/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62d12804001a43af80c6c45209bc83aa1b32227a172de286bbd0ddbb21d79111
|
| 3 |
+
size 1564
|
neuronxcc-2.21.33363.0+82129205/MODULE_1128490360109555897+fad94d7c/model.neff
ADDED
|
Binary file (42 kB). View file
|
|
|
neuronxcc-2.21.33363.0+82129205/MODULE_11490866273502815451+fad94d7c/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn2"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_11490866273502815451+fad94d7c/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_11490866273502815451+fad94d7c/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:56e31dc60bc7f63bea4ce0161094e32e8d1c9c4d151b2b3552af7a39db0d7d31
|
| 3 |
+
size 1564
|
neuronxcc-2.21.33363.0+82129205/MODULE_11490866273502815451+fad94d7c/model.neff
ADDED
|
Binary file (42 kB). View file
|
|
|
neuronxcc-2.21.33363.0+82129205/MODULE_1150969789231176771+fad94d7c/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn2"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_1150969789231176771+fad94d7c/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_1150969789231176771+fad94d7c/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76af0d68508bac289b494b23a2b27ce2fe26fe94e87f53f0be842ab1121aaec9
|
| 3 |
+
size 1564
|
neuronxcc-2.21.33363.0+82129205/MODULE_1150969789231176771+fad94d7c/model.neff
ADDED
|
Binary file (42 kB). View file
|
|
|
neuronxcc-2.21.33363.0+82129205/MODULE_11546540121525495183+fad94d7c/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn2"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_11546540121525495183+fad94d7c/model.done
ADDED
|
File without changes
|