Synchronizing local compiler cache.
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +41 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/d2b71947244dfeb32006.json +87 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/7ce147881fb65af8c501.json +95 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/4815ed12326933e8588b.json +95 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/qwen3/Qwen/Qwen3-Embedding-0.6B/d2b71947244dfeb32006.json +87 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/qwen3/Qwen/Qwen3-Embedding-4B/4815ed12326933e8588b.json +95 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/qwen3/Qwen/Qwen3-Embedding-8B/7ce147881fb65af8c501.json +95 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/edf2cd849e7234866f5f.json +87 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/4725d0f98e9d733a5354.json +62 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/79d1effc3dea92153467.json +62 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/dd5885d59f14f083843e.json +62 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/llama/llamafactory/tiny-random-Llama-3/79d1effc3dea92153467.json +62 -0
- neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/edf2cd849e7234866f5f.json +87 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_023068ea127f216b3230+18260fcf/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_023068ea127f216b3230+18260fcf/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_023068ea127f216b3230+18260fcf/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_023068ea127f216b3230+18260fcf/model.neff +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_023068ea127f216b3230+18260fcf/wrapped_neff.hlo +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_0243dc050d4687db06f4+12f2698a/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_0243dc050d4687db06f4+12f2698a/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_0243dc050d4687db06f4+12f2698a/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_0243dc050d4687db06f4+12f2698a/model.neff +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_0243dc050d4687db06f4+12f2698a/wrapped_neff.hlo +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/model.neff +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.neff +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.neff +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.neff +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.neff +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.hlo_module.pb +3 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.neff +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/compile_flags.json +1 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.done +0 -0
- neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.hlo_module.pb +3 -0
.gitattributes
CHANGED
|
@@ -6527,3 +6527,44 @@ neuronxcc-2.21.33363.0+82129205/MODULE_8221872293709102527+fad94d7c/model.neff f
|
|
| 6527 |
neuronxcc-2.21.33363.0+82129205/MODULE_e521a14f8c961dcc16f7+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6528 |
neuronxcc-2.21.33363.0+82129205/MODULE_5d75eac36946f6ceb5eb+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6529 |
neuronxcc-2.21.33363.0+82129205/MODULE_066fa9e2a211ec056b7c+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6527 |
neuronxcc-2.21.33363.0+82129205/MODULE_e521a14f8c961dcc16f7+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6528 |
neuronxcc-2.21.33363.0+82129205/MODULE_5d75eac36946f6ceb5eb+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6529 |
neuronxcc-2.21.33363.0+82129205/MODULE_066fa9e2a211ec056b7c+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6530 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_0243dc050d4687db06f4+12f2698a/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6531 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_0243dc050d4687db06f4+12f2698a/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 6532 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_2848c32ef7df1e905c25+4c66bb54/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6533 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_2848c32ef7df1e905c25+4c66bb54/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 6534 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_2d7bd8426bc64772d217+3ac48cd0/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6535 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_2d7bd8426bc64772d217+3ac48cd0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 6536 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_38bbf770dc53dec9b3ad+ab05f199/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6537 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_38bbf770dc53dec9b3ad+ab05f199/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 6538 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_3ba57e5cd85f7007611f+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6539 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_3bc44e72dfcbe8801f60+8731b5fe/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6540 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_3bc44e72dfcbe8801f60+8731b5fe/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 6541 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_52d7e7948bb9b090f5a0+c6cd0101/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6542 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_52d7e7948bb9b090f5a0+c6cd0101/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 6543 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_5b6831fd1af971b2989b+1f9760b2/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6544 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_5b6831fd1af971b2989b+1f9760b2/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 6545 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_75515a835d4f41d9da4b+423750a9/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6546 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_75515a835d4f41d9da4b+423750a9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 6547 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_82ceef97adb975a6d90e+aae46d5f/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6548 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_82ceef97adb975a6d90e+aae46d5f/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 6549 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_84fd59187fd6be117f3a+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6550 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_8c1d2cb1156a1d0a82e3+7b1e013e/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6551 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_8c1d2cb1156a1d0a82e3+7b1e013e/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 6552 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_90b73fb771aa346bb48e+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6553 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_90b73fb771aa346bb48e+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 6554 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_9d7acc11312c2c31d32e+ac382b22/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6555 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_9d7acc11312c2c31d32e+ac382b22/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 6556 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_9e5c74e26f840e51f8bf+390d6e68/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6557 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_9e5c74e26f840e51f8bf+390d6e68/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 6558 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_b06034c7057d53a7c643+02353d68/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6559 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_b06034c7057d53a7c643+02353d68/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 6560 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_bda1a1078d90ee07a4bc+dfab41ff/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6561 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_bda1a1078d90ee07a4bc+dfab41ff/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 6562 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_dd8016818616bef1560e+504b22d7/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6563 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_dd8016818616bef1560e+504b22d7/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 6564 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_e7fddd20b107d5347811+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6565 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_f56ba7a38230d2656ddd+fb50064a/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6566 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_f56ba7a38230d2656ddd+fb50064a/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 6567 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_fe766d9ae8b638251045+301c8a7e/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6568 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_fe766d9ae8b638251045+301c8a7e/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 6569 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_fe9e0a803d755853d3c7+60b6a716/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 6570 |
+
neuronxcc-2.21.33363.0+82129205/MODULE_fe9e0a803d755853d3c7+60b6a716/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/d2b71947244dfeb32006.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "Qwen/Qwen3-Embedding-0.6B",
|
| 4 |
+
"_task": "feature-extraction",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Qwen3ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 128,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 1024,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 3072,
|
| 16 |
+
"layer_types": [
|
| 17 |
+
"full_attention",
|
| 18 |
+
"full_attention",
|
| 19 |
+
"full_attention",
|
| 20 |
+
"full_attention",
|
| 21 |
+
"full_attention",
|
| 22 |
+
"full_attention",
|
| 23 |
+
"full_attention",
|
| 24 |
+
"full_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"full_attention",
|
| 27 |
+
"full_attention",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"full_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention"
|
| 45 |
+
],
|
| 46 |
+
"max_position_embeddings": 32768,
|
| 47 |
+
"max_window_layers": 28,
|
| 48 |
+
"model_type": "qwen3",
|
| 49 |
+
"neuron": {
|
| 50 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 51 |
+
"batch_size": 1,
|
| 52 |
+
"capacity_factor": null,
|
| 53 |
+
"checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
|
| 54 |
+
"checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
|
| 55 |
+
"continuous_batching": false,
|
| 56 |
+
"ep_degree": 1,
|
| 57 |
+
"fused_qkv": true,
|
| 58 |
+
"glu_mlp": true,
|
| 59 |
+
"local_ranks_size": 1,
|
| 60 |
+
"max_batch_size": 1,
|
| 61 |
+
"max_context_length": 1024,
|
| 62 |
+
"max_topk": 256,
|
| 63 |
+
"n_active_tokens": 1024,
|
| 64 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 65 |
+
"on_device_sampling": false,
|
| 66 |
+
"optimum_neuron_version": "0.4.5",
|
| 67 |
+
"output_logits": false,
|
| 68 |
+
"pp_degree": 1,
|
| 69 |
+
"sequence_length": 1024,
|
| 70 |
+
"speculation_length": 0,
|
| 71 |
+
"start_rank_id": 0,
|
| 72 |
+
"target": "trn1",
|
| 73 |
+
"torch_dtype": "bfloat16",
|
| 74 |
+
"tp_degree": 1
|
| 75 |
+
},
|
| 76 |
+
"num_attention_heads": 16,
|
| 77 |
+
"num_hidden_layers": 28,
|
| 78 |
+
"num_key_value_heads": 8,
|
| 79 |
+
"rms_norm_eps": 1e-06,
|
| 80 |
+
"rope_scaling": null,
|
| 81 |
+
"rope_theta": 1000000,
|
| 82 |
+
"sliding_window": null,
|
| 83 |
+
"tie_word_embeddings": true,
|
| 84 |
+
"use_cache": true,
|
| 85 |
+
"use_sliding_window": false,
|
| 86 |
+
"vocab_size": 151669
|
| 87 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/af58eb15d8e02338dc2f2e880e9c6ec803a98278914b3606acdcc252e7e18429/7ce147881fb65af8c501.json
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "Qwen/Qwen3-Embedding-8B",
|
| 4 |
+
"_task": "feature-extraction",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Qwen3ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 128,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 4096,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 12288,
|
| 16 |
+
"layer_types": [
|
| 17 |
+
"full_attention",
|
| 18 |
+
"full_attention",
|
| 19 |
+
"full_attention",
|
| 20 |
+
"full_attention",
|
| 21 |
+
"full_attention",
|
| 22 |
+
"full_attention",
|
| 23 |
+
"full_attention",
|
| 24 |
+
"full_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"full_attention",
|
| 27 |
+
"full_attention",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"full_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention"
|
| 53 |
+
],
|
| 54 |
+
"max_position_embeddings": 40960,
|
| 55 |
+
"max_window_layers": 36,
|
| 56 |
+
"model_type": "qwen3",
|
| 57 |
+
"neuron": {
|
| 58 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 59 |
+
"batch_size": 1,
|
| 60 |
+
"capacity_factor": null,
|
| 61 |
+
"checkpoint_id": "Qwen/Qwen3-Embedding-8B",
|
| 62 |
+
"checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
|
| 63 |
+
"continuous_batching": false,
|
| 64 |
+
"ep_degree": 1,
|
| 65 |
+
"fused_qkv": true,
|
| 66 |
+
"glu_mlp": true,
|
| 67 |
+
"local_ranks_size": 1,
|
| 68 |
+
"max_batch_size": 1,
|
| 69 |
+
"max_context_length": 1024,
|
| 70 |
+
"max_topk": 256,
|
| 71 |
+
"n_active_tokens": 1024,
|
| 72 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 73 |
+
"on_device_sampling": false,
|
| 74 |
+
"optimum_neuron_version": "0.4.5",
|
| 75 |
+
"output_logits": false,
|
| 76 |
+
"pp_degree": 1,
|
| 77 |
+
"sequence_length": 1024,
|
| 78 |
+
"speculation_length": 0,
|
| 79 |
+
"start_rank_id": 0,
|
| 80 |
+
"target": "trn1",
|
| 81 |
+
"torch_dtype": "bfloat16",
|
| 82 |
+
"tp_degree": 1
|
| 83 |
+
},
|
| 84 |
+
"num_attention_heads": 32,
|
| 85 |
+
"num_hidden_layers": 36,
|
| 86 |
+
"num_key_value_heads": 8,
|
| 87 |
+
"rms_norm_eps": 1e-06,
|
| 88 |
+
"rope_scaling": null,
|
| 89 |
+
"rope_theta": 1000000,
|
| 90 |
+
"sliding_window": null,
|
| 91 |
+
"tie_word_embeddings": false,
|
| 92 |
+
"use_cache": true,
|
| 93 |
+
"use_sliding_window": false,
|
| 94 |
+
"vocab_size": 151665
|
| 95 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/e0b6d1e2424243dcd9ff1755e02969dcc312d14df531d876c5c2892f285b2863/4815ed12326933e8588b.json
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "Qwen/Qwen3-Embedding-4B",
|
| 4 |
+
"_task": "feature-extraction",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Qwen3ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 128,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 2560,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 9728,
|
| 16 |
+
"layer_types": [
|
| 17 |
+
"full_attention",
|
| 18 |
+
"full_attention",
|
| 19 |
+
"full_attention",
|
| 20 |
+
"full_attention",
|
| 21 |
+
"full_attention",
|
| 22 |
+
"full_attention",
|
| 23 |
+
"full_attention",
|
| 24 |
+
"full_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"full_attention",
|
| 27 |
+
"full_attention",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"full_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention"
|
| 53 |
+
],
|
| 54 |
+
"max_position_embeddings": 40960,
|
| 55 |
+
"max_window_layers": 36,
|
| 56 |
+
"model_type": "qwen3",
|
| 57 |
+
"neuron": {
|
| 58 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 59 |
+
"batch_size": 4,
|
| 60 |
+
"capacity_factor": null,
|
| 61 |
+
"checkpoint_id": "Qwen/Qwen3-Embedding-4B",
|
| 62 |
+
"checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
|
| 63 |
+
"continuous_batching": false,
|
| 64 |
+
"ep_degree": 1,
|
| 65 |
+
"fused_qkv": true,
|
| 66 |
+
"glu_mlp": true,
|
| 67 |
+
"local_ranks_size": 2,
|
| 68 |
+
"max_batch_size": 4,
|
| 69 |
+
"max_context_length": 1024,
|
| 70 |
+
"max_topk": 256,
|
| 71 |
+
"n_active_tokens": 1024,
|
| 72 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 73 |
+
"on_device_sampling": false,
|
| 74 |
+
"optimum_neuron_version": "0.4.5",
|
| 75 |
+
"output_logits": false,
|
| 76 |
+
"pp_degree": 1,
|
| 77 |
+
"sequence_length": 1024,
|
| 78 |
+
"speculation_length": 0,
|
| 79 |
+
"start_rank_id": 0,
|
| 80 |
+
"target": "trn1",
|
| 81 |
+
"torch_dtype": "bfloat16",
|
| 82 |
+
"tp_degree": 2
|
| 83 |
+
},
|
| 84 |
+
"num_attention_heads": 32,
|
| 85 |
+
"num_hidden_layers": 36,
|
| 86 |
+
"num_key_value_heads": 8,
|
| 87 |
+
"rms_norm_eps": 1e-06,
|
| 88 |
+
"rope_scaling": null,
|
| 89 |
+
"rope_theta": 1000000,
|
| 90 |
+
"sliding_window": null,
|
| 91 |
+
"tie_word_embeddings": true,
|
| 92 |
+
"use_cache": true,
|
| 93 |
+
"use_sliding_window": false,
|
| 94 |
+
"vocab_size": 151665
|
| 95 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/qwen3/Qwen/Qwen3-Embedding-0.6B/d2b71947244dfeb32006.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "Qwen/Qwen3-Embedding-0.6B",
|
| 4 |
+
"_task": "feature-extraction",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Qwen3ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 128,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 1024,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 3072,
|
| 16 |
+
"layer_types": [
|
| 17 |
+
"full_attention",
|
| 18 |
+
"full_attention",
|
| 19 |
+
"full_attention",
|
| 20 |
+
"full_attention",
|
| 21 |
+
"full_attention",
|
| 22 |
+
"full_attention",
|
| 23 |
+
"full_attention",
|
| 24 |
+
"full_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"full_attention",
|
| 27 |
+
"full_attention",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"full_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention"
|
| 45 |
+
],
|
| 46 |
+
"max_position_embeddings": 32768,
|
| 47 |
+
"max_window_layers": 28,
|
| 48 |
+
"model_type": "qwen3",
|
| 49 |
+
"neuron": {
|
| 50 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 51 |
+
"batch_size": 1,
|
| 52 |
+
"capacity_factor": null,
|
| 53 |
+
"checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
|
| 54 |
+
"checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
|
| 55 |
+
"continuous_batching": false,
|
| 56 |
+
"ep_degree": 1,
|
| 57 |
+
"fused_qkv": true,
|
| 58 |
+
"glu_mlp": true,
|
| 59 |
+
"local_ranks_size": 1,
|
| 60 |
+
"max_batch_size": 1,
|
| 61 |
+
"max_context_length": 1024,
|
| 62 |
+
"max_topk": 256,
|
| 63 |
+
"n_active_tokens": 1024,
|
| 64 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 65 |
+
"on_device_sampling": false,
|
| 66 |
+
"optimum_neuron_version": "0.4.5",
|
| 67 |
+
"output_logits": false,
|
| 68 |
+
"pp_degree": 1,
|
| 69 |
+
"sequence_length": 1024,
|
| 70 |
+
"speculation_length": 0,
|
| 71 |
+
"start_rank_id": 0,
|
| 72 |
+
"target": "trn1",
|
| 73 |
+
"torch_dtype": "bfloat16",
|
| 74 |
+
"tp_degree": 1
|
| 75 |
+
},
|
| 76 |
+
"num_attention_heads": 16,
|
| 77 |
+
"num_hidden_layers": 28,
|
| 78 |
+
"num_key_value_heads": 8,
|
| 79 |
+
"rms_norm_eps": 1e-06,
|
| 80 |
+
"rope_scaling": null,
|
| 81 |
+
"rope_theta": 1000000,
|
| 82 |
+
"sliding_window": null,
|
| 83 |
+
"tie_word_embeddings": true,
|
| 84 |
+
"use_cache": true,
|
| 85 |
+
"use_sliding_window": false,
|
| 86 |
+
"vocab_size": 151669
|
| 87 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/qwen3/Qwen/Qwen3-Embedding-4B/4815ed12326933e8588b.json
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "Qwen/Qwen3-Embedding-4B",
|
| 4 |
+
"_task": "feature-extraction",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Qwen3ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 128,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 2560,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 9728,
|
| 16 |
+
"layer_types": [
|
| 17 |
+
"full_attention",
|
| 18 |
+
"full_attention",
|
| 19 |
+
"full_attention",
|
| 20 |
+
"full_attention",
|
| 21 |
+
"full_attention",
|
| 22 |
+
"full_attention",
|
| 23 |
+
"full_attention",
|
| 24 |
+
"full_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"full_attention",
|
| 27 |
+
"full_attention",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"full_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention"
|
| 53 |
+
],
|
| 54 |
+
"max_position_embeddings": 40960,
|
| 55 |
+
"max_window_layers": 36,
|
| 56 |
+
"model_type": "qwen3",
|
| 57 |
+
"neuron": {
|
| 58 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 59 |
+
"batch_size": 4,
|
| 60 |
+
"capacity_factor": null,
|
| 61 |
+
"checkpoint_id": "Qwen/Qwen3-Embedding-4B",
|
| 62 |
+
"checkpoint_revision": "5cf2132abc99cad020ac570b19d031efec650f2b",
|
| 63 |
+
"continuous_batching": false,
|
| 64 |
+
"ep_degree": 1,
|
| 65 |
+
"fused_qkv": true,
|
| 66 |
+
"glu_mlp": true,
|
| 67 |
+
"local_ranks_size": 2,
|
| 68 |
+
"max_batch_size": 4,
|
| 69 |
+
"max_context_length": 1024,
|
| 70 |
+
"max_topk": 256,
|
| 71 |
+
"n_active_tokens": 1024,
|
| 72 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 73 |
+
"on_device_sampling": false,
|
| 74 |
+
"optimum_neuron_version": "0.4.5",
|
| 75 |
+
"output_logits": false,
|
| 76 |
+
"pp_degree": 1,
|
| 77 |
+
"sequence_length": 1024,
|
| 78 |
+
"speculation_length": 0,
|
| 79 |
+
"start_rank_id": 0,
|
| 80 |
+
"target": "trn1",
|
| 81 |
+
"torch_dtype": "bfloat16",
|
| 82 |
+
"tp_degree": 2
|
| 83 |
+
},
|
| 84 |
+
"num_attention_heads": 32,
|
| 85 |
+
"num_hidden_layers": 36,
|
| 86 |
+
"num_key_value_heads": 8,
|
| 87 |
+
"rms_norm_eps": 1e-06,
|
| 88 |
+
"rope_scaling": null,
|
| 89 |
+
"rope_theta": 1000000,
|
| 90 |
+
"sliding_window": null,
|
| 91 |
+
"tie_word_embeddings": true,
|
| 92 |
+
"use_cache": true,
|
| 93 |
+
"use_sliding_window": false,
|
| 94 |
+
"vocab_size": 151665
|
| 95 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.5/qwen3/Qwen/Qwen3-Embedding-8B/7ce147881fb65af8c501.json
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "Qwen/Qwen3-Embedding-8B",
|
| 4 |
+
"_task": "feature-extraction",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Qwen3ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 128,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 4096,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 12288,
|
| 16 |
+
"layer_types": [
|
| 17 |
+
"full_attention",
|
| 18 |
+
"full_attention",
|
| 19 |
+
"full_attention",
|
| 20 |
+
"full_attention",
|
| 21 |
+
"full_attention",
|
| 22 |
+
"full_attention",
|
| 23 |
+
"full_attention",
|
| 24 |
+
"full_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"full_attention",
|
| 27 |
+
"full_attention",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"full_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention"
|
| 53 |
+
],
|
| 54 |
+
"max_position_embeddings": 40960,
|
| 55 |
+
"max_window_layers": 36,
|
| 56 |
+
"model_type": "qwen3",
|
| 57 |
+
"neuron": {
|
| 58 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 59 |
+
"batch_size": 1,
|
| 60 |
+
"capacity_factor": null,
|
| 61 |
+
"checkpoint_id": "Qwen/Qwen3-Embedding-8B",
|
| 62 |
+
"checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
|
| 63 |
+
"continuous_batching": false,
|
| 64 |
+
"ep_degree": 1,
|
| 65 |
+
"fused_qkv": true,
|
| 66 |
+
"glu_mlp": true,
|
| 67 |
+
"local_ranks_size": 1,
|
| 68 |
+
"max_batch_size": 1,
|
| 69 |
+
"max_context_length": 1024,
|
| 70 |
+
"max_topk": 256,
|
| 71 |
+
"n_active_tokens": 1024,
|
| 72 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 73 |
+
"on_device_sampling": false,
|
| 74 |
+
"optimum_neuron_version": "0.4.5",
|
| 75 |
+
"output_logits": false,
|
| 76 |
+
"pp_degree": 1,
|
| 77 |
+
"sequence_length": 1024,
|
| 78 |
+
"speculation_length": 0,
|
| 79 |
+
"start_rank_id": 0,
|
| 80 |
+
"target": "trn1",
|
| 81 |
+
"torch_dtype": "bfloat16",
|
| 82 |
+
"tp_degree": 1
|
| 83 |
+
},
|
| 84 |
+
"num_attention_heads": 32,
|
| 85 |
+
"num_hidden_layers": 36,
|
| 86 |
+
"num_key_value_heads": 8,
|
| 87 |
+
"rms_norm_eps": 1e-06,
|
| 88 |
+
"rope_scaling": null,
|
| 89 |
+
"rope_theta": 1000000,
|
| 90 |
+
"sliding_window": null,
|
| 91 |
+
"tie_word_embeddings": false,
|
| 92 |
+
"use_cache": true,
|
| 93 |
+
"use_sliding_window": false,
|
| 94 |
+
"vocab_size": 151665
|
| 95 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/60feecaa0c4c075e2f3e46a3f55d9a273f0ddd75a0ecf64e4ae27352e0819506/edf2cd849e7234866f5f.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "Qwen/Qwen3-Embedding-0.6B",
|
| 4 |
+
"_task": "feature-extraction",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Qwen3ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 128,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 1024,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 3072,
|
| 16 |
+
"layer_types": [
|
| 17 |
+
"full_attention",
|
| 18 |
+
"full_attention",
|
| 19 |
+
"full_attention",
|
| 20 |
+
"full_attention",
|
| 21 |
+
"full_attention",
|
| 22 |
+
"full_attention",
|
| 23 |
+
"full_attention",
|
| 24 |
+
"full_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"full_attention",
|
| 27 |
+
"full_attention",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"full_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention"
|
| 45 |
+
],
|
| 46 |
+
"max_position_embeddings": 32768,
|
| 47 |
+
"max_window_layers": 28,
|
| 48 |
+
"model_type": "qwen3",
|
| 49 |
+
"neuron": {
|
| 50 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 51 |
+
"batch_size": 4,
|
| 52 |
+
"capacity_factor": null,
|
| 53 |
+
"checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
|
| 54 |
+
"checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
|
| 55 |
+
"continuous_batching": false,
|
| 56 |
+
"ep_degree": 1,
|
| 57 |
+
"fused_qkv": true,
|
| 58 |
+
"glu_mlp": true,
|
| 59 |
+
"local_ranks_size": 2,
|
| 60 |
+
"max_batch_size": 4,
|
| 61 |
+
"max_context_length": 8192,
|
| 62 |
+
"max_topk": 256,
|
| 63 |
+
"n_active_tokens": 8192,
|
| 64 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 65 |
+
"on_device_sampling": false,
|
| 66 |
+
"optimum_neuron_version": "0.4.6.dev1",
|
| 67 |
+
"output_logits": false,
|
| 68 |
+
"pp_degree": 1,
|
| 69 |
+
"sequence_length": 8192,
|
| 70 |
+
"speculation_length": 0,
|
| 71 |
+
"start_rank_id": 0,
|
| 72 |
+
"target": "trn1",
|
| 73 |
+
"torch_dtype": "bfloat16",
|
| 74 |
+
"tp_degree": 2
|
| 75 |
+
},
|
| 76 |
+
"num_attention_heads": 16,
|
| 77 |
+
"num_hidden_layers": 28,
|
| 78 |
+
"num_key_value_heads": 8,
|
| 79 |
+
"rms_norm_eps": 1e-06,
|
| 80 |
+
"rope_scaling": null,
|
| 81 |
+
"rope_theta": 1000000,
|
| 82 |
+
"sliding_window": null,
|
| 83 |
+
"tie_word_embeddings": true,
|
| 84 |
+
"use_cache": true,
|
| 85 |
+
"use_sliding_window": false,
|
| 86 |
+
"vocab_size": 151669
|
| 87 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/4725d0f98e9d733a5354.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "llamafactory/tiny-random-Llama-3",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "float16",
|
| 11 |
+
"head_dim": 4,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 16,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 64,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 1,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "llamafactory/tiny-random-Llama-3",
|
| 24 |
+
"checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
|
| 25 |
+
"continuous_batching": false,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 2,
|
| 30 |
+
"max_batch_size": 1,
|
| 31 |
+
"max_context_length": 512,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 512,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": true,
|
| 36 |
+
"optimum_neuron_version": "0.4.6.dev1",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"sequence_length": 512,
|
| 40 |
+
"speculation_length": 0,
|
| 41 |
+
"start_rank_id": 0,
|
| 42 |
+
"target": "trn1",
|
| 43 |
+
"torch_dtype": "float16",
|
| 44 |
+
"tp_degree": 2
|
| 45 |
+
},
|
| 46 |
+
"num_attention_heads": 4,
|
| 47 |
+
"num_hidden_layers": 2,
|
| 48 |
+
"num_key_value_heads": 4,
|
| 49 |
+
"pretraining_tp": 1,
|
| 50 |
+
"rms_norm_eps": 1e-05,
|
| 51 |
+
"rope_scaling": {
|
| 52 |
+
"factor": 8.0,
|
| 53 |
+
"high_freq_factor": 4.0,
|
| 54 |
+
"low_freq_factor": 1.0,
|
| 55 |
+
"original_max_position_embeddings": 8192,
|
| 56 |
+
"rope_type": "llama3"
|
| 57 |
+
},
|
| 58 |
+
"rope_theta": 500000.0,
|
| 59 |
+
"tie_word_embeddings": false,
|
| 60 |
+
"use_cache": true,
|
| 61 |
+
"vocab_size": 128256
|
| 62 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/79d1effc3dea92153467.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "llamafactory/tiny-random-Llama-3",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "float16",
|
| 11 |
+
"head_dim": 4,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 16,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 64,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 1,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "llamafactory/tiny-random-Llama-3",
|
| 24 |
+
"checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
|
| 25 |
+
"continuous_batching": false,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 2,
|
| 30 |
+
"max_batch_size": 1,
|
| 31 |
+
"max_context_length": 1024,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 1024,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": true,
|
| 36 |
+
"optimum_neuron_version": "0.4.6.dev1",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"sequence_length": 1024,
|
| 40 |
+
"speculation_length": 0,
|
| 41 |
+
"start_rank_id": 0,
|
| 42 |
+
"target": "trn1",
|
| 43 |
+
"torch_dtype": "float16",
|
| 44 |
+
"tp_degree": 2
|
| 45 |
+
},
|
| 46 |
+
"num_attention_heads": 4,
|
| 47 |
+
"num_hidden_layers": 2,
|
| 48 |
+
"num_key_value_heads": 4,
|
| 49 |
+
"pretraining_tp": 1,
|
| 50 |
+
"rms_norm_eps": 1e-05,
|
| 51 |
+
"rope_scaling": {
|
| 52 |
+
"factor": 8.0,
|
| 53 |
+
"high_freq_factor": 4.0,
|
| 54 |
+
"low_freq_factor": 1.0,
|
| 55 |
+
"original_max_position_embeddings": 8192,
|
| 56 |
+
"rope_type": "llama3"
|
| 57 |
+
},
|
| 58 |
+
"rope_theta": 500000.0,
|
| 59 |
+
"tie_word_embeddings": false,
|
| 60 |
+
"use_cache": true,
|
| 61 |
+
"vocab_size": 128256
|
| 62 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/6454afdf3e9d66c7226c13a575b718845c25e53b0699600ba2bb4f883e9d841b/dd5885d59f14f083843e.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "llamafactory/tiny-random-Llama-3",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "float16",
|
| 11 |
+
"head_dim": 4,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 16,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 64,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 2,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "llamafactory/tiny-random-Llama-3",
|
| 24 |
+
"checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
|
| 25 |
+
"continuous_batching": true,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 2,
|
| 30 |
+
"max_batch_size": 2,
|
| 31 |
+
"max_context_length": 128,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 128,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": true,
|
| 36 |
+
"optimum_neuron_version": "0.4.6.dev1",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"sequence_length": 128,
|
| 40 |
+
"speculation_length": 0,
|
| 41 |
+
"start_rank_id": 0,
|
| 42 |
+
"target": "trn2",
|
| 43 |
+
"torch_dtype": "float16",
|
| 44 |
+
"tp_degree": 2
|
| 45 |
+
},
|
| 46 |
+
"num_attention_heads": 4,
|
| 47 |
+
"num_hidden_layers": 2,
|
| 48 |
+
"num_key_value_heads": 4,
|
| 49 |
+
"pretraining_tp": 1,
|
| 50 |
+
"rms_norm_eps": 1e-05,
|
| 51 |
+
"rope_scaling": {
|
| 52 |
+
"factor": 8.0,
|
| 53 |
+
"high_freq_factor": 4.0,
|
| 54 |
+
"low_freq_factor": 1.0,
|
| 55 |
+
"original_max_position_embeddings": 8192,
|
| 56 |
+
"rope_type": "llama3"
|
| 57 |
+
},
|
| 58 |
+
"rope_theta": 500000.0,
|
| 59 |
+
"tie_word_embeddings": false,
|
| 60 |
+
"use_cache": true,
|
| 61 |
+
"vocab_size": 128256
|
| 62 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/llama/llamafactory/tiny-random-Llama-3/79d1effc3dea92153467.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "llamafactory/tiny-random-Llama-3",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "float16",
|
| 11 |
+
"head_dim": 4,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 16,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 64,
|
| 16 |
+
"max_position_embeddings": 131072,
|
| 17 |
+
"mlp_bias": false,
|
| 18 |
+
"model_type": "llama",
|
| 19 |
+
"neuron": {
|
| 20 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 21 |
+
"batch_size": 1,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"checkpoint_id": "llamafactory/tiny-random-Llama-3",
|
| 24 |
+
"checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
|
| 25 |
+
"continuous_batching": false,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 2,
|
| 30 |
+
"max_batch_size": 1,
|
| 31 |
+
"max_context_length": 1024,
|
| 32 |
+
"max_topk": 256,
|
| 33 |
+
"n_active_tokens": 1024,
|
| 34 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 35 |
+
"on_device_sampling": true,
|
| 36 |
+
"optimum_neuron_version": "0.4.6.dev1",
|
| 37 |
+
"output_logits": false,
|
| 38 |
+
"pp_degree": 1,
|
| 39 |
+
"sequence_length": 1024,
|
| 40 |
+
"speculation_length": 0,
|
| 41 |
+
"start_rank_id": 0,
|
| 42 |
+
"target": "trn1",
|
| 43 |
+
"torch_dtype": "float16",
|
| 44 |
+
"tp_degree": 2
|
| 45 |
+
},
|
| 46 |
+
"num_attention_heads": 4,
|
| 47 |
+
"num_hidden_layers": 2,
|
| 48 |
+
"num_key_value_heads": 4,
|
| 49 |
+
"pretraining_tp": 1,
|
| 50 |
+
"rms_norm_eps": 1e-05,
|
| 51 |
+
"rope_scaling": {
|
| 52 |
+
"factor": 8.0,
|
| 53 |
+
"high_freq_factor": 4.0,
|
| 54 |
+
"low_freq_factor": 1.0,
|
| 55 |
+
"original_max_position_embeddings": 8192,
|
| 56 |
+
"rope_type": "llama3"
|
| 57 |
+
},
|
| 58 |
+
"rope_theta": 500000.0,
|
| 59 |
+
"tie_word_embeddings": false,
|
| 60 |
+
"use_cache": true,
|
| 61 |
+
"vocab_size": 128256
|
| 62 |
+
}
|
neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/qwen3/Qwen/Qwen3-Embedding-0.6B/edf2cd849e7234866f5f.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "Qwen/Qwen3-Embedding-0.6B",
|
| 4 |
+
"_task": "feature-extraction",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Qwen3ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"dtype": "bfloat16",
|
| 11 |
+
"head_dim": 128,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 1024,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 3072,
|
| 16 |
+
"layer_types": [
|
| 17 |
+
"full_attention",
|
| 18 |
+
"full_attention",
|
| 19 |
+
"full_attention",
|
| 20 |
+
"full_attention",
|
| 21 |
+
"full_attention",
|
| 22 |
+
"full_attention",
|
| 23 |
+
"full_attention",
|
| 24 |
+
"full_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"full_attention",
|
| 27 |
+
"full_attention",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"full_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention"
|
| 45 |
+
],
|
| 46 |
+
"max_position_embeddings": 32768,
|
| 47 |
+
"max_window_layers": 28,
|
| 48 |
+
"model_type": "qwen3",
|
| 49 |
+
"neuron": {
|
| 50 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 51 |
+
"batch_size": 4,
|
| 52 |
+
"capacity_factor": null,
|
| 53 |
+
"checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
|
| 54 |
+
"checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
|
| 55 |
+
"continuous_batching": false,
|
| 56 |
+
"ep_degree": 1,
|
| 57 |
+
"fused_qkv": true,
|
| 58 |
+
"glu_mlp": true,
|
| 59 |
+
"local_ranks_size": 2,
|
| 60 |
+
"max_batch_size": 4,
|
| 61 |
+
"max_context_length": 8192,
|
| 62 |
+
"max_topk": 256,
|
| 63 |
+
"n_active_tokens": 8192,
|
| 64 |
+
"neuronxcc_version": "2.21.33363.0+82129205",
|
| 65 |
+
"on_device_sampling": false,
|
| 66 |
+
"optimum_neuron_version": "0.4.6.dev1",
|
| 67 |
+
"output_logits": false,
|
| 68 |
+
"pp_degree": 1,
|
| 69 |
+
"sequence_length": 8192,
|
| 70 |
+
"speculation_length": 0,
|
| 71 |
+
"start_rank_id": 0,
|
| 72 |
+
"target": "trn1",
|
| 73 |
+
"torch_dtype": "bfloat16",
|
| 74 |
+
"tp_degree": 2
|
| 75 |
+
},
|
| 76 |
+
"num_attention_heads": 16,
|
| 77 |
+
"num_hidden_layers": 28,
|
| 78 |
+
"num_key_value_heads": 8,
|
| 79 |
+
"rms_norm_eps": 1e-06,
|
| 80 |
+
"rope_scaling": null,
|
| 81 |
+
"rope_theta": 1000000,
|
| 82 |
+
"sliding_window": null,
|
| 83 |
+
"tie_word_embeddings": true,
|
| 84 |
+
"use_cache": true,
|
| 85 |
+
"use_sliding_window": false,
|
| 86 |
+
"vocab_size": 151669
|
| 87 |
+
}
|
neuronxcc-2.21.33363.0+82129205/MODULE_023068ea127f216b3230+18260fcf/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_2e06d251-55b0-4587-80ed-65525f987744/compiler_workdir/ChunkedPrefillAttentionModule/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_023068ea127f216b3230+18260fcf/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_023068ea127f216b3230+18260fcf/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f759930af897434853a4b2faf85253d6dee4aa57720d02fd5206dc173982e14b
|
| 3 |
+
size 16230
|
neuronxcc-2.21.33363.0+82129205/MODULE_023068ea127f216b3230+18260fcf/model.neff
ADDED
|
Binary file (72.7 kB). View file
|
|
|
neuronxcc-2.21.33363.0+82129205/MODULE_023068ea127f216b3230+18260fcf/wrapped_neff.hlo
ADDED
|
Binary file (76 kB). View file
|
|
|
neuronxcc-2.21.33363.0+82129205/MODULE_0243dc050d4687db06f4+12f2698a/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_6ca317f1-4eb2-420e-ae59-e329a688937a/compiler_workdir/NeuronLlamaMLP/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_0243dc050d4687db06f4+12f2698a/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_0243dc050d4687db06f4+12f2698a/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:993f99d2d09d34bd152af4c7f96a1e6e1d8788e1cd4aefb845c601d2f4d5fcfb
|
| 3 |
+
size 1931
|
neuronxcc-2.21.33363.0+82129205/MODULE_0243dc050d4687db06f4+12f2698a/model.neff
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9cce4a92dd89a520b72d3146ca2905b351f9fc41ea2c6420132d0e212c370fa2
|
| 3 |
+
size 134144
|
neuronxcc-2.21.33363.0+82129205/MODULE_0243dc050d4687db06f4+12f2698a/wrapped_neff.hlo
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:005209541674e03d171a4d2bb9d3dbe7429f18eae8814dbc87f869181bc9fdd4
|
| 3 |
+
size 136222
|
neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d578702d0ba7a5003e02fbdec8b30f3cdaa8d03fdd29311f557bbda052ff1839
|
| 3 |
+
size 1121
|
neuronxcc-2.21.33363.0+82129205/MODULE_10217061096959125489+e30acd3a/model.neff
ADDED
|
Binary file (31.7 kB). View file
|
|
|
neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0113611968c8ed0cbeaea5ff3bba2ce4eecd5fc989e2592c6b8ac5fdf09c91c2
|
| 3 |
+
size 1562
|
neuronxcc-2.21.33363.0+82129205/MODULE_10244305442015770634+e30acd3a/model.neff
ADDED
|
Binary file (31.7 kB). View file
|
|
|
neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a35335d3e96ee86956cdf93862bf42ae6f83017e9b331b77531ad6942a62366d
|
| 3 |
+
size 1269
|
neuronxcc-2.21.33363.0+82129205/MODULE_10606948783918825529+e30acd3a/model.neff
ADDED
|
Binary file (31.7 kB). View file
|
|
|
neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:153fdddade7e1b284f8d78df84523bd332eccc7ddfc7a0698ad8653d560c55ca
|
| 3 |
+
size 1124
|
neuronxcc-2.21.33363.0+82129205/MODULE_10645643398657092095+e30acd3a/model.neff
ADDED
|
Binary file (31.7 kB). View file
|
|
|
neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7434cf1ede5dc6759a8ad5cdc472fb8e1668b1a8bd8dd7e7e08b5c09401b8065
|
| 3 |
+
size 1562
|
neuronxcc-2.21.33363.0+82129205/MODULE_10661660426924300837+e30acd3a/model.neff
ADDED
|
Binary file (31.7 kB). View file
|
|
|
neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7000c9195e6c3c9abfcbac09928468ef4c204488094a046ec7abfd1b7ea8657
|
| 3 |
+
size 1563
|
neuronxcc-2.21.33363.0+82129205/MODULE_10746122569655005679+e30acd3a/model.neff
ADDED
|
Binary file (31.7 kB). View file
|
|
|
neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/compile_flags.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["--target=trn1"]
|
neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.done
ADDED
|
File without changes
|
neuronxcc-2.21.33363.0+82129205/MODULE_11086318750207148626+e30acd3a/model.hlo_module.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2fcfd4f1816e23bee263009b76a032d3de2454e4937f73423fade31c7a0f0278
|
| 3 |
+
size 1562
|