diff --git a/.gitattributes b/.gitattributes index 9c0b5a850e68598ab019dc0f26a97b7139b6cd7e..259b7d70ef83c3e528118488b269c38a78210355 100644 --- a/.gitattributes +++ b/.gitattributes @@ -6766,3 +6766,43 @@ neuronxcc-2.21.33363.0+82129205/MODULE_73a7e973bf95eb244d4e+a02c3a36/model.neff neuronxcc-2.21.33363.0+82129205/MODULE_73a7e973bf95eb244d4e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_48a27d9858d7c49d242e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.33363.0+82129205/MODULE_48a27d9858d7c49d242e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_0b0d86f7b1bf902173a2+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_0b0d86f7b1bf902173a2+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_0e51d5282370df2dae08+79fc1760/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_0e51d5282370df2dae08+79fc1760/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_0f4258550af4e60d214f+283df001/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_194a42ec08de5c75d19f+677eeb9d/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1e1e519ed590f237df27+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1e1e519ed590f237df27+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_231deadc0dc6764f6f76+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_23956a7b8d1fb1daa936+e8482832/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_23956a7b8d1fb1daa936+e8482832/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_3ce59a68e3aaa3a9a1ac+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_3ce59a68e3aaa3a9a1ac+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_4d2cff1b9d2ece68620a+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_5697a7f39e29ac771aa9+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_6269d627a0f5195c5a18+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_76a0aa0955a457032262+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_77cd695491d7f3ce2591+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_77cd695491d7f3ce2591+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_85f4127cb33c6c3a001b+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8d6187b4c32336c3810f+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8d6187b4c32336c3810f+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8f55926fdb244a19d68c+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8f55926fdb244a19d68c+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_9d2ce58b75d943fb69f6+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_9d2ce58b75d943fb69f6+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a96ea0a19ecf26469614+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a96ea0a19ecf26469614+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_bea9f6407c0266e10265+80d05c3f/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_bea9f6407c0266e10265+80d05c3f/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c20f233bb310ac0cba49+2dde74c7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c20f233bb310ac0cba49+2dde74c7/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_e4e60218b370ea7f0c6e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_e4e60218b370ea7f0c6e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_e81df4d9ffdfcff0bd18+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_e81df4d9ffdfcff0bd18+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_edd7b34c0eb34865e1b4+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f14b9d4c563064069c82+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_f14b9d4c563064069c82+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/2da7a00f0478d50ae1e7f75f085c5b2773b5f355f427c61cf34cb6febd629d96/8522c8da70b6768237a5.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/2da7a00f0478d50ae1e7f75f085c5b2773b5f355f427c61cf34cb6febd629d96/8522c8da70b6768237a5.json new file mode 100644 index 0000000000000000000000000000000000000000..c800acf339d06789b0ccf320625f1b92f179c49b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/2da7a00f0478d50ae1e7f75f085c5b2773b5f355f427c61cf34cb6febd629d96/8522c8da70b6768237a5.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/0e89b203802f7fabc982.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/0e89b203802f7fabc982.json new file mode 100644 index 0000000000000000000000000000000000000000..610af520bbac634d047de47b210d2924ab6255ba --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/441269935591cad8d370e512c0b93cdd2fce6247c40e5a4866d872ee5338b0de/0e89b203802f7fabc982.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/gemma-3-270m-it", + "checkpoint_revision": "23cf460f6bb16954176b3ddcc8d4f250501458a9", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "unsloth_fixed": true, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/4ab8140bc7eb4a553d95855c5c2be2cf8c0fbab21b823d76183b6f51e98b6fc5/9205d47535c8c8ab72fa.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/4ab8140bc7eb4a553d95855c5c2be2cf8c0fbab21b823d76183b6f51e98b6fc5/9205d47535c8c8ab72fa.json new file mode 100644 index 0000000000000000000000000000000000000000..9a62f49a750b9ed656cd78c522ac3241d70eb4e3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/4ab8140bc7eb4a553d95855c5c2be2cf8c0fbab21b823d76183b6f51e98b6fc5/9205d47535c8c8ab72fa.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/597478f258749fdff84c.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/597478f258749fdff84c.json new file mode 100644 index 0000000000000000000000000000000000000000..0282b53fc9ffe329d59ff479d067513c5fa8ca8a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/73707b485eab9008c7aba7f5dad0ce2384ac685318d5f888c12fa0d81ed90b19/597478f258749fdff84c.json @@ -0,0 +1,134 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "HuggingFaceTB/SmolLM3-3B", + "_task": "text-generation", + "architectures": [ + "SmolLM3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 65536, + "max_window_layers": 28, + "mlp_bias": false, + "model_type": "smollm3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "HuggingFaceTB/SmolLM3-3B", + "checkpoint_revision": "a07cc9a04f16550a088caea529712d1d335b0ac1", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layer_interval": 4, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "pretraining_tp": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000.0, + "sliding_window": null, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/7518518c7e077820070186deda960d8cc49db068cdf0ac70664098fa2b6b698c/89ebc1898ab574188c6e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/7518518c7e077820070186deda960d8cc49db068cdf0ac70664098fa2b6b698c/89ebc1898ab574188c6e.json new file mode 100644 index 0000000000000000000000000000000000000000..7a6e428330f2e72b2f8f52a7769a323383f97825 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/7518518c7e077820070186deda960d8cc49db068cdf0ac70664098fa2b6b698c/89ebc1898ab574188c6e.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/920f44ce6d3e004d1ce547ae06644f7be262180644b04573153aa15d98742edc/3c1703b17314b02dcfb5.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/920f44ce6d3e004d1ce547ae06644f7be262180644b04573153aa15d98742edc/3c1703b17314b02dcfb5.json new file mode 100644 index 0000000000000000000000000000000000000000..acb53436bf0d5383875a3847b0260ef82087dc55 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/920f44ce6d3e004d1ce547ae06644f7be262180644b04573153aa15d98742edc/3c1703b17314b02dcfb5.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "dtype": "float32", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/9c24a2080ec26e9c74d5bd90866f74131aa4d5c975415b96f5e6600f081d5647/357aab4e03a8b3b961c3.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/9c24a2080ec26e9c74d5bd90866f74131aa4d5c975415b96f5e6600f081d5647/357aab4e03a8b3b961c3.json new file mode 100644 index 0000000000000000000000000000000000000000..2defb3ec95a5dd1bb8b7814b8ca1c58af7101be4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/9c24a2080ec26e9c74d5bd90866f74131aa4d5c975415b96f5e6600f081d5647/357aab4e03a8b3b961c3.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/b02932ba1b54f85e3d84.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/b02932ba1b54f85e3d84.json new file mode 100644 index 0000000000000000000000000000000000000000..181653c6b8ffdf9be34b5b534610f2b7b3348156 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/b02932ba1b54f85e3d84.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 5, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/da6950729a5bb39e8b09.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/da6950729a5bb39e8b09.json new file mode 100644 index 0000000000000000000000000000000000000000..9394dacd4355f7db8d23bc51193db70dea7885d2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/cf6b9a360dcf294104671106bae2adbd9fd291823bb60a351883163684073231/da6950729a5bb39e8b09.json @@ -0,0 +1,63 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/d139acf64685f15794bb983ff6eb881bdd31304bae88b0ce1ed20a54c21f2265/1fff0e9367994bde50df.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/d139acf64685f15794bb983ff6eb881bdd31304bae88b0ce1ed20a54c21f2265/1fff0e9367994bde50df.json new file mode 100644 index 0000000000000000000000000000000000000000..14355f8d17d1dda1d09d6066d5e681c9d60b4291 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/d139acf64685f15794bb983ff6eb881bdd31304bae88b0ce1ed20a54c21f2265/1fff0e9367994bde50df.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "dtype": "float32", + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/1fff0e9367994bde50df.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/1fff0e9367994bde50df.json new file mode 100644 index 0000000000000000000000000000000000000000..14355f8d17d1dda1d09d6066d5e681c9d60b4291 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/1fff0e9367994bde50df.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "dtype": "float32", + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/llama4_text/tiny-random/llama-4/357aab4e03a8b3b961c3.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/llama4_text/tiny-random/llama-4/357aab4e03a8b3b961c3.json new file mode 100644 index 0000000000000000000000000000000000000000..2defb3ec95a5dd1bb8b7814b8ca1c58af7101be4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/llama4_text/tiny-random/llama-4/357aab4e03a8b3b961c3.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "tiny-random/llama-4", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 128, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 64, + "intermediate_size_mlp": 128, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "tiny-random/llama-4", + "checkpoint_revision": "9e716f5d4d1ffe0a44a15f46f4a12b840439aba4", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 1, + "num_experts_per_tok": 1, + "num_hidden_layers": 4, + "num_key_value_heads": 1, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": true, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/mixtral/dacorvo/Mixtral-tiny/9205d47535c8c8ab72fa.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/mixtral/dacorvo/Mixtral-tiny/9205d47535c8c8ab72fa.json new file mode 100644 index 0000000000000000000000000000000000000000..9a62f49a750b9ed656cd78c522ac3241d70eb4e3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/mixtral/dacorvo/Mixtral-tiny/9205d47535c8c8ab72fa.json @@ -0,0 +1,58 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "float16", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": false, + "optimum_neuron_version": "0.4.6.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float16", + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/phi3/yujiepan/phi-4-tiny-random/8522c8da70b6768237a5.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/phi3/yujiepan/phi-4-tiny-random/8522c8da70b6768237a5.json new file mode 100644 index 0000000000000000000000000000000000000000..c800acf339d06789b0ccf320625f1b92f179c49b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/phi3/yujiepan/phi-4-tiny-random/8522c8da70b6768237a5.json @@ -0,0 +1,59 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "dtype": "bfloat16", + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/89ebc1898ab574188c6e.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/89ebc1898ab574188c6e.json new file mode 100644 index 0000000000000000000000000000000000000000..7a6e428330f2e72b2f8f52a7769a323383f97825 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/89ebc1898ab574188c6e.json @@ -0,0 +1,64 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "dtype": "bfloat16", + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "layer_types": [ + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/3c1703b17314b02dcfb5.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/3c1703b17314b02dcfb5.json new file mode 100644 index 0000000000000000000000000000000000000000..acb53436bf0d5383875a3847b0260ef82087dc55 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev1/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/3c1703b17314b02dcfb5.json @@ -0,0 +1,65 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 2, + "dtype": "float32", + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 64, + "initializer_range": 0.02, + "intermediate_size": 128, + "max_position_embeddings": 40960, + "max_window_layers": 1, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 128, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe", + "checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "n_active_tokens": 1024, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 1024, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "float32", + "tp_degree": 2 + }, + "norm_topk_prob": true, + "num_attention_heads": 2, + "num_experts": 8, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0311de55a947b4492cfe+4d5ae689/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0311de55a947b4492cfe+4d5ae689/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..88185f9a9b2844ede9ca1b201a3bc6d388ec3bab --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0311de55a947b4492cfe+4d5ae689/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_00cff404-4dfd-40a8-a03d-b7526f26a86f/compiler_workdir/MaskedSoftmaxModule/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0311de55a947b4492cfe+4d5ae689/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_0311de55a947b4492cfe+4d5ae689/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0311de55a947b4492cfe+4d5ae689/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0311de55a947b4492cfe+4d5ae689/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fa630775866f5254499431d6e2b26e218269039f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0311de55a947b4492cfe+4d5ae689/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c694be3799f7af69e675b5ff12cb5cc0a37c5b497506b666f215400c90a69457 +size 11756 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0311de55a947b4492cfe+4d5ae689/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_0311de55a947b4492cfe+4d5ae689/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7580d0da16efb73a766c91d9555f3c8d398b8af5 Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_0311de55a947b4492cfe+4d5ae689/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0b0d86f7b1bf902173a2+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0b0d86f7b1bf902173a2+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0b0d86f7b1bf902173a2+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0b0d86f7b1bf902173a2+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_0b0d86f7b1bf902173a2+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0b0d86f7b1bf902173a2+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0b0d86f7b1bf902173a2+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0cf5427654896cfc75b61c0b0088e9f060de590c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0b0d86f7b1bf902173a2+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce1076cd5b12d1cf60a0c7dd3327e12cd2a2afae7a9fe8f28a4d3d63b2f8a257 +size 84544 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0b0d86f7b1bf902173a2+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_0b0d86f7b1bf902173a2+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8e864d81fad0b8954ca67cd78f4e7eeebad9747f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0b0d86f7b1bf902173a2+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18833ba4d0c817b8e0ac0acfa094fbb357e5e1e6ed14356632c3c787d75f331f +size 246784 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0b0d86f7b1bf902173a2+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_0b0d86f7b1bf902173a2+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2eb7ff4cb65c98bb1d65feffbc249dc6e3334446 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0b0d86f7b1bf902173a2+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3ac8994d0c0a5f46d6ac3710639629670018b5519fc51a182299edbefd1807a +size 254967 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0e51d5282370df2dae08+79fc1760/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0e51d5282370df2dae08+79fc1760/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3e08ec868cd23e9346893a6ee5e47b531ec8a13f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0e51d5282370df2dae08+79fc1760/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_9c7311a4-31a5-4461-bc6f-4ecbd0b0a560/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0e51d5282370df2dae08+79fc1760/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_0e51d5282370df2dae08+79fc1760/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0e51d5282370df2dae08+79fc1760/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0e51d5282370df2dae08+79fc1760/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f0e003e69046fe0ab4b6dd8d100b65aaa6e17444 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0e51d5282370df2dae08+79fc1760/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0c7269fbc8f958cd8ce0b8572a40fbd51212f1657e437ff2ec3cbfa388cc5b5 +size 11280 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0e51d5282370df2dae08+79fc1760/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_0e51d5282370df2dae08+79fc1760/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..48013196f9da0923eed91565a6e4e422ca352c93 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0e51d5282370df2dae08+79fc1760/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb95c53f08b7e7a2ad5de472053771920c4f07a825b70f62536290a9da9c7865 +size 1444864 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0e51d5282370df2dae08+79fc1760/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_0e51d5282370df2dae08+79fc1760/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..244cd497eabca1d107e16ddac6141e2480b2a0f2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0e51d5282370df2dae08+79fc1760/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:200514c9d9efacaf322bdcdcf89d6b05100a95baba51c2ebb4d390df98a6706c +size 1447845 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0f4258550af4e60d214f+283df001/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_0f4258550af4e60d214f+283df001/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..58fbf6b9366d9e28d7f19321e85acd4cd96c0614 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0f4258550af4e60d214f+283df001/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0f4258550af4e60d214f+283df001/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_0f4258550af4e60d214f+283df001/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0f4258550af4e60d214f+283df001/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_0f4258550af4e60d214f+283df001/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a5fb3a3b63003a64f29476bdf34c7e79cd335c22 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0f4258550af4e60d214f+283df001/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae786a0ef339978c47e45f50ce8508b0fc8226be77e527da3c16bcf4d0a4a149 +size 97794 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_0f4258550af4e60d214f+283df001/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_0f4258550af4e60d214f+283df001/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d6e8a43722aca776065a420414a6e366fe6dcf80 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_0f4258550af4e60d214f+283df001/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:826467da8adec414f1dbbf042c8e9f0a5b9a363a786b346726d2c34778b85cb9 +size 410624 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_194a42ec08de5c75d19f+677eeb9d/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_194a42ec08de5c75d19f+677eeb9d/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..ae7e0c6790082c43cd145aae8cc7e3cc89e15ca8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_194a42ec08de5c75d19f+677eeb9d/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/speculation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_194a42ec08de5c75d19f+677eeb9d/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_194a42ec08de5c75d19f+677eeb9d/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_194a42ec08de5c75d19f+677eeb9d/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_194a42ec08de5c75d19f+677eeb9d/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f8598baa678e90279f2e591d45ecb63cff3748b4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_194a42ec08de5c75d19f+677eeb9d/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a7cc8c993bc61d1e98fe1b0ad7728aad78b3b74ad77dde376ba79b948b711ec +size 454314 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_194a42ec08de5c75d19f+677eeb9d/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_194a42ec08de5c75d19f+677eeb9d/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..93fd4597f4727fda0369e7cf8714913e6188cf4d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_194a42ec08de5c75d19f+677eeb9d/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0779608509ae4f9bd09a027b96a995f52e1464b4494024a75ccf5d6f11e2f467 +size 3984384 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1e1e519ed590f237df27+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1e1e519ed590f237df27+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1e1e519ed590f237df27+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1e1e519ed590f237df27+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1e1e519ed590f237df27+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1e1e519ed590f237df27+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1e1e519ed590f237df27+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1ce99b44c0f838530312a7f9d7202d0abd9370d9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1e1e519ed590f237df27+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09325ee281ea95427fdae297203397493d7c64e9048aa4433fe652c9943b03df +size 84543 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1e1e519ed590f237df27+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1e1e519ed590f237df27+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1682ba66fb24a58280fab0cab5132f2cb9bb2e2e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1e1e519ed590f237df27+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f01e40575a6aa5e2688875fb378b7b3e1da846adcf4eb6c478bd40b17ec60332 +size 216064 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1e1e519ed590f237df27+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_1e1e519ed590f237df27+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4dc25eee9c60ad1cb89ff93759da21ad94f886fd --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1e1e519ed590f237df27+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:321addc3100f7991f7e858f4522bda49260b71e325cb26814599a7d86122704f +size 224275 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_231deadc0dc6764f6f76+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_231deadc0dc6764f6f76+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_231deadc0dc6764f6f76+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_231deadc0dc6764f6f76+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_231deadc0dc6764f6f76+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_231deadc0dc6764f6f76+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_231deadc0dc6764f6f76+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..adfb4ea4e9a78adb072409d9fce7c1b45c4544d6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_231deadc0dc6764f6f76+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f7b8335306d8b77304eff4e596c1b03df36697b5213c4c7ea8207afa57de677 +size 761066 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_231deadc0dc6764f6f76+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_231deadc0dc6764f6f76+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ae83e0da0a3becd3deb09370ea1b8b1d2e203f53 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_231deadc0dc6764f6f76+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f62f5cde5c3ddf341cf1d809b9865042dfcdf27ebb22fe4e3e233921ce3a44f2 +size 12493824 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_23956a7b8d1fb1daa936+e8482832/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_23956a7b8d1fb1daa936+e8482832/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1b2c6ce69a311276fc34a9bde98afdbc8d82871f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_23956a7b8d1fb1daa936+e8482832/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_ea9203cd-6289-4765-ac2f-838e1ff2b9e6/compiler_workdir/AttentionModelWrapper/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_23956a7b8d1fb1daa936+e8482832/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_23956a7b8d1fb1daa936+e8482832/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_23956a7b8d1fb1daa936+e8482832/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_23956a7b8d1fb1daa936+e8482832/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..415bdd22650c272d268c66d5a14bb3af822509e3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_23956a7b8d1fb1daa936+e8482832/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f719f5e55874995332544b239de0b6dc5d1c3decb7a3074ef8331817bf836ce +size 8979 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_23956a7b8d1fb1daa936+e8482832/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_23956a7b8d1fb1daa936+e8482832/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8580fff2618221d7c6d4a07a7eb21f1a7693868e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_23956a7b8d1fb1daa936+e8482832/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50e4ea314562e0457ebad9e8ebf2bfca808c2b29ab068799d411e361a8464e49 +size 246784 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_23956a7b8d1fb1daa936+e8482832/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_23956a7b8d1fb1daa936+e8482832/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f694dfa81003cb39af3b6b8caf37ff4198f04751 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_23956a7b8d1fb1daa936+e8482832/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5443982514a62e0853a81a485cad1bf789460cbf315e752afbd280204a2dfc85 +size 249608 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3ce59a68e3aaa3a9a1ac+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_3ce59a68e3aaa3a9a1ac+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3ce59a68e3aaa3a9a1ac+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3ce59a68e3aaa3a9a1ac+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_3ce59a68e3aaa3a9a1ac+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3ce59a68e3aaa3a9a1ac+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_3ce59a68e3aaa3a9a1ac+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c6a684d0301ac601f365d3ad2916356d05666042 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3ce59a68e3aaa3a9a1ac+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f75c1a9e74cf35c92d14df0983537ee7d3763276d0d37fe94cbbf750be164fb1 +size 86918 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3ce59a68e3aaa3a9a1ac+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_3ce59a68e3aaa3a9a1ac+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..210bd831dc609f18ac7e8d3faa6b0808b3127660 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3ce59a68e3aaa3a9a1ac+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3fc92fb5099ea9a35469b8ebd860ac17b346d7ec744dab35d80e142d038eac7 +size 287744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_3ce59a68e3aaa3a9a1ac+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_3ce59a68e3aaa3a9a1ac+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..07e9a7bc9e9cbda30062052b3e2dace01a1579db --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_3ce59a68e3aaa3a9a1ac+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0968eb75d1a18678fd61dc405c389dceeac8c8a7aef0e5e2ba162e4945eb632d +size 295957 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4d2cff1b9d2ece68620a+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4d2cff1b9d2ece68620a+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4d2cff1b9d2ece68620a+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4d2cff1b9d2ece68620a+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_4d2cff1b9d2ece68620a+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4d2cff1b9d2ece68620a+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4d2cff1b9d2ece68620a+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6912220f0db8113e1b595d4efab793803ad7cafe --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4d2cff1b9d2ece68620a+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51a30751889f39d36f256480dac09aa40fd2740adc05ae4f46f6d2b19f9cd7e2 +size 90382 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4d2cff1b9d2ece68620a+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4d2cff1b9d2ece68620a+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b5f5521853fad84644e7622d352f470c323cec3e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4d2cff1b9d2ece68620a+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34f3ba6d5c4101af9ff2d765dcf023cd376d4907a4c7cbf4abdeb1423dcf642d +size 359424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5697a7f39e29ac771aa9+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_5697a7f39e29ac771aa9+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5697a7f39e29ac771aa9+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5697a7f39e29ac771aa9+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_5697a7f39e29ac771aa9+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5697a7f39e29ac771aa9+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_5697a7f39e29ac771aa9+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c59987d984caac334ce4be8120adc5417ead0e56 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5697a7f39e29ac771aa9+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96e437429ffc4245a97b0e5d19f247b8a4dcd744fa6bc8cd2676e29f1023e80c +size 694128 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_5697a7f39e29ac771aa9+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_5697a7f39e29ac771aa9+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4983f3520d4b4d3e9bbc8fc4fee8fa3395dda502 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_5697a7f39e29ac771aa9+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00313d158a61f879dda55b9ae47393394a66d922b94391ad9dfb7fb415a51154 +size 625664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6269d627a0f5195c5a18+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_6269d627a0f5195c5a18+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6269d627a0f5195c5a18+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6269d627a0f5195c5a18+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_6269d627a0f5195c5a18+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6269d627a0f5195c5a18+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_6269d627a0f5195c5a18+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f9bb4e196f019a89c94d32a8e39a6f28f565f995 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6269d627a0f5195c5a18+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d0a077665eb83d9a6debb0f18c7e1b1c59d228e600d873f3eb7133d8a5248b2 +size 82772 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_6269d627a0f5195c5a18+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_6269d627a0f5195c5a18+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9d7f1f99da0ee2c33c1590fdf9ec67f6e7e3e50a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_6269d627a0f5195c5a18+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2721f514ac3a3a9083e371c93a901ae9d2ed9371518e8370351d3331ef6c35cb +size 267264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_76a0aa0955a457032262+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_76a0aa0955a457032262+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_76a0aa0955a457032262+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_76a0aa0955a457032262+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_76a0aa0955a457032262+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_76a0aa0955a457032262+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_76a0aa0955a457032262+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9e01b7dac167f874e1361d73e41941d71b09009b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_76a0aa0955a457032262+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ed058fda756a718d4e80a0c80024822aa8467b6c299af833fe14355b7ea5d55 +size 81516 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_76a0aa0955a457032262+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_76a0aa0955a457032262+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8ceda910606be49fc33c6a8321c4b5d0a21469ec --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_76a0aa0955a457032262+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce0f9fb1e7f9f0c2c1b9ab59d9732d7b1b10fe04bed68da1085ab23967378319 +size 267264 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_77cd695491d7f3ce2591+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_77cd695491d7f3ce2591+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_77cd695491d7f3ce2591+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_77cd695491d7f3ce2591+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_77cd695491d7f3ce2591+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_77cd695491d7f3ce2591+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_77cd695491d7f3ce2591+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e04e539e1f51b79f04f2eb37726d80d89e282726 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_77cd695491d7f3ce2591+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fba797d8d0dc9606416d8428fa3a8d6dd193a1e5cf33421729681fe685b1fc6e +size 95345 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_77cd695491d7f3ce2591+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_77cd695491d7f3ce2591+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1c77b0e4332540a949f322b3a78746a3ad55e1d4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_77cd695491d7f3ce2591+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bc93f54a629e33f8c11abacc4d9d69e61f2620c1206a59e8f095c573b935e5d +size 277504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_77cd695491d7f3ce2591+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_77cd695491d7f3ce2591+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..8332a6053ea756ac5d4d4089b36dcafd4239aadf --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_77cd695491d7f3ce2591+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbc62af336c543496675ac21a36e042440f02c03ac981dc6b608c76f9ebdbfeb +size 288898 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_85f4127cb33c6c3a001b+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_85f4127cb33c6c3a001b+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_85f4127cb33c6c3a001b+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_85f4127cb33c6c3a001b+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_85f4127cb33c6c3a001b+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_85f4127cb33c6c3a001b+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_85f4127cb33c6c3a001b+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bf76122917287c78770534baacfb8858fb09e980 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_85f4127cb33c6c3a001b+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0895178fa19904f20d0bb0f1f6fa03beb2a7b59b5180e22847fb7a4a6c0e50ca +size 532500 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_85f4127cb33c6c3a001b+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_85f4127cb33c6c3a001b+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b41389eccb7244dc6d042df6efaacbb9ca608e7a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_85f4127cb33c6c3a001b+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc91bdbd865e6d62f2205e6791bfd790ca9a5e6e9d6e8abc194554f57855d762 +size 1813504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ad16080694c0280c3e6906bb2036aa9afb7a261f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8638ee6cdffb163bb8450cbcba91a01a045d04d3b7d79a421fee73a9d441aac +size 474402 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fa59ab3035f0e66be09b4bc1d59d3e04a7e1be72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8d5c7639490b29a58f59+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ed2467a687f6d6e48a0a6d16f3dbc862cc6f9f0d9ac3e4447bd0d6ff7e460ff +size 42322944 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8d6187b4c32336c3810f+ac10809c/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8d6187b4c32336c3810f+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8d6187b4c32336c3810f+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8d6187b4c32336c3810f+ac10809c/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8d6187b4c32336c3810f+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8d6187b4c32336c3810f+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8d6187b4c32336c3810f+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0671b70cca7258195a00b9e5d0aa83d0b3208b5f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8d6187b4c32336c3810f+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:088b03b0c41216d6e20ae00b556cce8f042ba4199de4122ceab169bbe375cfa7 +size 1576765 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8d6187b4c32336c3810f+ac10809c/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8d6187b4c32336c3810f+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ea21d0d37b1f1a2f6a298dd3ee0ec78523f3ce32 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8d6187b4c32336c3810f+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00d49da2bb0fcb3abb8b4cccf0883e4a88322ed91a11bea10d646c36207b3979 +size 769024 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8d6187b4c32336c3810f+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_8d6187b4c32336c3810f+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6cab5887f4c02a9a0d517ab279223cca568d0578 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8d6187b4c32336c3810f+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cce172d22cdc775c6bb72ce9a4e08d8fb3dd256b0c67499ade0b94352db9eb4e +size 776820 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8f55926fdb244a19d68c+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8f55926fdb244a19d68c+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8f55926fdb244a19d68c+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8f55926fdb244a19d68c+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8f55926fdb244a19d68c+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8f55926fdb244a19d68c+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8f55926fdb244a19d68c+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e5cbaa07a553beca168de5d4e4741bd45503c927 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8f55926fdb244a19d68c+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c7ce7b9d32f821e68f50158d41979e51194ff9220842421400cc23e83cb27f5 +size 85894 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8f55926fdb244a19d68c+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8f55926fdb244a19d68c+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3bcd21112228d1d899f3ca6f0c7ff2b570aa8108 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8f55926fdb244a19d68c+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52a915f05a559447c86b913f9b6b8cc2b5d08f2544b334a971141595363301ff +size 277504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8f55926fdb244a19d68c+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_8f55926fdb244a19d68c+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..53f032eb7c9b4e82a326b15e5263c6636eb8578c --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8f55926fdb244a19d68c+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aceee6a5f3a61359b87e97ed85e416db384402ebd05b5d9e2d2e1c01a4f84e35 +size 285717 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9d2ce58b75d943fb69f6+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_9d2ce58b75d943fb69f6+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9d2ce58b75d943fb69f6+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9d2ce58b75d943fb69f6+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_9d2ce58b75d943fb69f6+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9d2ce58b75d943fb69f6+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_9d2ce58b75d943fb69f6+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..65e088acccf1cd2363826e6c1a10eb51b100e53b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9d2ce58b75d943fb69f6+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1b53b44c50166b03707ef1675f7db2b41f48cdcc12f15ce3a1576f71c76e7d7 +size 572480 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9d2ce58b75d943fb69f6+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_9d2ce58b75d943fb69f6+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4835a8d58ae588188e0c8a408a77aeddcc733aa1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9d2ce58b75d943fb69f6+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d99495154d055b50523e9658ac63a45a9b10f1ec7221724d325a2f0427958a3 +size 1086464 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_9d2ce58b75d943fb69f6+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_9d2ce58b75d943fb69f6+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ee857f693d543ce8d26201cb1d1a6a21bc98eac3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_9d2ce58b75d943fb69f6+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb223e94cdc256d7ce4335d0c2b9424a76bad3fce237d25065148f2ef7dabbd3 +size 1208853 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a0603dd176bf7a8ead16+19d6b612/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a0603dd176bf7a8ead16+19d6b612/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..fe0deed26e89bd574fcfd0d55b65e1e193f7f199 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a0603dd176bf7a8ead16+19d6b612/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--auto-cast=none", "--model-type=transformer", "-O1", "--logfile=/tmp/nxdi_test_00441da5-5c3a-447c-9ced-2b8bca750910/compiler_workdir/MaskedSoftmaxModule/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a0603dd176bf7a8ead16+19d6b612/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a0603dd176bf7a8ead16+19d6b612/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a0603dd176bf7a8ead16+19d6b612/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a0603dd176bf7a8ead16+19d6b612/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1ea68fb9527f34ed0a9b2658c8de4eb68151bc05 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a0603dd176bf7a8ead16+19d6b612/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9d0dc25b69fb5f42eb56f3dcd1dde0eaecaa712b2d7d4ad8d1b10b2868d85b5 +size 11756 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a0603dd176bf7a8ead16+19d6b612/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a0603dd176bf7a8ead16+19d6b612/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..170a0c2930215a19dbc2f5468aab50ee4625568e Binary files /dev/null and b/neuronxcc-2.21.33363.0+82129205/MODULE_a0603dd176bf7a8ead16+19d6b612/model.neff differ diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a96ea0a19ecf26469614+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a96ea0a19ecf26469614+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a96ea0a19ecf26469614+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a96ea0a19ecf26469614+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a96ea0a19ecf26469614+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a96ea0a19ecf26469614+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a96ea0a19ecf26469614+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a0d539c996782cbd84898e863a1eebe525d2c753 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a96ea0a19ecf26469614+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1544c8c7b2d0791d83b9f3723148f35ec6e3266832030b1726c04f073cf5a19a +size 441431 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a96ea0a19ecf26469614+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a96ea0a19ecf26469614+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..143aa9bb72571cb788b1d9b68c4edbb97e5b4a30 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a96ea0a19ecf26469614+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d413ac87f7306856b4bcd8dbde8d6820ac3bd4e9cb1a4286dd28a372fa519a5 +size 2171904 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a96ea0a19ecf26469614+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_a96ea0a19ecf26469614+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..7f24c0847aa3cb6ee24155551e8a42f044a4e2b9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a96ea0a19ecf26469614+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b800a918d783b08883e3923854beb82f5ba7323459791ce438f4688b97a41d3f +size 2245991 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bea9f6407c0266e10265+80d05c3f/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_bea9f6407c0266e10265+80d05c3f/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3d7d85cd3d975162683053faa369dcc6259604af --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bea9f6407c0266e10265+80d05c3f/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bea9f6407c0266e10265+80d05c3f/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_bea9f6407c0266e10265+80d05c3f/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bea9f6407c0266e10265+80d05c3f/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_bea9f6407c0266e10265+80d05c3f/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f510d2691049012b41e1d168e8248ea056e5fbe3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bea9f6407c0266e10265+80d05c3f/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f32d182b8cf2eb41131dbd4adc0c37b7ced823c31eddf989a5fb2892b801338 +size 74472 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bea9f6407c0266e10265+80d05c3f/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_bea9f6407c0266e10265+80d05c3f/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ba4273eb13aca2f25ea21ddb0b3807f9d4b30346 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bea9f6407c0266e10265+80d05c3f/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:906df6a89a1478cc37c274e68caf4d58dfb23a16e965b79721e570cd92c174b8 +size 277504 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_bea9f6407c0266e10265+80d05c3f/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_bea9f6407c0266e10265+80d05c3f/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9ec7e9fe72658954e6553c6eee1d8fcda98a208e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_bea9f6407c0266e10265+80d05c3f/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d09acd3da91e05e4c82c1ee02f8dd8c949452e88f7a174ce7be047fa219ff25e +size 289434 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c20f233bb310ac0cba49+2dde74c7/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c20f233bb310ac0cba49+2dde74c7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..a7a40c022faf4beb0fb20246d9c1f9c8f32c6900 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c20f233bb310ac0cba49+2dde74c7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c20f233bb310ac0cba49+2dde74c7/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_c20f233bb310ac0cba49+2dde74c7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c20f233bb310ac0cba49+2dde74c7/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c20f233bb310ac0cba49+2dde74c7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5543dc5c020c9305b7517d947523c6f815518e7b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c20f233bb310ac0cba49+2dde74c7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9524b891faa754214fab31cf4b3bdce454254bfc8d51c06b69c0c2222d998a6 +size 92265 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c20f233bb310ac0cba49+2dde74c7/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c20f233bb310ac0cba49+2dde74c7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2dc621c52d73ae43452748e980c8fbecb38b511a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c20f233bb310ac0cba49+2dde74c7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1329f8fd5a7fa2235dca84212a69c9c2df591fcf3732be4eec7acd9dc19fa08b +size 369664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c20f233bb310ac0cba49+2dde74c7/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_c20f233bb310ac0cba49+2dde74c7/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..75f1d318d4a532d89af6e6a5ae2def919fa3d8b8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c20f233bb310ac0cba49+2dde74c7/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52b28c618d03cc1be3b646d702226c9ae179230d0c65425326c9f1b19548367c +size 379225 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e4e60218b370ea7f0c6e+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_e4e60218b370ea7f0c6e+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e4e60218b370ea7f0c6e+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e4e60218b370ea7f0c6e+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_e4e60218b370ea7f0c6e+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e4e60218b370ea7f0c6e+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e4e60218b370ea7f0c6e+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ce455bd77de482e418369533bc88a0e911c03d04 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e4e60218b370ea7f0c6e+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d65caa43042a43520b727dc0f5d70b91ba3eb14478a1c95f5a5f34b4c0fbf122 +size 900253 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e4e60218b370ea7f0c6e+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_e4e60218b370ea7f0c6e+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..14f656a43587c1c3f1bb467b3a5113bdec9a160a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e4e60218b370ea7f0c6e+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91b6f44b92fd8dd597059fdf7f8fd4a44f13de119ad8eb0c9b7eb6aa5d61d817 +size 4946944 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e4e60218b370ea7f0c6e+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_e4e60218b370ea7f0c6e+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ba41efbc95da2b3a92fe7fcb331453392629cf8b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e4e60218b370ea7f0c6e+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9816c7728d02046dcf8214db9ab6946052925538285b6fab938a179f44d5465 +size 5113468 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e81df4d9ffdfcff0bd18+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_e81df4d9ffdfcff0bd18+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e81df4d9ffdfcff0bd18+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e81df4d9ffdfcff0bd18+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_e81df4d9ffdfcff0bd18+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e81df4d9ffdfcff0bd18+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e81df4d9ffdfcff0bd18+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..87298326a2af3fc8ec086b9f7b0af1daf7801640 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e81df4d9ffdfcff0bd18+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c131ab731d9027dc8c47cad247d41dafed978061ca3534333b40242db4b3193d +size 679627 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e81df4d9ffdfcff0bd18+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_e81df4d9ffdfcff0bd18+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..522f59cc7f9af64c807ef8b81116a6450478550b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e81df4d9ffdfcff0bd18+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e402e362985771b8893b41c913c8e5fab58e927989c8027e51bf61948d2c6b6b +size 543744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e81df4d9ffdfcff0bd18+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_e81df4d9ffdfcff0bd18+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..fd1eda01bbc69659695fca21cdc03f5fd4b85ace --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e81df4d9ffdfcff0bd18+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff0f80dcbeeaa73fafd96531ca7b0832e4da19dc4495dbcec3c7fe467092fed9 +size 563245 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_edd7b34c0eb34865e1b4+a32116a7/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_edd7b34c0eb34865e1b4+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_edd7b34c0eb34865e1b4+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_edd7b34c0eb34865e1b4+a32116a7/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_edd7b34c0eb34865e1b4+a32116a7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_edd7b34c0eb34865e1b4+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_edd7b34c0eb34865e1b4+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0e6c876d6b42667b9f5434df9734e9ff4ff2e881 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_edd7b34c0eb34865e1b4+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d115f5494511f484525e05ed8cad3d47e8517737d5089f70a3136c8899efa379 +size 1289344 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_edd7b34c0eb34865e1b4+a32116a7/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_edd7b34c0eb34865e1b4+a32116a7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3faa4ab70e89d2323b7b94f7d7a2aa505eeda5b1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_edd7b34c0eb34865e1b4+a32116a7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4ef6eed2fe95e512bd58198e0df2f13802442762d41138771f74fb4ed872d8b +size 728064 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f14b9d4c563064069c82+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_f14b9d4c563064069c82+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f14b9d4c563064069c82+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f14b9d4c563064069c82+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_f14b9d4c563064069c82+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f14b9d4c563064069c82+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_f14b9d4c563064069c82+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1f8e553f42fad20757d959c8110b60401086a506 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f14b9d4c563064069c82+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fc4d7d87845c3c8bebbe6abaf97eb843def900337305e4f6a8d0e539eaabbbc +size 403785 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f14b9d4c563064069c82+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_f14b9d4c563064069c82+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..24947df8158cfaad56588faf539aca89093e1fd8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f14b9d4c563064069c82+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a803b498cdb7cd67c2baf358e8378eab09735df03370b177faffb304e51e969c +size 2018304 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_f14b9d4c563064069c82+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_f14b9d4c563064069c82+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..73eba481708db7834b39ea5597f656c5cceba53a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_f14b9d4c563064069c82+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab3247ab3cab9cb628072db249693ac470f5aaf8c3e5ac7e388defdcd43864cf +size 2111217