| { |
| "metadata": { |
| "ParamSize": 707, |
| "ParamBytes": 58394173440.0, |
| "BitsPerParam": 4.500051549843175 |
| }, |
| "records": [ |
| { |
| "dataPath": "params_shard_0.bin", |
| "format": "raw-shard", |
| "nbytes": 1572864000, |
| "records": [ |
| { |
| "name": "model.embed_tokens.q_weight", |
| "shape": [ |
| 256000, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 1572864000, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ec3e21d9dd46c9a04ca52e50b2554ae4" |
| }, |
| { |
| "dataPath": "params_shard_1.bin", |
| "format": "raw-shard", |
| "nbytes": 196608000, |
| "records": [ |
| { |
| "name": "model.embed_tokens.q_scale", |
| "shape": [ |
| 256000, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 196608000, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7f8473d4d21a55c43622d82021695c80" |
| }, |
| { |
| "dataPath": "params_shard_2.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.0.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b386620a04a68323886b6222dd73c5ee" |
| }, |
| { |
| "dataPath": "params_shard_3.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.0.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "63fbbb969b9503275cfdb084ad83a61d" |
| }, |
| { |
| "dataPath": "params_shard_4.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.0.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b031aad87f5aa98937114d7439dcb673" |
| }, |
| { |
| "dataPath": "params_shard_5.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.0.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "75f7d6e3066b92c16d9be64060ec17c1" |
| }, |
| { |
| "dataPath": "params_shard_6.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.0.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "553fd8612d9cba2ac8fe146a15ed3400" |
| }, |
| { |
| "dataPath": "params_shard_7.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.0.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "31ea81a9a9af6f6c579164a492baa9e8" |
| }, |
| { |
| "dataPath": "params_shard_8.bin", |
| "format": "raw-shard", |
| "nbytes": 25976832, |
| "records": [ |
| { |
| "name": "model.layers.0.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.0.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 24576 |
| } |
| ], |
| "md5sum": "967a7087a43501119721d0f0a160a244" |
| }, |
| { |
| "dataPath": "params_shard_9.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.0.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f46db76ecacc9bb963e431b978c511ba" |
| }, |
| { |
| "dataPath": "params_shard_10.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.1.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "93c6e7fd6eb21a7c7042364618b72e71" |
| }, |
| { |
| "dataPath": "params_shard_11.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.1.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "17435e9d2a3b3c514a18afae5f9a32b2" |
| }, |
| { |
| "dataPath": "params_shard_12.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.1.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c011dbeef2a29546638cc8598c536889" |
| }, |
| { |
| "dataPath": "params_shard_13.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.1.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d8eda53030d27891d8e136f6f898104d" |
| }, |
| { |
| "dataPath": "params_shard_14.bin", |
| "format": "raw-shard", |
| "nbytes": 31457280, |
| "records": [ |
| { |
| "name": "model.layers.0.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.0.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.1.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 20447232 |
| } |
| ], |
| "md5sum": "c480d050b908f331852f4bf0d2e24f1f" |
| }, |
| { |
| "dataPath": "params_shard_15.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.1.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "83ede47f592ad837027af82bd4218457" |
| }, |
| { |
| "dataPath": "params_shard_16.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.1.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4174829a7ba615f44d43c54c71231a8e" |
| }, |
| { |
| "dataPath": "params_shard_17.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.1.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5d293de9f28d781a61ecb31d36e3fa66" |
| }, |
| { |
| "dataPath": "params_shard_18.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.1.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f54039e7904a116d703ce4ccad0b0353" |
| }, |
| { |
| "dataPath": "params_shard_19.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.2.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "47ae51b8b2b20aea057f10a9e7ec3fb6" |
| }, |
| { |
| "dataPath": "params_shard_20.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.2.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "96896490ef738044730e56a01746dce4" |
| }, |
| { |
| "dataPath": "params_shard_21.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.2.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6f9722ea8f33a26157d2a0c06d384de2" |
| }, |
| { |
| "dataPath": "params_shard_22.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.2.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "af7eba9f6339a66c041889afb6c97218" |
| }, |
| { |
| "dataPath": "params_shard_23.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.2.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c56c67af434c97fe116c07160ebc42a0" |
| }, |
| { |
| "dataPath": "params_shard_24.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.2.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5f289de554b5b847e0c6a386e237e55f" |
| }, |
| { |
| "dataPath": "params_shard_25.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.2.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0086df2ba54b37b78ec4b76a33fc6381" |
| }, |
| { |
| "dataPath": "params_shard_26.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.2.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cd64f46407756bd18842fa8143eb409b" |
| }, |
| { |
| "dataPath": "params_shard_27.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.10.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "be5e81ab0b81bdedff0e5090cfc6d244" |
| }, |
| { |
| "dataPath": "params_shard_28.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.10.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b407bf7e60960dc7cf16d14898eee604" |
| }, |
| { |
| "dataPath": "params_shard_29.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.10.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c4f4fba5324b7736e63a141c536a1362" |
| }, |
| { |
| "dataPath": "params_shard_30.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.10.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "281ab8ad22c7ac979a1c788669f85f21" |
| }, |
| { |
| "dataPath": "params_shard_31.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.11.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ba0b92448e925db01a95bb68e71106e0" |
| }, |
| { |
| "dataPath": "params_shard_32.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.11.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f2b31655a8c2457a11ab4e6ace83bc5c" |
| }, |
| { |
| "dataPath": "params_shard_33.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.11.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "12014f5698b660199ff26e062b961f1c" |
| }, |
| { |
| "dataPath": "params_shard_34.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.11.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e7ebcfcecb832825c021413968d804c1" |
| }, |
| { |
| "dataPath": "params_shard_35.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.11.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "bc8baf1473ab2527fb3330fde5666145" |
| }, |
| { |
| "dataPath": "params_shard_36.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.11.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a2556b6b6c6d8c3bf1dc2439b10999cf" |
| }, |
| { |
| "dataPath": "params_shard_37.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.11.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9268ff589f6eb2458c0795655855b687" |
| }, |
| { |
| "dataPath": "params_shard_38.bin", |
| "format": "raw-shard", |
| "nbytes": 29982720, |
| "records": [ |
| { |
| "name": "model.layers.1.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.1.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.2.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9461760 |
| }, |
| { |
| "name": "model.layers.2.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 9486336 |
| }, |
| { |
| "name": "model.layers.2.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 20496384 |
| }, |
| { |
| "name": "model.layers.10.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 29933568 |
| }, |
| { |
| "name": "model.layers.11.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 29958144 |
| } |
| ], |
| "md5sum": "067b00df67ddaf07508ff6b7c0669bd7" |
| }, |
| { |
| "dataPath": "params_shard_39.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.11.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0bc1682f4fe5022a6ad4eeb0cd30fe23" |
| }, |
| { |
| "dataPath": "params_shard_40.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.10.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9098f10f03ea2f6608806c19fe01642e" |
| }, |
| { |
| "dataPath": "params_shard_41.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.10.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b6e945ca5bb11bd29ae9c75b0e777cb1" |
| }, |
| { |
| "dataPath": "params_shard_42.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.10.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "aee5071b20c9633be1cc9b42fa37bf04" |
| }, |
| { |
| "dataPath": "params_shard_43.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.10.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ea9371432b276a44caba0c8186a05524" |
| }, |
| { |
| "dataPath": "params_shard_44.bin", |
| "format": "raw-shard", |
| "nbytes": 31457280, |
| "records": [ |
| { |
| "name": "model.layers.11.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.11.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.10.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 20447232 |
| } |
| ], |
| "md5sum": "ccd9dcf2f7a655890c2abd74443bebfa" |
| }, |
| { |
| "dataPath": "params_shard_45.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.9.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ead19ae41fa6354727124cd9e34fe560" |
| }, |
| { |
| "dataPath": "params_shard_46.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.9.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "8e2377317b6da59028d3eac3c0ca0a44" |
| }, |
| { |
| "dataPath": "params_shard_47.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.9.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "de50406f88042307a5fff70e4bed8e59" |
| }, |
| { |
| "dataPath": "params_shard_48.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.9.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "8c558fb634dd1890b0d77d1cfb94a270" |
| }, |
| { |
| "dataPath": "params_shard_49.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.9.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "67e53ab4e41adff89710527d86506ca7" |
| }, |
| { |
| "dataPath": "params_shard_50.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.9.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "746421d78204ee57b6dd45ff01c519cd" |
| }, |
| { |
| "dataPath": "params_shard_51.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.9.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2a57f414b46fb462447c181bdaa70707" |
| }, |
| { |
| "dataPath": "params_shard_52.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.9.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "093b238d24e71d73a47ea3999e53c7e7" |
| }, |
| { |
| "dataPath": "params_shard_53.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.12.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b85e3e09c4f68991186ca4da74f5424c" |
| }, |
| { |
| "dataPath": "params_shard_54.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.12.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "600c00eb02f74988493c94830aaa3eac" |
| }, |
| { |
| "dataPath": "params_shard_55.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.12.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "addeb3aead073976c9833ecac07af09e" |
| }, |
| { |
| "dataPath": "params_shard_56.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.12.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5ea451fad9506ed9ff695f551da9b2ed" |
| }, |
| { |
| "dataPath": "params_shard_57.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.12.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3af4404c2a047d88559a36b16de1c2cc" |
| }, |
| { |
| "dataPath": "params_shard_58.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.12.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f321921a9d0cb9426388b85135999d3e" |
| }, |
| { |
| "dataPath": "params_shard_59.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.12.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b30fb969f740786bff25e5caf855df0e" |
| }, |
| { |
| "dataPath": "params_shard_60.bin", |
| "format": "raw-shard", |
| "nbytes": 29933568, |
| "records": [ |
| { |
| "name": "model.layers.10.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.9.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.9.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 9461760 |
| }, |
| { |
| "name": "model.layers.9.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 20471808 |
| }, |
| { |
| "name": "model.layers.12.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 29908992 |
| } |
| ], |
| "md5sum": "b64eded009316e37f75fc4691b15ac03" |
| }, |
| { |
| "dataPath": "params_shard_61.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.12.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f58e1189cc3a111dc224a7d2d31d37e0" |
| }, |
| { |
| "dataPath": "params_shard_62.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.13.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2ba6c289b735b70d14d2aa084185db47" |
| }, |
| { |
| "dataPath": "params_shard_63.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.13.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1b78a5615ddfb1382ea5073ba59cacd0" |
| }, |
| { |
| "dataPath": "params_shard_64.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.13.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cea12e1bceaec5663762e2f710c6daed" |
| }, |
| { |
| "dataPath": "params_shard_65.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.13.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f3d86f199bf9f50ee089b780b6cbcdea" |
| }, |
| { |
| "dataPath": "params_shard_66.bin", |
| "format": "raw-shard", |
| "nbytes": 31457280, |
| "records": [ |
| { |
| "name": "model.layers.12.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.12.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.13.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 20447232 |
| } |
| ], |
| "md5sum": "709a9baf03c909babf061189a29a0085" |
| }, |
| { |
| "dataPath": "params_shard_67.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.13.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0b3a0192d69d1ad1957f2bf5909d22fa" |
| }, |
| { |
| "dataPath": "params_shard_68.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.13.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "58a2db60b301dce3b88c91e5cfe822cf" |
| }, |
| { |
| "dataPath": "params_shard_69.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.13.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "82af38122b95dd45907efee9e834fc8b" |
| }, |
| { |
| "dataPath": "params_shard_70.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.13.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b4df25eb98af0b13adde61f225ade7dc" |
| }, |
| { |
| "dataPath": "params_shard_71.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.14.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "74727e35cf089fd290692eb28256503c" |
| }, |
| { |
| "dataPath": "params_shard_72.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.14.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "dd6ad77a41317d19a4851a39718ffa80" |
| }, |
| { |
| "dataPath": "params_shard_73.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.14.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f13d1bcc939aa4128e16e13c168e2e16" |
| }, |
| { |
| "dataPath": "params_shard_74.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.14.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "74ef07f73c66d031a2316d9bc24d32a8" |
| }, |
| { |
| "dataPath": "params_shard_75.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.14.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3f64cf05cc4a58d8d42e1b10e0812cc7" |
| }, |
| { |
| "dataPath": "params_shard_76.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.14.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3d79c0be2b190d95add53c787e25f9f0" |
| }, |
| { |
| "dataPath": "params_shard_77.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.14.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6017717d34524831e56c08ce1d54af58" |
| }, |
| { |
| "dataPath": "params_shard_78.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.14.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "16a2c42f711e57e27a655db6dbdf0de1" |
| }, |
| { |
| "dataPath": "params_shard_79.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.15.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "dc7ed3108d17135b23aba8035b5bab7d" |
| }, |
| { |
| "dataPath": "params_shard_80.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.15.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0d777bd1837aa34af01f35d812ee9f15" |
| }, |
| { |
| "dataPath": "params_shard_81.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.15.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "34fa9e6aed6a5619c6aecdfc382b7209" |
| }, |
| { |
| "dataPath": "params_shard_82.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.15.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "244744ff41027c41395aaff8a8e6d9d5" |
| }, |
| { |
| "dataPath": "params_shard_83.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.15.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "58f1638c6bee1221ad9591684d7f0ff0" |
| }, |
| { |
| "dataPath": "params_shard_84.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.15.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "310c2b53aa85f695ccfc5e9cab51e519" |
| }, |
| { |
| "dataPath": "params_shard_85.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.15.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "018d90c7bb45699a567122a4eed4cd1d" |
| }, |
| { |
| "dataPath": "params_shard_86.bin", |
| "format": "raw-shard", |
| "nbytes": 29958144, |
| "records": [ |
| { |
| "name": "model.layers.13.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.13.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.14.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9461760 |
| }, |
| { |
| "name": "model.layers.14.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 9486336 |
| }, |
| { |
| "name": "model.layers.14.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 20496384 |
| }, |
| { |
| "name": "model.layers.15.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 29933568 |
| } |
| ], |
| "md5sum": "56d43da3c2453bcf52fc9894ad072d80" |
| }, |
| { |
| "dataPath": "params_shard_87.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.15.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2e1f6f72879b0c2eac772653ba6b6418" |
| }, |
| { |
| "dataPath": "params_shard_88.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.16.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "dc6e98b5ae7134ed21d4f8685127a8a9" |
| }, |
| { |
| "dataPath": "params_shard_89.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.16.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4a52b1b310aaf6185ee0d34d4349e4b9" |
| }, |
| { |
| "dataPath": "params_shard_90.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.16.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6b9f62ff2338981d8b7f12dffa450cab" |
| }, |
| { |
| "dataPath": "params_shard_91.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.16.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ed9ddd86cf9a3630338bd8bd6cfa8d99" |
| }, |
| { |
| "dataPath": "params_shard_92.bin", |
| "format": "raw-shard", |
| "nbytes": 31457280, |
| "records": [ |
| { |
| "name": "model.layers.15.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.15.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.16.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 20447232 |
| } |
| ], |
| "md5sum": "ac37a97eecbdc010a28bdf007c58e161" |
| }, |
| { |
| "dataPath": "params_shard_93.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.16.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7279ee0dc2d1449c4a775b9d72471ced" |
| }, |
| { |
| "dataPath": "params_shard_94.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.16.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e4b6d86b2c1a924965fb4456da80b63e" |
| }, |
| { |
| "dataPath": "params_shard_95.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.16.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "facadd11b2c986a386a7533f1f649860" |
| }, |
| { |
| "dataPath": "params_shard_96.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.16.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "538028970e0b2d7c361d4cfd38171cb9" |
| }, |
| { |
| "dataPath": "params_shard_97.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.17.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a10b5879a4620115d533bfb574d61c7a" |
| }, |
| { |
| "dataPath": "params_shard_98.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.17.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "acc7f74e34f3a78fac6f4b4d61614bba" |
| }, |
| { |
| "dataPath": "params_shard_99.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.17.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "663ff764c3af8810076c5cdb02bf57b2" |
| }, |
| { |
| "dataPath": "params_shard_100.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.17.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "54a8a93bdcd4317c5b99fc8642c1e2c3" |
| }, |
| { |
| "dataPath": "params_shard_101.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.17.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a6ffacf942e183c68853137f5f646ab9" |
| }, |
| { |
| "dataPath": "params_shard_102.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.17.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a600aa3d233e8e75a357963bc854176a" |
| }, |
| { |
| "dataPath": "params_shard_103.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.17.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7fd095e051a9c758078d4b997782ed92" |
| }, |
| { |
| "dataPath": "params_shard_104.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.17.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0a777217a3a259590131fbe7d436d104" |
| }, |
| { |
| "dataPath": "params_shard_105.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.18.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0cff0c652535bfcccbe565e97c410280" |
| }, |
| { |
| "dataPath": "params_shard_106.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.18.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "239e54eb33ebb2547a3438c64006071a" |
| }, |
| { |
| "dataPath": "params_shard_107.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.18.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3d1fc49da8ce01dfb5cbe452eaf7ed78" |
| }, |
| { |
| "dataPath": "params_shard_108.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.18.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e3e1b06c70a54ddf8b26da05ac79a67a" |
| }, |
| { |
| "dataPath": "params_shard_109.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.18.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "268c1a68ccc68ea31523edfe6b160137" |
| }, |
| { |
| "dataPath": "params_shard_110.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.18.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b419218f8dc2527ad7fec36caf00024a" |
| }, |
| { |
| "dataPath": "params_shard_111.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.18.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2ef94f20c0df30632c6e6f26636984de" |
| }, |
| { |
| "dataPath": "params_shard_112.bin", |
| "format": "raw-shard", |
| "nbytes": 29958144, |
| "records": [ |
| { |
| "name": "model.layers.16.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.16.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.17.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9461760 |
| }, |
| { |
| "name": "model.layers.17.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 9486336 |
| }, |
| { |
| "name": "model.layers.17.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 20496384 |
| }, |
| { |
| "name": "model.layers.18.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 29933568 |
| } |
| ], |
| "md5sum": "b1439f37ee1fbe25a9047c18b1f69379" |
| }, |
| { |
| "dataPath": "params_shard_113.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.18.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "92cc95ebdcd9d0871df369fee6e8d712" |
| }, |
| { |
| "dataPath": "params_shard_114.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.19.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6d722edf4b6808bfd1c28ea88b4716ed" |
| }, |
| { |
| "dataPath": "params_shard_115.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.19.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "09c77b0ea568760c38837c5f6a1cd2ab" |
| }, |
| { |
| "dataPath": "params_shard_116.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.19.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "93c0147ec37b8ea2c3a8cb83aabc175e" |
| }, |
| { |
| "dataPath": "params_shard_117.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.19.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ddf319b6e14e4d1ff0ca7cfbc1ef9695" |
| }, |
| { |
| "dataPath": "params_shard_118.bin", |
| "format": "raw-shard", |
| "nbytes": 31457280, |
| "records": [ |
| { |
| "name": "model.layers.18.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.18.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.19.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 20447232 |
| } |
| ], |
| "md5sum": "4c50efc4988e644c17e8205d6bdff763" |
| }, |
| { |
| "dataPath": "params_shard_119.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.19.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c8e389d4fa7f753dd741670a08ee3487" |
| }, |
| { |
| "dataPath": "params_shard_120.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.19.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e5e7c28a4452c5c2903478d3c1b81553" |
| }, |
| { |
| "dataPath": "params_shard_121.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.19.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "897f1866f1b1e120cbf1daa28a454307" |
| }, |
| { |
| "dataPath": "params_shard_122.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.19.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "977a0db30044f2e1ded99e196481467d" |
| }, |
| { |
| "dataPath": "params_shard_123.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.20.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a058f3bbb1eb576614becd2a5e6da47c" |
| }, |
| { |
| "dataPath": "params_shard_124.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.20.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e026934393042ee7bdaf55924d6a4c0a" |
| }, |
| { |
| "dataPath": "params_shard_125.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.20.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7c9c31f3b253a00e41918da3fdbcc72c" |
| }, |
| { |
| "dataPath": "params_shard_126.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.20.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "211dd755031f864326c91b3486e3c1d4" |
| }, |
| { |
| "dataPath": "params_shard_127.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.20.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d6e4c05708300ab8998515cb1d23a8ef" |
| }, |
| { |
| "dataPath": "params_shard_128.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.20.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "96186e7467e0b1dba12457ba60cc727a" |
| }, |
| { |
| "dataPath": "params_shard_129.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.20.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "acaeee20c428c03b34a3d0a41cfb06cc" |
| }, |
| { |
| "dataPath": "params_shard_130.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.20.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "bfcaa3f315a8ac10c6c9b8067def368a" |
| }, |
| { |
| "dataPath": "params_shard_131.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.21.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "aa3201dba79a7a8cf46275b0873b0586" |
| }, |
| { |
| "dataPath": "params_shard_132.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.21.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1caa371db867cc2f2e5bb02534b58417" |
| }, |
| { |
| "dataPath": "params_shard_133.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.21.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2f96828e86071595ee1f45feb0de2885" |
| }, |
| { |
| "dataPath": "params_shard_134.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.21.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5805268a28657764c8c5c9d226177ee9" |
| }, |
| { |
| "dataPath": "params_shard_135.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.21.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6706ff2f22f471c5de0f838f5595ff3b" |
| }, |
| { |
| "dataPath": "params_shard_136.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.21.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "dec3c3439f93840b681bbf38afd77177" |
| }, |
| { |
| "dataPath": "params_shard_137.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.21.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "edbf9683ce19feb9f2ccbc03920d4e66" |
| }, |
| { |
| "dataPath": "params_shard_138.bin", |
| "format": "raw-shard", |
| "nbytes": 29958144, |
| "records": [ |
| { |
| "name": "model.layers.19.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.19.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.20.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9461760 |
| }, |
| { |
| "name": "model.layers.20.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 9486336 |
| }, |
| { |
| "name": "model.layers.20.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 20496384 |
| }, |
| { |
| "name": "model.layers.21.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 29933568 |
| } |
| ], |
| "md5sum": "d6d2a853982d08d2382052845055027d" |
| }, |
| { |
| "dataPath": "params_shard_139.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.21.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ad0a40c1870c9e64f01a74c5a302e0d6" |
| }, |
| { |
| "dataPath": "params_shard_140.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.22.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "abff5f64e0546aebf2b8629739a3238a" |
| }, |
| { |
| "dataPath": "params_shard_141.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.22.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2ee811090b83009f41654cbd11cda7f4" |
| }, |
| { |
| "dataPath": "params_shard_142.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.22.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a550fecbd732181336b051ecdb365e47" |
| }, |
| { |
| "dataPath": "params_shard_143.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.22.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "fd8532ff0f275c6f977cb60bb619fbf4" |
| }, |
| { |
| "dataPath": "params_shard_144.bin", |
| "format": "raw-shard", |
| "nbytes": 31457280, |
| "records": [ |
| { |
| "name": "model.layers.21.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.21.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.22.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 20447232 |
| } |
| ], |
| "md5sum": "8731465c7fa627d4cb4439fa24f69859" |
| }, |
| { |
| "dataPath": "params_shard_145.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.22.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7aa5f7201b3b4cd195d92a654541cbd1" |
| }, |
| { |
| "dataPath": "params_shard_146.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.22.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ae5947385511e0a70248f916e3fffed9" |
| }, |
| { |
| "dataPath": "params_shard_147.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.22.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4c51b537e45867661cedf535ed4742ce" |
| }, |
| { |
| "dataPath": "params_shard_148.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.22.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "50984c8779f21e28c50622cba9d14a78" |
| }, |
| { |
| "dataPath": "params_shard_149.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.23.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "88723d3839901e1bc1f9f32e645f9bad" |
| }, |
| { |
| "dataPath": "params_shard_150.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.23.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "32feda2f0fd8d1b7f20dbfc2003bea56" |
| }, |
| { |
| "dataPath": "params_shard_151.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.23.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ea11316f193afac4ef3bb1775a80379a" |
| }, |
| { |
| "dataPath": "params_shard_152.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.23.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a7d1a3064c02c936c57860ce6cfa7839" |
| }, |
| { |
| "dataPath": "params_shard_153.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.23.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6f364102a7116550da37bd8a06e9e83b" |
| }, |
| { |
| "dataPath": "params_shard_154.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.23.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "eadd8104c86b91eb64b3aaa281927485" |
| }, |
| { |
| "dataPath": "params_shard_155.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.23.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9a8a2443b7e680e437b0a1e3bb78fce3" |
| }, |
| { |
| "dataPath": "params_shard_156.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.23.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f7990fec3c6cc1e87ba20c334d9c21d6" |
| }, |
| { |
| "dataPath": "params_shard_157.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.24.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ab019ad6ec2a8cc2f2533da128201540" |
| }, |
| { |
| "dataPath": "params_shard_158.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.24.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ed55c839b3b73a68ea7abd8c80fbd757" |
| }, |
| { |
| "dataPath": "params_shard_159.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.24.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "fb72180ff9a01685d79f1de086aca226" |
| }, |
| { |
| "dataPath": "params_shard_160.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.24.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "746d7ef977050c92f0aad0f70b84e800" |
| }, |
| { |
| "dataPath": "params_shard_161.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.24.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a5eef9601587704fd8978ca31405f9fc" |
| }, |
| { |
| "dataPath": "params_shard_162.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.24.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3fe53945bddb5e7f13a9cce8e9b2502a" |
| }, |
| { |
| "dataPath": "params_shard_163.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.24.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a2ea17c00453b8f60a3bfaa1108e86bd" |
| }, |
| { |
| "dataPath": "params_shard_164.bin", |
| "format": "raw-shard", |
| "nbytes": 29958144, |
| "records": [ |
| { |
| "name": "model.layers.22.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.22.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.23.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9461760 |
| }, |
| { |
| "name": "model.layers.23.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 9486336 |
| }, |
| { |
| "name": "model.layers.23.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 20496384 |
| }, |
| { |
| "name": "model.layers.24.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 29933568 |
| } |
| ], |
| "md5sum": "43cb47a94f06dc673303661d1d259f41" |
| }, |
| { |
| "dataPath": "params_shard_165.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.24.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "07ce162f1b66ef6fd862829d1258f702" |
| }, |
| { |
| "dataPath": "params_shard_166.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.25.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "fe6b4ce20f089cd123890a7df5780e93" |
| }, |
| { |
| "dataPath": "params_shard_167.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.25.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "39d4879a63344000e35e5495872e8621" |
| }, |
| { |
| "dataPath": "params_shard_168.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.25.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cde77b645aed75dc1414706ba1e9d31c" |
| }, |
| { |
| "dataPath": "params_shard_169.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.25.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e11d58a147c67f8460ace4a93bd7f5dc" |
| }, |
| { |
| "dataPath": "params_shard_170.bin", |
| "format": "raw-shard", |
| "nbytes": 31457280, |
| "records": [ |
| { |
| "name": "model.layers.24.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.24.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.25.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 20447232 |
| } |
| ], |
| "md5sum": "2d0e29ca4cb253c75aeb150755d1bdb5" |
| }, |
| { |
| "dataPath": "params_shard_171.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.25.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5f328b90b1178eb59e93dd65981804ff" |
| }, |
| { |
| "dataPath": "params_shard_172.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.25.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "8fe205ccb41315d06f5175431cb37dcd" |
| }, |
| { |
| "dataPath": "params_shard_173.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.25.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e0376daca6b247c7457102f3c6df8bbd" |
| }, |
| { |
| "dataPath": "params_shard_174.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.25.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c081d720feee12ad47c5a24e23f201fa" |
| }, |
| { |
| "dataPath": "params_shard_175.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.26.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a11558595aba80651117cae9a6c9240f" |
| }, |
| { |
| "dataPath": "params_shard_176.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.26.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2696b13f85b90c83815eb5f8195116fd" |
| }, |
| { |
| "dataPath": "params_shard_177.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.26.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3e2e86d35468e9548076f1aa021b5ccf" |
| }, |
| { |
| "dataPath": "params_shard_178.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.26.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "bf5d68e283fbb6655749a201db2194f1" |
| }, |
| { |
| "dataPath": "params_shard_179.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.26.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b9cf51c41e081c7eddf5ff97971ae52f" |
| }, |
| { |
| "dataPath": "params_shard_180.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.26.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "25dfc2a80c1727c353b3c6e558817fa1" |
| }, |
| { |
| "dataPath": "params_shard_181.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.26.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "bf2d6e07f9f11b0108b39e0474fb523e" |
| }, |
| { |
| "dataPath": "params_shard_182.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.26.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "47042032ec8a5adbc830b7b7a6a30c80" |
| }, |
| { |
| "dataPath": "params_shard_183.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.27.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c46f570ab76ac2ba000431c672319626" |
| }, |
| { |
| "dataPath": "params_shard_184.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.27.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1685ac905edeb3dcfc7b12d12dde4455" |
| }, |
| { |
| "dataPath": "params_shard_185.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.27.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2a59f6b72002d0e7535eac11ab9cd4e4" |
| }, |
| { |
| "dataPath": "params_shard_186.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.27.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "dedad700407a7b0ea1cab6adfb3e508f" |
| }, |
| { |
| "dataPath": "params_shard_187.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.27.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a47ec34ce816600f9043b75c25432bc6" |
| }, |
| { |
| "dataPath": "params_shard_188.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.27.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0301acdbfc7d2788602785548bd1d505" |
| }, |
| { |
| "dataPath": "params_shard_189.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.27.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f2c983673363dbda3ffce2ebc11e73f3" |
| }, |
| { |
| "dataPath": "params_shard_190.bin", |
| "format": "raw-shard", |
| "nbytes": 29958144, |
| "records": [ |
| { |
| "name": "model.layers.25.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.25.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.26.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9461760 |
| }, |
| { |
| "name": "model.layers.26.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 9486336 |
| }, |
| { |
| "name": "model.layers.26.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 20496384 |
| }, |
| { |
| "name": "model.layers.27.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 29933568 |
| } |
| ], |
| "md5sum": "462b66df7b536755eb357508c715c01f" |
| }, |
| { |
| "dataPath": "params_shard_191.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.27.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a6cca03b84c1a51e47ff941c35f25337" |
| }, |
| { |
| "dataPath": "params_shard_192.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.28.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "40cbad82edbdc6a5009ef92e833d097f" |
| }, |
| { |
| "dataPath": "params_shard_193.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.28.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5e2876dadade116f332cacc02b6147b5" |
| }, |
| { |
| "dataPath": "params_shard_194.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.28.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "57b6e2e02af3a4243cdf4ede1fd1fbea" |
| }, |
| { |
| "dataPath": "params_shard_195.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.28.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "22879f161ba3091898746d1ab6ae7f42" |
| }, |
| { |
| "dataPath": "params_shard_196.bin", |
| "format": "raw-shard", |
| "nbytes": 31457280, |
| "records": [ |
| { |
| "name": "model.layers.27.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.27.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.28.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 20447232 |
| } |
| ], |
| "md5sum": "504829fcb9f612e147abe6cef3cc7f37" |
| }, |
| { |
| "dataPath": "params_shard_197.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.28.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5d705c4fa9b83f8d0f7d3e959ad8146a" |
| }, |
| { |
| "dataPath": "params_shard_198.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.28.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "afa06629da701c601438454d98ee6cb7" |
| }, |
| { |
| "dataPath": "params_shard_199.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.28.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9841a00eedd2c955fa1a3dc176d6abf6" |
| }, |
| { |
| "dataPath": "params_shard_200.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.28.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "304254181c4521ad6d606d4b39268d7c" |
| }, |
| { |
| "dataPath": "params_shard_201.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.29.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "84813e1a219466b88c70ffb75d5b2b11" |
| }, |
| { |
| "dataPath": "params_shard_202.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.29.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c1ddbb40fe8d1a5b75c46f2f20c6c62a" |
| }, |
| { |
| "dataPath": "params_shard_203.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.29.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "79317253d213b73b4a3a3da637e7e6bb" |
| }, |
| { |
| "dataPath": "params_shard_204.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.29.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f97e4e0a4d7fa8cbb461647b60c839b7" |
| }, |
| { |
| "dataPath": "params_shard_205.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.29.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "20e1871c096392e8eb895c917c82fdb5" |
| }, |
| { |
| "dataPath": "params_shard_206.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.29.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d71830c5f326e0d004e2b2d14422899d" |
| }, |
| { |
| "dataPath": "params_shard_207.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.29.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c338bf88561e61117055e03418bc88c8" |
| }, |
| { |
| "dataPath": "params_shard_208.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.29.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e8859a9a0ae4f181eeb3e7944e6e1a62" |
| }, |
| { |
| "dataPath": "params_shard_209.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.3.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "491314408d19cc403b3ac0ee2153737a" |
| }, |
| { |
| "dataPath": "params_shard_210.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.3.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3dc760aa520344215921d84f95f426ee" |
| }, |
| { |
| "dataPath": "params_shard_211.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.3.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d2e964571c181cd196d6899ac554d390" |
| }, |
| { |
| "dataPath": "params_shard_212.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.3.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3a25926b72fe5a166193dca0564ffc97" |
| }, |
| { |
| "dataPath": "params_shard_213.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.3.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "64cf46d6a395b851915668d8dfb7e8e8" |
| }, |
| { |
| "dataPath": "params_shard_214.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.3.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4c31a7ef1ffe706b8e97a8bd89070b05" |
| }, |
| { |
| "dataPath": "params_shard_215.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.3.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "28cc3e6730e7995188b987b8021fa1ae" |
| }, |
| { |
| "dataPath": "params_shard_216.bin", |
| "format": "raw-shard", |
| "nbytes": 29958144, |
| "records": [ |
| { |
| "name": "model.layers.28.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.28.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.29.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9461760 |
| }, |
| { |
| "name": "model.layers.29.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 9486336 |
| }, |
| { |
| "name": "model.layers.29.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 20496384 |
| }, |
| { |
| "name": "model.layers.3.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 29933568 |
| } |
| ], |
| "md5sum": "3d2b6cfa1cd50ad6c6535d3fabfe5b77" |
| }, |
| { |
| "dataPath": "params_shard_217.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.3.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6a8e49b66f828f88b946b8948b39998c" |
| }, |
| { |
| "dataPath": "params_shard_218.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.4.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ee9ac03c0bc8456c2e1ad21378b89d65" |
| }, |
| { |
| "dataPath": "params_shard_219.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.4.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b94bb40539045fe1f2b40995ccda86fb" |
| }, |
| { |
| "dataPath": "params_shard_220.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.4.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "62c7ba308242db5dab6128f274709bb8" |
| }, |
| { |
| "dataPath": "params_shard_221.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.4.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b7f64f96f8aeecddd7af0ee26b8f722e" |
| }, |
| { |
| "dataPath": "params_shard_222.bin", |
| "format": "raw-shard", |
| "nbytes": 31457280, |
| "records": [ |
| { |
| "name": "model.layers.3.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.3.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.4.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 20447232 |
| } |
| ], |
| "md5sum": "547f48033461ef06eb60dddfd79f414f" |
| }, |
| { |
| "dataPath": "params_shard_223.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.30.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3225b904d8366f529ec46a72cee9385e" |
| }, |
| { |
| "dataPath": "params_shard_224.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.30.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5a3a437e74ad65d3434abe8cb49f51dd" |
| }, |
| { |
| "dataPath": "params_shard_225.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.30.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "636edc4d5e8d72c74475d77c5e594ba3" |
| }, |
| { |
| "dataPath": "params_shard_226.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.30.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0ad548e726e63840376f1eb5b0b097a8" |
| }, |
| { |
| "dataPath": "params_shard_227.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.30.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9d6eb955c6cc1b73b3244c75b2f147f4" |
| }, |
| { |
| "dataPath": "params_shard_228.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.30.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "fec40f17475d224417bed31fc9970e67" |
| }, |
| { |
| "dataPath": "params_shard_229.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.30.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b35ffda485966e9ff243ccecf34a4ba1" |
| }, |
| { |
| "dataPath": "params_shard_230.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.30.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "24ea39fca537ac171034193392f3b27c" |
| }, |
| { |
| "dataPath": "params_shard_231.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.31.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e0a1637ed73a9be72b84ba82b1e0eaba" |
| }, |
| { |
| "dataPath": "params_shard_232.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.31.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e799156c4382c3924f71f83bb404428b" |
| }, |
| { |
| "dataPath": "params_shard_233.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.31.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f76e151e828ef5c5d5fbadbb18307e87" |
| }, |
| { |
| "dataPath": "params_shard_234.bin", |
| "format": "raw-shard", |
| "nbytes": 29908992, |
| "records": [ |
| { |
| "name": "model.layers.4.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.30.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.30.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 9461760 |
| }, |
| { |
| "name": "model.layers.30.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 20471808 |
| } |
| ], |
| "md5sum": "b9a9c4f1d709a45273b8d4f08eef51fa" |
| }, |
| { |
| "dataPath": "params_shard_235.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.31.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cb7eaf48c7fd67d004ac43fb5e207d90" |
| }, |
| { |
| "dataPath": "params_shard_236.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.31.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f73e10472e7ef7a7dc8a31ba15a6c51e" |
| }, |
| { |
| "dataPath": "params_shard_237.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.31.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a2b6e26e2073654a024a56fea0e09d75" |
| }, |
| { |
| "dataPath": "params_shard_238.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.31.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "8af35cde65993d30c3d228af87caec7c" |
| }, |
| { |
| "dataPath": "params_shard_239.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.31.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7082889a891244f4ae2838025290f4bf" |
| }, |
| { |
| "dataPath": "params_shard_240.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.32.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1bf619a7835bba54bbf3597f3ac88c35" |
| }, |
| { |
| "dataPath": "params_shard_241.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.32.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7c3dc4dd1be5d07f41aa717810ba8d38" |
| }, |
| { |
| "dataPath": "params_shard_242.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.32.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a9c8896941bc42fd33a6afd09ffa8001" |
| }, |
| { |
| "dataPath": "params_shard_243.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.32.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "beb91fcc28cd0149a8c73adecd7939e3" |
| }, |
| { |
| "dataPath": "params_shard_244.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.32.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b583380b2879ef4708f928c269670374" |
| }, |
| { |
| "dataPath": "params_shard_245.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.32.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4fa767b8a9b1d30ff4d4f4d3b2944373" |
| }, |
| { |
| "dataPath": "params_shard_246.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.32.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "12f3e69f3177ea44698ce11381cce25c" |
| }, |
| { |
| "dataPath": "params_shard_247.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.32.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "821fbf6e7e4c2fdc9293df9ea1e670c7" |
| }, |
| { |
| "dataPath": "params_shard_248.bin", |
| "format": "raw-shard", |
| "nbytes": 31506432, |
| "records": [ |
| { |
| "name": "model.layers.31.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.31.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.31.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 20447232 |
| }, |
| { |
| "name": "model.layers.32.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 20471808 |
| }, |
| { |
| "name": "model.layers.32.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 20496384 |
| } |
| ], |
| "md5sum": "bbcdde45125753fb51216bbfebc2eb83" |
| }, |
| { |
| "dataPath": "params_shard_249.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.33.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0b542fc9ba3866070d42113cd5cdfed8" |
| }, |
| { |
| "dataPath": "params_shard_250.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.33.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6a121504ca3ad9aaca2f2994772a89d1" |
| }, |
| { |
| "dataPath": "params_shard_251.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.33.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "af4e0b97045942332cd8c1223da5e62c" |
| }, |
| { |
| "dataPath": "params_shard_252.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.33.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "48722335911c07189b328bf60df92acd" |
| }, |
| { |
| "dataPath": "params_shard_253.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.33.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cd66ecd873b43b9586596d2a5bd29e22" |
| }, |
| { |
| "dataPath": "params_shard_254.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.33.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5e04cf9f744802f483148adfb8f625b7" |
| }, |
| { |
| "dataPath": "params_shard_255.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.33.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "fc72da85b179253b492d3ac0a7a8e1e4" |
| }, |
| { |
| "dataPath": "params_shard_256.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.33.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "86e0f35a2383d3467a0400058802a28a" |
| }, |
| { |
| "dataPath": "params_shard_257.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.34.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c826afa4cef4122e236b1722beabc8a5" |
| }, |
| { |
| "dataPath": "params_shard_258.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.34.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b65640f813fccbf415bdd7c88eedba9e" |
| }, |
| { |
| "dataPath": "params_shard_259.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.34.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "eb582a96c95c9e411dc804b14e69fe8a" |
| }, |
| { |
| "dataPath": "params_shard_260.bin", |
| "format": "raw-shard", |
| "nbytes": 29908992, |
| "records": [ |
| { |
| "name": "model.layers.32.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.33.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.33.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 9461760 |
| }, |
| { |
| "name": "model.layers.33.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 20471808 |
| } |
| ], |
| "md5sum": "fdee6096523b1ba89a1556ca9db532bd" |
| }, |
| { |
| "dataPath": "params_shard_261.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.34.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b3790c836ecc7f42f5215d6fb522cc99" |
| }, |
| { |
| "dataPath": "params_shard_262.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.34.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "58a4793a5c276654edb55abc9ca68e41" |
| }, |
| { |
| "dataPath": "params_shard_263.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.34.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "67e1991564eaa5fe4f870125f065dd39" |
| }, |
| { |
| "dataPath": "params_shard_264.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.34.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d8a129ef0c75d908d5747e2dce0728b5" |
| }, |
| { |
| "dataPath": "params_shard_265.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.34.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "41818ee01ae1c415beb74261470fbc2f" |
| }, |
| { |
| "dataPath": "params_shard_266.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.35.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b06a84ef7f569dba6c20a34417c187ca" |
| }, |
| { |
| "dataPath": "params_shard_267.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.35.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "552ee07c797059d6159846eeba30ed11" |
| }, |
| { |
| "dataPath": "params_shard_268.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.35.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "06214a15dca6b3cd0f5854f5f3218a2f" |
| }, |
| { |
| "dataPath": "params_shard_269.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.35.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "013677fe051a8244e513a62e57954dd6" |
| }, |
| { |
| "dataPath": "params_shard_270.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.35.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "16e7546cca17b0be3d17d7276968c42e" |
| }, |
| { |
| "dataPath": "params_shard_271.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.35.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4a80e46954e2233913482105abd0eaf7" |
| }, |
| { |
| "dataPath": "params_shard_272.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.35.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4ba68b534bfc7f34c266b36dcdd8d512" |
| }, |
| { |
| "dataPath": "params_shard_273.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.35.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "733d0022bfcfc10c5a135492e4870078" |
| }, |
| { |
| "dataPath": "params_shard_274.bin", |
| "format": "raw-shard", |
| "nbytes": 31506432, |
| "records": [ |
| { |
| "name": "model.layers.34.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.34.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.34.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 20447232 |
| }, |
| { |
| "name": "model.layers.35.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 20471808 |
| }, |
| { |
| "name": "model.layers.35.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 20496384 |
| } |
| ], |
| "md5sum": "2d8561d4da66d19e2f1b8bc7ed118915" |
| }, |
| { |
| "dataPath": "params_shard_275.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.36.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2792b2854afc8b076f424c8b3f9db042" |
| }, |
| { |
| "dataPath": "params_shard_276.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.36.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "761bbee468e9f19282f91a0c450d1774" |
| }, |
| { |
| "dataPath": "params_shard_277.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.36.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "57e5dc1d5bf9e1f61e6d72df484bce52" |
| }, |
| { |
| "dataPath": "params_shard_278.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.36.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "16bf98100a7deb1737ae6e62c9f8a0a3" |
| }, |
| { |
| "dataPath": "params_shard_279.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.36.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "930c0912230876af1dd850ab481a5331" |
| }, |
| { |
| "dataPath": "params_shard_280.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.36.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "422c5e35cb818386cceab5e67caa323c" |
| }, |
| { |
| "dataPath": "params_shard_281.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.36.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cd6c9d67e17b0678b81492a8a6c16ced" |
| }, |
| { |
| "dataPath": "params_shard_282.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.36.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9213635957e5fff7fe3b6f5d9cc233de" |
| }, |
| { |
| "dataPath": "params_shard_283.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.37.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "27c5f18b723c1a69e0abc102498253e8" |
| }, |
| { |
| "dataPath": "params_shard_284.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.37.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d380f3a1d5cf67496c6e5847f1ced93a" |
| }, |
| { |
| "dataPath": "params_shard_285.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.37.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c8ef23890d1a7f5454047c46e145772d" |
| }, |
| { |
| "dataPath": "params_shard_286.bin", |
| "format": "raw-shard", |
| "nbytes": 29908992, |
| "records": [ |
| { |
| "name": "model.layers.35.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.36.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.36.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 9461760 |
| }, |
| { |
| "name": "model.layers.36.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 20471808 |
| } |
| ], |
| "md5sum": "3a2e2fc756869dc306492b120634bcfd" |
| }, |
| { |
| "dataPath": "params_shard_287.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.37.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7dd0cc7501832c7cc1955e12090b75dc" |
| }, |
| { |
| "dataPath": "params_shard_288.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.37.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "01914ca6f760a351fdd479007c027d6e" |
| }, |
| { |
| "dataPath": "params_shard_289.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.37.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e272a30c99d88d9d0ee25e0621f36de8" |
| }, |
| { |
| "dataPath": "params_shard_290.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.37.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4e4a41b5419d9ca6ad695bb8b533530e" |
| }, |
| { |
| "dataPath": "params_shard_291.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.37.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "86125340bfb512352935f78f0f23a789" |
| }, |
| { |
| "dataPath": "params_shard_292.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.38.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "323b6f6d066ea6aa7b74bc915722f64b" |
| }, |
| { |
| "dataPath": "params_shard_293.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.38.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6150b8bea9420c6b9836ae91288123fb" |
| }, |
| { |
| "dataPath": "params_shard_294.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.38.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a0dda61afacc19e7bba18d094ccb0371" |
| }, |
| { |
| "dataPath": "params_shard_295.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.38.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4a3330cac1c22f256ed55a7b6ec7d27f" |
| }, |
| { |
| "dataPath": "params_shard_296.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.38.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "41e0bf2f651a0eddaf02564e674cae98" |
| }, |
| { |
| "dataPath": "params_shard_297.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.38.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6cbf19fb4eea022885103e3ab1388ddf" |
| }, |
| { |
| "dataPath": "params_shard_298.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.38.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "902391c708f29ed56430ace0de2007e5" |
| }, |
| { |
| "dataPath": "params_shard_299.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.38.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b9f063ef5e259e8003d4e44e51ecac41" |
| }, |
| { |
| "dataPath": "params_shard_300.bin", |
| "format": "raw-shard", |
| "nbytes": 31506432, |
| "records": [ |
| { |
| "name": "model.layers.37.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.37.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.37.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 20447232 |
| }, |
| { |
| "name": "model.layers.38.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 20471808 |
| }, |
| { |
| "name": "model.layers.38.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 20496384 |
| } |
| ], |
| "md5sum": "9a3308fb15891fbc6c52ef676b496646" |
| }, |
| { |
| "dataPath": "params_shard_301.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.39.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5078bd0ef4c2162019cc8ccc100ffb4d" |
| }, |
| { |
| "dataPath": "params_shard_302.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.39.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c9f100f19e5c8d5405dd693a77d6e9bb" |
| }, |
| { |
| "dataPath": "params_shard_303.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.39.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e915aead37d8d0f59eedadcd3b3e18df" |
| }, |
| { |
| "dataPath": "params_shard_304.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.39.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "16a3f36860a6b64ff1bfb9a0f0dd7ba3" |
| }, |
| { |
| "dataPath": "params_shard_305.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.39.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "dac704a2b9321c3ebc7b37b220a459bf" |
| }, |
| { |
| "dataPath": "params_shard_306.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.39.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "dfbd6cac746ae26754d7571c8fc0bda3" |
| }, |
| { |
| "dataPath": "params_shard_307.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.39.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "04ebefb2db9de8d52d741f4e3f339813" |
| }, |
| { |
| "dataPath": "params_shard_308.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.39.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c53e4498af24a081ec3aefc8b65a0752" |
| }, |
| { |
| "dataPath": "params_shard_309.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.40.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "8a037a14fbdb43a46c3288fca97eb170" |
| }, |
| { |
| "dataPath": "params_shard_310.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.40.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2aeb9990465fc42f41f6b4e5ce8bfc39" |
| }, |
| { |
| "dataPath": "params_shard_311.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.40.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "055c6be23675ce021ac4b4442a77dc1a" |
| }, |
| { |
| "dataPath": "params_shard_312.bin", |
| "format": "raw-shard", |
| "nbytes": 29908992, |
| "records": [ |
| { |
| "name": "model.layers.38.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.39.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.39.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 9461760 |
| }, |
| { |
| "name": "model.layers.39.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 20471808 |
| } |
| ], |
| "md5sum": "e0ca62e5e9a6fdaef96929a7c8147099" |
| }, |
| { |
| "dataPath": "params_shard_313.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.40.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "eabda0606507bc28a78e8dcbdcfb86cb" |
| }, |
| { |
| "dataPath": "params_shard_314.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.4.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9d33137fc6483b01f5b06d35e26076a9" |
| }, |
| { |
| "dataPath": "params_shard_315.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.4.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4f6ee53184f7051aa4230102c19da7e0" |
| }, |
| { |
| "dataPath": "params_shard_316.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.4.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1946c68b5999456f4839b25daf0915a4" |
| }, |
| { |
| "dataPath": "params_shard_317.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.4.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c5ca60926bc0acd879e34b48f90d0695" |
| }, |
| { |
| "dataPath": "params_shard_318.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.5.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "94c860761bdb35bab5557be25bb8ef5e" |
| }, |
| { |
| "dataPath": "params_shard_319.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.5.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d7e23a2cc0f79b8665fe0f75e06f9956" |
| }, |
| { |
| "dataPath": "params_shard_320.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.5.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7e68395707ab66621037e60dd42c491e" |
| }, |
| { |
| "dataPath": "params_shard_321.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.5.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cf9afcf20774be6176b9fc5bf80a8e1f" |
| }, |
| { |
| "dataPath": "params_shard_322.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.5.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d97693955b534197ba7b33d45b674de8" |
| }, |
| { |
| "dataPath": "params_shard_323.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.5.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "093653e8f65e2273813dba1ac84189c2" |
| }, |
| { |
| "dataPath": "params_shard_324.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.5.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4cdb17cbdefbefc50f15a93bc331875e" |
| }, |
| { |
| "dataPath": "params_shard_325.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.5.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f18f687f1f7325efe029587557d6f042" |
| }, |
| { |
| "dataPath": "params_shard_326.bin", |
| "format": "raw-shard", |
| "nbytes": 31506432, |
| "records": [ |
| { |
| "name": "model.layers.40.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.40.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.4.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 20447232 |
| }, |
| { |
| "name": "model.layers.5.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 20471808 |
| }, |
| { |
| "name": "model.layers.5.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 20496384 |
| } |
| ], |
| "md5sum": "41bcf4a82e064c40d748eb281bd7e99f" |
| }, |
| { |
| "dataPath": "params_shard_327.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.40.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3617274f0d984dc3ba1f15c9fe9ba92b" |
| }, |
| { |
| "dataPath": "params_shard_328.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.40.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2d0212569eecbd27fa0ee29061aa332d" |
| }, |
| { |
| "dataPath": "params_shard_329.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.40.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f34b81a2b33f78069c933b5344d1b8ad" |
| }, |
| { |
| "dataPath": "params_shard_330.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.40.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "eaf680b459d3fe1c575206f4dc572bfd" |
| }, |
| { |
| "dataPath": "params_shard_331.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.41.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "23cc0d056d493c623b46044346121e40" |
| }, |
| { |
| "dataPath": "params_shard_332.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.41.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f668d22b5b7d92a166b6a5b8fcd1fb57" |
| }, |
| { |
| "dataPath": "params_shard_333.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.41.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ef6f84f7b168a70ace6d96080ad081c9" |
| }, |
| { |
| "dataPath": "params_shard_334.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.41.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "560357686bbc89829ba022e3c51b3ef6" |
| }, |
| { |
| "dataPath": "params_shard_335.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.41.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "daea47b41b3590f1a7ab8559b5c0fe61" |
| }, |
| { |
| "dataPath": "params_shard_336.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.41.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a4e40ba9e841745bab45c97a8a432e9f" |
| }, |
| { |
| "dataPath": "params_shard_337.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.41.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3260b5d5756d685bb304ce4f0f9c5373" |
| }, |
| { |
| "dataPath": "params_shard_338.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.41.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9a5f5c90fa9b0dafc4beb75755200393" |
| }, |
| { |
| "dataPath": "params_shard_339.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.42.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5eb7093fa43f5dd845516dd23d343ae3" |
| }, |
| { |
| "dataPath": "params_shard_340.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.42.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "998119c1a588ff880f53dc755e5526e3" |
| }, |
| { |
| "dataPath": "params_shard_341.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.42.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2068c3a37186c9338a2fc27b33529706" |
| }, |
| { |
| "dataPath": "params_shard_342.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.42.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "74c06fa9dac3c9d2c56a0008b6a6776d" |
| }, |
| { |
| "dataPath": "params_shard_343.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.42.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "939a9175212b3343df6afad686bdddb7" |
| }, |
| { |
| "dataPath": "params_shard_344.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.42.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "fb82383b0a7d7983e3961824c8c98b42" |
| }, |
| { |
| "dataPath": "params_shard_345.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.42.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a7317192c2c3ed9ef36db190b343c8e3" |
| }, |
| { |
| "dataPath": "params_shard_346.bin", |
| "format": "raw-shard", |
| "nbytes": 29958144, |
| "records": [ |
| { |
| "name": "model.layers.5.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.40.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.41.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9461760 |
| }, |
| { |
| "name": "model.layers.41.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 9486336 |
| }, |
| { |
| "name": "model.layers.41.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 20496384 |
| }, |
| { |
| "name": "model.layers.42.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 29933568 |
| } |
| ], |
| "md5sum": "25a2ab6476e23299131432ecc8f066df" |
| }, |
| { |
| "dataPath": "params_shard_347.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.42.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1d07622606d9f74f6316c79a347eabe8" |
| }, |
| { |
| "dataPath": "params_shard_348.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.43.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "aadd32888d240c898af1a4df2f314a88" |
| }, |
| { |
| "dataPath": "params_shard_349.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.43.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ab565f20cbaa84be4fba4c7e3b7b76f9" |
| }, |
| { |
| "dataPath": "params_shard_350.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.43.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ed151471e41abb00d3adb4f3691a9a06" |
| }, |
| { |
| "dataPath": "params_shard_351.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.43.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "232c4edb3848f05696a3a581da59d943" |
| }, |
| { |
| "dataPath": "params_shard_352.bin", |
| "format": "raw-shard", |
| "nbytes": 31457280, |
| "records": [ |
| { |
| "name": "model.layers.42.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.42.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.43.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 20447232 |
| } |
| ], |
| "md5sum": "507c21547e318b8e81955b510fe1adee" |
| }, |
| { |
| "dataPath": "params_shard_353.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.43.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "76d49795e2692067a57c54f175a4c0ad" |
| }, |
| { |
| "dataPath": "params_shard_354.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.43.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "baeafad621dd744a78fa60482ff3f45a" |
| }, |
| { |
| "dataPath": "params_shard_355.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.43.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b22639b963be117f6c64ca87a3cc762b" |
| }, |
| { |
| "dataPath": "params_shard_356.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.43.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "abf54024ba05d614539472860ae2b1c9" |
| }, |
| { |
| "dataPath": "params_shard_357.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.44.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "26fd6520c5a01339c64b40c6e764c1f6" |
| }, |
| { |
| "dataPath": "params_shard_358.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.44.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9424d1104408be407d6d530633ccce83" |
| }, |
| { |
| "dataPath": "params_shard_359.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.44.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3bec24f753e5ee10f6631dc70f9e9570" |
| }, |
| { |
| "dataPath": "params_shard_360.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.44.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ccc2d4d2949a0f4a0f4eed64ff911939" |
| }, |
| { |
| "dataPath": "params_shard_361.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.44.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5bff97807714a5cfe4af8be1eb15e860" |
| }, |
| { |
| "dataPath": "params_shard_362.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.44.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e1627960af7488c76fad84a040699a24" |
| }, |
| { |
| "dataPath": "params_shard_363.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.44.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f895ca8d689814c2501c884bce1fec11" |
| }, |
| { |
| "dataPath": "params_shard_364.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.44.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e1beee862288032a0ce321cd95a5a27e" |
| }, |
| { |
| "dataPath": "params_shard_365.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.45.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "87e1a0b23d022d3f82e386ce918ffe13" |
| }, |
| { |
| "dataPath": "params_shard_366.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.45.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e75543c26e2406fc1601a8f81a47a8a1" |
| }, |
| { |
| "dataPath": "params_shard_367.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.45.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "88019c293fe2ba25bad0d49e58f549e4" |
| }, |
| { |
| "dataPath": "params_shard_368.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.45.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2a3e709b60654792fabf8292b125169b" |
| }, |
| { |
| "dataPath": "params_shard_369.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.45.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "caca1d6d30a7ee769ceeb2f18624f6b0" |
| }, |
| { |
| "dataPath": "params_shard_370.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.45.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "db09aaea1bd5e99f32947c0286ef8e42" |
| }, |
| { |
| "dataPath": "params_shard_371.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.45.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cf481dec12c0edf1e902c72ea15e679a" |
| }, |
| { |
| "dataPath": "params_shard_372.bin", |
| "format": "raw-shard", |
| "nbytes": 29958144, |
| "records": [ |
| { |
| "name": "model.layers.43.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.43.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.44.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9461760 |
| }, |
| { |
| "name": "model.layers.44.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 9486336 |
| }, |
| { |
| "name": "model.layers.44.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 20496384 |
| }, |
| { |
| "name": "model.layers.45.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 29933568 |
| } |
| ], |
| "md5sum": "8a2d8e395e78e03bc1aecc94a081ce83" |
| }, |
| { |
| "dataPath": "params_shard_373.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.45.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1efa314545ae07cfb489a6626fb8e12b" |
| }, |
| { |
| "dataPath": "params_shard_374.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.46.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9ccd3514808ffa5640c176c928aa6b40" |
| }, |
| { |
| "dataPath": "params_shard_375.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.46.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a6b803a6bd8772abe5d1f7da7889ca70" |
| }, |
| { |
| "dataPath": "params_shard_376.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.46.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1f8fac97a287a3f2e5d4b58a3288d1da" |
| }, |
| { |
| "dataPath": "params_shard_377.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.46.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d64d0c220a7f3e11466a42180b53f688" |
| }, |
| { |
| "dataPath": "params_shard_378.bin", |
| "format": "raw-shard", |
| "nbytes": 31457280, |
| "records": [ |
| { |
| "name": "model.layers.45.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.45.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.46.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 20447232 |
| } |
| ], |
| "md5sum": "792ecff8f70ebe5373c9377ca683a22b" |
| }, |
| { |
| "dataPath": "params_shard_379.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.46.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0f8119ddb4cfac56c3aa143e2a80f2c5" |
| }, |
| { |
| "dataPath": "params_shard_380.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.46.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e95b73fce557947cbe1bb3ad07b4c336" |
| }, |
| { |
| "dataPath": "params_shard_381.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.46.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ab56fea06d8b5ae16fe851ed7c0e7799" |
| }, |
| { |
| "dataPath": "params_shard_382.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.46.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0eed1362f0c4d5f9e7c0eb44aa86d5f4" |
| }, |
| { |
| "dataPath": "params_shard_383.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.47.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6cf6758412b7692bcf89befaac158a94" |
| }, |
| { |
| "dataPath": "params_shard_384.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.47.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "671378bfa1bb85b7b5c677333f2531d8" |
| }, |
| { |
| "dataPath": "params_shard_385.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.47.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2ac739d8a8050edd24196289e5557d8a" |
| }, |
| { |
| "dataPath": "params_shard_386.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.47.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d32a890d2d2611896d097bad532f5aaa" |
| }, |
| { |
| "dataPath": "params_shard_387.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.47.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "86a4796647d1a941e44269284efb7d8d" |
| }, |
| { |
| "dataPath": "params_shard_388.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.47.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "07fcb3e0c852413a5cb36212ea82de8a" |
| }, |
| { |
| "dataPath": "params_shard_389.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.47.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7ed7a76fb1f42b05513e2a4515d05e60" |
| }, |
| { |
| "dataPath": "params_shard_390.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.47.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6d6d6edc259184cdeda1437183a3c6b1" |
| }, |
| { |
| "dataPath": "params_shard_391.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.48.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2477b73fa5c20553abdc0643c95ede0e" |
| }, |
| { |
| "dataPath": "params_shard_392.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.48.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6929075f09e8b289084857d123b15d92" |
| }, |
| { |
| "dataPath": "params_shard_393.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.48.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "8e775f6ed5f09a9ce882c6f125e2689a" |
| }, |
| { |
| "dataPath": "params_shard_394.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.48.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "67c4a2b4a9fbd62c83441cd591df0e6a" |
| }, |
| { |
| "dataPath": "params_shard_395.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.48.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "460c47c7714e62220a4c8e2c919e3342" |
| }, |
| { |
| "dataPath": "params_shard_396.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.48.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "89ea78df9854e1fc5df712f9c5a30f24" |
| }, |
| { |
| "dataPath": "params_shard_397.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.48.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4c2db0987ca722473f86177d730e0a97" |
| }, |
| { |
| "dataPath": "params_shard_398.bin", |
| "format": "raw-shard", |
| "nbytes": 29958144, |
| "records": [ |
| { |
| "name": "model.layers.46.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.46.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.47.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9461760 |
| }, |
| { |
| "name": "model.layers.47.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 9486336 |
| }, |
| { |
| "name": "model.layers.47.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 20496384 |
| }, |
| { |
| "name": "model.layers.48.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 29933568 |
| } |
| ], |
| "md5sum": "9fcd05d7689788f6a35319739a84d1e6" |
| }, |
| { |
| "dataPath": "params_shard_399.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.48.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d4430ae9d4baa3f14798cfe8a187b174" |
| }, |
| { |
| "dataPath": "params_shard_400.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.49.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6db99860d7f15fb1e79078834ffd13da" |
| }, |
| { |
| "dataPath": "params_shard_401.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.49.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d118f994afcf9da4365bcba508500c16" |
| }, |
| { |
| "dataPath": "params_shard_402.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.49.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "11e85edb6856ad787969744f46258748" |
| }, |
| { |
| "dataPath": "params_shard_403.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.49.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "11e517be43057f2a84567f9d91b6f8cd" |
| }, |
| { |
| "dataPath": "params_shard_404.bin", |
| "format": "raw-shard", |
| "nbytes": 31457280, |
| "records": [ |
| { |
| "name": "model.layers.48.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.48.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.49.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 20447232 |
| } |
| ], |
| "md5sum": "7b581f03d7c11d52d94ad7a272b9caa0" |
| }, |
| { |
| "dataPath": "params_shard_405.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.49.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ab72770824f9a110e63964a0f6b375b4" |
| }, |
| { |
| "dataPath": "params_shard_406.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.49.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3529ae39ab077bba3507c586dda784c5" |
| }, |
| { |
| "dataPath": "params_shard_407.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.49.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5fee79b9d542cbc435a407dd5bad403a" |
| }, |
| { |
| "dataPath": "params_shard_408.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.49.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7d539d56ba7d9be6b8a57cf12fa34b87" |
| }, |
| { |
| "dataPath": "params_shard_409.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.50.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c1a209047da42909f9a952809b6a43a9" |
| }, |
| { |
| "dataPath": "params_shard_410.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.50.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9fe1eb771200819b29e98d959496120e" |
| }, |
| { |
| "dataPath": "params_shard_411.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.50.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "fb5e2152c09a5a8934a7ef297097344a" |
| }, |
| { |
| "dataPath": "params_shard_412.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.50.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6553f1b2e1c01f322edb5280f80fb509" |
| }, |
| { |
| "dataPath": "params_shard_413.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.50.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1c39b745b6a96a0a26fb5dce54b8ad69" |
| }, |
| { |
| "dataPath": "params_shard_414.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.50.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b3b7fe018371f111cf67b84bbcf594ac" |
| }, |
| { |
| "dataPath": "params_shard_415.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.50.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f6f4f62bacb1c254e5829ac64ca6a303" |
| }, |
| { |
| "dataPath": "params_shard_416.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.50.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "71d8f8a04954f34f964b1f0700415c60" |
| }, |
| { |
| "dataPath": "params_shard_417.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.51.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1885e703a8d45e11ca2b4118e7201e34" |
| }, |
| { |
| "dataPath": "params_shard_418.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.51.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b8267e6ab37daec06a20bfdaa5449add" |
| }, |
| { |
| "dataPath": "params_shard_419.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.51.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "02506ac1efc5ee0cacf69471e24683e7" |
| }, |
| { |
| "dataPath": "params_shard_420.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.51.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a98e94324c474dadff5224d7197011c0" |
| }, |
| { |
| "dataPath": "params_shard_421.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.51.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c4a4aa36f568dc607a32e7a4a6934b14" |
| }, |
| { |
| "dataPath": "params_shard_422.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.51.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b5fd70817dc914f8999ca65b262e18f7" |
| }, |
| { |
| "dataPath": "params_shard_423.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.51.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "36bfc19c1130b7946da59bb071c55860" |
| }, |
| { |
| "dataPath": "params_shard_424.bin", |
| "format": "raw-shard", |
| "nbytes": 29958144, |
| "records": [ |
| { |
| "name": "model.layers.49.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.49.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.50.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9461760 |
| }, |
| { |
| "name": "model.layers.50.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 9486336 |
| }, |
| { |
| "name": "model.layers.50.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 20496384 |
| }, |
| { |
| "name": "model.layers.51.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 29933568 |
| } |
| ], |
| "md5sum": "133a966a3b83d4e511860c61912aa940" |
| }, |
| { |
| "dataPath": "params_shard_425.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.51.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "24c3687ce4e8260e797f67c1e1407772" |
| }, |
| { |
| "dataPath": "params_shard_426.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.52.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "863e1e2a6b2811438ec83c8d25159c9e" |
| }, |
| { |
| "dataPath": "params_shard_427.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.52.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "dc7110dc0114855504d389766a7ef27d" |
| }, |
| { |
| "dataPath": "params_shard_428.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.52.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "85e558ed89f101064cf4120f7ac35799" |
| }, |
| { |
| "dataPath": "params_shard_429.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.52.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d78d33ae729136c7a4786319f1382e69" |
| }, |
| { |
| "dataPath": "params_shard_430.bin", |
| "format": "raw-shard", |
| "nbytes": 31457280, |
| "records": [ |
| { |
| "name": "model.layers.51.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.51.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.52.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 20447232 |
| } |
| ], |
| "md5sum": "f9cbab190a010bd223d35dee1d5e207a" |
| }, |
| { |
| "dataPath": "params_shard_431.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.52.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "141b1bfb3c58bf5b933defbddd9a66e0" |
| }, |
| { |
| "dataPath": "params_shard_432.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.52.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a3d669b18876cee06e9a1e53b3b8f2e7" |
| }, |
| { |
| "dataPath": "params_shard_433.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.52.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "8dce1d1dab439098969324fa9b82ce36" |
| }, |
| { |
| "dataPath": "params_shard_434.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.52.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a03bc1dfe0bca244c79f9873a3f13b50" |
| }, |
| { |
| "dataPath": "params_shard_435.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.53.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ba1fd1884985987f8402b2017ffff780" |
| }, |
| { |
| "dataPath": "params_shard_436.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.53.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f18d1174ccb98bf6f2908e1213266a4b" |
| }, |
| { |
| "dataPath": "params_shard_437.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.53.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6cf5f7697a91e8a756d823654d24cd7a" |
| }, |
| { |
| "dataPath": "params_shard_438.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.53.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5a1ccccd4132ff98b1fd03f7df1e7f3c" |
| }, |
| { |
| "dataPath": "params_shard_439.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.53.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ccf531e0ce593314bfe3e660d603df0a" |
| }, |
| { |
| "dataPath": "params_shard_440.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.53.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3d80f75490c360202f8989515dfb7657" |
| }, |
| { |
| "dataPath": "params_shard_441.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.53.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f93d62ec4c9e66bf8d0d1bee1f570f0f" |
| }, |
| { |
| "dataPath": "params_shard_442.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.53.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9db325d6500aad2fc7b03544685f08bb" |
| }, |
| { |
| "dataPath": "params_shard_443.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.54.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7fb796bbe1c3dd90f39043aee991302f" |
| }, |
| { |
| "dataPath": "params_shard_444.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.54.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4d16282616b64e064d4f88bf26a6ee0b" |
| }, |
| { |
| "dataPath": "params_shard_445.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.54.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "dcfb59c8649eba1bbd43c300142b767a" |
| }, |
| { |
| "dataPath": "params_shard_446.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.54.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "574cdbdac63f4bce2a36adf6d1bac6b4" |
| }, |
| { |
| "dataPath": "params_shard_447.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.54.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b5795336e1f44ad239ae7d6a17f1854c" |
| }, |
| { |
| "dataPath": "params_shard_448.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.54.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e37c7f400db490cb07f976dd40814a32" |
| }, |
| { |
| "dataPath": "params_shard_449.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.54.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "25de9d8fef1c5de7412bc35c21f535b4" |
| }, |
| { |
| "dataPath": "params_shard_450.bin", |
| "format": "raw-shard", |
| "nbytes": 29958144, |
| "records": [ |
| { |
| "name": "model.layers.52.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.52.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.53.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9461760 |
| }, |
| { |
| "name": "model.layers.53.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 9486336 |
| }, |
| { |
| "name": "model.layers.53.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 20496384 |
| }, |
| { |
| "name": "model.layers.54.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 29933568 |
| } |
| ], |
| "md5sum": "aaab30880c644cba3238161c157e624a" |
| }, |
| { |
| "dataPath": "params_shard_451.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.54.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d2d2d19d8d36a324ab42b9f92d6cfc25" |
| }, |
| { |
| "dataPath": "params_shard_452.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.55.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f8937c0f7708eaa5ec8199c833582aa0" |
| }, |
| { |
| "dataPath": "params_shard_453.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.55.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "656d2c40f7481df75b278c2d5881ba46" |
| }, |
| { |
| "dataPath": "params_shard_454.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.55.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "91ceb71505ad6f6d9ea4638ea8fac3cf" |
| }, |
| { |
| "dataPath": "params_shard_455.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.55.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "143c328d64bde3972adc9fffc8718c46" |
| }, |
| { |
| "dataPath": "params_shard_456.bin", |
| "format": "raw-shard", |
| "nbytes": 31457280, |
| "records": [ |
| { |
| "name": "model.layers.54.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.54.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.55.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 20447232 |
| } |
| ], |
| "md5sum": "3e7451d1cb1068f044cbf625749bf17d" |
| }, |
| { |
| "dataPath": "params_shard_457.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.55.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f226999e7a99cd26e26557a36ff9cdb0" |
| }, |
| { |
| "dataPath": "params_shard_458.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.55.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1f99f0b03f69dd92f2db0d086775497a" |
| }, |
| { |
| "dataPath": "params_shard_459.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.55.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4b7f09d8d61e0dd6505f2bef6aea343b" |
| }, |
| { |
| "dataPath": "params_shard_460.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.55.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "27b9078118f337f3f4d027dc88522500" |
| }, |
| { |
| "dataPath": "params_shard_461.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.56.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "bd22bd16a48aef7ebf1920a9d5b6c007" |
| }, |
| { |
| "dataPath": "params_shard_462.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.56.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e695c64aa41f2137c241acbad5904c8b" |
| }, |
| { |
| "dataPath": "params_shard_463.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.56.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ee91cabb08cc843d0fe02ad358b9225c" |
| }, |
| { |
| "dataPath": "params_shard_464.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.56.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9782ca3d205795207a0d102822ab91e6" |
| }, |
| { |
| "dataPath": "params_shard_465.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.56.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cd03dd52846d1031c55bb3b84468067b" |
| }, |
| { |
| "dataPath": "params_shard_466.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.56.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "100f76964d0ea01c6940689805d212ae" |
| }, |
| { |
| "dataPath": "params_shard_467.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.56.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "24717e4fa232edcd91035e028e270e4c" |
| }, |
| { |
| "dataPath": "params_shard_468.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.56.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7e63396fb6f837f90f633b9fb49197ed" |
| }, |
| { |
| "dataPath": "params_shard_469.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.57.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5c33842b66a5ec2dfb2c4624c635efe1" |
| }, |
| { |
| "dataPath": "params_shard_470.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.57.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f02259e4015843f2bf069d0f430aed89" |
| }, |
| { |
| "dataPath": "params_shard_471.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.57.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e5ea0dd672cb7cf69eca26c2c8d3c8d9" |
| }, |
| { |
| "dataPath": "params_shard_472.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.57.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4eda25ff3065877f6736c75020d4df37" |
| }, |
| { |
| "dataPath": "params_shard_473.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.57.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1c6b689b55a8f7f2fdf2f26bdafff307" |
| }, |
| { |
| "dataPath": "params_shard_474.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.57.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c27ce626d3e3962b0ae75231ef020697" |
| }, |
| { |
| "dataPath": "params_shard_475.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.57.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1f0470ac2182e1e8b4febbb30508bc94" |
| }, |
| { |
| "dataPath": "params_shard_476.bin", |
| "format": "raw-shard", |
| "nbytes": 29958144, |
| "records": [ |
| { |
| "name": "model.layers.55.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.55.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.56.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9461760 |
| }, |
| { |
| "name": "model.layers.56.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 9486336 |
| }, |
| { |
| "name": "model.layers.56.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 20496384 |
| }, |
| { |
| "name": "model.layers.57.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 29933568 |
| } |
| ], |
| "md5sum": "1f28762870df1c52252d2cbe6eabaacd" |
| }, |
| { |
| "dataPath": "params_shard_477.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.57.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3b5cb8e191a6db26c121d5a25b708137" |
| }, |
| { |
| "dataPath": "params_shard_478.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.58.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0cf5895820a1ee6de792fd86bb1d1351" |
| }, |
| { |
| "dataPath": "params_shard_479.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.58.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1a238148fe51fa24d9ce8e93efa137c4" |
| }, |
| { |
| "dataPath": "params_shard_480.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.58.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c60436cdd0dc67f751fca6422e5d56e3" |
| }, |
| { |
| "dataPath": "params_shard_481.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.58.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "8037a7b4a247b0f535bc7b77b492f50a" |
| }, |
| { |
| "dataPath": "params_shard_482.bin", |
| "format": "raw-shard", |
| "nbytes": 31457280, |
| "records": [ |
| { |
| "name": "model.layers.57.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.57.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.58.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 20447232 |
| } |
| ], |
| "md5sum": "d489a2d80cf97ae7f6cbfef05240520d" |
| }, |
| { |
| "dataPath": "params_shard_483.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.58.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "279b461c741428cf6cf50a31f9fee116" |
| }, |
| { |
| "dataPath": "params_shard_484.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.58.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "540742f6c4ca04b38519b23c88106869" |
| }, |
| { |
| "dataPath": "params_shard_485.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.58.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b4d5a3fefacd71b2182f27be7ae7955b" |
| }, |
| { |
| "dataPath": "params_shard_486.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.58.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a0ff855f3ca463ff1cdbea46a26b45aa" |
| }, |
| { |
| "dataPath": "params_shard_487.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.59.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "478fcc39b483e29a333f78fd7c06d53a" |
| }, |
| { |
| "dataPath": "params_shard_488.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.59.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3052d7f0bf79e0a902e94386d8024980" |
| }, |
| { |
| "dataPath": "params_shard_489.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.59.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ef0f30584bbe372308c492e9d5903157" |
| }, |
| { |
| "dataPath": "params_shard_490.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.59.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "913d0c8de0b073dbc688ac94cedd8f8d" |
| }, |
| { |
| "dataPath": "params_shard_491.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.59.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "aef2ff545dc729ffa18c5b50fdac4142" |
| }, |
| { |
| "dataPath": "params_shard_492.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.59.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "245c77d394f1d4d7a5c4ba267ef3e047" |
| }, |
| { |
| "dataPath": "params_shard_493.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.59.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "56da5b036dc412292a68b6dd29ba2932" |
| }, |
| { |
| "dataPath": "params_shard_494.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.59.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "82c036f61c97409f6be0090de298f987" |
| }, |
| { |
| "dataPath": "params_shard_495.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.6.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c0885b44efa72f20b4798d38bf07f7dd" |
| }, |
| { |
| "dataPath": "params_shard_496.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.6.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d5aecc2a16697a3606cd6d2ae68cc8c2" |
| }, |
| { |
| "dataPath": "params_shard_497.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.6.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cfcb849be40c42c7d8e40aaf9a18cf3a" |
| }, |
| { |
| "dataPath": "params_shard_498.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.6.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2e88dc4c4b643098b1a7eb3c139fd0b1" |
| }, |
| { |
| "dataPath": "params_shard_499.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.6.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ce794544afffe1dc0870f7825cb2a443" |
| }, |
| { |
| "dataPath": "params_shard_500.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.6.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4c5ce2597fc5ccd1e7ad6085febccd88" |
| }, |
| { |
| "dataPath": "params_shard_501.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.6.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "8774b3b95784113f663a63c6a63c7db0" |
| }, |
| { |
| "dataPath": "params_shard_502.bin", |
| "format": "raw-shard", |
| "nbytes": 29958144, |
| "records": [ |
| { |
| "name": "model.layers.58.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.58.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.59.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9461760 |
| }, |
| { |
| "name": "model.layers.59.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 9486336 |
| }, |
| { |
| "name": "model.layers.59.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 20496384 |
| }, |
| { |
| "name": "model.layers.6.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 29933568 |
| } |
| ], |
| "md5sum": "2e9e15f42129396c9762a193643649f5" |
| }, |
| { |
| "dataPath": "params_shard_503.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.6.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4a01810fe9e6618ac23675502535ccf3" |
| }, |
| { |
| "dataPath": "params_shard_504.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.7.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1a36ef21024257a44aadd3f7d59ca169" |
| }, |
| { |
| "dataPath": "params_shard_505.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.7.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cb24fd728c67f0ac0c54101e498852bd" |
| }, |
| { |
| "dataPath": "params_shard_506.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.7.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "bb90765cfbe106de8c4a74cf93ccd269" |
| }, |
| { |
| "dataPath": "params_shard_507.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.7.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4b303c1fa119863ba8bb7c80ae9177a7" |
| }, |
| { |
| "dataPath": "params_shard_508.bin", |
| "format": "raw-shard", |
| "nbytes": 31457280, |
| "records": [ |
| { |
| "name": "model.layers.6.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.6.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.7.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 20447232 |
| } |
| ], |
| "md5sum": "992749d0429e39b4533f8986d8b94091" |
| }, |
| { |
| "dataPath": "params_shard_509.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.60.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "8c9127168ed527f51a09deffa4be63e2" |
| }, |
| { |
| "dataPath": "params_shard_510.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.60.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5b0b81a4158ad4f2212af8f3f12e5501" |
| }, |
| { |
| "dataPath": "params_shard_511.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.60.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "dbf4205a31594d0e0e8a75618ae9c9d7" |
| }, |
| { |
| "dataPath": "params_shard_512.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.60.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3c79de4dc413c12dd41e14ebaf0689eb" |
| }, |
| { |
| "dataPath": "params_shard_513.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.60.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "fbc49432582556de296143eff64421a6" |
| }, |
| { |
| "dataPath": "params_shard_514.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.60.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f66f2c416f5cd81e1f8689bb5cdba484" |
| }, |
| { |
| "dataPath": "params_shard_515.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.60.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ed22974d964945a660cc1e878cc7c66f" |
| }, |
| { |
| "dataPath": "params_shard_516.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.60.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "769294211d6dcb6c784f1219217a7676" |
| }, |
| { |
| "dataPath": "params_shard_517.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.61.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "84ed064d3e3b94c114f92ad9b9507dde" |
| }, |
| { |
| "dataPath": "params_shard_518.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.61.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "14ab5e32cf12aa09f4a0287858d44260" |
| }, |
| { |
| "dataPath": "params_shard_519.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.61.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "550dfdf57c271ddcfab6162aa6f7d971" |
| }, |
| { |
| "dataPath": "params_shard_520.bin", |
| "format": "raw-shard", |
| "nbytes": 29908992, |
| "records": [ |
| { |
| "name": "model.layers.7.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.60.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.60.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 9461760 |
| }, |
| { |
| "name": "model.layers.60.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 20471808 |
| } |
| ], |
| "md5sum": "e8dbd9216fba8a1a81ea35c25b6d6c6f" |
| }, |
| { |
| "dataPath": "params_shard_521.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.61.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "8eb5a19878ad4c030f434c6037db49ed" |
| }, |
| { |
| "dataPath": "params_shard_522.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.61.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ee5124a168fa145ca7b6dd73fbf5b6d5" |
| }, |
| { |
| "dataPath": "params_shard_523.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.61.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a2afe21446b0f14eb27f0e41fca2be5d" |
| }, |
| { |
| "dataPath": "params_shard_524.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.61.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "671fd9b5f6b9e926f6a12f12ab6837db" |
| }, |
| { |
| "dataPath": "params_shard_525.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.61.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5688612df5305de8f432c2af323e9bf2" |
| }, |
| { |
| "dataPath": "params_shard_526.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.62.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4bc158c6ec280f38a5d728f0197150b9" |
| }, |
| { |
| "dataPath": "params_shard_527.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.62.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "429cc2a64ace72a4668f325f49f9d9b1" |
| }, |
| { |
| "dataPath": "params_shard_528.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.62.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7c65bdf9da2c1e3bc669229b2a34052c" |
| }, |
| { |
| "dataPath": "params_shard_529.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.62.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0a618f4b843e6e28332a11206e0af172" |
| }, |
| { |
| "dataPath": "params_shard_530.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.62.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "73ec3424a3582840698c271fdb63954c" |
| }, |
| { |
| "dataPath": "params_shard_531.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.62.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cf65dc64ea7a01bb44f82e5eb1ec9a1b" |
| }, |
| { |
| "dataPath": "params_shard_532.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.62.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ce504560e8d5199b01f88c80aaccf730" |
| }, |
| { |
| "dataPath": "params_shard_533.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.62.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "12d63bc702d49f106f94bd2e584a4447" |
| }, |
| { |
| "dataPath": "params_shard_534.bin", |
| "format": "raw-shard", |
| "nbytes": 31506432, |
| "records": [ |
| { |
| "name": "model.layers.61.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.61.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11010048 |
| }, |
| { |
| "name": "model.layers.61.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 20447232 |
| }, |
| { |
| "name": "model.layers.62.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 20471808 |
| }, |
| { |
| "name": "model.layers.62.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 20496384 |
| } |
| ], |
| "md5sum": "048b671ae952d503393d37873e317680" |
| }, |
| { |
| "dataPath": "params_shard_535.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.63.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "303dc851631cda20cffb7d15a7d52740" |
| }, |
| { |
| "dataPath": "params_shard_536.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.63.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e06298408845ad4417f56d6f2df7c16e" |
| }, |
| { |
| "dataPath": "params_shard_537.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.63.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "fa910ba5e9a1ac294ee34f26bf28bdf1" |
| }, |
| { |
| "dataPath": "params_shard_538.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.63.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2bb20544d2556eaee9208a7f50c5ae6e" |
| }, |
| { |
| "dataPath": "params_shard_539.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.63.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3da579463edd4e89a78a3a3ef87ae1e5" |
| }, |
| { |
| "dataPath": "params_shard_540.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.63.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ad2910312a529397aab746d073a10d25" |
| }, |
| { |
| "dataPath": "params_shard_541.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.63.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e3efac10169a5b6bc555879c397d4d48" |
| }, |
| { |
| "dataPath": "params_shard_542.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.63.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f7ad74b98365ba9f0c955da44f7c5cef" |
| }, |
| { |
| "dataPath": "params_shard_543.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.7.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9de20d00727d5a3bab44ee20b22d4815" |
| }, |
| { |
| "dataPath": "params_shard_544.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.7.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9470e2c4fab5eb2e045f1d240354a916" |
| }, |
| { |
| "dataPath": "params_shard_545.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.7.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "37054acc6abe8b0fe9f1b91eb4585a40" |
| }, |
| { |
| "dataPath": "params_shard_546.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.7.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ab0dc3a9528eae81e5d6eee7ce92430b" |
| }, |
| { |
| "dataPath": "params_shard_547.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.8.mlp.down_proj.q_weight", |
| "shape": [ |
| 12288, |
| 4224 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cecff68a6a804beac5a2dbc911853cce" |
| }, |
| { |
| "dataPath": "params_shard_548.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.8.mlp.down_proj.q_scale", |
| "shape": [ |
| 12288, |
| 1056 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "761a37c179492819c88dec4818d8b214" |
| }, |
| { |
| "dataPath": "params_shard_549.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.8.mlp.gate_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cea415b20c751595da2bf6a7aa25720e" |
| }, |
| { |
| "dataPath": "params_shard_550.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.8.mlp.gate_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "130be3fee135ed7112c7e990bb50e982" |
| }, |
| { |
| "dataPath": "params_shard_551.bin", |
| "format": "raw-shard", |
| "nbytes": 207618048, |
| "records": [ |
| { |
| "name": "model.layers.8.mlp.up_proj.q_weight", |
| "shape": [ |
| 33792, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 207618048, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0c5a4ed789995113659a30ec85f22090" |
| }, |
| { |
| "dataPath": "params_shard_552.bin", |
| "format": "raw-shard", |
| "nbytes": 25952256, |
| "records": [ |
| { |
| "name": "model.layers.8.mlp.up_proj.q_scale", |
| "shape": [ |
| 33792, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 25952256, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7eaf0ccffa4afbc17285f9104aa01731" |
| }, |
| { |
| "dataPath": "params_shard_553.bin", |
| "format": "raw-shard", |
| "nbytes": 88080384, |
| "records": [ |
| { |
| "name": "model.layers.8.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 14336, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 88080384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "25aa631285fb8c1e422e2906e0019918" |
| }, |
| { |
| "dataPath": "params_shard_554.bin", |
| "format": "raw-shard", |
| "nbytes": 29982720, |
| "records": [ |
| { |
| "name": "model.layers.62.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.63.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.63.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 9461760 |
| }, |
| { |
| "name": "model.layers.63.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 20471808 |
| }, |
| { |
| "name": "model.norm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 29908992 |
| }, |
| { |
| "name": "model.layers.7.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 29933568 |
| }, |
| { |
| "name": "model.layers.8.input_layernorm.weight", |
| "shape": [ |
| 12288 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 29958144 |
| } |
| ], |
| "md5sum": "5b93b8a0ad4b7fcff64e2283d0a9aba0" |
| }, |
| { |
| "dataPath": "params_shard_555.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "model.layers.8.self_attn.out_proj.q_weight", |
| "shape": [ |
| 12288, |
| 1536 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7a6f8d6a32ccde6586fbb5f354b87040" |
| }, |
| { |
| "dataPath": "params_shard_556.bin", |
| "format": "raw-shard", |
| "nbytes": 20447232, |
| "records": [ |
| { |
| "name": "model.layers.8.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 14336, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 11010048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.8.self_attn.out_proj.q_scale", |
| "shape": [ |
| 12288, |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11010048 |
| } |
| ], |
| "md5sum": "d696d3a1cde821e670e3548a0e2b7c4d" |
| } |
| ] |
| } |