File tree Expand file tree Collapse file tree 1 file changed +9
-5
lines changed Expand file tree Collapse file tree 1 file changed +9
-5
lines changed Original file line number Diff line number Diff line change @@ -1502,15 +1502,19 @@ def write_tensors(self):
1502
1502
class MambaModel (Model ):
1503
1503
def set_gguf_parameters (self ):
1504
1504
d_model = self .hparams ["d_model" ]
1505
+ d_inner = self .hparams .get ("d_inner" , 2 * d_model )
1506
+ # Fail early for models which don't have a block expansion factor of 2
1507
+ assert d_inner == 2 * d_model
1508
+
1505
1509
self .gguf_writer .add_name (self .dir_model .name )
1506
- self .gguf_writer .add_context_length (128 ) # arbitrary value; it shouldn't be important for Mamba
1510
+ self .gguf_writer .add_context_length (2 ** 20 ) # arbitrary value; for those who use the default
1507
1511
self .gguf_writer .add_embedding_length (d_model )
1508
1512
self .gguf_writer .add_feed_forward_length (0 ) # unused, but seemingly required when loading
1509
- self .gguf_writer .add_head_count (2 * d_model ) # d_inner
1513
+ self .gguf_writer .add_head_count (d_inner )
1510
1514
self .gguf_writer .add_block_count (self .hparams ["n_layer" ])
1511
- self .gguf_writer .add_layer_norm_rms_eps (1e-5 )
1512
- self .gguf_writer .add_key_length (4 ) # d_conv
1513
- self .gguf_writer .add_value_length (16 ) # d_state
1515
+ self .gguf_writer .add_layer_norm_rms_eps (self . hparams . get ( "rms_norm_eps" , 1e-5 ) )
1516
+ self .gguf_writer .add_key_length (self . hparams . get ( "d_conv" , 4 ))
1517
+ self .gguf_writer .add_value_length (self . hparams . get ( "d_state" , 16 ))
1514
1518
self .gguf_writer .add_file_type (self .ftype )
1515
1519
1516
1520
def write_tensors (self ):
You can’t perform that action at this time.
0 commit comments