{ "_name_or_path": "NanoChat-0.3B-base/", "architectures": [ "NanoChatLM" ], "attention": "GQA", "auto_map": { "AutoConfig": "LMConfig.LMConfig", "AutoModel": "model.NanoChatLM", "AutoModelForCausalLM": "model.NanoChatLM" }, "aux_loss_alpha": 0.1, "dim": 896, "dropout": 0.0, "flash_attn": true, "hidden_dim": 2432, "kv_lora_rank": 512, "max_seq_len": 512, "model_type": "nanochat", "multiple_of": 64, "n_heads": 14, "n_kv_heads": 2, "n_layers": 24, "n_routed_experts": 4, "n_shared_experts": true, "norm_eps": 1e-05, "norm_topk_prob": true, "num_experts_per_tok": 2, "q_lora_rank": 0, "qk_nope_head_dim": 64, "qk_rope_head_dim": 64, "rope_theta": 1000000.0, "scoring_func": "softmax", "seq_aux": true, "torch_dtype": "float32", "transformers_version": "4.42.4", "use_moe": false, "v_head_dim": 64, "vocab_size": 151650 }