{ "activation_type": "gelu", "afrac": 0.25, "afrac_loctypes": "attn_in,attn_out,mlp_in,mlp_out,mlp_neuron,attn_v,attn_k,attn_q", "architectures": [ "CircuitGPTForCausalLM" ], "auto_map": { "AutoConfig": "config.CircuitGPTConfig", "AutoModelForCausalLM": "modeling_circuitgpt.CircuitGPTForCausalLM" }, "bias": true, "bigram_table_rank": null, "block_size": 1024, "bos_token_id": null, "d_head": 16, "d_mlp": 8192, "d_model": 2048, "d_pos_emb": 32, "dropout": 0.0, "dropout_cat_pos_emb": false, "enable_bigram_table": true, "eos_token_id": 2047, "flash": true, "is_decoder": true, "learnable_bigram_table": true, "ln_bias": true, "max_position_embeddings": 1024, "model_type": "circuitgpt", "n_head": 128, "n_layer": 8, "pad_token_id": null, "residual_activation_type": "identity", "rms_norm": true, "sink": true, "sinusoidal_cat_pos_emb": false, "tie_word_embeddings": false, "tied_unembed": false, "torch_dtype": "float32", "transformers_version": "4.49.0", "unembed_rank": null, "use_position_embeddings": true, "vocab_size": 2048 }