pyvene.models.gpt_oss.modelings_intervenable_gpt_oss.gpt_oss_type_to_dimension_mapping#
- gpt_oss_type_to_dimension_mapping = {'attention_input': ('hidden_size',), 'attention_output': ('hidden_size',), 'attention_value_output': ('hidden_size',), 'block_input': ('hidden_size',), 'block_output': ('hidden_size',), 'expert_input': ('hidden_size',), 'expert_output': ('hidden_size',), 'head_attention_value_output': ('head_dim', 'hidden_size/num_attention_heads'), 'head_key_output': ('head_dim', 'hidden_size/num_key_value_heads'), 'head_query_output': ('head_dim', 'hidden_size/num_attention_heads'), 'head_value_output': ('head_dim', 'hidden_size/num_key_value_heads'), 'key_output': ('num_key_value_heads*hidden_size/num_attention_heads',), 'mlp_input': ('hidden_size',), 'mlp_output': ('hidden_size',), 'num_attention_heads': ('num_attention_heads',), 'num_experts_per_tok': ('num_experts_per_tok',), 'num_key_value_heads': ('num_key_value_heads',), 'num_local_experts': ('num_local_experts',), 'query_output': ('hidden_size',), 'router_input': ('hidden_size',), 'router_output': ('num_local_experts',), 'value_output': ('num_key_value_heads*hidden_size/num_attention_heads',)}#
gpt-oss model with LM head