{
  "model_type": "glm_ocr",
  "params": {
    "vision_param": "glm_ocr_vision_encoder.ncnn.param",
    "vision_bin": "glm_ocr_vision_encoder.ncnn.bin",
    "text_embed_param": "glm_ocr_text_embed.ncnn.param",
    "text_embed_bin": "glm_ocr_text_embed.ncnn.bin",
    "text_decoder_param": "glm_ocr_text_decoder.ncnn.param",
    "text_decoder_bin": "glm_ocr_text_decoder.ncnn.bin",
    "lm_head_param": "glm_ocr_lm_head.ncnn.param",
    "lm_head_bin": "glm_ocr_lm_head.ncnn.bin"
  },
  "tokenizer": {
    "type": "bbpe",
    "vocab_file": "vocab.txt",
    "merges_file": "merges.txt",
    "bos": "<s>",
    "eos": "<|endoftext|>",
    "pad": "<|endoftext|>",
    "unk": "<unk>",
    "cls": "<s>",
    "sep": "</s>",
    "mask": "<mask>",
    "additional_special_tokens": [
      "<|endoftext|>",
      "[MASK]",
      "[gMASK]",
      "[sMASK]",
      "<sop>",
      "<eop>",
      "<|system|>",
      "<|user|>",
      "<|assistant|>",
      "<|observation|>",
      "<|begin_of_image|>",
      "<|end_of_image|>",
      "<|begin_of_video|>",
      "<|end_of_video|>",
      "<|begin_of_audio|>",
      "<|end_of_audio|>",
      "<|begin_of_transcription|>",
      "<|end_of_transcription|>",
      "<|code_prefix|>",
      "<|code_middle|>",
      "<|code_suffix|>",
      "<think>",
      "</think>",
      "<tool_call>",
      "</tool_call>",
      "<tool_response>",
      "</tool_response>",
      "<arg_key>",
      "</arg_key>",
      "<arg_value>",
      "</arg_value>",
      "/nothink",
      "<|begin_of_box|>",
      "<|end_of_box|>",
      "<|image|>",
      "<|video|>"
    ]
  },
  "setting": {
    "attn_cnt": 16,
    "hidden_size": 1536,
    "head_dim": 128,
    "num_attention_heads": 16,
    "num_key_value_heads": 8,
    "rope": {
      "type": "mRoPE",
      "rope_head_dim": 128,
      "rope_theta": 10000.0,
      "mrope_section": [16, 24, 24]
    },
    "image_token_id": 59280,
    "vision": {
      "type": "glm_ocr",
      "patch_size": 14,
      "spatial_merge_size": 2,
      "vision_hidden_size": 1024,
      "vision_out_hidden_size": 1536,
      "vision_head_dim": 64,
      "vision_num_heads": 16,
      "min_pixels": 12544,
      "max_pixels": 9633792,
      "rope": {
        "type": "mRoPE",
        "rope_theta": 10000.0,
        "rope_head_dim": 32,
        "mrope_section": [16, 16]
      },
      "image_mean": [0.48145466, 0.4578275, 0.40821073],
      "image_std": [0.26862954, 0.26130258, 0.27577711]
    }
  }
}
