Global: model_name: UniMERNet # To use static model for inference. use_gpu: True epoch_num: 40 log_smooth_window: 10 print_batch_step: 10 save_model_dir: ./output/rec/unimernet/ save_epoch_step: 5 # evaluation is run every 37880 iterations after the 0th iteration eval_batch_step: [0, 37880] cal_metric_during_train: True pretrained_model: checkpoints: save_inference_dir: use_visualdl: False infer_img: doc/datasets/pme_demo/0000013.png infer_mode: False use_space_char: False rec_char_dict_path: &rec_char_dict_path ppocr/utils/dict/unimernet_tokenizer input_size: &input_size [192, 672] max_seq_len: &max_seq_len 1024 save_res_path: ./output/rec/predicts_unimernet.txt allow_resize_largeImg: False d2s_train_image_shape: [1,192,672] Optimizer: name: AdamW beta1: 0.9 beta2: 0.999 weight_decay: 0.05 lr: name: LinearWarmupCosine learning_rate: 1e-4 start_lr: 1e-5 min_lr: 1e-8 warmup_steps: 5000 Architecture: model_type: rec algorithm: UniMERNet in_channels: 3 Transform: Backbone: name: DonutSwinModel hidden_size : 1024 num_layers: 4 num_heads: [4, 8, 16, 32] add_pooling_layer: True use_mask_token: False Head: name: UniMERNetHead max_new_tokens: 1536 decoder_start_token_id: 0 temperature: 0.2 do_sample: False top_p: 0.95 encoder_hidden_size: 1024 is_export: False length_aware: True Loss: name: UniMERNetLoss PostProcess: name: UniMERNetDecode rec_char_dict_path: *rec_char_dict_path Metric: name: LaTeXOCRMetric main_indicator: exp_rate cal_bleu_score: True Train: dataset: name: SimpleDataSet data_dir: ./train_data/UniMERNet/ label_file_list: ["./train_data/UniMERNet/train_unimernet_1M.txt"] transforms: - UniMERNetImgDecode: input_size: *input_size - UniMERNetTrainTransform: - UniMERNetImageFormat: - UniMERNetLabelEncode: rec_char_dict_path: *rec_char_dict_path max_seq_len: *max_seq_len - KeepKeys: keep_keys: ['image', 'label', 'attention_mask'] loader: shuffle: False drop_last: False batch_size_per_card: 7 num_workers: 0 collate_fn: UniMERNetCollator Eval: dataset: name: SimpleDataSet data_dir: ./train_data/UniMERNet/UniMER-Test/cpe label_file_list: ["./train_data/UniMERNet/test_unimernet_cpe.txt"] transforms: - UniMERNetImgDecode: input_size: *input_size - UniMERNetTestTransform: - UniMERNetImageFormat: - UniMERNetLabelEncode: max_seq_len: *max_seq_len rec_char_dict_path: *rec_char_dict_path - KeepKeys: keep_keys: ['image', 'label', 'attention_mask'] loader: shuffle: False drop_last: False batch_size_per_card: 30 num_workers: 0 collate_fn: UniMERNetCollator