| 1234567891011121314151617181920212223242526272829303132333435363738394041 |
- config:
- (): colpali_engine.trainer.colmodel_training.ColModelTrainingConfig
- output_dir: !path ../../../models/right_pad/train_colpali-3b-mix-448
- processor:
- (): colpali_engine.utils.transformers_wrappers.AllPurposeWrapper
- class_to_instanciate: !ext colpali_engine.models.ColPaliProcessor
- pretrained_model_name_or_path: "./models/colpaligemma-3b-mix-448-base" # "./models/paligemma-3b-mix-448"
- max_length: 50
- model:
- (): colpali_engine.utils.transformers_wrappers.AllPurposeWrapper
- class_to_instanciate: !ext colpali_engine.models.ColPali
- pretrained_model_name_or_path: "./models/colpaligemma-3b-mix-448-base"
- torch_dtype: !ext torch.bfloat16
- # device_map: "auto"
- # quantization_config:
- # (): transformers.BitsAndBytesConfig
- # load_in_4bit: true
- # bnb_4bit_quant_type: "nf4"
- # bnb_4bit_compute_dtype: "bfloat16"
- # bnb_4bit_use_double_quant: true
- train_dataset:
- (): colpali_engine.utils.dataset_transformation.load_train_set
- eval_dataset: !import ../data/test_data.yaml
- max_length: 50
- run_eval: true
- loss_func:
- (): colpali_engine.loss.late_interaction_losses.ColbertPairwiseCELoss
- tr_args: !import ../tr_args/default_tr_args.yaml
- peft_config:
- (): peft.LoraConfig
- r: 32
- lora_alpha: 32
- lora_dropout: 0.1
- init_lora_weights: "gaussian"
- bias: "none"
- task_type: "FEATURE_EXTRACTION"
- target_modules: '(.*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(custom_text_proj).*$)'
- # target_modules: '(.*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(custom_text_proj).*$)'
|