huggingface · qgallouedec · May 26, 2025 · May 26, 2025 · May 26, 2025 · May 26, 2025
diff --git a/Makefile b/Makefile
@@ -11,6 +11,7 @@ test:
 precommit:
 	python scripts/add_copyrights.py
 	pre-commit run --all-files
+	doc-builder style trl tests docs/source --max_len 119
 
 slow_tests:
 	pytest -m "slow" tests/ $(if $(IS_GITHUB_CI),--report-log "slow_tests.log",)

diff --git a/tests/slow/test_dpo_slow.py b/tests/slow/test_dpo_slow.py
@@ -117,7 +117,8 @@ def test_dpo_bare_model(self, model_id, loss_type, pre_compute_logits):
     @require_peft
     def test_dpo_peft_model(self, model_id, loss_type, pre_compute_logits, gradient_checkpointing_kwargs):
         """
-        A test that tests the simple usage of `DPOTrainer` using a peft model in full precision + different scenarios of gradient checkpointing.
+        A test that tests the simple usage of `DPOTrainer` using a peft model in full precision + different scenarios
+        of gradient checkpointing.
         """
         model = AutoModelForCausalLM.from_pretrained(model_id)
         tokenizer = AutoTokenizer.from_pretrained(model_id)

diff --git a/tests/slow/test_sft_slow.py b/tests/slow/test_sft_slow.py
@@ -66,8 +66,7 @@ def tearDown(self):
     @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS)))
     def test_sft_trainer_str(self, model_name, packing):
         """
-        Simply tests if passing a simple str to `SFTTrainer` loads and runs the trainer
-        as expected.
+        Simply tests if passing a simple str to `SFTTrainer` loads and runs the trainer as expected.
         """
         with tempfile.TemporaryDirectory() as tmp_dir:
             training_args = SFTConfig(
@@ -92,8 +91,7 @@ def test_sft_trainer_str(self, model_name, packing):
     @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS)))
     def test_sft_trainer_transformers(self, model_name, packing):
         """
-        Simply tests if passing a transformers model to `SFTTrainer` loads and runs the trainer
-        as expected.
+        Simply tests if passing a transformers model to `SFTTrainer` loads and runs the trainer as expected.
         """
         with tempfile.TemporaryDirectory() as tmp_dir:
             training_args = SFTConfig(
@@ -125,8 +123,8 @@ def test_sft_trainer_transformers(self, model_name, packing):
     @require_peft
     def test_sft_trainer_peft(self, model_name, packing):
         """
-        Simply tests if passing a transformers model + peft config to `SFTTrainer` loads and runs the trainer
-        as expected.
+        Simply tests if passing a transformers model + peft config to `SFTTrainer` loads and runs the trainer as
+        expected.
         """
         with tempfile.TemporaryDirectory() as tmp_dir:
             training_args = SFTConfig(
@@ -161,8 +159,8 @@ def test_sft_trainer_peft(self, model_name, packing):
     @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS)))
     def test_sft_trainer_transformers_mp(self, model_name, packing):
         """
-        Simply tests if passing a transformers model to `SFTTrainer` loads and runs the trainer
-        as expected in mixed precision.
+        Simply tests if passing a transformers model to `SFTTrainer` loads and runs the trainer as expected in mixed
+        precision.
         """
         with tempfile.TemporaryDirectory() as tmp_dir:
             training_args = SFTConfig(
@@ -194,8 +192,8 @@ def test_sft_trainer_transformers_mp(self, model_name, packing):
     @parameterized.expand(list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS, GRADIENT_CHECKPOINTING_KWARGS)))
     def test_sft_trainer_transformers_mp_gc(self, model_name, packing, gradient_checkpointing_kwargs):
         """
-        Simply tests if passing a transformers model to `SFTTrainer` loads and runs the trainer
-        as expected in mixed precision + different scenarios of gradient_checkpointing.
+        Simply tests if passing a transformers model to `SFTTrainer` loads and runs the trainer as expected in mixed
+        precision + different scenarios of gradient_checkpointing.
         """
         with tempfile.TemporaryDirectory() as tmp_dir:
             training_args = SFTConfig(
@@ -230,8 +228,8 @@ def test_sft_trainer_transformers_mp_gc(self, model_name, packing, gradient_chec
     @require_peft
     def test_sft_trainer_transformers_mp_gc_peft(self, model_name, packing, gradient_checkpointing_kwargs):
         """
-        Simply tests if passing a transformers model + PEFT to `SFTTrainer` loads and runs the trainer
-        as expected in mixed precision + different scenarios of gradient_checkpointing.
+        Simply tests if passing a transformers model + PEFT to `SFTTrainer` loads and runs the trainer as expected in
+        mixed precision + different scenarios of gradient_checkpointing.
         """
         with tempfile.TemporaryDirectory() as tmp_dir:
             training_args = SFTConfig(
@@ -273,8 +271,8 @@ def test_sft_trainer_transformers_mp_gc_device_map(
         self, model_name, packing, gradient_checkpointing_kwargs, device_map
     ):
         """
-        Simply tests if passing a transformers model to `SFTTrainer` loads and runs the trainer
-        as expected in mixed precision + different scenarios of gradient_checkpointing (single, multi-gpu, etc).
+        Simply tests if passing a transformers model to `SFTTrainer` loads and runs the trainer as expected in mixed
+        precision + different scenarios of gradient_checkpointing (single, multi-gpu, etc).
         """
         with tempfile.TemporaryDirectory() as tmp_dir:
             training_args = SFTConfig(
@@ -310,8 +308,8 @@ def test_sft_trainer_transformers_mp_gc_device_map(
     @require_bitsandbytes
     def test_sft_trainer_transformers_mp_gc_peft_qlora(self, model_name, packing, gradient_checkpointing_kwargs):
         """
-        Simply tests if passing a transformers model + PEFT + bnb to `SFTTrainer` loads and runs the trainer
-        as expected in mixed precision + different scenarios of gradient_checkpointing.
+        Simply tests if passing a transformers model + PEFT + bnb to `SFTTrainer` loads and runs the trainer as
+        expected in mixed precision + different scenarios of gradient_checkpointing.
         """
         with tempfile.TemporaryDirectory() as tmp_dir:
             training_args = SFTConfig(
@@ -352,8 +350,8 @@ def test_sft_trainer_transformers_mp_gc_peft_qlora(self, model_name, packing, gr
     @require_bitsandbytes
     def test_sft_trainer_with_chat_format_qlora(self, model_name, packing):
         """
-        Simply tests if using setup_chat_format with a transformers model + peft + bnb config to `SFTTrainer` loads and runs the trainer
-        as expected.
+        Simply tests if using setup_chat_format with a transformers model + peft + bnb config to `SFTTrainer` loads and
+        runs the trainer as expected.
         """
         with tempfile.TemporaryDirectory() as tmp_dir:
             train_dataset = load_dataset("trl-internal-testing/dolly-chatml-sft", split="train")
@@ -395,8 +393,8 @@ def test_sft_trainer_with_chat_format_qlora(self, model_name, packing):
     @require_liger_kernel
     def test_sft_trainer_with_liger(self, model_name, packing):
         """
-        Tests if passing use_liger=True to SFTConfig loads and runs the trainer
-        with AutoLigerKernelForCausalLM as expected.
+        Tests if passing use_liger=True to SFTConfig loads and runs the trainer with AutoLigerKernelForCausalLM as
+        expected.
         """
         with tempfile.TemporaryDirectory() as tmp_dir:
             training_args = SFTConfig(

diff --git a/tests/test_modeling_value_head.py b/tests/test_modeling_value_head.py
@@ -72,8 +72,7 @@ def test_value_head_shape(self):
 
         def test_value_head_init_random(self):
             r"""
-            Test if the v-head has been randomly initialized.
-            We can check that by making sure the bias is different
+            Test if the v-head has been randomly initialized. We can check that by making sure the bias is different
             than zeros by default.
             """
             for model_name in self.all_model_names:
@@ -84,8 +83,8 @@ def test_value_head_init_random(self):
 
         def test_value_head_not_str(self):
             r"""
-            Test if the v-head is added to the model successfully, by passing a non `PretrainedModel`
-            as an argument to `from_pretrained`.
+            Test if the v-head is added to the model successfully, by passing a non `PretrainedModel` as an argument to
+            `from_pretrained`.
             """
             for model_name in self.all_model_names:
                 pretrained_model = self.transformers_model_class.from_pretrained(model_name)
@@ -94,8 +93,8 @@ def test_value_head_not_str(self):
 
         def test_from_save_trl(self):
             """
-            Test if the model can be saved and loaded from a directory and get the same weights
-            Including the additional modules (e.g. v_head)
+            Test if the model can be saved and loaded from a directory and get the same weights, including the
+            additional modules (e.g. v_head)
             """
             for model_name in self.all_model_names:
                 model = self.trl_model_class.from_pretrained(model_name)
@@ -150,8 +149,8 @@ def test_from_save_transformers_sharded(self):
 
         def test_from_save_transformers(self):
             """
-            Test if the model can be saved and loaded using transformers and get the same weights.
-            We override the test of the super class to check if the weights are the same.
+            Test if the model can be saved and loaded using transformers and get the same weights. We override the test
+            of the super class to check if the weights are the same.
             """
             for model_name in self.all_model_names:
                 transformers_model = self.trl_model_class.transformers_parent_class.from_pretrained(model_name)
@@ -220,8 +219,7 @@ def test_inference(self):
 
     def test_dropout_config(self):
         r"""
-        Test if we instantiate a model by adding `summary_drop_prob` to the config
-        it will be added to the v_head
+        Test if we instantiate a model by adding `summary_drop_prob` to the config it will be added to the v_head
         """
         for model_name in self.all_model_names:
             pretrained_model = self.transformers_model_class.from_pretrained(model_name)
@@ -233,8 +231,7 @@ def test_dropout_config(self):
 
     def test_dropout_kwargs(self):
         r"""
-        Test if we instantiate a model by adding `summary_drop_prob` to the config
-        it will be added to the v_head
+        Test if we instantiate a model by adding `summary_drop_prob` to the config it will be added to the v_head
         """
         for model_name in self.all_model_names:
             v_head_kwargs = {"summary_dropout_prob": 0.5}
@@ -263,10 +260,9 @@ def test_generate(self, model_name):
 
     def test_transformers_bf16_kwargs(self):
         r"""
-        Test if the transformers kwargs are correctly passed
-        Here we check that loading a model in half precision works as expected, i.e. the weights of
-        the `pretrained_model` attribute is loaded in half precision and you can run a dummy
-        forward pass without any issue.
+        Test if the transformers kwargs are correctly passed. Here we check that loading a model in half precision
+        works as expected, i.e. the weights of the `pretrained_model` attribute is loaded in half precision and you can
+        run a dummy forward pass without any issue.
         """
         for model_name in self.all_model_names:
             trl_model = self.trl_model_class.from_pretrained(model_name, torch_dtype=torch.bfloat16)
@@ -339,8 +335,7 @@ def test_inference(self):
 
     def test_dropout_config(self):
         r"""
-        Test if we instantiate a model by adding `summary_drop_prob` to the config
-        it will be added to the v_head
+        Test if we instantiate a model by adding `summary_drop_prob` to the config it will be added to the v_head
         """
         for model_name in self.all_model_names:
             pretrained_model = self.transformers_model_class.from_pretrained(model_name)
@@ -352,8 +347,7 @@ def test_dropout_config(self):
 
     def test_dropout_kwargs(self):
         r"""
-        Test if we instantiate a model by adding `summary_drop_prob` to the config
-        it will be added to the v_head
+        Test if we instantiate a model by adding `summary_drop_prob` to the config it will be added to the v_head
         """
         for model_name in self.all_model_names:
             v_head_kwargs = {"summary_dropout_prob": 0.5}
@@ -402,10 +396,9 @@ def test_push_to_hub(self):
 
     def test_transformers_bf16_kwargs(self):
         r"""
-        Test if the transformers kwargs are correctly passed
-        Here we check that loading a model in half precision works as expected, i.e. the weights of
-        the `pretrained_model` attribute is loaded in half precision and you can run a dummy
-        forward pass without any issue.
+        Test if the transformers kwargs are correctly passed. Here we check that loading a model in half precision
+        works as expected, i.e. the weights of the `pretrained_model` attribute is loaded in half precision and you can
+        run a dummy forward pass without any issue.
         """
         for model_name in self.all_model_names:
             trl_model = self.trl_model_class.from_pretrained(model_name, torch_dtype=torch.bfloat16)

diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py
@@ -524,7 +524,8 @@ def test_data_collator_completion_lm_with_multiple_text(self):
         data_collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer, mlm=False)
 
         text1 = """\n\n### Instructions:\nHello all this should be masked\n\n### Response:\nI have not been masked correctly."""
-        text2 = """\n\n### Instructions:\nThis is another longer text that should also be masked. This text is significantly longer than the previous one.\n\n### Response:\nI have not been masked correctly."""
+        text2 = """\n\n### Instructions:\nThis is another longer text that should also be masked. This text is significantly longer than
+the previous one.\n\n### Response:\nI have not been masked correctly."""
 
         encoded_text1 = tokenizer(text1)
         encoded_text2 = tokenizer(text2)
@@ -549,7 +550,8 @@ def test_data_collator_chat_completion_lm(self):
             mlm=False,
         )
 
-        text = """### Human: Hello all this should be masked.### Assistant: I should not be masked.### Human: All this should be masked too.### Assistant: I should not be masked too."""
+        text = """### Human: Hello all this should be masked.### Assistant: I should not be masked.### Human: All this should be masked
+too.### Assistant: I should not be masked too."""
         encoded_text = self.tokenizer(text)
 
         examples = [encoded_text]
@@ -574,7 +576,8 @@ def test_data_collator_chat_completion_lm_with_multiple_text(self):
         )
 
         text1 = """### Human: Hello all this should be masked.### Assistant: I should not be masked."""
-        text2 = """### Human: Hello all this should be masked.### Assistant: I should not be masked.### Human: All this should be masked too.### Assistant: I should not be masked too."""
+        text2 = """### Human: Hello all this should be masked.### Assistant: I should not be masked.### Human: All this should be masked
+too.### Assistant: I should not be masked too."""
         encoded_text1 = tokenizer(text1)
         encoded_text2 = tokenizer(text2)
 
@@ -915,7 +918,12 @@ def test_llava(self):
             )
             processor = AutoProcessor.from_pretrained("trl-internal-testing/tiny-LlavaForConditionalGeneration")
 
-            processor.chat_template = """{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. {% for message in messages %}{% if message['role'] == 'user' %}USER: {% else %}ASSISTANT: {% endif %}{% for item in message['content'] %}{% if item['type'] == 'text' %}{{ item['text'] }}{% elif item['type'] == 'image' %}<image>{% endif %}{% endfor %}{% if message['role'] == 'user' %} {% else %}{{eos_token}}{% endif %}{% endfor %}{% if add_generation_prompt %}ASSISTANT: {% endif %}"""
+            processor.chat_template = """{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}A chat between a curious
+user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's
+questions. {% for message in messages %}{% if message['role'] == 'user' %}USER: {% else %}ASSISTANT: {% endif %}{% for
+item in message['content'] %}{% if item['type'] == 'text' %}{{ item['text'] }}{% elif item['type'] == 'image'
+%}<image>{% endif %}{% endfor %}{% if message['role'] == 'user' %} {% else %}{{eos_token}}{% endif %}{% endfor %}{% if
+add_generation_prompt %}ASSISTANT: {% endif %}"""
 
             def collate_fn(examples):
                 # Get the texts and images, and apply the chat template

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -537,7 +537,7 @@ def test_print_output(self, mock_stdout):
         │ │ The sun is │  in the sky. │        0.46 │   0.10 │ │
         │ └────────────┴──────────────┴─────────────┴────────┘ │
         ╰──────────────────────────────────────────────────────╯
-        """)
+        """)  # docstyle-ignore
         self.assertEqual(output, expected_output)
 
     @patch("sys.stdout", new_callable=StringIO)
@@ -569,5 +569,5 @@ def test_num_samples(self, mock_stdout):
                 │ └────────┴────────────┴───────┘ │
                 ╰─────────────────────────────────╯
                 """),
-        ]
+        ]  # docstyle-ignore
         self.assertIn(output, possible_outputs)