Merge pull request #62 from MaleicAcid/master

zh-plus · web-flow · commit 52a3c4180512 · 2024-12-12T20:44:47.000+08:00
Handling no speech video
diff --git a/openlrc/openlrc.py b/openlrc/openlrc.py
@@ -424,12 +424,19 @@ def to_json(segments: List[Segment], name, lang):
             'segments': []
         }
 
-        for segment in segments:
+        if not segments:
             result['segments'].append({
-                'start': segment.start,
-                'end': segment.end,
-                'text': segment.text
+                'start': 0.0,
+                'end': 5.0,
+                'text': "no speech found"
             })
+        else:
+            for segment in segments:
+                result['segments'].append({
+                    'start': segment.start,
+                    'end': segment.end,
+                    'text': segment.text
+                })
 
         with open(name, 'w', encoding='utf-8') as f:
             json.dump(result, f, ensure_ascii=False, indent=4)
diff --git a/openlrc/transcribe.py b/openlrc/transcribe.py
@@ -98,10 +98,12 @@ def transcribe(self, audio_path: Union[str, Path], language: Optional[str] = Non
             if timestamps < info.duration:  # silence at the end of the audio
                 pbar.update(info.duration - timestamps)
 
-        assert segments, f'No voice found for {audio_path}'
-
-        with Timer('Sentence Segmentation'):
-            result = self.sentence_split(segments, info.language)
+        if not segments:
+            logger.warning(f'No speech found for {audio_path}')
+            result = []
+        else:
+            with Timer('Sentence Segmentation'):
+                result = self.sentence_split(segments, info.language)
 
         info = TranscriptionInfo(language=info.language, duration=get_audio_duration(audio_path),
                                  duration_after_vad=info.duration_after_vad)
diff --git a/tests/data/test_nospeech_video.mp4 b/tests/data/test_nospeech_video.mp4
diff --git a/tests/test_openlrc.py b/tests/test_openlrc.py
@@ -31,6 +31,7 @@ class TestLRCer(unittest.TestCase):
     def setUp(self) -> None:
         self.audio_path = Path('data/test_audio.wav')
         self.video_path = Path('data/test_video.mp4')
+        self.nospeech_video_path = Path('data/test_nospeech_video.mp4')
 
     def tearDown(self) -> None:
         def clear_paths(input_path):
@@ -78,6 +79,11 @@ def test_video_file_transcription_translation(self):
         result = lrcer.run('data/test_video.mp4')
         self.assertTrue(result)
 
+    def test_nospeech_video_file_transcription_translation(self):
+        lrcer = LRCer(whisper_model='tiny', device='cpu', compute_type='default')
+        result = lrcer.run('data/test_nospeech_video.mp4')
+        self.assertTrue(result)
+
     @patch('openlrc.translate.LLMTranslator.translate', MagicMock(side_effect=Exception('test exception')))
     def test_translation_error(self):
         lrcer = LRCer(whisper_model='tiny', device='cpu', compute_type='default')