Skip to content

Commit 52a3c41

Browse files
authored
Merge pull request #62 from MaleicAcid/master
Handling no speech video
2 parents 51dd6e4 + f14eadf commit 52a3c41

File tree

4 files changed

+23
-8
lines changed

4 files changed

+23
-8
lines changed

openlrc/openlrc.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -424,12 +424,19 @@ def to_json(segments: List[Segment], name, lang):
424424
'segments': []
425425
}
426426

427-
for segment in segments:
427+
if not segments:
428428
result['segments'].append({
429-
'start': segment.start,
430-
'end': segment.end,
431-
'text': segment.text
429+
'start': 0.0,
430+
'end': 5.0,
431+
'text': "no speech found"
432432
})
433+
else:
434+
for segment in segments:
435+
result['segments'].append({
436+
'start': segment.start,
437+
'end': segment.end,
438+
'text': segment.text
439+
})
433440

434441
with open(name, 'w', encoding='utf-8') as f:
435442
json.dump(result, f, ensure_ascii=False, indent=4)

openlrc/transcribe.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,12 @@ def transcribe(self, audio_path: Union[str, Path], language: Optional[str] = Non
9898
if timestamps < info.duration: # silence at the end of the audio
9999
pbar.update(info.duration - timestamps)
100100

101-
assert segments, f'No voice found for {audio_path}'
102-
103-
with Timer('Sentence Segmentation'):
104-
result = self.sentence_split(segments, info.language)
101+
if not segments:
102+
logger.warning(f'No speech found for {audio_path}')
103+
result = []
104+
else:
105+
with Timer('Sentence Segmentation'):
106+
result = self.sentence_split(segments, info.language)
105107

106108
info = TranscriptionInfo(language=info.language, duration=get_audio_duration(audio_path),
107109
duration_after_vad=info.duration_after_vad)

tests/data/test_nospeech_video.mp4

1.48 MB
Binary file not shown.

tests/test_openlrc.py

+6
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class TestLRCer(unittest.TestCase):
3131
def setUp(self) -> None:
3232
self.audio_path = Path('data/test_audio.wav')
3333
self.video_path = Path('data/test_video.mp4')
34+
self.nospeech_video_path = Path('data/test_nospeech_video.mp4')
3435

3536
def tearDown(self) -> None:
3637
def clear_paths(input_path):
@@ -78,6 +79,11 @@ def test_video_file_transcription_translation(self):
7879
result = lrcer.run('data/test_video.mp4')
7980
self.assertTrue(result)
8081

82+
def test_nospeech_video_file_transcription_translation(self):
83+
lrcer = LRCer(whisper_model='tiny', device='cpu', compute_type='default')
84+
result = lrcer.run('data/test_nospeech_video.mp4')
85+
self.assertTrue(result)
86+
8187
@patch('openlrc.translate.LLMTranslator.translate', MagicMock(side_effect=Exception('test exception')))
8288
def test_translation_error(self):
8389
lrcer = LRCer(whisper_model='tiny', device='cpu', compute_type='default')

0 commit comments

Comments
 (0)