Skip to content

Commit 1ab051e

Browse files
committed
Remove blank segment feature
Signed-off-by: makaveli10 <[email protected]>
1 parent 617f587 commit 1ab051e

File tree

3 files changed

+1
-67
lines changed

3 files changed

+1
-67
lines changed

whisper_live/backend/base.py

+1-34
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,13 @@ def __init__(self, client_uid, websocket):
2020
self.text = []
2121
self.current_out = ''
2222
self.prev_out = ''
23-
self.t_start = None
2423
self.exit = False
2524
self.same_output_count = 0
26-
self.show_prev_out_thresh = 5 # if pause(no output from whisper) show previous output for 5 seconds
27-
self.add_pause_thresh = 3 # add a blank to segment list as a pause(no speech) for 3 seconds
2825
self.transcript = []
2926
self.send_last_n_segments = 10
3027
self.no_speech_thresh = 0.45
3128
self.clip_audio = False
3229

33-
# text formatting
34-
self.pick_previous_segments = 2
35-
3630
# threading
3731
self.lock = threading.Lock()
3832

@@ -85,7 +79,7 @@ def speech_to_text(self):
8579
def transcribe_audio(self):
8680
raise NotImplementedError
8781

88-
def handle_transcription_output(self):
82+
def handle_transcription_output(self, result, duration):
8983
raise NotImplementedError
9084

9185
def format_segment(self, start, end, text, completed=False):
@@ -228,33 +222,6 @@ def send_transcription_to_client(self, segments):
228222
except Exception as e:
229223
logging.error(f"[ERROR]: Sending data to client: {e}")
230224

231-
def get_previous_output(self):
232-
"""
233-
Retrieves previously generated transcription outputs if no new transcription is available
234-
from the current audio chunks.
235-
236-
Checks the time since the last transcription output and, if it is within a specified
237-
threshold, returns the most recent segments of transcribed text. It also manages
238-
adding a pause (blank segment) to indicate a significant gap in speech based on a defined
239-
threshold.
240-
241-
Returns:
242-
segments (list): A list of transcription segments. This may include the most recent
243-
transcribed text segments or a blank segment to indicate a pause
244-
in speech.
245-
"""
246-
segments = []
247-
if self.t_start is None:
248-
self.t_start = time.time()
249-
if time.time() - self.t_start < self.show_prev_out_thresh:
250-
segments = self.prepare_segments()
251-
252-
# add a blank if there is no speech for 3 seconds
253-
if len(self.text) and self.text[-1] != '':
254-
if time.time() - self.t_start > self.add_pause_thresh:
255-
self.text.append('')
256-
return segments
257-
258225
def disconnect(self):
259226
"""
260227
Notify the client of disconnection and send a disconnect message.

whisper_live/backend/faster_whisper_backend.py

-30
Original file line numberDiff line numberDiff line change
@@ -175,33 +175,6 @@ def transcribe_audio(self, input_sample):
175175
self.set_language(info)
176176
return result
177177

178-
def get_previous_output(self):
179-
"""
180-
Retrieves previously generated transcription outputs if no new transcription is available
181-
from the current audio chunks.
182-
183-
Checks the time since the last transcription output and, if it is within a specified
184-
threshold, returns the most recent segments of transcribed text. It also manages
185-
adding a pause (blank segment) to indicate a significant gap in speech based on a defined
186-
threshold.
187-
188-
Returns:
189-
segments (list): A list of transcription segments. This may include the most recent
190-
transcribed text segments or a blank segment to indicate a pause
191-
in speech.
192-
"""
193-
segments = []
194-
if self.t_start is None:
195-
self.t_start = time.time()
196-
if time.time() - self.t_start < self.show_prev_out_thresh:
197-
segments = self.prepare_segments()
198-
199-
# add a blank if there is no speech for 3 seconds
200-
if len(self.text) and self.text[-1] != '':
201-
if time.time() - self.t_start > self.add_pause_thresh:
202-
self.text.append('')
203-
return segments
204-
205178
def handle_transcription_output(self, result, duration):
206179
"""
207180
Handle the transcription output, updating the transcript and sending data to the client.
@@ -215,9 +188,6 @@ def handle_transcription_output(self, result, duration):
215188
self.t_start = None
216189
last_segment = self.update_segments(result, duration)
217190
segments = self.prepare_segments(last_segment)
218-
else:
219-
# show previous output if there is pause i.e. no output from whisper
220-
segments = self.get_previous_output()
221191

222192
if len(segments):
223193
self.send_transcription_to_client(segments)

whisper_live/backend/openvino_backend.py

-3
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,6 @@ def handle_transcription_output(self, result, duration):
118118
self.t_start = None
119119
last_segment = self.update_segments(result, duration)
120120
segments = self.prepare_segments(last_segment)
121-
else:
122-
# show previous output if there is pause i.e. no output from whisper
123-
segments = self.get_previous_output()
124121

125122
if len(segments):
126123
self.send_transcription_to_client(segments)

0 commit comments

Comments
 (0)