Skip to content

Audio resampler adds noise in some audio. #389

Closed
@ChenghaoMou

Description

@ChenghaoMou

We encountered an issue where the audio resampler adds noise into the audio. Here is the code to reproduce the issue

import numpy as np
from livekit import rtc
from numpy.typing import NDArray
from scipy import signal
from scipy.io import wavfile

output_rate = 16000
input_rate, data = wavfile.read("normal.wav")
print(
    f"data shape: {data.shape}, data dtype: {data.dtype}, data endianness: {data.dtype.byteorder}"
)

resampler = rtc.AudioResampler(
    input_rate=input_rate,
    output_rate=output_rate,
    num_channels=1,
    quality=rtc.AudioResamplerQuality.VERY_HIGH,
)


def array2frames(data: np.ndarray) -> list[rtc.AudioFrame]:
    frames = []
    frame_size = input_rate
    for i in range(0, len(data), frame_size):
        frames.append(
            rtc.AudioFrame(
                data=data[i : i + frame_size].reshape(-1, 1).tobytes(),
                sample_rate=input_rate,
                num_channels=1,
                samples_per_channel=len(data[i : i + frame_size]),
            )
        )
    return frames


def resample_frames(frames: list[rtc.AudioFrame]) -> list[rtc.AudioFrame]:
    output_frames = []
    for frame in frames:
        for resampled_frame in resampler.push(frame):
            output_frames.append(resampled_frame)
    for frame in resampler.flush():
        output_frames.append(frame)
    return output_frames


def frames2array(frames: list[rtc.AudioFrame]) -> np.ndarray:
    array = [np.frombuffer(frame.data, dtype=np.int16).reshape(-1) for frame in frames]
    return np.concatenate(array)


# This is fine
frames = array2frames(data)
wavfile.write("output_unchanged.wav", input_rate, frames2array(frames))

# This is not fine
resampled_frames = resample_frames(frames)
resampled_array = frames2array(resampled_frames)
print(
    f"resampled array shape: {resampled_array.shape}, resampled array dtype: {resampled_array.dtype}, resampled array endianness: {resampled_array.dtype.byteorder}"
)
wavfile.write(
    "output_resampled.wav",
    output_rate,
    resampled_array,
)


# custom resampling works fine
def resample(
    data: NDArray,
    current_sample_rate: int,
    target_sample_rate: int,
) -> NDArray:
    """
    Resample the audio data to the target sample rate.

    Parameters
    ----------
    data : NDArray
        The audio data to resample.
    current_sample_rate : int
        The current sample rate of the audio data.
    target_sample_rate : int
        The target sample rate to resample the audio data to.

    Returns
    -------
    NDArray
        The resampled audio data.
    """
    n_samples = data.shape[0]
    n_samples = round(n_samples * float(target_sample_rate) / current_sample_rate)
    data = signal.resample(data, n_samples, axis=0).astype(data.dtype)
    return data


resampled_data = resample(data, input_rate, output_rate)
wavfile.write("output_resampled_scipy.wav", output_rate, resampled_data)

Warning

The noise can be very loud. Be sure to adjust your volume before opening

Here are the files:

I have tried other audio files with the same attributes (int16 24KHZ) but different voices, and they all worked fine with the same code. The original audio was generated from Cartesia. Any ideas?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions