Created
January 13, 2023 10:39
-
-
Save rob-smallshire/dfe72178ffc2bc29d4f7a40b94d919bd to your computer and use it in GitHub Desktop.
Audio power from a WAV file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from dataclasses import dataclass | |
import librosa | |
import numpy as np | |
import matplotlib.pyplot as plt | |
@dataclass | |
class Audio: | |
samples: np.ndarray | |
rate: int | |
def read_wav(filename): | |
"""Read a wav file into a numpy array. | |
Args: | |
filename (str): Path to the wav file. | |
Returns: | |
A numpy array with floating point values between -1 and 1. | |
""" | |
samples, rate = librosa.load(filename, sr=None) | |
return Audio(samples, rate) | |
def rms(samples, window_size): | |
"""Compute the root mean square of a signal with a moving window. | |
RMS is closely related to audio power. | |
""" | |
squared = np.power(samples, 2) | |
kernel = np.ones(window_size) / float(window_size) | |
convolved = np.convolve(squared, kernel, mode="same") | |
return np.sqrt(convolved) | |
if __name__ == "__main__": | |
audio = read_wav("data/clip-05.wav") | |
truncate_seconds = 20 # First twenty seconds | |
rms = rms(audio.samples[:audio.rate * truncate_seconds], int(0.3 * audio.rate) ) # 300 ms window | |
t = np.linspace(0, len(rms) / audio.rate, num=len(rms)) | |
plt.figure(1) | |
plt.title("Signal Power...") | |
plt.plot(t, rms) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment