-
-
Save voodoohop/2089c61218605f758289cada102c1b9e to your computer and use it in GitHub Desktop.
To compute pseudo CQT (Constant-Q-transform using STFT) on Tensorflow.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import librosa | |
import tensorflow as tf | |
import numpy as np | |
cqt_filter_fft = librosa.constantq.__cqt_filter_fft | |
EPS = 0.0001 | |
class PseudoCqt(): | |
"""A class to compute pseudo-CQT with Tensorflow. | |
Written by Keunwoo Choi and adapted to tensorflow by Thomas Haferlach | |
API (+implementations) follows librosa (https://librosa.github.io/librosa/generated/librosa.core.pseudo_cqt.html) | |
Usage: | |
src, _ = librosa.load(filename) | |
cqt_calculator = PseudoCqt() | |
cqt_calculator(src) | |
""" | |
def __init__(self, sr=22050, hop_length=512, fmin=None, n_bins=84, | |
bins_per_octave=12, filter_scale=1, | |
norm=1, sparsity=0.01, window='hann', scale=True, | |
pad_mode='reflect'): | |
assert scale | |
assert window == "hann" | |
if fmin is None: | |
fmin = 2 * 32.703195 # note_to_hz('C2') because C1 is too low | |
fft_basis, n_fft, _ = cqt_filter_fft(sr, fmin, n_bins, bins_per_octave, | |
filter_scale, norm, sparsity, | |
hop_length=hop_length, window=window) | |
fft_basis = np.abs(fft_basis.astype(dtype=np.float32)).todense() # because it was sparse. (n_bins, n_fft) | |
self.fft_basis = tf.expand_dims(tf.convert_to_tensor(fft_basis),0) # (n_freq, n_bins) | |
self.n_fft = n_fft | |
self.hop_length = hop_length | |
self.pad_mode = pad_mode | |
self.scale = scale | |
self.window = tf.signal.hann_window | |
self.npdtype = np.float32 | |
def __call__(self, y): | |
return self.forward(y) | |
def forward(self, y): | |
stft_magnitudes = tf.transpose(tf.math.real(tf.signal.stft(y, fft_length=self.n_fft, | |
frame_length=self.hop_length*4, | |
frame_step=self.hop_length, | |
window_fn=self.window, | |
pad_end=True)),perm=[0,2,1]) | |
D = tf.math.pow(stft_magnitudes, 2) # n_freq, time | |
D = tf.math.sqrt(D + EPS) # without EPS, backpropagating through CQT can yield NaN. | |
# Project onto the pseudo-cqt basis | |
C = tf.matmul(self.fft_basis, D) # n_bins, time | |
C /= tf.math.sqrt(float(self.n_fft)) # because `scale` is always True | |
return C |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Usage:
audio needs to be in batches