Created
July 25, 2022 10:34
-
-
Save kuguma/7fe941951c2d73325b323c3b7f1a26c0 to your computer and use it in GitHub Desktop.
waveファイルを無音区間で分割して吐き出すやつ
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from struct import unpack | |
from scipy.io import wavfile | |
import scipy.io | |
import numpy as np | |
import os, sys | |
import glob | |
threshold = 0 | |
silence_min_frames = 1024 | |
def split_wav(wavfilepath, output_dir): | |
print("-----------------------------------------") | |
sr, rdata = wavfile.read(wavfilepath, mmap=True) | |
n_ch = rdata.shape[1] | |
assert(n_ch == 2) | |
n_frames = rdata.shape[0] | |
length_sec = n_frames / sr | |
print(f"{wavfilepath} | Fs = {sr} | ch = {n_ch} | frames = {n_frames} | length = {length_sec}") | |
silence_area = [] | |
def is_silence(rdata, idx): | |
return rdata[idx][0] <= threshold and rdata[idx][1] <= threshold | |
# 簡単化のため、先頭と末尾に十分な量の無音区間を追加 | |
zero_area = np.zeros((silence_min_frames,2), dtype = rdata.dtype) | |
data = np.vstack((zero_area, rdata)) | |
data = np.vstack((data, zero_area)) | |
n_frames += silence_min_frames * 2 | |
print("START") | |
# データは必ず無音区間から始まる | |
under_silence = True | |
silence_start = 0 | |
# 無音区間の検出 | |
for i in range(1, n_frames): | |
if is_silence(data, i): | |
if under_silence: | |
# 無音継続中 | |
pass | |
else: | |
# 無音区間開始 | |
under_silence = True | |
silence_start = i | |
else: | |
if under_silence: | |
# 無音区間終了 | |
silence_end = i - 1 | |
if silence_end - silence_start >= silence_min_frames: | |
silence_area.append( (silence_start, silence_end) ) | |
print(f"silence_area : {silence_start} to {silence_end}") | |
silence_start = -1 | |
under_silence = False | |
else: | |
# 音声区間継続 | |
pass | |
# 最後は必ず無音区間 | |
silence_area.append( (silence_start, n_frames-1) ) | |
print(f"silence_area : {silence_start} to {n_frames-1}") | |
# チェック | |
if len(silence_area) == 1: | |
raise RuntimeError("This is silence file") # これは完全な無音ファイル | |
if len(silence_area) == 2: | |
raise RuntimeError("This file does not need to be split") # そもそも分割する必要がないファイル | |
# 音声区間に変換 | |
audio_area = [] | |
for i in range(0, len(silence_area)-1): | |
audio_start = silence_area[i][1]+1 | |
audio_end = silence_area[i+1][0]-1 | |
audio_area.append( (audio_start, audio_end) ) | |
# print(f"audio_area #{i} : {audio_start} to {audio_end}") | |
# 出力 | |
for i, area in enumerate(audio_area): | |
name,ext = os.path.splitext( os.path.basename(wavfilepath) ) | |
output_fname = f"{output_dir}/{name} part{i+1:0=2}{ext}" | |
wavfile.write(output_fname, sr, data[area[0]:area[1]+1]) | |
print(f"output : {output_fname} | {area[0]} to {area[1]}") | |
print("END") | |
print("-----------------------------------------") | |
def main(): | |
target_dir = sys.argv[1] | |
output_dir = f"{target_dir}/split_data" | |
os.makedirs(output_dir, exist_ok=True) | |
for path in glob.glob(f"{target_dir}/*.wav"): | |
split_wav(path, output_dir) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment