Created
October 25, 2025 20:48
-
-
Save indiejoseph/583c8c4a6e7271f4f5c7492805702476 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from glob import glob | |
| import os | |
| import argparse | |
| from tqdm import tqdm | |
| from pathlib import Path | |
| from PolUVR.separator import Separator | |
| import torch | |
| separator1 = None | |
| separator2 = None | |
| # Constants | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| USE_AUTOCAST = DEVICE == "cuda" | |
| def svs_stage1(f: str, output_dir: Path): | |
| vocal_file = str(Path(f).stem) | |
| instrumental_file = str((Path(f).stem + "(Instrumental)")) | |
| # Implement the first stage of the SVS process | |
| outputs = separator1.separate( | |
| f, | |
| custom_output_names={ | |
| "vocals": vocal_file, | |
| "instrumental": instrumental_file, | |
| }, | |
| ) | |
| # Remove instrumental file | |
| os.remove([str(output_dir / file) for file in outputs if "Instrumental" in file][0]) | |
| vocal_file = [ | |
| str(output_dir / file) for file in outputs if "Instrumental" not in file | |
| ][0] | |
| return vocal_file | |
| def svs_stage2(f: str, output_dir: Path): | |
| vocal_file = str(Path(f).stem) | |
| instrumental_file = str((Path(f).stem + "(Instrumental)")) | |
| # Implement the second stage of the SVS process | |
| outputs = separator2.separate( | |
| f, | |
| custom_output_names={ | |
| "vocals": vocal_file, | |
| "instrumental": instrumental_file, | |
| }, | |
| ) | |
| # Remove instrumental file | |
| os.remove([str(output_dir / file) for file in outputs if "Instrumental" in file][0]) | |
| vocal_file = [ | |
| str(output_dir / file) for file in outputs if "Instrumental" not in file | |
| ][0] | |
| return vocal_file | |
| def main(): | |
| global separator1, separator2 | |
| parser = argparse.ArgumentParser(description="Two-Stage Singing Voice Separation") | |
| parser.add_argument( | |
| "audio_dir", | |
| type=str, | |
| help="Directory containing audio files for SVS processing", | |
| ) | |
| parser.add_argument( | |
| "output_dir", | |
| type=str, | |
| help="Directory to save the processed vocal files", | |
| ) | |
| args = parser.parse_args() | |
| audio_dir = Path(args.audio_dir) | |
| output_dir = Path(args.output_dir) | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| separator1 = Separator( | |
| output_dir=str(output_dir), | |
| use_autocast=USE_AUTOCAST, | |
| ) | |
| separator1.load_model("melband_roformer_instvoc_duality_v2.ckpt") | |
| separator2 = Separator( | |
| output_dir=str(output_dir), | |
| use_autocast=USE_AUTOCAST, | |
| ) | |
| separator2.load_model("6_HP-Karaoke-UVR.pth") | |
| files = glob(str(audio_dir / "*.mp3")) | |
| for f in tqdm(files): | |
| print(f"Processing file: {f}") | |
| # Stage 1 SVS | |
| vocal_file_stage1 = svs_stage1(f, output_dir) | |
| print(f"Stage 1 complete. Vocal file: {vocal_file_stage1}") | |
| # Stage 2 SVS | |
| vocal_file_stage2 = svs_stage2(vocal_file_stage1, output_dir) | |
| print(f"Stage 2 complete. Final vocal file: {vocal_file_stage2}") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment