Skip to content

Instantly share code, notes, and snippets.

@kenenbek
Created May 30, 2025 12:23
Show Gist options
  • Save kenenbek/9c7d8ecf1280938f16609d8e4b83386a to your computer and use it in GitHub Desktop.
Save kenenbek/9c7d8ecf1280938f16609d8e4b83386a to your computer and use it in GitHub Desktop.
import os
import shutil
from math import ceil
# Configuration
SOURCE_DIR = 'my_source_files_test' # Path to source files
OUTPUT_DIR = 'output' # Path to output worker folders
NUM_LABELERS = 5 # Number of workers
NUM_COPIES = 3 # Number of workers each file should be copied to
def distribute_files(
source_dir=SOURCE_DIR,
output_dir=OUTPUT_DIR,
num_labelers=NUM_LABELERS,
num_copies=NUM_COPIES
):
files = sorted([
f for f in os.listdir(source_dir)
if os.path.isfile(os.path.join(source_dir, f))
])
num_files = len(files)
# Create output directories for each worker
for worker_id in range(num_labelers):
worker_dir = os.path.join(output_dir, f'worker_{worker_id}')
os.makedirs(worker_dir, exist_ok=True)
# Distribute files to workers (round-robin, each file to num_copies workers)
worker_files = [[] for _ in range(num_labelers)]
for idx, filename in enumerate(files):
for p in range(num_copies):
worker_id = (idx*num_copies + p) % num_labelers
worker_files[worker_id].append(filename)
# Remove duplicates and keep order
for i in range(num_labelers):
seen = set()
unique_files = []
for f in worker_files[i]:
if f not in seen:
unique_files.append(f)
seen.add(f)
worker_files[i] = unique_files
# Copy files to each worker's folder
for worker_id, file_list in enumerate(worker_files):
worker_dir = os.path.join(output_dir, f'worker_{worker_id}')
for filename in file_list:
src = os.path.join(source_dir, filename)
dst = os.path.join(worker_dir, filename)
shutil.copy2(src, dst)
print(f"Distributed {num_files} files to {num_labelers} workers, each file copied to {num_copies} workers.")
# Optionally, call the function if running as script
if __name__ == "__main__":
distribute_files()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment