Skip to content

Instantly share code, notes, and snippets.

@Lvdwardt
Last active April 17, 2025 10:34
Show Gist options
  • Save Lvdwardt/a2e98a308543cd6e4749bb2b625d9553 to your computer and use it in GitHub Desktop.
Save Lvdwardt/a2e98a308543cd6e4749bb2b625d9553 to your computer and use it in GitHub Desktop.
/**
* This script is used to dump the contents of a directory into a file.
* Default output file is <directory>.txt
*
* Use it as follows:
* npx ts-node codedump.ts <directory> <output file> <type> <topN>
*
* - directory: The directory to dump the contents of.
* - output file: The file to write the contents to.
* - type: The type of output to generate.
* - list: Only list file paths
* - normal: Include file contents with formatting (default)
* - verbose: Include file contents with additional metadata
* - minify: Include file contents with minimal whitespace
* - topN: The number of largest files to include in the output.
*
* Example:
* npx ts-node codedump.ts .
* npx ts-node codedump.ts . --type=minify
* npx ts-node codedump.ts -m
*/
// @ts-ignore
import * as fs from "fs";
// @ts-ignore
import * as path from "path";
interface FileInfo {
size: number;
lastModified: string;
language?: string; // Programming language
imports?: string[]; // List of imports/dependencies
fileType?: string; // File classification (source, config, etc.)
lineCount?: number; // Total lines of code
}
// List of allowed extensions
const ALLOWED_EXTENSIONS = new Set([
// General
".txt",
".md",
".markdown",
".json",
".xml",
".yaml",
".yml",
".toml",
".ini",
".cfg",
".conf",
".sql",
".graphql",
".proto",
// Python
".py",
".pyx",
".pyd",
".pyo",
".pyc",
".pyw",
".pyi",
// C and C++
".c",
".h",
".i",
".cpp",
".hpp",
".cc",
".hh",
".cxx",
".hxx",
// Julia
".jl",
// JavaScript and TypeScript
".js",
".jsx",
".ts",
".tsx",
".mjs",
".cjs",
// Web
".html",
".htm",
".css",
".scss",
".sass",
".less",
// Java and JVM languages
".java",
".kt",
".kts",
".groovy",
".scala",
".clj",
".cljs",
// .NET languages
".cs",
".fs",
".vb",
// Ruby
".rb",
".rake",
".gemspec",
// PHP
".php",
".phtml",
".php3",
".php4",
".php5",
".phps",
// Go
".go",
// Rust
".rs",
// Swift
".swift",
// Shell scripting
".sh",
".bash",
".zsh",
".fish",
// PowerShell
".ps1",
".psm1",
".psd1",
// Perl
".pl",
".pm",
// Lua
".lua",
// Haskell
".hs",
".lhs",
// R
".r",
".R",
".Rmd",
// Dart
".dart",
// Kotlin
".kt",
".kts",
// Objective-C
".m",
".mm",
// Elm
".elm",
// F#
".fs",
".fsi",
".fsx",
// Elixir
".ex",
".exs",
// Erlang
".erl",
".hrl",
// Lisp dialects
".lisp",
".cl",
".el",
// Fortran
".f",
".for",
".f90",
".f95",
".f03",
".f08",
// MATLAB/Octave
".m",
".mat",
// Scala
".scala",
".sc",
// Terraform
".tf",
".tfvars",
// Ansible
".yml",
".yaml",
// LaTeX
".tex",
".sty",
".cls",
]);
// List of allowed filenames without extensions
const ALLOWED_FILENAMES = new Set([
// General
// "readme",
"license",
"dockerfile",
"makefile",
".gitignore",
".dockerignore",
".editorconfig",
".env",
"requirements.txt",
"package.json",
"tsconfig.json",
// Python
"setup.py",
"setup.cfg",
"pyproject.toml",
"pipfile",
"manifest.in",
".pylintrc",
".flake8",
"pytest.ini",
"tox.ini",
// C/C++
"makefile",
"cmakelist.txt",
"cmakelist.txt",
// Julia
"project.toml",
"manifest.toml",
"juliaconfig.toml",
// JavaScript/TypeScript
".npmignore",
".babelrc",
".eslintrc",
".prettierrc",
"tslint.json",
"webpack.config.js",
"package-lock.json",
"yarn.lock",
// Ruby
"gemfile",
"rakefile",
// PHP
"composer.json",
"composer.lock",
// Go
"go.mod",
"go.sum",
// Rust
"cargo.toml",
"cargo.lock",
// .NET
"packages.config",
"nuget.config",
// Java
"pom.xml",
"build.gradle",
"build.gradle.kts",
"settings.gradle",
"settings.gradle.kts",
// Docker
"docker-compose.yml",
"docker-compose.yaml",
// Git
".gitattributes",
// CI/CD
".travis.yml",
".gitlab-ci.yml",
"jenkins.file",
"azure-pipelines.yml",
// Editor/IDE
".vscode",
".idea",
// Elm
"elm.json",
// F#
"paket.dependencies",
"paket.lock",
// Elixir
"mix.exs",
"mix.lock",
// Erlang
"rebar.config",
// MATLAB/Octave
".octaverc",
// Scala
"build.sbt",
// Terraform
".terraform.lock.hcl",
// Ansible
"ansible.cfg",
"hosts",
// LaTeX
"latexmkrc",
]);
// Directories to skip
const SKIP_DIRECTORIES = new Set([
"__pycache__",
"node_modules",
"venv",
"env",
".venv",
".env",
".cache",
"build",
"dist",
"target",
"out",
"bin",
"obj",
".git",
".svn",
".hg",
".idea",
".vscode",
"logs",
"output",
".next",
".turbo",
"migrations",
]);
// Regex patterns for directories to skip
const SKIP_DIRECTORY_PATTERNS = [
/\.egg-info$/, // Matches directories ending with .egg-info
/\.yarn$/,
];
// Filenames to skip
const SKIP_FILENAMES = new Set([
"package-lock.json",
".DS_Store",
".eslintcache",
"thumbs.db",
".npmrc",
".prettierignore",
".eslintignore",
".gitkeep",
".browserslistrc",
"tsconfig.tsbuildinfo",
".node-version",
".nvmrc",
"desktop.ini",
"npm-debug.log",
"sdk.ts",
]);
// Regex patterns for files to skip
const SKIP_PATTERNS = [
/\.log(\.[0-9]+)?$/, // Matches .log, .log.1, .log.2, etc.
/^log\./, // Matches log.txt, log.old, etc.
/\.bak$/,
/\.tmp$/,
/\.temp$/,
/\.swp$/,
/~$/,
/pnpm-lock\.yaml$/,
/yarn\.lock$/,
/graphql-cache\.d\.ts$/,
];
// Store output file path to avoid processing it
let outputFilePath: string = "";
// Add a class to handle gitignore rules
class GitignoreManager {
private ignoreRulesByDir: Map<
string,
Array<{ pattern: string; isNegated: boolean }>
> = new Map();
constructor() {}
// Load a .gitignore file and associate its rules with a directory
loadGitignoreFile(directory: string): void {
const gitignorePath = path.join(directory, ".gitignore");
try {
if (fs.existsSync(gitignorePath)) {
const content = fs.readFileSync(gitignorePath, "utf-8");
const rules = content
.split(/\r?\n/)
.filter((line) => line.trim() && !line.startsWith("#"))
.map((line) => {
const trimmedLine = line.trim();
const isNegated = trimmedLine.startsWith("!");
const pattern = isNegated ? trimmedLine.substring(1) : trimmedLine;
return { pattern, isNegated };
});
this.ignoreRulesByDir.set(path.resolve(directory), rules);
}
} catch (error) {
console.warn(`Error reading .gitignore at ${gitignorePath}:`, error);
}
}
// Check if a path should be ignored based on gitignore rules
shouldIgnore(filePath: string): boolean {
const absolutePath = path.resolve(filePath);
// Get all parent directories up to the root
let currentDir = path.dirname(absolutePath);
const parentDirs: string[] = [];
while (
currentDir.length > 0 &&
currentDir !== path.parse(currentDir).root
) {
parentDirs.push(currentDir);
currentDir = path.dirname(currentDir);
}
// Start with not ignored
let shouldBeIgnored = false;
// Check rules from closest directory up to the root
for (const dir of parentDirs) {
const rules = this.ignoreRulesByDir.get(dir);
if (!rules) continue;
// Get the path relative to the directory with the gitignore file
const relativePath = path.relative(dir, absolutePath);
// Apply each rule in order
for (const { pattern, isNegated } of rules) {
if (
this.matchesGitignorePattern(
relativePath,
pattern,
fs.statSync(absolutePath).isDirectory()
)
) {
// If pattern matches, either ignore or un-ignore based on negation
shouldBeIgnored = !isNegated;
}
}
}
return shouldBeIgnored;
}
// Custom implementation of gitignore pattern matching
private matchesGitignorePattern(
relativePath: string,
pattern: string,
isDirectory: boolean
): boolean {
// Handle directory-specific patterns (ending with /)
if (pattern.endsWith("/") && !isDirectory) {
return false;
}
// Remove trailing slash if present for directories
let cleanPattern = pattern.endsWith("/") ? pattern.slice(0, -1) : pattern;
// Convert gitignore pattern to regex
let regexPattern = this.gitignorePatternToRegex(cleanPattern);
// Test the path against the regex
return regexPattern.test(relativePath);
}
// Convert a gitignore pattern to a regular expression
private gitignorePatternToRegex(pattern: string): RegExp {
// Escape regex special characters except those with special meaning in gitignore
let regexStr = pattern.replace(/[.+^$|()[\]{}]/g, "\\$&");
// Handle ** (matches any number of directories)
regexStr = regexStr.replace(/\*\*/g, "__DOUBLE_STAR__");
// Handle * (matches anything except /)
regexStr = regexStr.replace(/\*/g, "[^/]*");
// Restore ** patterns
regexStr = regexStr.replace(/__DOUBLE_STAR__/g, ".*");
// Handle ? (matches any single character except /)
regexStr = regexStr.replace(/\?/g, "[^/]");
// Handle leading slash (anchor to start)
if (regexStr.startsWith("/")) {
regexStr = "^" + regexStr.substring(1);
} else {
// If no leading slash, pattern can match at any directory level
regexStr = "(^|.*/)" + regexStr;
}
// Handle trailing patterns
if (!regexStr.endsWith("/")) {
// Match files or directories exactly, or directories with content
regexStr = regexStr + "(/.*)?$";
} else {
// If ends with /, match only directories
regexStr = regexStr + ".*$";
}
return new RegExp(regexStr);
}
}
// Initialize the GitignoreManager
const gitignoreManager = new GitignoreManager();
function getFileInfo(filePath: string): FileInfo {
const stats = fs.statSync(filePath);
const fileInfo: FileInfo = {
size: stats.size,
lastModified: new Date(stats.mtime)
.toISOString()
.replace("T", " ")
.substring(0, 19),
};
// Add language detection based on extension
const extension = path.extname(filePath).toLowerCase();
fileInfo.language = detectLanguage(extension);
// Add file type classification
fileInfo.fileType = classifyFileType(filePath);
// Add line count if file is not binary and not too large
if (stats.size < 1024 * 1024 && !isBinaryExtension(extension)) {
try {
const content = fs.readFileSync(filePath, "utf-8");
fileInfo.lineCount = content.split("\n").length;
// Extract imports for certain file types
if ([".js", ".jsx", ".ts", ".tsx"].includes(extension)) {
fileInfo.imports = extractImports(content);
}
} catch (e) {
// Silent fail on line count and imports if can't read
}
}
return fileInfo;
}
// Detect language based on file extension
function detectLanguage(extension: string): string {
const languageMap: { [key: string]: string } = {
".js": "JavaScript",
".jsx": "JavaScript (React)",
".ts": "TypeScript",
".tsx": "TypeScript (React)",
".py": "Python",
".rb": "Ruby",
".java": "Java",
".cs": "C#",
".go": "Go",
".rs": "Rust",
".php": "PHP",
".swift": "Swift",
".kt": "Kotlin",
".c": "C",
".cpp": "C++",
".h": "C/C++ Header",
".sh": "Shell",
".json": "JSON",
".xml": "XML",
".yaml": "YAML",
".yml": "YAML",
".md": "Markdown",
".html": "HTML",
".css": "CSS",
".scss": "SCSS",
".sql": "SQL",
".graphql": "GraphQL",
};
return languageMap[extension] || "Unknown";
}
// Classify file type based on name and location
function classifyFileType(filePath: string): string {
const fileName = path.basename(filePath).toLowerCase();
const extension = path.extname(fileName).toLowerCase();
const pathSegments = filePath.split(path.sep);
// Configuration files
if (
fileName.includes("config") ||
fileName.includes(".config") ||
fileName.includes("rc") ||
fileName === ".env" ||
([".json", ".yaml", ".yml", ".toml", ".ini"].includes(extension) &&
!pathSegments.some((seg) => seg.includes("src") || seg.includes("app")))
) {
return "Configuration";
}
// Test files
if (
fileName.includes(".test.") ||
fileName.includes(".spec.") ||
pathSegments.some(
(seg) => seg === "test" || seg === "tests" || seg === "__tests__"
)
) {
return "Test";
}
// Documentation
if (
[".md", ".markdown", ".txt", ".doc", ".pdf"].includes(extension) ||
// fileName === "readme" ||
fileName === "contributing" ||
fileName === "license"
) {
return "Documentation";
}
// Package management
if (
fileName === "package.json" ||
fileName === "requirements.txt" ||
fileName === "cargo.toml" ||
fileName === "go.mod"
) {
return "Package Management";
}
// Sources based on common source dirs
if (
pathSegments.some((seg) =>
["src", "app", "lib", "components", "services"].includes(seg)
)
) {
return "Source";
}
return "Other";
}
// Check if extension is likely binary
function isBinaryExtension(extension: string): boolean {
const binaryExtensions = [
".exe",
".dll",
".so",
".dylib",
".bin",
".o",
".obj",
".zip",
".tar",
".gz",
".rar",
".7z",
".jpg",
".jpeg",
".png",
".gif",
".bmp",
".ico",
".pdf",
];
return binaryExtensions.includes(extension);
}
// Extract imports from JS/TS files
function extractImports(content: string): string[] {
const imports: string[] = [];
const importRegex =
/import\s+(?:(?:\{[^}]*\})|(?:[^{}\s]+))\s+from\s+['"]([^'"]+)['"]/g;
const requireRegex =
/(?:const|let|var)\s+(?:\{[^}]*\}|\w+)\s*=\s*require\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
let match;
while ((match = importRegex.exec(content)) !== null) {
imports.push(match[1]);
}
while ((match = requireRegex.exec(content)) !== null) {
imports.push(match[1]);
}
return [...new Set(imports)]; // Remove duplicates
}
// Update the shouldSkip function to use gitignore rules
function shouldSkip(pathname: string): boolean {
const name = path.basename(pathname);
const nameLower = name.toLowerCase();
const extension = path.extname(name);
// Check if this is the output file by comparing absolute paths
if (outputFilePath && path.resolve(pathname) === outputFilePath) {
return true;
}
// Check against gitignore rules
if (gitignoreManager.shouldIgnore(pathname)) {
return true;
}
if (fs.statSync(pathname).isDirectory()) {
// Load gitignore file in this directory if it exists
gitignoreManager.loadGitignoreFile(pathname);
return (
SKIP_DIRECTORIES.has(name) ||
SKIP_DIRECTORY_PATTERNS.some((pattern) => pattern.test(name))
);
}
// Check if the filename is in the skip filenames set
if (SKIP_FILENAMES.has(nameLower)) {
return true;
}
// Check if the file matches any skip patterns
if (SKIP_PATTERNS.some((pattern) => pattern.test(nameLower))) {
return true;
}
return (
(name.startsWith(".") && !ALLOWED_FILENAMES.has(nameLower)) ||
(!ALLOWED_EXTENSIONS.has(extension.toLowerCase()) &&
!ALLOWED_FILENAMES.has(nameLower))
);
}
function concatenateFiles(
directory: string = ".",
type: "list" | "normal" | "verbose" | "minify" = "normal"
): string {
const output: string[] = [];
const processedPaths = new Set<string>(); // Track processed files to avoid duplicates
function walkDir(dir: string) {
try {
// Load gitignore file at this directory level
gitignoreManager.loadGitignoreFile(dir);
const entries = fs.readdirSync(dir, { withFileTypes: true });
// Sort entries to make directories come first, then files alphabetically
const sortedEntries = entries.sort((a, b) => {
if (a.isDirectory() && !b.isDirectory()) return -1;
if (!a.isDirectory() && b.isDirectory()) return 1;
return a.name.localeCompare(b.name);
});
for (const entry of sortedEntries) {
const fullPath = path.join(dir, entry.name);
const canonicalPath = path.resolve(fullPath);
// Skip if we've already processed this path (handles symlink loops)
if (processedPaths.has(canonicalPath)) continue;
processedPaths.add(canonicalPath);
if (entry.isDirectory()) {
if (!shouldSkip(fullPath)) {
walkDir(fullPath);
}
} else if (entry.isFile() && !shouldSkip(fullPath)) {
const fileInfo = getFileInfo(fullPath);
if (type === "list") {
output.push(`${fullPath}`);
} else if (type === "minify") {
// For minify type, add formatted file path header but minimize content whitespace
output.push(`\n\n${"=".repeat(10)}`);
output.push(`File: ${fullPath}`);
output.push("=".repeat(10) + "\n");
try {
if (fileInfo.size > 1024 * 1024) {
output.push(`[File content truncated - file exceeds 1MB]`);
} else {
const content = fs.readFileSync(fullPath, "utf-8");
// Remove excessive whitespace
const minified = content
.replace(/\n\s*\n\s*\n/g, "\n\n") // Remove extra blank lines
.replace(/[ \t]+/g, " ") // Collapse multiple spaces/tabs
.replace(/\s+$/gm, "") // Remove trailing whitespace
.trim(); // Trim leading/trailing whitespace
output.push(minified);
}
} catch (e) {
output.push(
`Error reading file: ${e instanceof Error ? e.message : String(e)}`
);
}
} else {
output.push(`\n\n${"=".repeat(60)}`);
output.push(`File: ${fullPath}`);
// verbose
if (type === "verbose") {
output.push(`Size: ${formatSize(fileInfo.size)}`);
output.push(`Language: ${fileInfo.language || "Unknown"}`);
output.push(`Type: ${fileInfo.fileType || "Unknown"}`);
if (fileInfo.lineCount)
output.push(`Lines: ${fileInfo.lineCount}`);
if (fileInfo.imports && fileInfo.imports.length > 0)
output.push(`Imports: ${fileInfo.imports.join(", ")}`);
output.push(`Last Modified: ${fileInfo.lastModified}`);
}
output.push("=".repeat(60) + "\n");
try {
// For very large files, consider streaming or truncating
if (fileInfo.size > 1024 * 1024) {
// 1MB threshold
output.push(`[File content truncated - file exceeds 1MB]`);
} else {
const content = fs.readFileSync(fullPath, "utf-8");
output.push(content);
}
} catch (e) {
output.push(
`Error reading file: ${e instanceof Error ? e.message : String(e)}`
);
}
}
}
}
} catch (error) {
output.push(
`Error accessing directory ${dir}: ${error instanceof Error ? error.message : String(error)}`
);
}
}
walkDir(directory);
return output.join(type === "minify" ? "" : "\n");
}
function getLargestFiles(
directory: string = ".",
topN: number = 10
): [number, string][] {
const fileSizes: [number, string][] = [];
const processedPaths = new Set<string>(); // Track processed files to avoid duplicates
function walkDir(dir: string) {
try {
// Load gitignore file at this directory level
gitignoreManager.loadGitignoreFile(dir);
const entries = fs.readdirSync(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
const canonicalPath = path.resolve(fullPath);
// Skip if we've already processed this path (handles symlink loops)
if (processedPaths.has(canonicalPath)) continue;
processedPaths.add(canonicalPath);
if (entry.isDirectory()) {
if (!shouldSkip(fullPath)) {
walkDir(fullPath);
}
} else if (entry.isFile() && !shouldSkip(fullPath)) {
try {
const size = fs.statSync(fullPath).size;
fileSizes.push([size, fullPath]);
} catch (e) {
// Skip files we can't access
}
}
}
} catch (error) {
// Silent fail on directories we can't access
}
}
walkDir(directory);
// Use heap or more efficient algorithm for large file lists
return fileSizes.sort((a, b) => b[0] - a[0]).slice(0, topN);
}
function formatSize(size: number): string {
const units = ["B", "KB", "MB", "GB", "TB"];
let formattedSize = size;
let unitIndex = 0;
while (formattedSize >= 1024 && unitIndex < units.length - 1) {
formattedSize /= 1024;
unitIndex++;
}
return `${formattedSize.toFixed(2)} ${units[unitIndex]}`;
}
function main(): void {
try {
// Parse command line arguments
const args = parseArgs();
// Get the absolute path of the directory
const absDirectory = path.resolve(args.directory);
// Verify directory exists
if (!fs.existsSync(absDirectory)) {
console.error(`Error: Directory '${args.directory}' does not exist.`);
// @ts-ignore
process.exit(1);
}
// If output file is not specified, use the directory name
if (!args.output) {
// Get the last part of the path
let dirName = path.basename(absDirectory);
// If it's the current directory, get the parent directory name
if (dirName === ".") {
// @ts-ignore
dirName = path.basename(process.cwd());
}
args.output = `${dirName}.txt`;
}
// Store output file path
outputFilePath = path.resolve(args.output);
// Get and display largest files
const largestFiles = getLargestFiles(args.directory, args.topN);
let largestFilesText =
"\nLargest files in directory:\n" + "=".repeat(80) + "\n";
for (const [size, filePath] of largestFiles) {
largestFilesText += `${formatSize(size).padEnd(10)} ${filePath}\n`;
}
largestFilesText += "=".repeat(80) + "\n\n";
// Get main content
const result = concatenateFiles(args.directory, args.type);
// Combine largest files summary with main content
const fullOutput = largestFilesText + result;
// Write to file
fs.writeFileSync(args.output, fullOutput, "utf-8");
console.log(largestFilesText); // Show largest files in console
console.log(
`\nOutput for directory '${args.directory}' has been written to '${args.output}'`
);
} catch (error) {
console.error(
`Error: ${error instanceof Error ? error.message : String(error)}`
);
// @ts-ignore
process.exit(1);
}
}
function parseArgs() {
const args = {
directory: ".",
type: "normal" as "list" | "normal" | "verbose" | "minify",
output: "",
topN: 10,
};
// Simple argument parser
// @ts-ignore
const argv = process.argv.slice(2);
for (let i = 0; i < argv.length; i++) {
// list, normal and verbose
if (argv[i] === "-l" || argv[i] === "--type=list") {
args.type = "list";
} else if (argv[i] === "-n" || argv[i] === "--type=normal") {
args.type = "normal";
} else if (argv[i] === "-v" || argv[i] === "--type=verbose") {
args.type = "verbose";
} else if (argv[i] === "-m" || argv[i] === "--type=minify") {
args.type = "minify";
} else if (argv[i] === "-o" || argv[i] === "--output") {
if (i + 1 < argv.length) {
args.output = argv[++i];
}
} else if (argv[i] === "-n" || argv[i] === "--top-n") {
if (i + 1 < argv.length) {
const n = parseInt(argv[++i], 10);
if (!isNaN(n)) args.topN = n;
}
} else if (!argv[i].startsWith("-")) {
args.directory = argv[i];
}
}
return args;
}
// Run the script if this is the main module
// @ts-ignore
if (require.main === module) {
main();
}
// Export functions for use as a module
export {
getFileInfo,
shouldSkip,
concatenateFiles,
getLargestFiles,
formatSize,
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment