Skip to content

Instantly share code, notes, and snippets.

@jlewin
Created June 27, 2013 18:23

Revisions

  1. jlewin created this gist Jun 27, 2013.
    20 changes: 20 additions & 0 deletions ListDuplicateFiles.cs
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,20 @@
    /// <summary>
    /// Processes a given directory, grouping and listing files with duplicate content
    /// </summary>
    /// <param name="directory">The path to process</param>
    private void ListDuplicateFiles(string directory)
    {
    // Calculate and store the hash and path for each file in the directory
    var files = Directory.GetFiles(directory).Select(f => new { Path = f, Hash = FileHash.CalculateFromFile(f) });

    // Group and iterate when duplicates exist
    foreach (var group in files.GroupBy(f => f.Hash).Where(g => g.Count() > 1))
    {
    Console.WriteLine("--- Dupe ({0})-------------------------------", group.Key);
    foreach (var file in group)
    {
    // List each file
    Console.WriteLine("\t {0}", Path.GetFileName(file.Path));
    }
    }
    }
    21 changes: 21 additions & 0 deletions Supporting-FileHash.cs
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,21 @@
    public static class FileHash
    {
    /// <summary>
    /// Generate an MD5 hash of the given file
    /// </summary>
    /// <param name="filename">The content file</param>
    /// <returns>The generated hash</returns>
    public static string CalculateFromFile(string filename)
    {
    // read file bytes
    byte[] bytes = File.ReadAllBytes(filename);

    // md5 calculation
    MD5CryptoServiceProvider md5 = new MD5CryptoServiceProvider();
    byte[] output = md5.ComputeHash(bytes);
    string hash = Convert.ToBase64String(output);

    // returning
    return hash;
    }
    }