Created
June 11, 2025 14:29
-
-
Save adoublef/45ec386fcba65e615b56ad7aeff02d04 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"context" | |
"crypto/md5" | |
"fmt" | |
"log" | |
"os" | |
"path/filepath" | |
"golang.org/x/sync/errgroup" | |
) | |
func main() { | |
m, err := MD5All(context.Background(), ".") | |
if err != nil { | |
log.Fatal(err) | |
} | |
for k, sum := range m { | |
fmt.Printf("%s:\t%x\n", k, sum) | |
} | |
} | |
// MD5All reads all the files in the file tree rooted at root and returns a map | |
// from file path to the MD5 sum of the file's contents. If the directory walk | |
// fails or any read operation fails, MD5All returns an error. | |
func MD5All(ctx context.Context, root string) (map[string][md5.Size]byte, error) { | |
// ctx is canceled when g.Wait() returns. When this version of MD5All returns | |
// - even in case of error! - we know that all of the goroutines have finished | |
// and the memory they were using can be garbage-collected. | |
g, ctx := errgroup.WithContext(ctx) | |
g.SetLimit(10) | |
paths := make(chan string) | |
g.Go(func() error { | |
defer close(paths) | |
return filepath.Walk(root, func(path string, info os.FileInfo, err error) error { | |
if err != nil { | |
return err | |
} | |
if !info.Mode().IsRegular() { | |
return nil | |
} | |
select { | |
case paths <- path: | |
case <-ctx.Done(): | |
return ctx.Err() | |
} | |
return nil | |
}) | |
}) | |
// Start a fixed number of goroutines to read and digest files. | |
c := make(chan result) | |
const numDigesters = 20 | |
for i := 0; i < numDigesters; i++ { | |
g.Go(func() error { | |
for path := range paths { | |
data, err := os.ReadFile(path) | |
if err != nil { | |
return err | |
} | |
select { | |
case c <- result{path, md5.Sum(data)}: | |
case <-ctx.Done(): | |
return ctx.Err() | |
} | |
} | |
return nil | |
}) | |
} | |
go func() { | |
g.Wait() | |
close(c) | |
}() | |
m := make(map[string][md5.Size]byte) | |
for r := range c { | |
m[r.path] = r.sum | |
} | |
// Check whether any of the goroutines failed. Since g is accumulating the | |
// errors, we don't need to send them (or check for them) in the individual | |
// results sent on the channel. | |
if err := g.Wait(); err != nil { | |
return nil, err | |
} | |
return m, nil | |
} | |
type result struct { | |
path string | |
sum [md5.Size]byte | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment