Skip to content

Instantly share code, notes, and snippets.

@corlinp
Last active January 24, 2025 14:36

Revisions

  1. corlinp revised this gist Jan 4, 2024. 1 changed file with 2 additions and 1 deletion.
    3 changes: 2 additions & 1 deletion 1brc_corlinp.go
    Original file line number Diff line number Diff line change
    @@ -12,10 +12,11 @@ Still, it's currently 2X faster than the best Java implementation (on my machine
    Script Time (s)
    calculate_average_corlinp.sh 5.092
    calculate_average_warpspeedlabs.sh 8.183 (Go)
    calculate_average_ddimtirov.sh 10.183
    calculate_average_ebarlas.sh 11.625
    calculate_average_royvanrijn.sh 11.970
    calculate_average_AlexanderYastrebov.sh 14.114
    calculate_average_AlexanderYastrebov.sh 14.114 (Go)
    calculate_average_filiphr.sh 15.038
    calculate_average_palmr.sh 15.359
    calculate_average_spullara.sh 16.350
  2. corlinp revised this gist Jan 4, 2024. 1 changed file with 1 addition and 0 deletions.
    1 change: 1 addition & 0 deletions 1brc_corlinp.go
    Original file line number Diff line number Diff line change
    @@ -129,6 +129,7 @@ type CityName [64]byte
    // [3] = max temperature
    type CityTemp *[4]int64

    // worker parses the lines in a chunk and performs some calculations
    func worker(validChunks <-chan []byte, results chan<- map[CityName]CityTemp) {
    cityTemps := make(map[CityName]CityTemp, 512)
    var city CityName
  3. corlinp revised this gist Jan 4, 2024. 1 changed file with 15 additions and 15 deletions.
    30 changes: 15 additions & 15 deletions 1brc_corlinp.go
    Original file line number Diff line number Diff line change
    @@ -11,30 +11,30 @@ A fair amount of optimization has been done to reduce memory allocations.
    Still, it's currently 2X faster than the best Java implementation (on my machine, a Macbook M3 Pro).
    Script Time (s)
    calculate_average_corlinp.sh 5.092
    calculate_average_ddimtirov.sh 10.183
    calculate_average_ebarlas.sh 11.625
    calculate_average_corlinp.sh 5.092
    calculate_average_ddimtirov.sh 10.183
    calculate_average_ebarlas.sh 11.625
    calculate_average_royvanrijn.sh 11.970
    calculate_average_AlexanderYastrebov.sh 14.114
    calculate_average_filiphr.sh 15.038
    calculate_average_palmr.sh 15.359
    calculate_average_spullara.sh 16.350
    calculate_average_seijikun.sh 20.214
    calculate_average_padreati.sh 22.460
    calculate_average_richardstartin.sh 22.496
    calculate_average_bjhara.sh 23.166
    calculate_average_filiphr.sh 15.038
    calculate_average_palmr.sh 15.359
    calculate_average_spullara.sh 16.350
    calculate_average_seijikun.sh 20.214
    calculate_average_padreati.sh 22.460
    calculate_average_richardstartin.sh 22.496
    calculate_average_bjhara.sh 23.166
    calculate_average_criccomini.sh 23.378
    calculate_average_truelive.sh 25.741
    calculate_average_truelive.sh 25.741
    calculate_average_khmarbaise.sh 41.079
    calculate_average_kuduwa-keshavram.sh 45.390
    calculate_average_itaske.sh 50.250
    calculate_average.sh 162.420
    calculate_average_kuduwa-keshavram.sh 45.390
    calculate_average_itaske.sh 50.250
    calculate_average.sh 162.420
    For some fun comparisons, it's even faster than wc -l, but still slower than piping it to /dev/null:
    cat measurements.txt > /dev/null 3.522
    wc -l measurements.txt 11.892
    wc -l measurements.txt 11.892
    */

  4. corlinp created this gist Jan 4, 2024.
    270 changes: 270 additions & 0 deletions 1brc_corlinp.go
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,270 @@
    /*
    Corlin Palmer's Go solution to the 1BRC coding challenge: https://github.com/gunnarmorling/1brc
    - This solution reads the file sequentially as fast as possible (reader)
    - It passes off the job of ensuring that each chunk ends with a complete line to another goroutine (lineSupervisor)
    - The lineSupervisor sends valid chunks to a pool of worker goroutines (worker) which parse the data and calculate the results
    - The results from the workers are collected in a map and then sorted before printing the final results
    A fair amount of optimization has been done to reduce memory allocations.
    Still, it's currently 2X faster than the best Java implementation (on my machine, a Macbook M3 Pro).
    Script Time (s)
    calculate_average_corlinp.sh 5.092
    calculate_average_ddimtirov.sh 10.183
    calculate_average_ebarlas.sh 11.625
    calculate_average_royvanrijn.sh 11.970
    calculate_average_AlexanderYastrebov.sh 14.114
    calculate_average_filiphr.sh 15.038
    calculate_average_palmr.sh 15.359
    calculate_average_spullara.sh 16.350
    calculate_average_seijikun.sh 20.214
    calculate_average_padreati.sh 22.460
    calculate_average_richardstartin.sh 22.496
    calculate_average_bjhara.sh 23.166
    calculate_average_criccomini.sh 23.378
    calculate_average_truelive.sh 25.741
    calculate_average_khmarbaise.sh 41.079
    calculate_average_kuduwa-keshavram.sh 45.390
    calculate_average_itaske.sh 50.250
    calculate_average.sh 162.420
    For some fun comparisons, it's even faster than wc -l, but still slower than piping it to /dev/null:
    cat measurements.txt > /dev/null 3.522
    wc -l measurements.txt 11.892
    */

    package main

    import (
    "bytes"
    "fmt"
    "io"
    "os"
    "sort"
    "sync"
    )

    // reader reads raw chunks from the file and sends them to the rawChunks channel.
    func reader(file *os.File, rawChunks chan<- []byte) {
    const chunkSize = 256 * 1024
    buf := make([]byte, chunkSize)

    for {
    bytesRead, err := file.Read(buf)
    if bytesRead > 0 {
    chunk := make([]byte, bytesRead)
    copy(chunk, buf[:bytesRead])
    rawChunks <- chunk
    }
    if err != nil {
    if err != io.EOF {
    fmt.Printf("Error reading file: %v\n", err)
    }
    break
    }
    }
    close(rawChunks)
    }

    // processChunk processes a single chunk and returns the valid part and the leftover part.
    func processChunk(chunk, leftover []byte) (validPart, newLeftover []byte) {
    // Find the first and last newline to determine the valid part of the chunk
    firstNewline := bytes.Index(chunk, []byte{'\n'})
    lastNewline := bytes.LastIndex(chunk, []byte{'\n'})

    if firstNewline != -1 {
    // There's a complete line at the beginning of the chunk
    validPart = append(leftover, chunk[:firstNewline+1]...)
    leftover = leftover[:0] // Clear the leftover
    } else {
    // No complete line at the start, append the whole chunk to the leftover
    leftover = append(leftover, chunk...)
    }

    if lastNewline != -1 && firstNewline != lastNewline {
    // There's at least one complete line in this chunk
    // Include the middle part of the chunk, which contains only complete lines
    validPart = append(validPart, chunk[firstNewline+1:lastNewline+1]...)

    // Store the dangling end (if any) in newLeftover for the next chunk
    newLeftover = append(newLeftover, chunk[lastNewline+1:]...)
    } else {
    // No complete lines or only one complete line in this chunk
    newLeftover = leftover
    }

    return validPart, newLeftover
    }

    // lineSupervisor takes raw chunks, ensures they end with complete lines, and sends valid chunks to the workers.
    func lineSupervisor(rawChunks <-chan []byte, validChunks chan<- []byte) {
    buffer := make([]byte, 0) // Buffer to hold the dangling ends

    for chunk := range rawChunks {
    validPart, newBuffer := processChunk(chunk, buffer)
    if len(validPart) > 0 {
    validChunks <- validPart
    }
    buffer = newBuffer
    }

    // Handle any data left in the buffer after all chunks have been processed
    if len(buffer) > 0 {
    validChunks <- buffer
    }

    close(validChunks)
    }

    type CityName [64]byte

    // [0] = sum of all temperatures
    // [1] = number of temperatures
    // [2] = min temperature
    // [3] = max temperature
    type CityTemp *[4]int64

    func worker(validChunks <-chan []byte, results chan<- map[CityName]CityTemp) {
    cityTemps := make(map[CityName]CityTemp, 512)
    var city CityName
    cityLen := 0
    var temp int64
    var isNegative bool

    for chunk := range validChunks {
    for _, b := range chunk {
    switch b {
    case ';': // Delimiter between city and temperature
    cityLen = 64
    isNegative = false
    case '\n':
    if isNegative {
    temp = -temp
    }
    ct := cityTemps[city]
    if ct == nil {
    minMaxTemp := temp
    if isNegative {
    minMaxTemp = -minMaxTemp
    }
    cityTemps[city] = &[4]int64{temp, 1, minMaxTemp, minMaxTemp}
    } else {
    ct[0] += temp
    ct[1]++
    if temp < ct[2] {
    ct[2] = temp
    }
    if temp > ct[3] {
    ct[3] = temp
    }
    }
    // Reset for the next line
    city = CityName{}
    cityLen = 0
    temp = 0
    isNegative = false
    case '.':
    continue // Skip the decimal point, we know all numbers have 1 decimal place
    case '-':
    isNegative = true // Next number will be negative
    default:
    if cityLen < 44 {
    city[cityLen] = b
    cityLen++
    } else {
    // Inline parsing of temperature
    temp = temp*10 + int64(b-'0')
    }
    }
    }
    }

    results <- cityTemps
    }

    func main() {
    if len(os.Args) < 2 {
    fmt.Println("Error: First arg should be the measurements.txt file path")
    return
    }

    filePath := os.Args[1]
    file, err := os.Open(filePath)
    if err != nil {
    fmt.Printf("Error opening file: %v\n", err)
    return
    }
    defer file.Close()

    numWorkers := 256
    rawChunks := make(chan []byte, 16384) // buffered channel for raw chunks from the file
    validChunks := make(chan []byte, 16384) // buffered channel for valid chunks ending with a complete line
    results := make(chan map[CityName]CityTemp, numWorkers) // buffered channel for results from the workers

    // Start the reader
    go reader(file, rawChunks)

    // Start the line supervisor
    go lineSupervisor(rawChunks, validChunks)

    var wg sync.WaitGroup

    // Start worker goroutines
    for i := 0; i < numWorkers; i++ {
    wg.Add(1)
    go func() {
    defer wg.Done()
    worker(validChunks, results)
    }()
    }

    // Collect results
    go func() {
    wg.Wait()
    close(results)
    }()

    finalResults := make(map[CityName]CityTemp)
    for cityTemps := range results {
    for city, ct := range cityTemps {
    finalCt := finalResults[city]
    if finalCt == nil {
    finalCt = new([4]int64)
    }
    finalCt[0] += ct[0]
    finalCt[1] += ct[1]
    if ct[2] < finalCt[2] {
    finalCt[2] = ct[2]
    }
    if ct[3] > finalCt[3] {
    finalCt[3] = ct[3]
    }
    finalResults[city] = finalCt
    }
    }

    // Sort results
    allCities := make([]CityName, 0, len(finalResults))
    for city := range finalResults {
    allCities = append(allCities, city)
    }
    sort.Slice(allCities, func(i, j int) bool {
    return bytes.Compare(allCities[i][:], allCities[j][:]) < 0
    })

    // Calculate and print the final results
    fmt.Print("{")
    for i, city := range allCities {
    ct := finalResults[city]
    fmt.Printf("%s=%.1f/%.1f/%.1f", city[:], float64(ct[2])/10, float64(ct[0])/float64(ct[1])/10, float64(ct[3])/10)
    if i < len(allCities)-1 {
    fmt.Print(", ")
    }
    }
    fmt.Println("}")
    }