Last active
December 10, 2024 17:16
-
-
Save Tetralux/60a6f8079a55a5e76ba53e3122510d0d to your computer and use it in GitHub Desktop.
A way to serialise strings along with a blob
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
A short demonstration of how atomized strings (strings turned into single ints to make comparison faster and storage simpler) can be serialised into a stream (or file), | |
alongside the blobs that refer to them, without doing anything more than just memcpy'ing everything into the stream. | |
It achieves this using the following techniques: | |
1) Atoms are stored in the Person structs only. They are i32s. | |
2) Atomizer turns strings into i32s by copying their length (u32le) into a buffer, followed by the string data. The current offset into the buffer is the atom for that string. | |
3) Both the slice of Persons and the atomizer's buffer can be directly written into a stream without alteration. See the comment in main() of the output. | |
Written by Tetralux 2024-10-12. | |
*/ | |
package main | |
import "core:fmt" | |
import "core:bytes" | |
import "core:mem" | |
import "core:io" | |
import "core:encoding/endian" | |
import "core:slice" | |
Person :: struct { | |
name: Atom, | |
race: Race, | |
} | |
Race :: enum u8 { | |
UNKNOWN = 0, | |
HUMAN = 1, | |
ORC = 2, | |
DRAGON = 3, | |
CAT = 4, | |
} | |
Atom :: i32 | |
Atomizer :: struct { | |
data: [dynamic]byte, | |
names: map[string]Atom, | |
} | |
atomize_safe :: proc(x: ^Atomizer, s: string) -> (atom: Atom, err: mem.Allocator_Error) { | |
if name, found := x.names[s]; found { | |
return name, nil | |
} | |
atom = i32(len(x.data)) | |
length := u32le(len(s)) | |
length_bytes := transmute([4]byte)length | |
append(&x.data, ..length_bytes[:]) or_return | |
bytes := transmute([]byte)s | |
append(&x.data, ..bytes) or_return | |
x.names[s] = atom | |
return atom, nil | |
} | |
atomize :: proc(x: ^Atomizer, s: string) -> Atom { | |
atom, err := atomize_safe(x, s) | |
assert(err == nil) | |
return atom | |
} | |
deatomize_safe :: proc(x: ^Atomizer, atom: Atom) -> (s: string, found: bool) { | |
if atom < 0 || int(atom) >= len(x.data) { | |
return "", false | |
} | |
data := x.data[atom:] | |
length, ok := endian.get_u32(data, .Little) | |
assert(ok) | |
bytes := data[size_of(u32le):][:length] | |
return string(bytes), true | |
} | |
deatomize :: proc(x: ^Atomizer, atom: Atom) -> string { | |
s, found := deatomize_safe(x, atom) | |
assert(found) | |
return s | |
} | |
main :: proc() { | |
atomizer: Atomizer | |
persons := []Person { | |
{ name = atomize(&atomizer, "Sarah"), race = .DRAGON }, | |
{ name = atomize(&atomizer, "Stephanie"), race = .CAT }, | |
{ name = atomize(&atomizer, "Bob"), race = .HUMAN }, | |
} | |
buffer: bytes.Buffer | |
w := bytes.buffer_to_stream(&buffer) | |
write_persons_to_stream(w, persons) | |
write_name_data(w, &atomizer) | |
fmt.printfln("%v", bytes.buffer_to_bytes(&buffer)) | |
// [3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 9, 0, 0, 0, 4, 0, 0, 0, 22, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 83, 97, 114, 97, 104, 9, 0, 0, 0, 83, 116, 101, 112, 104, 97, 110, 105, 101, 3, 0, 0, 0, 66, 111, 98] | |
// ^^^^^^^^^^^ ^^^^^^^^^^ ^ ^^^^^^^^ ^^^^^^^^^^^ ^ ^^^^^^^^ ^^^^^^^^^^^^ ^ ^^^^^^^^ ^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^ ^^^^^^^^^^^ | |
// len(persons) atom race | atom race | atom race | len(Sarah) "Sarah" | "Stephanie" len(Bob) "Bob" | |
// padding padding padding len(Stephanie) | |
new_persons, _, load_persons_err := read_persons_from_stream(w) | |
assert(load_persons_err == nil) | |
new_atomizer: Atomizer | |
_, load_names_err := read_name_data(w, &new_atomizer) | |
assert(load_names_err == nil) | |
fmt.printfln("new_persons=%v atoms=%v", new_persons, new_atomizer.names) | |
// new_persons=[Person{name = 0, race = "DRAGON"}, Person{name = 9, race = "CAT"}, Person{name = 22, race = "HUMAN"}] atoms=map[Bob=22, Sarah=0, Stephanie=9] | |
} | |
write_persons_to_stream :: proc(w: io.Stream, persons: []Person) -> (written: int, err: io.Error) { | |
length := u32le(len(persons)) | |
length_bytes := transmute([4]byte)length | |
io.write_full(w, length_bytes[:]) or_return | |
bytes := slice.to_bytes(persons) | |
written = io.write_full(w, bytes) or_return | |
return | |
} | |
write_name_data :: proc(w: io.Stream, x: ^Atomizer) -> (written: int, err: io.Error) { | |
bytes := x.data[:] | |
written, err = io.write_full(w, bytes) | |
return | |
} | |
read_persons_from_stream :: proc(r: io.Stream, allocator := context.allocator) -> (persons: []Person, bytes_read: int, err: io.Error) { | |
length_bytes: [4]byte | |
length_length := io.read_full(r, length_bytes[:]) or_return | |
bytes_read += length_length | |
assert(length_length == size_of(u32le)) | |
length, _ := endian.get_u32(length_bytes[:], .Little) | |
people: [dynamic]Person | |
people.allocator = allocator | |
resize(&people, length) | |
for &person in people { | |
person_length := io.read_full(r, mem.ptr_to_bytes(&person)) or_return | |
bytes_read += person_length | |
assert(person_length == size_of(Person)) | |
} | |
persons = people[:] | |
return | |
} | |
read_name_data :: proc(r: io.Stream, x: ^Atomizer) -> (bytes_read: int, err: io.Error) { | |
clear(&x.data) | |
clear(&x.names) | |
offset := 0 | |
for { | |
length_bytes: [4]byte | |
length_length, length_length_err := io.read_full(r, length_bytes[:]) | |
if length_length_err == .EOF { | |
break | |
} else if length_length_err != nil { | |
err = length_length_err | |
return | |
} | |
assert(length_length == size_of(u32le)) | |
bytes_read += length_length | |
length, _ := endian.get_u32(length_bytes[:], .Little) | |
atom := offset | |
offset += length_length | |
offset += int(length) | |
append(&x.data, ..length_bytes[:]) | |
start := len(x.data) | |
resize(&x.data, start + int(length)) | |
dst := x.data[start:][:length] | |
string_length, string_length_err := io.read_full(r, dst) | |
if string_length_err == .EOF { | |
break | |
} else if string_length_err != nil { | |
err = string_length_err | |
return | |
} | |
assert(string_length == int(length)) | |
s := string(dst) | |
x.names[s] = Atom(atom) | |
} | |
return | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment