Last active
May 28, 2023 00:18
-
-
Save Stef-Gijsberts/f1f9abe0a082b050c9fba8f9d6b6f924 to your computer and use it in GitHub Desktop.
A stub for a parser combinator in zig
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// (Use zig 0.11) | |
// | |
// Idea for further exploration: remove inferable arguments from all functions. | |
// For example the types in the 'map' function. Or the type in the 'match' | |
// function. I think this is doable, but it produces a lot of code and is a bit | |
// error-prone. So maybe I'll do it at the end, after expanding the parser. | |
const std = @import("std"); | |
const testing = std.testing; | |
fn POut(comptime T: type) type { | |
return struct { res: T, rem: []const u8 }; | |
} | |
fn P(comptime T: type) type { | |
return fn (s: []const u8) anyerror!POut(T); | |
} | |
fn one() P(u8) { | |
return struct { | |
fn parse(s: []const u8) !POut(u8) { | |
if (s.len == 0) { | |
return error.unexpectedEoi; | |
} | |
return .{ .res = s[0], .rem = s[1..] }; | |
} | |
}.parse; | |
} | |
fn map(comptime T: type, comptime U: type, comptime f: fn (T) anyerror!U, comptime p: P(T)) P(U) { | |
return struct { | |
fn parse(s: []const u8) !POut(U) { | |
const out1 = try p(s); | |
const result = try f(out1.res); | |
return .{ .res = result, .rem = out1.rem }; | |
} | |
}.parse; | |
} | |
fn match(comptime T: type, needle: T, comptime p: P(T)) P(T) { | |
const f = struct { | |
fn call(x: T) !T { | |
if (x != needle) { | |
return error.noMatch; | |
} | |
return x; | |
} | |
}.call; | |
return map(T, T, f, p); | |
} | |
fn seq(comptime T: type, comptime N: comptime_int, comptime parsers: [N]P(T)) P([N]T) { | |
return struct { | |
fn parse(s: []const u8) !POut([N]T) { | |
var results: [N]T = undefined; | |
var remainder = s; | |
inline for (parsers, 0..) |parser, i| { | |
const out = try parser(remainder); | |
results[i] = out.res; | |
remainder = out.rem; | |
} | |
return .{ .res = results, .rem = remainder }; | |
} | |
}.parse; | |
} | |
test "'one' parser" { | |
const parser = one(); | |
const out = try parser("abcd"); | |
try testing.expectEqual(@as(u8, 'a'), out.res); | |
try testing.expectEqualStrings("bcd", out.rem); | |
} | |
test "'one' parser with map" { | |
// Anonymous function 'f' that maps from u8 to i16 | |
const f = struct { | |
fn call(x: u8) !i16 { | |
return @as(i16, x); | |
} | |
}.call; | |
const parser = map(u8, i16, f, one()); | |
const out = try parser("abcd"); | |
try testing.expectEqual(@as(i16, 'a'), out.res); | |
try testing.expectEqualStrings("bcd", out.rem); | |
} | |
test "'one' parser on empty string" { | |
const parser = one(); | |
const out = parser(""); | |
try testing.expectError(error.unexpectedEoi, out); | |
} | |
test "'one' parser twice" { | |
const parser = seq(u8, 2, .{ one(), one() }); | |
const out = try parser("abcd"); | |
try testing.expectEqualSlices(u8, &[_]u8{ 'a', 'b' }, &out.res); | |
try testing.expectEqualStrings("cd", out.rem); | |
} | |
test "'one' parser three times" { | |
const parser = seq(u8, 3, .{ one(), one(), one() }); | |
const out = try parser("abcd"); | |
try testing.expectEqualSlices(u8, &[_]u8{ 'a', 'b', 'c' }, &out.res); | |
try testing.expectEqualStrings("d", out.rem); | |
} | |
test "'one' parser three times on string with length 2" { | |
const parser = seq(u8, 3, .{ one(), one(), one() }); | |
const out = parser("ab"); | |
try testing.expectError(error.unexpectedEoi, out); | |
} | |
test "matching single char" { | |
const parser = match(u8, 'a', one()); | |
const out = try parser("abcd"); | |
try testing.expectEqual(@as(u8, 'a'), out.res); | |
try testing.expectEqualStrings("bcd", out.rem); | |
} | |
test "not matching single char" { | |
const parser = match(u8, 'z', one()); | |
const out = parser("abcd"); | |
try testing.expectError(error.noMatch, out); | |
} | |
/// Some extra function (fun to look at this in compiler explorer!) | |
export fn b_in_the_middle(cs: [*c] u8) isize { | |
const parser = seq(u8, 3, .{ one(), match(u8, 'b', one()), one() }); | |
const s = std.mem.span(cs); | |
const out = parser(s) catch return -1; | |
return out.res.len; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment