Created
June 2, 2025 15:47
-
-
Save skeeto/41a6cc059cc590a691fa87577eca8632 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://old.reddit.com/r/C_Programming/comments/1l1hxeu | |
#include <assert.h> | |
#include <stddef.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#define S(s) (Str){s, sizeof(s)-1} | |
typedef struct { | |
char *data; | |
ptrdiff_t len; | |
} Str; | |
static _Bool equals(Str a, Str b) | |
{ | |
if (a.len != b.len) { | |
return 0; | |
} | |
return !a.len || !memcmp(a.data, b.data, a.len); | |
} | |
static Str slice(Str s, ptrdiff_t beg, ptrdiff_t end) | |
{ | |
assert(beg>=0 && beg<=end); | |
return (Str){s.data+beg, end-beg}; | |
} | |
static _Bool whitespace(char c) | |
{ | |
return c==' ' || c=='\t' || c=='\n' || c=='\r'; | |
} | |
static Str lstrip(Str s) | |
{ | |
for (; s.len && whitespace(*s.data); s = slice(s, 1, s.len)) {} | |
return s; | |
} | |
static Str readfile(FILE *f) | |
{ | |
Str r = {0}; | |
for (;;) { | |
int grow = 1<<14; // amount to read at a time | |
void *new = realloc(r.data, r.len+grow); | |
if (!new) { | |
return r; // TODO: handle allocation failure | |
} | |
r.data = new; | |
ptrdiff_t n = fread(r.data+r.len, 1, grow, f); | |
r.len += n; | |
if (n < grow) { | |
return r; // TODO: ferror() to check read error | |
} | |
} | |
} | |
typedef struct { | |
Str token; | |
Str tail; | |
} Result; | |
static _Bool special(char c) | |
{ | |
return whitespace(c) || c=='"' || c=='<' || c=='>' || c=='"' || c=='='; | |
} | |
static Result tokenize(Str s) | |
{ | |
Result r = {0}; | |
s = lstrip(s); | |
if (!s.len) { | |
return r; // done | |
} | |
ptrdiff_t toklen = 1; | |
switch (*s.data) { | |
case '<': | |
if (s.len > 1) { | |
switch (s.data[1]) { | |
case '!': | |
case '/': toklen = 2; | |
break; | |
} | |
} | |
break; | |
case '>': | |
case '"': | |
break; | |
default: | |
for (; toklen<s.len && !special(s.data[toklen]); toklen++) {} | |
} | |
r.token = slice(s, 0, toklen); | |
r.tail = slice(s, toklen, s.len); | |
return r; | |
} | |
int main(void) | |
{ | |
Result r = {0}; | |
r.tail = readfile(stdin); | |
for (;;) { | |
r = tokenize(r.tail); | |
if (!r.token.len) break; | |
if (!equals(r.token, S("class"))) { | |
continue; | |
} | |
r = tokenize(r.tail); | |
if (!r.token.len) break; | |
if (!equals(r.token, S("="))) { | |
continue; | |
} | |
r = tokenize(r.tail); | |
if (!r.token.len) break; | |
if (!equals(r.token, S("\""))) { | |
continue; | |
} | |
for (;;) { | |
r = tokenize(r.tail); | |
if (!r.token.len || equals(r.token, S("\""))) break; | |
fwrite(r.token.data, 1, r.token.len, stdout); | |
putchar('\n'); | |
} | |
} | |
fflush(stdout); | |
return ferror(stdout); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment