Last active
January 12, 2024 15:22
-
-
Save lelanthran/896a2d1e228d345ecea66a5b279aea24 to your computer and use it in GitHub Desktop.
A simplistic tool to search HTML files.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* ******************************************************** | |
* Copyright ©2024 Rundata Systems. All rights reserved. | |
* This project is licensed under the GPLv3 License. You | |
* can find a copy of this license at: | |
* https://www.gnu.org/licenses/gpl-3.0.en.html | |
*/ | |
#warning TODO: Split this into multiple files | |
#warning INCOMPLETE: Implement searching using compiled query | |
/* ******************************************************** | |
* I call this program from shell scripts that scrape web-pages. | |
* | |
* It pairs quite nicely with curl: fetch a URL with curl and | |
* search it with htmlq. | |
*/ | |
/* ******************************************************** | |
* Compiled with: | |
* gcc -W -Wall -Wextra -g htmlq.c -o htmlq | |
* | |
* The easiest way to execute the compile command above is by | |
* copying and pasting it into the command-line. | |
*/ | |
// Standard headers | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdbool.h> | |
#include <stdint.h> | |
#include <inttypes.h> | |
#include <string.h> | |
#include <ctype.h> | |
#include <stdarg.h> | |
#define FPRINTF(f,...) do {\ | |
fprintf (f, "%s:%i in %s(): ", __FILE__, __LINE__, __func__);\ | |
fprintf (f, __VA_ARGS__);\ | |
} while (0); | |
#define VERSION "0.0.1" | |
/* ******************************************************** | |
* util.c module | |
*/ | |
static bool tprintf (char **dst, const char *fmts, ...) | |
{ | |
va_list ap, ap_copy; | |
va_start (ap, fmts); | |
va_copy (ap_copy, ap); | |
size_t curlen = *dst ? strlen (*dst) : 0; | |
size_t newlen = vsnprintf (NULL, 0, fmts, ap_copy); | |
char *tmp = realloc (*dst, curlen + newlen + 10); | |
if (!tmp) { | |
FPRINTF (stderr, "OOM error reallocating formatted string\n"); | |
return false; | |
} | |
*dst = tmp; | |
vsprintf (&tmp[curlen], fmts, ap); | |
va_end (ap); | |
return true; | |
} | |
static char *sstrdup (const char *src) | |
{ | |
if (!src) | |
return NULL; | |
char *ret = NULL; | |
size_t nbytes = strlen (src) + 1; | |
if (!(ret = malloc (nbytes))) { | |
FPRINTF (stderr, "OOM error allocating new string from [%s]\n", src); | |
return NULL; | |
} | |
return strcpy (ret, src); | |
} | |
size_t strdiff (const char **lhs, const char **rhs) | |
{ | |
size_t count = 0; | |
while (lhs && rhs && *lhs && *rhs && **rhs && **lhs && **lhs == **rhs) { | |
(*lhs)++; | |
(*rhs)++; | |
count++; | |
} | |
if (**lhs == **rhs) { | |
return 0; | |
} | |
return count; | |
} | |
static bool sstricmp (const char *lhs, const char *rhs) | |
{ | |
if (!lhs || !rhs) | |
return false; | |
size_t len1 = strlen (lhs), | |
len2 = strlen (rhs); | |
if (len1 != len2) | |
return false; | |
for (size_t i=0; i<len1; i++) { | |
if ((tolower (lhs[i]) != tolower (rhs[i]))) | |
return false; | |
} | |
return true; | |
} | |
static bool is_space (int c) | |
{ | |
return isspace (c) ? true : false; | |
} | |
static bool not_space (int c) | |
{ | |
return isspace (c) || c == 0 ? false : true; | |
} | |
static char *file_slurp (const char *fname, size_t *len) | |
{ | |
bool error = true; | |
char *ret = NULL; | |
FILE *inf = NULL; | |
if (!(inf = fopen (fname, "r"))) { | |
FPRINTF (stderr, "Failed to open [%s] for reading: %m\n", fname); | |
goto cleanup; | |
} | |
if ((fseek (inf, 0, SEEK_END)) != 0) { | |
FPRINTF (stderr, "Error setting file position: %m\n"); | |
goto cleanup; | |
} | |
long flen = ftell (inf); | |
if (!(ret = calloc (flen + 1, 1))) { | |
FPRINTF (stderr, "OOM error allocating buffer for file\n"); | |
goto cleanup; | |
} | |
if ((fseek (inf, 0, SEEK_SET)) != 0) { | |
FPRINTF (stderr, "Error setting file position: %m\n"); | |
goto cleanup; | |
} | |
size_t nbytes = fread (ret, 1, flen, inf); | |
if (nbytes != (size_t)flen) { | |
FPRINTF (stderr, "Unexpected number of bytes read in [%zu vs %li]: %m\n", | |
nbytes, flen); | |
goto cleanup; | |
} | |
if (len) { | |
*len = nbytes; | |
} | |
error = false; | |
cleanup: | |
if (inf) { | |
fclose (inf); | |
} | |
if (error) { | |
free (ret); | |
ret = NULL; | |
} | |
return ret; | |
} | |
/* ******************************************************** | |
* list.c module | |
* A datatype for arrays of *things* ... with a deallocate | |
* function compatible with `free`. | |
* | |
* The list is strictly ordered. The order stored and returned is | |
* exactly the same as the order of `append` calls. | |
*/ | |
typedef struct { | |
void **items; | |
size_t nitems; | |
void (*dealloc) (void *); | |
} list_t; | |
static void list_free (list_t **list) | |
{ | |
if (!list || !*list) | |
return; | |
for (size_t i=0; (*list)->dealloc && i < (*list)->nitems; i++) { | |
(*list)->dealloc ((void *)(*list)->items[i]); | |
} | |
free ((void *)(*list)->items); | |
free (*list); | |
*list = NULL; | |
} | |
static list_t *list_new (void (*dealloc) (void *)) | |
{ | |
list_t *ret = calloc (1, sizeof *ret); | |
if (!ret) { | |
FPRINTF (stderr, "OOM error allocating list_t\n"); | |
return NULL; | |
} | |
ret->dealloc = dealloc; | |
return ret; | |
} | |
static const void *list_append (list_t *list, const void *item) | |
{ | |
void **tmp = realloc (list->items, (list->nitems + 1) * sizeof *list->items); | |
if (!tmp) { | |
FPRINTF (stderr, "OOM error reallocating list_t array\n"); | |
return NULL; | |
} | |
list->items = tmp; | |
list->items[list->nitems++] = (void *)item; | |
return item; | |
} | |
/* ******************************************************** | |
* node.c module | |
* A datatype for trees of nodes. There is no difference | |
* between a `tree` and a `node` - they're the same thing. | |
* | |
* A node stores an item from an HTML page, and is created from | |
* parsing literal HTML passed to the `_new()` function. | |
* | |
* Children of a node have a strict ordering, and this is the | |
* order that recursion on the structure is guaranteed. | |
*/ | |
enum node_type_t { | |
node_type_NODE, | |
node_type_STRING, | |
}; | |
typedef struct node_t node_t; | |
struct node_t { | |
enum node_type_t type; | |
char *tag; | |
node_t *parent; | |
list_t *keys; // char *, keys[i] maps to values [i] | |
list_t *values; // char *, values[i] maps to keys [i] | |
list_t *children; // node_t *, Children, stored in order of appearance. | |
}; | |
char *node_ancestry (char **dst, node_t *node) | |
{ | |
char *ret = NULL; | |
node_t *tmp = node; | |
size_t nbytes = 0; | |
while (tmp) { | |
nbytes += strlen (tmp->tag) + strlen ("] --> [") + 3; | |
tmp = tmp->parent; | |
} | |
nbytes++; | |
if (!(ret = calloc (nbytes, 1))) { | |
FPRINTF (stderr, "OOM error allocating space for ancestry\n"); | |
return NULL; | |
} | |
tmp = node; | |
size_t index = 0; | |
const char *delim = ""; | |
while (tmp) { | |
sprintf (&ret[index], "%s[%s]", delim, tmp->tag); | |
delim = " --> "; | |
index += strlen (&ret[index]); | |
tmp = tmp->parent; | |
} | |
free (*dst); | |
*dst = ret; | |
return ret; | |
} | |
static void print_node (node_t *node, size_t depth, void *outf) | |
{ | |
FILE *f = outf; | |
if (!f) | |
f = stdout; | |
if (node->type == node_type_STRING) { | |
fprintf (f, "%s ", node->tag); | |
return; | |
} | |
#define INDENT for (size_t i=0; i<depth; i++) { fprintf (f, " "); } | |
INDENT; | |
fprintf (f, "<%s ", node->tag); | |
for (size_t i=0; i<node->keys->nitems; i++) { | |
fprintf (f, "%s='%s' ", | |
(char *)node->keys->items[i], (char *)node->values->items[i]); | |
} | |
fprintf (f, ">\n"); | |
} | |
static void node_del (node_t *node) | |
{ | |
if (!node) | |
return; | |
free (node->tag); | |
list_free (&node->keys); | |
list_free (&node->values); | |
list_free (&node->children); | |
free (node); | |
} | |
static bool node_add_child (node_t *parent, node_t *child) | |
{ | |
if (!parent) | |
return true; | |
if (!child) | |
return false; | |
if (parent->type != node_type_NODE) { | |
FPRINTF (stderr, "Attempt to append child onto a data elementis invalid\n"); | |
return false; | |
} | |
child->parent = parent; | |
if (!(list_append (parent->children, child))) { | |
FPRINTF (stderr, "Failed to attach child to parent\n"); | |
return false; | |
} | |
return true; | |
} | |
static bool node_add_attr (node_t *node, char *attr) | |
{ | |
char *key = attr; | |
char *value = strchr (key, '='); | |
if (!value) { | |
value = ""; | |
} else { | |
*value++ = 0; | |
} | |
if (value[0] == '\'' || value[0] == '"') { | |
*value++ = 0; | |
value[strlen (value) - 1] = 0; | |
} | |
const char *akey, *avalue; | |
if (!(akey = list_append (node->keys, sstrdup (key))) | |
|| !(avalue = list_append (node->values, sstrdup (value)))) { | |
FPRINTF (stderr, "Failed to store [%s=%s] for node [%s]\n", | |
key, value, node->tag); | |
return false; | |
} | |
return true; | |
} | |
node_t *node_new (node_t *parent, enum node_type_t type, const char *data) | |
{ | |
bool error = true; | |
node_t *ret = calloc (1, sizeof *ret); | |
if (!ret) { | |
FPRINTF (stderr, "OOM error allocating node [%s]\n", data); | |
goto cleanup; | |
} | |
if (!(node_add_child (parent, ret))) { | |
FPRINTF (stderr, "Failed to add child to parent [%s]\n", data); | |
goto cleanup; | |
} | |
ret->type = type; | |
if (!(ret->tag = sstrdup (data))) { | |
FPRINTF (stderr, "Failed to allocate tag [%s]", data); | |
goto cleanup; | |
} | |
if (type == node_type_NODE) { | |
ret->keys = list_new (free); | |
ret->values = list_new (free); | |
ret->children = list_new ((void (*) (void *))node_del); | |
if (!ret->keys || !ret->values || !ret->children) { | |
FPRINTF (stderr, "Failed to allocate fields [keys:values:children] " | |
"[%p:%p:%p]\n", ret->keys, ret->values, ret->children); | |
goto cleanup; | |
} | |
} | |
error = false; | |
cleanup: | |
if (error) { | |
node_del (ret); | |
ret = NULL; | |
} | |
return ret; | |
} | |
static void node_visit (struct node_t *node, size_t depth, void *parg, | |
void (*fptr) (struct node_t *, size_t, void *)) | |
{ | |
if (!node) | |
return; | |
fptr (node, depth, parg); | |
if (node->type == node_type_STRING) | |
return; | |
for (size_t i=0; i < node->children->nitems; i++) { | |
node_visit (node->children->items[i], depth + 1, parg, fptr); | |
} | |
} | |
/* ************************************************************************* | |
* token.c module | |
* The token datatype, and some operations for it. | |
*/ | |
enum token_type_t { | |
token_END, | |
token_TAGOPEN, | |
token_TAGCLOSE, | |
token_IGNOPEN, | |
token_IGNCLOSE, | |
token_TEXT, | |
token_KP, | |
token_GT, | |
token_SELFCLOSING, | |
}; | |
static const char *token_type_name (enum token_type_t type) | |
{ | |
static const struct { | |
enum token_type_t t; | |
const char *s; | |
} types[] = { | |
#define TOKEN_NAME(x) { x, #x } | |
TOKEN_NAME(token_END), | |
TOKEN_NAME(token_TAGOPEN), | |
TOKEN_NAME(token_TAGCLOSE), | |
TOKEN_NAME(token_IGNOPEN), | |
TOKEN_NAME(token_IGNCLOSE), | |
TOKEN_NAME(token_TEXT), | |
TOKEN_NAME(token_KP), | |
TOKEN_NAME(token_GT), | |
TOKEN_NAME(token_SELFCLOSING), | |
}; | |
#undef TOKEN_NAME | |
static size_t ntypes = sizeof types/sizeof types[0]; | |
for (size_t i=0; i<ntypes; i++) { | |
if (types[i].t == type) | |
return types[i].s; | |
} | |
static char unknown[55]; | |
snprintf (unknown, sizeof unknown, "Unknown token type %i\n", type); | |
return unknown; | |
} | |
struct token_t { | |
enum token_type_t type; | |
char *text; | |
}; | |
static struct token_t *token_new (enum token_type_t type, char *start, char *end) | |
{ | |
struct token_t *ret = calloc (1, sizeof *ret); | |
if (!ret) { | |
FPRINTF (stderr, "OOM error allocating token_t\n"); | |
return NULL; | |
} | |
if (!start || !end) { | |
ret->text = sstrdup (""); | |
ret->type = type; | |
return ret; | |
} | |
size_t slen = end - start; | |
if (!(ret->text = calloc (slen + 1, 1))) { | |
FPRINTF (stderr, "OOM error allocating token->text\n"); | |
free (ret); | |
return NULL; | |
} | |
memcpy (ret->text, start, slen); | |
ret->type = type; | |
return ret; | |
} | |
static void token_del (struct token_t *token) | |
{ | |
if (!token) | |
return; | |
free (token->text); | |
free (token); | |
} | |
/* ************************************************************************* | |
* The actual tokeniser. | |
*/ | |
static void token_set_ignoretag (struct token_t *token) | |
{ | |
static const char *items[] = { | |
"SCRIPT", "STYLE", | |
}; | |
static const size_t nitems = sizeof items/sizeof items[0]; | |
if (!token) | |
return; | |
for (size_t i=0; i < nitems; i++) { | |
if ((sstricmp (items[i], token->text))) { | |
if (token->type == token_TAGOPEN) | |
token->type = token_IGNOPEN; | |
if (token->type == token_TAGCLOSE) | |
token->type = token_IGNCLOSE; | |
break; | |
} | |
} | |
} | |
static struct token_t *read_token_tag (char *in, size_t *idx) | |
{ | |
(*idx)++; | |
char *start = &in[*idx];; | |
while ((not_space(in[*idx])) && in[*idx] != '>') { | |
(*idx)++; | |
} | |
if ((&in[*idx] - start) < 1) { | |
FPRINTF (stderr, "Empty tag found, aborting\n"); | |
return NULL; | |
} | |
if (in[(*idx) - 1] == '/') { | |
(*idx)--; | |
} | |
enum token_type_t tagtype = token_TAGOPEN; | |
if (start[0] == '/') { | |
tagtype = token_TAGCLOSE; | |
start++; | |
while ((is_space (in[*idx]))) { | |
in[*idx] = 0; | |
(*idx)++; | |
} | |
in[*idx] = 0; | |
(*idx)++; | |
} | |
struct token_t *ret = token_new (tagtype, start, &in[*idx]); | |
token_set_ignoretag (ret); | |
return ret; | |
} | |
static bool read_ffwd_quote (char *in, size_t *idx) | |
{ | |
int quotec = in[*idx]; | |
while (1) { | |
(*idx)++; | |
int c = in[*idx]; | |
if (c == '\\') { | |
(*idx)++; | |
if (in[*idx] == 0) | |
return false; | |
(*idx)++; | |
continue; | |
} | |
if (c == 0) | |
return false; | |
if (c == quotec) { | |
(*idx)++; | |
return true; | |
} | |
} | |
return false; | |
} | |
static struct token_t *read_token_char (enum token_type_t type, | |
char *in, size_t *idx) | |
{ | |
(*idx)++; | |
return token_new (type, &in[(*idx) - 1], &in[*idx]); | |
} | |
static struct token_t *read_token_text (char *in, size_t *idx) | |
{ | |
static const char *delims = "<=>"; | |
char *start = &in[*idx]; | |
while ((not_space (in[*idx])) && !(strchr (delims, in[*idx]))) { | |
(*idx)++; | |
} | |
if ((is_space (in[*idx]))) { | |
return token_new (token_TEXT, start, &in[*idx]); | |
} | |
if (in[*idx] == '=') { | |
(*idx)++; | |
if (in[*idx] == '"' || in[*idx] == '\'') { | |
if (!(read_ffwd_quote (in, idx))) { | |
return NULL; | |
} | |
return token_new (token_KP, start, &in[*idx]); | |
} | |
while ((not_space (in[*idx])) | |
&& in[*idx] != '>' | |
&& in[*idx] != '/') { | |
(*idx)++; | |
} | |
return token_new (token_TEXT, start, &in[*idx]); | |
} | |
return token_new (token_TEXT, start, &in[*idx]); | |
} | |
static struct token_t *read_token_selfclose (char *in, size_t *idx) | |
{ | |
char *start = &in[*idx]; | |
char *end = &in[(*idx) + 1]; | |
while ((is_space (*end))) { | |
end++; | |
} | |
if (*end != '>') { | |
return read_token_text (in, idx); | |
} | |
(*idx) += (end - start) + 1; | |
return token_new (token_SELFCLOSING, start, &in[*idx]); | |
} | |
static char *parse_string (char *in, size_t *idx) | |
{ | |
(*idx)++; | |
char *start = &in[*idx]; | |
char qc = *(start - 1); | |
while ((*idx)++) { | |
int c = in[*idx]; | |
if (c == 0) { | |
FPRINTF (stderr, "Unterminated string near [%s]\n", start); | |
return NULL; | |
} | |
if (c == '\\') { | |
(*idx)++; | |
continue; | |
} | |
if (c == qc) { | |
break; | |
} | |
} | |
in[*idx] = 0; | |
(*idx)++; | |
return start; | |
} | |
static struct token_t *read_token_string (char *in, size_t *idx) | |
{ | |
char *s = parse_string (in, idx); | |
char *e = &s[strlen(s)]; | |
return token_new (token_TEXT, s, e); | |
} | |
static struct token_t *next_token (char *in, size_t *idx) | |
{ | |
// Swallow whitespace | |
while (is_space (in[*idx])) | |
(*idx)++; | |
// The first character indicates what we are dealing with. | |
switch (in[*idx]) { | |
case 0: | |
return token_new (token_END, NULL, NULL); | |
case '<': return read_token_tag (in, idx); | |
case '>': return read_token_char (token_GT, in, idx); | |
case '/': return read_token_selfclose (in, idx); | |
case '`': // Fallthrough | |
case '\'': // Fallthrough | |
case '"': return read_token_string (in, idx); | |
default: return read_token_text (in, idx); | |
} | |
return NULL; | |
} | |
// This is a very quick-n-dirty tokeniser/parser combination. Designing | |
// a nice one is involved, as each character has to be examined in order | |
// to keep track of character positions within a line and line positions | |
// within the input. | |
// | |
// While a "nice" tokeniser/parser is easy, it's also tedious. The trade- | |
// off is, unfortunately, poor error reporting when parsing (because | |
// no character position or line number information is available). | |
// | |
// In the absence of line number and character positions, we can still | |
// report "error near 'foo bar blaz ...'" and let the user perform a | |
// search for that string. | |
// | |
enum recurse_action_t { | |
recurse_action_SUCCESS = 1, | |
recurse_action_ERROR, | |
recurse_action_SKIP, | |
}; | |
static enum recurse_action_t node_read_recurse (const char *rootname, bool preamble, | |
node_t *parent, char *input, | |
size_t *index, int *nerrors) | |
{ | |
int nerrs = 0; | |
struct token_t *token = NULL; | |
enum recurse_action_t action; | |
char *ancestors = NULL; | |
/* Parsing HTML into a tree is stupidly tedious. Here are every | |
* type of token we will ever see: | |
* END: Dummy token returned on end-of-input | |
* TAGOPEN: Token of a tag. Strips out '<' and '>' | |
* TAGCLOSE: Token of a /tag. Strips out "</" and '>' | |
* IGNOPEN: Token of a tag we will ignore. Strips out '<' and '>' | |
* IGNCLOSE: Token of a /tag we will ignore. Strips out "</" and '>' | |
* TEXT: Normal content encountered | |
* KP: keypair value (either k=v, k='v' or k="v") | |
* GT: The '>' character | |
* SELFCLOSING: The "/>" two-character string. | |
* | |
* Here's the actions we will take for each token encountered: | |
* | |
* END: | |
* We end processing immediately, signalling error if parent is not | |
* named "<root>" | |
* TAGOPEN: | |
* If preamble, signal error and return | |
* If !preamble, create a newnode, call recurse(newnode) | |
* TAGCLOSE: | |
* if preamble, signal error and return | |
* If !preamble | |
* If token->text != parent->tag | |
* warn, then return success | |
* else | |
* return success | |
* IGNOPEN: | |
* If preamble, signal error and return | |
* If !preamble, read and discard tokens until IGNCLOSE | |
* IGNCLOSE: | |
* signal error and return | |
* TEXT and KP | |
* If preamble we attach token as attr to parent | |
* If !preamble we attach token as text/content to parent | |
* GT: | |
* If preamble | |
* If token->text does not start with with a '!' | |
* we set preamble to false | |
* else | |
* we inform caller to SKIP | |
* If !preamble we signal error and return | |
* SELFCLOSING: | |
* If preamble we return success | |
* if !preamble we signal error and return | |
*/ | |
while ((token = next_token (input, index)) | |
&& token->type != token_END) { | |
node_t *newnode = NULL; | |
FPRINTF (stdout, "<%s> (%i) %s[%s]\n", | |
parent->tag, preamble, token_type_name (token->type), token->text); | |
if (preamble) { | |
switch (token->type) { | |
case token_END: | |
FPRINTF (stderr, "%s: Encountered EOF within attrs of [%s]\n", | |
node_ancestry(&ancestors, parent), parent->tag); | |
nerrs++; | |
goto cleanup; | |
case token_TAGOPEN: // Fallthrough | |
case token_IGNOPEN: | |
FPRINTF (stderr, "%s: Encountered tag [%s] within attrs of [%s]\n", | |
node_ancestry (&ancestors, parent), | |
token->text, parent->tag); | |
nerrs++; | |
goto cleanup; | |
case token_TAGCLOSE: // Fallthrough | |
case token_IGNCLOSE: | |
FPRINTF (stderr, "%s: Encountered '</%s>' within attrs of [%s]\n", | |
node_ancestry (&ancestors, parent), | |
token->text, parent->tag); | |
nerrs++; | |
goto cleanup; | |
case token_TEXT: // Fallthrough | |
case token_KP: | |
if (!(node_add_attr (parent, token->text))) { | |
FPRINTF (stderr, "%s: Failed to add attr [%s] to [%s]\n", | |
node_ancestry (&ancestors, parent), | |
token->text, parent->tag); | |
nerrs++; | |
goto cleanup; | |
} | |
break; | |
case token_GT: | |
if (parent->tag[0] == '!') { | |
nerrs = -1; | |
goto cleanup; | |
} else { | |
preamble = false; | |
} | |
break; | |
case token_SELFCLOSING: | |
goto cleanup; | |
} | |
} else { | |
switch (token->type) { | |
case token_END: | |
if ((strcmp (parent->tag, "root")) != 0) { | |
FPRINTF (stderr, "%s: Encountered EOF while processing [%s]\n", | |
node_ancestry (&ancestors, parent), | |
parent->tag); | |
nerrs++; | |
} | |
goto cleanup; | |
case token_TAGOPEN: | |
if (!(newnode = node_new (parent, node_type_NODE, token->text))) { | |
FPRINTF (stderr, "%s: Failed to create new node [%s]\n", | |
node_ancestry (&ancestors, parent), | |
token->text); | |
nerrs++; | |
goto cleanup; | |
} | |
action = node_read_recurse (rootname, true, | |
newnode, input, index, nerrors); | |
if (action == recurse_action_ERROR) { | |
FPRINTF (stderr, "%s: Failed to read node [%s]\n", | |
node_ancestry (&ancestors, parent), | |
newnode->tag); | |
nerrs++; | |
goto cleanup; | |
} | |
if (action == recurse_action_SKIP) { | |
} | |
break; | |
case token_TAGCLOSE: | |
if ((strcmp (token->text, parent->tag)) != 0) { | |
FPRINTF (stderr, "WARNING: %s: Expected </%s>, got </%s>\n", | |
node_ancestry (&ancestors, parent), | |
token->text, parent->tag); | |
} | |
goto cleanup; | |
case token_IGNOPEN: | |
token_del (token); | |
while ((token = (next_token (input, index)))) { | |
if (token->type == token_END) { | |
FPRINTF (stderr, "%s: Unexpected end of input\n", | |
node_ancestry (&ancestors, parent)); | |
nerrs++; | |
goto cleanup; | |
} | |
FPRINTF (stdout, "TAG_IGNOPEN: <%s> (%i) %s[%s]\n", | |
parent->tag, | |
preamble, | |
token_type_name (token->type), token->text); | |
if (token->type == token_IGNCLOSE) { | |
break; | |
} | |
token_del (token); | |
token = NULL; | |
} | |
if (!token) { | |
FPRINTF (stderr, "%s: Unexpected end of input looking for " | |
"ignoretag", | |
node_ancestry (&ancestors, parent)); | |
nerrs++; | |
goto cleanup; | |
} | |
break; | |
case token_IGNCLOSE: | |
FPRINTF (stderr, "%s: unexpected </%s>\n", | |
node_ancestry (&ancestors, parent), token->text); | |
nerrs++; | |
goto cleanup; | |
case token_TEXT: // Fallthrough | |
case token_KP: | |
if (!(newnode = node_new (parent, node_type_STRING, token->text))) { | |
FPRINTF (stderr, "%s: Failed to create new text [%s]\n", | |
node_ancestry (&ancestors, parent), | |
token->text); | |
nerrs++; | |
goto cleanup; | |
} | |
break; | |
case token_GT: // Fallthrough | |
case token_SELFCLOSING: | |
FPRINTF (stderr, "%s: Unexpected [%s]\n", | |
node_ancestry (&ancestors, parent), | |
token->text); | |
nerrs++; | |
goto cleanup; | |
} | |
} | |
token_del (token); | |
} | |
if (nerrors) { | |
*nerrors = *nerrors + nerrs; | |
} | |
cleanup: | |
free (ancestors); | |
token_del (token); | |
if (nerrs > 0) { | |
return recurse_action_ERROR; | |
} | |
if (nerrs < 0) { | |
return recurse_action_SKIP; | |
} | |
return recurse_action_SUCCESS; | |
} | |
static node_t *node_read (const char *fname, int *nerrors) | |
{ | |
int nerrs = 0; | |
node_t *root = NULL; | |
size_t input_len = 0; | |
char *input = file_slurp (fname, &input_len); | |
if (!input) { | |
FPRINTF (stderr, "Failed to read file into memory, aborting file read\n"); | |
nerrs++; | |
goto cleanup; | |
} | |
size_t index = 0; | |
if (!(root = node_new (NULL, node_type_NODE, fname))) { | |
FPRINTF (stderr, "Failed to create a root node for [%s]\n", fname); | |
goto cleanup; | |
} | |
enum recurse_action_t action = recurse_action_SKIP; | |
while ((action = node_read_recurse (fname, false, root, input, &index, &nerrs)) | |
== recurse_action_SKIP) { | |
; | |
} | |
if (action == recurse_action_ERROR) { | |
FPRINTF (stderr, "Failure parsing file [%s]\n", fname); | |
nerrs++; | |
goto cleanup; | |
} | |
node_visit (root, 0, stdout, print_node); | |
cleanup: | |
if (nerrs) { | |
node_del (root); | |
root = NULL; | |
} | |
if (nerrors) { | |
*nerrors = nerrs; | |
} | |
free (input); | |
return root; | |
} | |
/* ******************************************************** | |
* selector.c module | |
* Selector functions. | |
* | |
* The goal is to locate, in the input HTML, specific data, and extract | |
* that data in plain text form. | |
* | |
* While it would be nice to eventually support any expression supported | |
* by `querySelectorAll()` due to existing webdev familiarity, that's a | |
* large (and probably painful) undertaking. | |
* | |
* Selectors should look like this: | |
* "div > id^='-test' ~ .myClass" | |
* | |
*/ | |
enum selector_type_t { | |
selector_END = -2, | |
selector_ERROR = -1, | |
selector_OPERAND = 1, | |
selector_OPERATOR, | |
selector_STRING, | |
}; | |
const char *selector_type_name (enum selector_type_t type) | |
{ | |
static const struct { | |
enum selector_type_t type; | |
const char *name; | |
} names[] = { | |
#define SEL(x) { x, #x } | |
SEL (selector_END), | |
SEL (selector_ERROR), | |
SEL (selector_OPERAND), | |
SEL (selector_OPERATOR), | |
SEL (selector_STRING), | |
#undef SEL | |
}; | |
static const size_t nnames = sizeof names/sizeof names[0]; | |
for (size_t i=0; i<nnames; i++) { | |
if (type == names[i].type) { | |
return names[i].name; | |
} | |
} | |
static char unknown[55]; | |
snprintf (unknown, sizeof unknown, "Unknown selector type: %i", type); | |
return unknown; | |
} | |
struct selector_t { | |
/* ********************************************** | |
* A selector_t element is one of _END, _OPERAND | |
* or _OPERATOR | |
*/ | |
enum selector_type_t type; | |
union { | |
/* ********************************************** | |
* Attribute we want to match. There are some | |
* that are reserved for internal use and cannot | |
* be used by the HTML we are processing. | |
* 1. \x02tagname | |
* 2. \x02content | |
* In general any string starting with \x02 is | |
* reserved for internal use. HTML pages using | |
* tags, classnames or attribute names that | |
* start with \x02 will have incorrect search | |
* results. | |
*/ | |
char *_operand; | |
/* ********************************************** | |
* Operators: | |
* , Logical OR | |
* ~ Any sibling | |
* + Adjacent sibling | |
* > Direct child | |
* $ Wildcard for end of string | |
* ^ Wildcard for beginning of string | |
* * Wildcard for 'anywhere in string' | |
* = Match exact | |
* \x03 Descendent | |
*/ | |
int _operator; | |
/* ********************************************** | |
* Store strings that would get matched against | |
*/ | |
char *_string; | |
} u; | |
}; | |
static void selector_dump (struct selector_t *s, char **dst) | |
{ | |
if (!s) { | |
FPRINTF (stderr, "Got NULL selector_t object\n"); | |
return; | |
} | |
tprintf (dst, "[%s:", selector_type_name (s->type)); | |
switch (s->type) { | |
case selector_END: | |
case selector_ERROR: tprintf (dst, "..."); break; | |
case selector_OPERAND: tprintf (dst, "%s", s->u._operand); break; | |
case selector_STRING: tprintf (dst, "%s", s->u._string); break; | |
case selector_OPERATOR: tprintf (dst, "%c", s->u._operator); break; | |
} | |
tprintf (dst, "]\n"); | |
} | |
static void selector_del (struct selector_t *sel) | |
{ | |
if (!sel) | |
return; | |
if (sel->type == selector_OPERAND) { | |
free (sel->u._operand); | |
} | |
if (sel->type == selector_STRING) { | |
free (sel->u._string); | |
} | |
free (sel); | |
} | |
static struct selector_t *selector_new (enum selector_type_t type, | |
int operator, | |
const char *operand) | |
{ | |
struct selector_t *ret = calloc (1, sizeof *ret); | |
if (!ret) { | |
FPRINTF (stderr, "OOM error allocating selector_t object [%s]\n", operand); | |
return NULL; | |
} | |
ret->type = type; | |
switch (ret->type) { | |
case selector_OPERATOR: | |
ret->u._operator = operator; | |
break; | |
case selector_STRING: | |
case selector_OPERAND: | |
if (!(ret->u._string = sstrdup (operand))) { | |
FPRINTF (stderr, "OOM error allocating operand field [%s]\n", operand); | |
free (ret); | |
return NULL; | |
} | |
break; | |
case selector_ERROR: | |
case selector_END: | |
break; | |
} | |
return ret; | |
} | |
static char *delimited_string (char *src, size_t *index, const char *delims) | |
{ | |
char *start = &src[*index]; | |
char *end = start; | |
while (*end && (!(strchr (delims, *end)))) { | |
end++; | |
} | |
*end = 0; | |
size_t slen = end - start; | |
(*index) += slen; | |
return start; | |
} | |
#define SOPERATORS ".#[,~>+$*^=" | |
#define SDELIMS "'\" \t\n\r" SOPERATORS | |
static enum selector_type_t _selector_read_attr (list_t *dst, | |
const char *attr, | |
char *s, size_t *index) | |
{ | |
if (!(list_append (dst, selector_new (selector_OPERAND, 0, attr))) | |
|| !(list_append (dst, selector_new (selector_OPERATOR, '=', NULL))) | |
|| !(list_append (dst, | |
selector_new (selector_STRING, 0, | |
delimited_string (s, index, SDELIMS))))) { | |
FPRINTF (stderr, "Failed to read direct attr ref [%s]\n", attr); | |
return false; | |
} | |
return true; | |
} | |
static enum selector_type_t selector_read_class (list_t *dst, | |
char *s, size_t *index) | |
{ | |
// e.g. ".myClassName" | |
(*index)++; | |
return _selector_read_attr (dst, "class", s, index); | |
} | |
static enum selector_type_t selector_read_id (list_t *dst, | |
char *s, size_t *index) | |
{ | |
// e.g. "#myId" | |
(*index)++; | |
return _selector_read_attr(dst, "id", s, index); | |
} | |
static enum selector_type_t selector_read_attrname (list_t *dst, | |
char *s, size_t *index) | |
{ | |
// e.g. "[someAttr=somevalue]" | |
// ^^^^^^^^ <------ consume and returns only that | |
(*index)++; | |
return | |
_selector_read_attr (dst, delimited_string (s, index, SDELIMS), s, index); | |
} | |
static enum selector_type_t selector_read_op (list_t *dst, | |
char *s, size_t *index) | |
{ | |
// e.g. "s1 > s2" | |
// ^ <---- consume and returns only that | |
// e.g. "[someAttr ~ someValue]" | |
// ^ <-------- consume and returns only that | |
if (!(list_append(dst, selector_new (selector_OPERATOR, s[*index], NULL)))) { | |
FPRINTF (stderr, "Failed to create operator %c\n", s[*index]); | |
return false; | |
} | |
(*index)++; | |
return true; | |
} | |
static enum selector_type_t selector_read_match (list_t *dst, | |
char *s, size_t *index) | |
{ | |
// e.g. [someAttr^=someValue] | |
// ^ <-------------- returns only that | |
// Consumes ----> ^^ | |
if (!(list_append(dst, selector_new (selector_OPERATOR, s[*index], NULL)))) { | |
FPRINTF (stderr, "Failed to create operator %c\n", s[*index]); | |
return false; | |
} | |
(*index)++; | |
return true; | |
} | |
static enum selector_type_t selector_read_string (list_t *dst, | |
char *s, size_t *index) | |
{ | |
char *string = parse_string (s, index); | |
if (!(list_append (dst, selector_new (selector_STRING, 0, string)))) { | |
FPRINTF (stderr, "Failed to read string into selector_t list\n"); | |
return false; | |
} | |
return true; | |
} | |
static enum selector_type_t selector_read_tag (list_t *dst, | |
char *s, size_t *index) | |
{ | |
return _selector_read_attr (dst, "\x02tagname", s, index); | |
} | |
/* ************************************************************** | |
* Reads exactly one selector_t object, and inserts it into the | |
* provided list. | |
*/ | |
static enum selector_type_t _selector_read_next (list_t *dst, | |
char *sq, size_t *index) | |
{ | |
// Swallow whitespace | |
while ((is_space (sq[*index]))) { | |
(*index)++; | |
} | |
// Reached end of input? | |
if (!(sq[*index])) { | |
return list_append (dst, selector_new (selector_END, 0, NULL)) | |
? selector_END | |
: selector_ERROR; | |
} | |
enum selector_type_t type = selector_ERROR;; | |
// Determine what we are dealing with | |
switch (sq[*index]) { | |
case '.': type = selector_read_class (dst, sq, index); break; | |
case '#': type = selector_read_id (dst, sq, index); break; | |
case '[': type = selector_read_attrname (dst, sq, index); break; | |
case ',': // Fallthrough | |
case '~': // Fallthrough | |
case '>': // Fallthrough | |
case '=': // Fallthrough | |
case '+': type = selector_read_op (dst, sq, index); break; | |
case '$': // Fallthrough | |
case '*': // Fallthrough | |
case '^': type = selector_read_match (dst, sq, index); break; | |
case '"': // Fallthrough | |
case '\'': type = selector_read_string (dst, sq, index); break; | |
default: type = selector_read_tag (dst, sq, index); break; | |
} | |
return type; | |
} | |
static list_t *selector_parse (const char *sq) | |
{ | |
bool error = true; | |
size_t index = 0; | |
char *copy = sstrdup (sq); | |
if (!copy) { | |
FPRINTF (stderr, "Failed to create a copy of the input\n"); | |
return NULL; | |
} | |
list_t *ret = list_new ((void (*) (void *))selector_del); | |
if (!ret) { | |
FPRINTF (stderr, "Failed to create list of selector operations for [%s]\n", | |
copy); | |
goto cleanup; | |
} | |
enum selector_type_t type; | |
while ((type = _selector_read_next (ret, copy, &index)) > 0) { | |
; | |
} | |
if (type != selector_END) { | |
FPRINTF (stderr, "Encountered errors while processing [%s]\n", copy); | |
goto cleanup; | |
} | |
error = false; | |
cleanup: | |
free (copy); | |
if (error) { | |
list_free (&ret); | |
} | |
return ret; | |
} | |
/* ************************************************************** | |
* The main program module that contains `int main()`. | |
*/ | |
static int process_query (const char *query, const node_t *tree) | |
{ | |
return 1; | |
} | |
static int process_file (const char *fname, list_t *queries) | |
{ | |
int nerrs = 0; | |
node_t *tree = NULL; | |
if (!(tree = node_read (fname, &nerrs))) { | |
FPRINTF (stderr, "Failed to parse input from [%s]\n", fname); | |
nerrs++; | |
goto cleanup; | |
} | |
for (size_t i=0; i<queries->nitems; i++) { | |
// Parallelisation also easily possible right here. This is | |
// a better spot than the loop in `main()`. | |
int rc = process_query ((const char *)queries->items[i], tree); | |
if (rc) { | |
const char *s_err = "error"; | |
if (rc > 1) { | |
s_err = "errors"; | |
} | |
FPRINTF (stderr, "%i %s found while processing [%s:%s]. Ignoring\n", | |
rc, s_err, fname, (const char *)queries->items[i]); | |
} | |
} | |
cleanup: | |
node_del (tree); | |
return nerrs; | |
} | |
static void print_helpmsg (void) | |
{ | |
static const char *helpmsg[] = { | |
"NAME", | |
" HtmlQuery: a program to perform queries and minimal transformations.", | |
" on HTML input.", | |
"", | |
"SYNPOSIS", | |
" htmlq [[-q <query-string>] ...] [-f <filename>] ...", | |
" htmlq [[-q <query-string>] ...]", | |
" htmlq -h", | |
"", | |
"DESCRIPTION", | |
" Zero or more query expressions must be specified with [-q query-string].", | |
" When no query expressions are found, nothing is returned. Zero or more", | |
" input files can be specified with [-f <filename>]. When no input files", | |
" are specified, input is read from stdin.", | |
"", | |
" HtmlQuery returns the number of errors encountered to the caller.", | |
"", | |
"OPTIONS", | |
" -f <filename> Path to the input file. This option can be repeated as", | |
" many times as necessary to process multiple files at the", | |
" same time.", | |
" -s <query-string> Specify the query-string to match. This option can be", | |
" repeated to specify multiple query-strings. A match is", | |
" made when *any* query-string matches.", | |
" -h Print this message and exit with a zero exit code.", | |
" -v Print the program version and exit with a zero exit code.", | |
"", | |
"BUGS", | |
" Very likely. Send bug reports to [email protected], with the title set", | |
" to 'bug-report: htmlq' or similar.", | |
"", | |
}; | |
for (size_t i=0; i<sizeof helpmsg/sizeof helpmsg[i]; i++) { | |
printf ("%s\n", helpmsg[i]); | |
} | |
} | |
int main (int argc, char **argv) | |
{ | |
int ret = EXIT_FAILURE; | |
list_t *files = list_new (free); | |
list_t *queries = list_new (free); | |
if (!files || !queries) { | |
FPRINTF (stderr, "Failed to allocate arrays [files:queries] [%p:%p]\n", | |
files, queries); | |
goto cleanup; | |
} | |
int tokeniser_test (void); | |
int selector_test (void); | |
// Parse all the options | |
int argv_index = 0; | |
for (argv_index=1; argv_index<argc && argv[argv_index]; argv_index++) { | |
if ((argv[argv_index][0]) != '-') { | |
break; | |
} | |
switch (argv[argv_index][1]) { | |
case '1': | |
ret = tokeniser_test (); | |
goto cleanup; | |
case '2': | |
ret = selector_test (); | |
goto cleanup; | |
case 'f': | |
if (!(list_append (files, sstrdup (argv[++argv_index])))) { | |
FPRINTF (stderr, "OOM error storing option [-f %s]\n", | |
argv[argv_index]); | |
goto cleanup; | |
} | |
break; | |
case 'q': | |
if (!(list_append (queries, sstrdup (argv[++argv_index])))) { | |
FPRINTF (stderr, "OOM error storing query-string [-s %s]\n", | |
argv[argv_index]); | |
goto cleanup; | |
} | |
break; | |
case 'h': | |
print_helpmsg (); | |
ret = EXIT_SUCCESS; | |
goto cleanup; | |
case 'v': | |
printf ("Starting htmlq %s\n", VERSION); | |
break; | |
default: | |
FPRINTF (stderr, "Unrecognised option flag '%s'\n", argv[argv_index]); | |
goto cleanup; | |
} | |
} | |
ret = 0; | |
for (size_t i=0; i<files->nitems; i++) { | |
// Can process this in parallel, but why bother? | |
int nerrs = process_file ((const char *)files->items[i], queries); | |
if (nerrs) { | |
const char *s_err = "error"; | |
if (nerrs > 1) { | |
s_err = "errors"; | |
} | |
FPRINTF (stderr, "%i %s processing file [%s]. Ignoring\n", | |
nerrs, s_err, (const char *)files->items[i]); | |
ret += nerrs; | |
} | |
} | |
cleanup: | |
list_free (&files); | |
list_free (&queries); | |
return ret; | |
} | |
int tokeniser_test (void) | |
{ | |
char *input = sstrdup ( | |
"<!DOCTYPE attr1 attr2=\"value2\">" | |
"<html >" | |
" <body attr3='value3'>" | |
" <!-- this is a comment" | |
" -->" | |
" <tag1 attr4='value4'/>" | |
" <tag2 attr5='value5' />" | |
" <tag3 attr6=value6/>" | |
" <tag4 > some text goes here <tag5> more text </tag5> </tag4>" | |
" <tag6 attr7> </tag6>" | |
" <tag7 attr8=value8> </tag7>" | |
" <script type=module src=where.js>" | |
" let tmp = \" </script> \";" | |
" let tmp = '\"</script>\"';" | |
" </script>" | |
" <tag8 attr9=value-10 > </tag8>" | |
" <tag9/>" | |
" <p>" | |
" </body>" | |
" <!--another comment-->" | |
"</html >" | |
); | |
static const char *expected = | |
"T token_TAGOPEN[!DOCTYPE]\n" | |
"T token_TEXT[attr1]\n" | |
"T token_KP[attr2=\"value2\"]\n" | |
"T token_GT[>]\n" | |
"T token_TAGOPEN[html]\n" | |
"T token_GT[>]\n" | |
"T token_TAGOPEN[body]\n" | |
"T token_KP[attr3='value3']\n" | |
"T token_GT[>]\n" | |
"T token_TAGOPEN[!--]\n" | |
"T token_TEXT[this]\n" | |
"T token_TEXT[is]\n" | |
"T token_TEXT[a]\n" | |
"T token_TEXT[comment]\n" | |
"T token_TEXT[--]\n" | |
"T token_GT[>]\n" | |
"T token_TAGOPEN[tag1]\n" | |
"T token_KP[attr4='value4']\n" | |
"T token_SELFCLOSING[/>]\n" | |
"T token_TAGOPEN[tag2]\n" | |
"T token_KP[attr5='value5']\n" | |
"T token_SELFCLOSING[/>]\n" | |
"T token_TAGOPEN[tag3]\n" | |
"T token_TEXT[attr6=value6]\n" | |
"T token_SELFCLOSING[/>]\n" | |
"T token_TAGOPEN[tag4]\n" | |
"T token_GT[>]\n" | |
"T token_TEXT[some]\n" | |
"T token_TEXT[text]\n" | |
"T token_TEXT[goes]\n" | |
"T token_TEXT[here]\n" | |
"T token_TAGOPEN[tag5]\n" | |
"T token_GT[>]\n" | |
"T token_TEXT[more]\n" | |
"T token_TEXT[text]\n" | |
"T token_TAGCLOSE[tag5]\n" | |
"T token_TAGCLOSE[tag4]\n" | |
"T token_TAGOPEN[tag6]\n" | |
"T token_TEXT[attr7]\n" | |
"T token_GT[>]\n" | |
"T token_TAGCLOSE[tag6]\n" | |
"T token_TAGOPEN[tag7]\n" | |
"T token_TEXT[attr8=value8]\n" | |
"T token_GT[>]\n" | |
"T token_TAGCLOSE[tag7]\n" | |
"T token_IGNOPEN[script]\n" | |
"T token_TEXT[type=module]\n" | |
"T token_TEXT[src=where.js]\n" | |
"T token_GT[>]\n" | |
"T token_TEXT[let]\n" | |
"T token_TEXT[tmp]\n" | |
"T token_TEXT[=]\n" | |
"T token_TEXT[ </script> ]\n" | |
"T token_TEXT[;]\n" | |
"T token_TEXT[let]\n" | |
"T token_TEXT[tmp]\n" | |
"T token_TEXT[=]\n" | |
"T token_TEXT[\"</script>\"]\n" | |
"T token_TEXT[;]\n" | |
"T token_IGNCLOSE[script]\n" | |
"T token_TAGOPEN[tag8]\n" | |
"T token_TEXT[attr9=value-10]\n" | |
"T token_GT[>]\n" | |
"T token_TAGCLOSE[tag8]\n" | |
"T token_TAGOPEN[tag9]\n" | |
"T token_SELFCLOSING[/>]\n" | |
"T token_TAGOPEN[p]\n" | |
"T token_GT[>]\n" | |
"T token_TAGCLOSE[body]\n" | |
"T token_TAGOPEN[!--another]\n" | |
"T token_TEXT[comment--]\n" | |
"T token_GT[>]\n" | |
"T token_TAGCLOSE[html]\n" | |
"Found 73 tokens\n"; | |
char *output = NULL; | |
struct token_t *token = NULL; | |
size_t index = 0; | |
size_t ntokens = 0; | |
while ((token = next_token (input, &index)) | |
&& token->type != token_END) { | |
ntokens++; | |
tprintf (&output, "T %s[%s]\n", token_type_name (token->type), token->text); | |
token_del (token); | |
} | |
tprintf (&output, "Found %zu tokens\n", ntokens); | |
int ret = EXIT_FAILURE; | |
if (token && token->type == token_END) { | |
FPRINTF (stderr, "TEST: Success\n"); | |
ret = EXIT_SUCCESS; | |
} else { | |
FPRINTF (stderr, "TEST: Failure\n"); | |
} | |
token_del (token); | |
free (input); | |
if ((strcmp (expected, output)) != 0) { | |
FPRINTF (stderr, "Unexpected output. Expected:\n%s\nGot:\n%s\n", | |
expected, output); | |
} | |
free (output); | |
return ret; | |
} | |
int selector_test (void) | |
{ | |
int ret = EXIT_FAILURE; | |
static const char *tests[] = { | |
"one", | |
".two", | |
"#three", | |
"four > five", | |
"#six ~ .seven", | |
}; | |
static const char *expected = | |
"[selector_OPERAND:\x02tagname]\n" | |
"[selector_OPERATOR:=]\n" | |
"[selector_STRING:one]\n" | |
"[selector_END:...]\n" | |
"[selector_OPERAND:class]\n" | |
"[selector_OPERATOR:=]\n" | |
"[selector_STRING:two]\n" | |
"[selector_END:...]\n" | |
"[selector_OPERAND:id]\n" | |
"[selector_OPERATOR:=]\n" | |
"[selector_STRING:three]\n" | |
"[selector_END:...]\n" | |
"[selector_OPERAND:\x02tagname]\n" | |
"[selector_OPERATOR:=]\n" | |
"[selector_STRING:four]\n" | |
"[selector_END:...]\n" | |
"[selector_OPERAND:id]\n" | |
"[selector_OPERATOR:=]\n" | |
"[selector_STRING:six]\n" | |
"[selector_END:...]\n"; | |
static const size_t ntests = sizeof tests / sizeof tests[0]; | |
char *output = NULL; | |
list_t *selectors = NULL; | |
for (size_t i=0; i<ntests; i++) { | |
list_free (&selectors); | |
selectors = selector_parse (tests[i]); | |
if (!selectors) { | |
FPRINTF (stderr, "Failed to create list for selector objects\n"); | |
goto cleanup; | |
} | |
for (size_t j=0; j<selectors->nitems; j++) { | |
selector_dump (selectors->items[j], &output); | |
} | |
} | |
const char *tmp = output; | |
size_t processed = strdiff (&expected, &tmp); | |
if (processed) { | |
FPRINTF (stderr, "processed before error: %zu characters\n", processed); | |
FPRINTF (stderr, "Unexpected output. Expected:\n%s\nGot:\n%s\n", | |
expected, tmp); | |
} | |
ret = EXIT_SUCCESS; | |
cleanup: | |
free (output); | |
list_free (&selectors); | |
return ret; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment