Skip to content

Instantly share code, notes, and snippets.

@vinicius507
Last active March 14, 2022 15:39
Show Gist options
  • Save vinicius507/85be669e13f8debbf5099836d3267476 to your computer and use it in GitHub Desktop.
Save vinicius507/85be669e13f8debbf5099836d3267476 to your computer and use it in GitHub Desktop.
Prova de Conceito AST básica
#define _GNU_SOURCE // asprintf needs it
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#define ASSERT(EXPR, MSG) do {if (!EXPR) printf("Assertion error: %s, %s\n", #EXPR, MSG), exit(1);} while (0)
#define L_PIPE "|"
const char *raw_input = "ls -1 | sort | uniq -u | wc -l";
typedef enum e_type
{
WORD,
OPERATOR,
TOKEN_COUNT
} t_type;
typedef struct s_token
{
t_type type;
const char *value;
struct s_token *prev;
struct s_token *next;
} t_token;
typedef enum e_ast_type
{
COMMAND,
PIPE,
AST_COUNT
} t_ast_type;
typedef struct s_command
{
const char *value;
} t_command;
typedef struct s_ast
{
t_ast_type type;
union u_command {
t_command *command;
struct s_child {
struct s_ast *lhs;
struct s_ast *rhs;
} child;
} data;
} t_ast;
static t_token *new_token(t_type token_type, t_token *prev)
{
t_token *token;
token = calloc(1, sizeof(t_token));
ASSERT((token != NULL), "Failed to alloc token");
token->prev = prev;
if (token->prev != NULL)
token->prev->next = token;
token->type = token_type;
return (token);
}
static char is_space(char c)
{
if (c == ' ')
return (1);
if (c == '\t')
return (1);
if (c == '\n')
return (1);
return (0);
}
static char is_metachar(char c)
{
// Se os redirects forem lidados direto no tokenizer, adicionar metachar
if (c == '|')
return (1);
if (c == '&')
return (1);
return (is_space(c));
}
static char *word(const char *input, size_t *offset)
{
size_t len;
char quote;
char *value;
const char *start;
size_t counter;
quote = '\0';
start = (input + *offset);
if (*start == '"' || *start == '\'')
{
counter = 0;
// Se não houver uma quote no terminando o LITERAL, tratar como
// caractere
while (start[++counter] != '\0')
{
if (start[counter] == *start)
{
quote = *start;
start += 1;
*offset += 1;
}
}
}
len = 0;
while (input[*offset] != '\0')
{
// Se quote não tiver setado, separar WORDs por metachars/espaços
// Se estiver, ir até o final do quote
if ((quote == '\0'
&& (is_metachar(input[*offset])
|| is_space(input[*offset])))
|| (input[*offset] == quote))
break ;
len++;
*offset += 1;
}
value = calloc(1, len + 1);
ASSERT((value != NULL), "Failed to alloc value");
strncpy(value, start, len);
return (value);
}
static char *operator(const char *input, size_t *offset)
{
size_t len;
char operator;
char *value;
const char *start;
start = input + *offset;
len = 1;
operator = *start;
if (operator == start[1])
{
len++;
*offset += 1;
}
value = calloc(1, len + 1);
ASSERT((value != NULL), "Failed to alloc value");
strncpy(value, start, len);
return (value);
}
static const char *value(t_token *token, const char *input, size_t *offset)
{
if (token->type == WORD)
return (word(input, offset));
if (token->type == OPERATOR)
return (operator(input, offset));
return (NULL);
}
static t_token *tokenize(const char *input)
{
t_type type;
t_token *start;
t_token *token;
size_t offset;
offset = 0;
start = NULL;
token = NULL;
while (input[offset] != '\0')
{
// Espaço só pula
if ((is_space(input[offset]) != 0) && ++offset)
continue ;
// Tratar & como OPERATOR somente se for &&
if (input[offset] == '|' || (strncmp("&&", input + offset, 2) == 0))
type = OPERATOR;
else
type = WORD;
// Geramos um token com o tipo e depois pegamos o valor
token = new_token(type, token);
ASSERT((token != NULL), "Could not parse token");
token->value = value(token, input, &offset);
if (start == NULL)
start = token;
if (input[offset] != '\0')
offset++;
}
return start;
}
static t_ast *new_node(t_ast_type type, t_ast *lhs)
{
t_ast *node;
node = calloc(1, sizeof(t_ast));
ASSERT((node != NULL), "Failed to alloc node");
node->type = type;
if (node->type != COMMAND)
node->data.child.lhs = lhs;
return (node);
}
static void parse_command(t_ast *node, t_token **token)
{
size_t len;
size_t copied;
t_token *start;
len = 0;
start = *token;
while (*token != NULL)
{
len += strlen((*token)->value);
if ((*token)->next == NULL || (*token)->next->type != WORD)
break ;
len += 1;
(*token) = (*token)->next;
}
node->data.command = calloc(1, sizeof(t_command));
node->data.command->value = calloc(1, len + 1);
copied = 0;
while (start != NULL)
{
strncpy((char *)node->data.command->value + copied, start->value, strlen(start->value));
copied += strlen(start->value);
start = start->next;
if (start == (*token)->next)
break ;
strncpy((char *)node->data.command->value + copied, " ", 1);
copied += 1;
}
}
static t_ast_type operator_type(t_token *token)
{
if (strcmp(L_PIPE, token->value) == 0)
return (PIPE);
ASSERT(0, "Not implemented");
return (AST_COUNT);
}
static t_ast *build_ast(t_token *head)
{
t_ast *ast;
t_ast *node;
t_token *token;
t_ast_type type;
ast = NULL;
node = NULL;
token = head;
while (token != NULL)
{
if (token->type == WORD)
type = COMMAND;
else if (token->type == OPERATOR)
type = operator_type(token);
node = new_node(type, ast);
if (node->type == COMMAND)
{
parse_command(node, &token);
if (ast != NULL && ast->type == PIPE)
ast->data.child.rhs = node;
}
if (ast == NULL || node->type == PIPE)
ast = node;
token = token->next;
}
return (ast);
}
static const char *token_type(t_token *token)
{
if (token->type == WORD)
return ("WORD");
if (token->type == OPERATOR)
return ("OPERATOR");
ASSERT(0, "Unreachable");
return (NULL);
}
static const char *node_type(t_ast *node)
{
if (node->type == COMMAND)
return ("COMMAND");
if (node->type == PIPE)
return ("PIPE");
ASSERT(0, "Unreachable");
return (NULL);
}
static const char *ast_value(t_ast *node)
{
char *str;
char *lhs;
char *rhs;
if (node == NULL)
return ("NULL");
if (node->type == COMMAND)
asprintf(&str, "COMMAND '%s'", node->data.command->value);
else if (node->type == PIPE)
asprintf(&str, "PIPE '%s' TO '%s'", ast_value(node->data.child.lhs), ast_value(node->data.child.rhs));
return (str);
}
int main()
{
t_token *token;
t_ast *ast;
token = tokenize(raw_input);
ast = build_ast(token);
printf("%s\n", ast_value(ast));
return (0);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment