Last active
March 14, 2022 15:39
-
-
Save vinicius507/85be669e13f8debbf5099836d3267476 to your computer and use it in GitHub Desktop.
Prova de Conceito AST básica
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define _GNU_SOURCE // asprintf needs it | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <ctype.h> | |
#include <string.h> | |
#define ASSERT(EXPR, MSG) do {if (!EXPR) printf("Assertion error: %s, %s\n", #EXPR, MSG), exit(1);} while (0) | |
#define L_PIPE "|" | |
const char *raw_input = "ls -1 | sort | uniq -u | wc -l"; | |
typedef enum e_type | |
{ | |
WORD, | |
OPERATOR, | |
TOKEN_COUNT | |
} t_type; | |
typedef struct s_token | |
{ | |
t_type type; | |
const char *value; | |
struct s_token *prev; | |
struct s_token *next; | |
} t_token; | |
typedef enum e_ast_type | |
{ | |
COMMAND, | |
PIPE, | |
AST_COUNT | |
} t_ast_type; | |
typedef struct s_command | |
{ | |
const char *value; | |
} t_command; | |
typedef struct s_ast | |
{ | |
t_ast_type type; | |
union u_command { | |
t_command *command; | |
struct s_child { | |
struct s_ast *lhs; | |
struct s_ast *rhs; | |
} child; | |
} data; | |
} t_ast; | |
static t_token *new_token(t_type token_type, t_token *prev) | |
{ | |
t_token *token; | |
token = calloc(1, sizeof(t_token)); | |
ASSERT((token != NULL), "Failed to alloc token"); | |
token->prev = prev; | |
if (token->prev != NULL) | |
token->prev->next = token; | |
token->type = token_type; | |
return (token); | |
} | |
static char is_space(char c) | |
{ | |
if (c == ' ') | |
return (1); | |
if (c == '\t') | |
return (1); | |
if (c == '\n') | |
return (1); | |
return (0); | |
} | |
static char is_metachar(char c) | |
{ | |
// Se os redirects forem lidados direto no tokenizer, adicionar metachar | |
if (c == '|') | |
return (1); | |
if (c == '&') | |
return (1); | |
return (is_space(c)); | |
} | |
static char *word(const char *input, size_t *offset) | |
{ | |
size_t len; | |
char quote; | |
char *value; | |
const char *start; | |
size_t counter; | |
quote = '\0'; | |
start = (input + *offset); | |
if (*start == '"' || *start == '\'') | |
{ | |
counter = 0; | |
// Se não houver uma quote no terminando o LITERAL, tratar como | |
// caractere | |
while (start[++counter] != '\0') | |
{ | |
if (start[counter] == *start) | |
{ | |
quote = *start; | |
start += 1; | |
*offset += 1; | |
} | |
} | |
} | |
len = 0; | |
while (input[*offset] != '\0') | |
{ | |
// Se quote não tiver setado, separar WORDs por metachars/espaços | |
// Se estiver, ir até o final do quote | |
if ((quote == '\0' | |
&& (is_metachar(input[*offset]) | |
|| is_space(input[*offset]))) | |
|| (input[*offset] == quote)) | |
break ; | |
len++; | |
*offset += 1; | |
} | |
value = calloc(1, len + 1); | |
ASSERT((value != NULL), "Failed to alloc value"); | |
strncpy(value, start, len); | |
return (value); | |
} | |
static char *operator(const char *input, size_t *offset) | |
{ | |
size_t len; | |
char operator; | |
char *value; | |
const char *start; | |
start = input + *offset; | |
len = 1; | |
operator = *start; | |
if (operator == start[1]) | |
{ | |
len++; | |
*offset += 1; | |
} | |
value = calloc(1, len + 1); | |
ASSERT((value != NULL), "Failed to alloc value"); | |
strncpy(value, start, len); | |
return (value); | |
} | |
static const char *value(t_token *token, const char *input, size_t *offset) | |
{ | |
if (token->type == WORD) | |
return (word(input, offset)); | |
if (token->type == OPERATOR) | |
return (operator(input, offset)); | |
return (NULL); | |
} | |
static t_token *tokenize(const char *input) | |
{ | |
t_type type; | |
t_token *start; | |
t_token *token; | |
size_t offset; | |
offset = 0; | |
start = NULL; | |
token = NULL; | |
while (input[offset] != '\0') | |
{ | |
// Espaço só pula | |
if ((is_space(input[offset]) != 0) && ++offset) | |
continue ; | |
// Tratar & como OPERATOR somente se for && | |
if (input[offset] == '|' || (strncmp("&&", input + offset, 2) == 0)) | |
type = OPERATOR; | |
else | |
type = WORD; | |
// Geramos um token com o tipo e depois pegamos o valor | |
token = new_token(type, token); | |
ASSERT((token != NULL), "Could not parse token"); | |
token->value = value(token, input, &offset); | |
if (start == NULL) | |
start = token; | |
if (input[offset] != '\0') | |
offset++; | |
} | |
return start; | |
} | |
static t_ast *new_node(t_ast_type type, t_ast *lhs) | |
{ | |
t_ast *node; | |
node = calloc(1, sizeof(t_ast)); | |
ASSERT((node != NULL), "Failed to alloc node"); | |
node->type = type; | |
if (node->type != COMMAND) | |
node->data.child.lhs = lhs; | |
return (node); | |
} | |
static void parse_command(t_ast *node, t_token **token) | |
{ | |
size_t len; | |
size_t copied; | |
t_token *start; | |
len = 0; | |
start = *token; | |
while (*token != NULL) | |
{ | |
len += strlen((*token)->value); | |
if ((*token)->next == NULL || (*token)->next->type != WORD) | |
break ; | |
len += 1; | |
(*token) = (*token)->next; | |
} | |
node->data.command = calloc(1, sizeof(t_command)); | |
node->data.command->value = calloc(1, len + 1); | |
copied = 0; | |
while (start != NULL) | |
{ | |
strncpy((char *)node->data.command->value + copied, start->value, strlen(start->value)); | |
copied += strlen(start->value); | |
start = start->next; | |
if (start == (*token)->next) | |
break ; | |
strncpy((char *)node->data.command->value + copied, " ", 1); | |
copied += 1; | |
} | |
} | |
static t_ast_type operator_type(t_token *token) | |
{ | |
if (strcmp(L_PIPE, token->value) == 0) | |
return (PIPE); | |
ASSERT(0, "Not implemented"); | |
return (AST_COUNT); | |
} | |
static t_ast *build_ast(t_token *head) | |
{ | |
t_ast *ast; | |
t_ast *node; | |
t_token *token; | |
t_ast_type type; | |
ast = NULL; | |
node = NULL; | |
token = head; | |
while (token != NULL) | |
{ | |
if (token->type == WORD) | |
type = COMMAND; | |
else if (token->type == OPERATOR) | |
type = operator_type(token); | |
node = new_node(type, ast); | |
if (node->type == COMMAND) | |
{ | |
parse_command(node, &token); | |
if (ast != NULL && ast->type == PIPE) | |
ast->data.child.rhs = node; | |
} | |
if (ast == NULL || node->type == PIPE) | |
ast = node; | |
token = token->next; | |
} | |
return (ast); | |
} | |
static const char *token_type(t_token *token) | |
{ | |
if (token->type == WORD) | |
return ("WORD"); | |
if (token->type == OPERATOR) | |
return ("OPERATOR"); | |
ASSERT(0, "Unreachable"); | |
return (NULL); | |
} | |
static const char *node_type(t_ast *node) | |
{ | |
if (node->type == COMMAND) | |
return ("COMMAND"); | |
if (node->type == PIPE) | |
return ("PIPE"); | |
ASSERT(0, "Unreachable"); | |
return (NULL); | |
} | |
static const char *ast_value(t_ast *node) | |
{ | |
char *str; | |
char *lhs; | |
char *rhs; | |
if (node == NULL) | |
return ("NULL"); | |
if (node->type == COMMAND) | |
asprintf(&str, "COMMAND '%s'", node->data.command->value); | |
else if (node->type == PIPE) | |
asprintf(&str, "PIPE '%s' TO '%s'", ast_value(node->data.child.lhs), ast_value(node->data.child.rhs)); | |
return (str); | |
} | |
int main() | |
{ | |
t_token *token; | |
t_ast *ast; | |
token = tokenize(raw_input); | |
ast = build_ast(token); | |
printf("%s\n", ast_value(ast)); | |
return (0); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment