vinicius507 · March 14, 2022 15:39
diff --git a/main.c b/main.c
 #define _GNU_SOURCE // asprintf needs it

 #include <stdio.h>
 #include <stdlib.h>
 #include <ctype.h>
 #include <string.h>

 #define ASSERT(EXPR, MSG) do {if (!EXPR) printf("Assertion error: %s, %s\n", #EXPR, MSG), exit(1);} while (0)

 #define L_PIPE "|"

 const char *raw_input = "ls -1 | sort | uniq -u | wc -l";

 typedef enum e_type
 {
    WORD,
    OPERATOR,
    TOKEN_COUNT
 }	t_type;

 typedef struct s_token
 {
    t_type			type;
    const char		*value;
    struct s_token	*prev;
    struct s_token	*next;
 }	t_token;

 typedef enum e_ast_type
 {
  COMMAND,
  PIPE,
  AST_COUNT
 } t_ast_type;

 typedef struct s_command
 {
    const char	*value;
 }	t_command;

 typedef struct s_ast
 {
    t_ast_type	type;
    union	u_command {
        t_command	*command;
        struct s_child {
            struct s_ast *lhs;
            struct s_ast *rhs;
        } child;
    } data;
 }	t_ast;

 static t_token	*new_token(t_type token_type, t_token *prev)
 {
    t_token	*token;

    token = calloc(1, sizeof(t_token));
    ASSERT((token != NULL), "Failed to alloc token");
    token->prev = prev;
    if (token->prev != NULL)
        token->prev->next = token;
    token->type = token_type;
    return (token);
 }

 static char is_space(char c)
 {
    if (c == ' ')
        return (1);
    if (c == '\t')
        return (1);
    if (c == '\n')
        return (1);
    return (0);
 }

 static char	is_metachar(char c)
 {
    // Se os redirects forem lidados direto no tokenizer, adicionar metachar
    if (c == '|')
        return (1);
    if (c == '&')
        return (1);
    return (is_space(c));
 }

 static char	*word(const char *input, size_t *offset)
 {
    size_t		len;
    char			quote;
    char			*value;
    const char	*start;
    size_t		counter;

    quote = '\0';
    start = (input + *offset);
    if (*start == '"' || *start == '\'')
    {
        counter = 0;
        // Se não houver uma quote no terminando o LITERAL, tratar como
        // caractere
        while (start[++counter] != '\0')
        {
            if (start[counter] == *start)
            {
                quote = *start;
                start += 1;
                *offset += 1;
            }
        }
    }
    len = 0;
    while (input[*offset] != '\0')
    {
        // Se quote não tiver setado, separar WORDs por metachars/espaços
        // Se estiver, ir até o final do quote
        if ((quote == '\0'
             && (is_metachar(input[*offset])
                 || is_space(input[*offset])))
            || (input[*offset] == quote))
            break ;
        len++;
        *offset += 1;
    }
    value = calloc(1, len + 1);
    ASSERT((value != NULL), "Failed to alloc value");
    strncpy(value, start, len);
    return (value);
 }

 static char *operator(const char *input, size_t *offset)
 {
    size_t		len;
    char			operator;
    char			*value;
    const char	*start;

    start = input + *offset;
    len = 1;
    operator = *start;
    if (operator == start[1])
    {
        len++;
        *offset += 1;
    }
    value = calloc(1, len + 1);
    ASSERT((value != NULL), "Failed to alloc value");
    strncpy(value, start, len);
    return (value);
 }

 static const char	*value(t_token *token, const char *input, size_t *offset)
 {
    if (token->type == WORD)
        return (word(input, offset));
    if (token->type == OPERATOR)
        return (operator(input, offset));
    return (NULL);
 }

 static t_token	*tokenize(const char *input)
 {
    t_type	type;
    t_token	*start;
    t_token	*token;
    size_t	offset;

    offset = 0;
    start = NULL;
    token = NULL;
    while (input[offset] != '\0')
    {
        // Espaço só pula
        if ((is_space(input[offset]) != 0) && ++offset)
            continue ;
        // Tratar & como OPERATOR somente se for &&
        if (input[offset] == '|' || (strncmp("&&", input + offset, 2) == 0))
            type = OPERATOR;
        else
            type = WORD;
        // Geramos um token com o tipo e depois pegamos o valor
        token = new_token(type, token);
        ASSERT((token != NULL), "Could not parse token");
        token->value = value(token, input, &offset);
        if (start == NULL)
            start = token;
        if (input[offset] != '\0')
            offset++;
    }
    return start;
 }

 static t_ast	*new_node(t_ast_type type, t_ast *lhs)
 {
    t_ast	*node;

    node = calloc(1, sizeof(t_ast));
    ASSERT((node != NULL), "Failed to alloc node");
    node->type = type;
    if (node->type != COMMAND)
        node->data.child.lhs = lhs;
    return (node);
 }

 static void	parse_command(t_ast *node, t_token **token)
 {
    size_t	len;
    size_t	copied;
    t_token	*start;

    len = 0;
    start = *token;
    while (*token != NULL)
    {
        len += strlen((*token)->value);
        if ((*token)->next == NULL || (*token)->next->type != WORD)
            break ;
        len += 1;
        (*token) = (*token)->next;
    }
    node->data.command = calloc(1, sizeof(t_command));
    node->data.command->value = calloc(1, len + 1);
    copied = 0;
    while (start != NULL)
    {
        strncpy((char *)node->data.command->value + copied, start->value, strlen(start->value));
        copied += strlen(start->value);
        start = start->next;
        if (start == (*token)->next)
            break ;
        strncpy((char *)node->data.command->value + copied, " ", 1);
        copied += 1;
    }
 }

 static t_ast_type operator_type(t_token *token)
 {
    if (strcmp(L_PIPE, token->value) == 0)
        return (PIPE);
    ASSERT(0, "Not implemented");
    return (AST_COUNT);
 }

 static t_ast	*build_ast(t_token *head)
 {
    t_ast			*ast;
    t_ast			*node;
    t_token		*token;
    t_ast_type	type;

    ast = NULL;
    node = NULL;
    token = head;
    while (token != NULL)
    {
        if (token->type == WORD)
            type = COMMAND;
        else if (token->type == OPERATOR)
            type = operator_type(token);
        node = new_node(type, ast);
        if (node->type == COMMAND)
        {
          parse_command(node, &token);
          if (ast != NULL && ast->type == PIPE)
            ast->data.child.rhs = node;
        }
        if (ast == NULL || node->type == PIPE)
            ast = node;
        token = token->next;
    }
    return (ast);
 }

 static const char	*token_type(t_token *token)
 {
    if (token->type == WORD)
        return ("WORD");
    if (token->type == OPERATOR)
        return ("OPERATOR");
    ASSERT(0, "Unreachable");
    return (NULL);
 }

 static const char	*node_type(t_ast *node)
 {
    if (node->type == COMMAND)
        return ("COMMAND");
    if (node->type == PIPE)
        return ("PIPE");
    ASSERT(0, "Unreachable");
    return (NULL);
 }

 static const char	*ast_value(t_ast *node)
 {
    char	*str;
    char	*lhs;
    char	*rhs;

    if (node == NULL)
        return ("NULL");
    if (node->type == COMMAND)
        asprintf(&str, "COMMAND '%s'", node->data.command->value);
    else if (node->type == PIPE)
      asprintf(&str, "PIPE '%s' TO '%s'", ast_value(node->data.child.lhs), ast_value(node->data.child.rhs));
    return (str);
 }

 int main()
 {
    t_token	*token;
    t_ast		*ast;

    token = tokenize(raw_input);
    ast = build_ast(token);
    printf("%s\n", ast_value(ast));
    return (0);
 }
	#define _GNU_SOURCE // asprintf needs it

	#include <stdio.h>
	#include <stdlib.h>
	#include <ctype.h>
	#include <string.h>

	#define ASSERT(EXPR, MSG) do {if (!EXPR) printf("Assertion error: %s, %s\n", #EXPR, MSG), exit(1);} while (0)

	#define L_PIPE "\|"

	const char *raw_input = "ls -1 \| sort \| uniq -u \| wc -l";

	typedef enum e_type
	{
	WORD,
	OPERATOR,
	TOKEN_COUNT
	} t_type;

	typedef struct s_token
	{
	t_type type;
	const char *value;
	struct s_token *prev;
	struct s_token *next;
	} t_token;

	typedef enum e_ast_type
	{
	COMMAND,
	PIPE,
	AST_COUNT
	} t_ast_type;

	typedef struct s_command
	{
	const char *value;
	} t_command;

	typedef struct s_ast
	{
	t_ast_type type;
	union u_command {
	t_command *command;
	struct s_child {
	struct s_ast *lhs;
	struct s_ast *rhs;
	} child;
	} data;
	} t_ast;

	static t_token new_token(t_type token_type, t_token prev)
	{
	t_token *token;

	token = calloc(1, sizeof(t_token));
	ASSERT((token != NULL), "Failed to alloc token");
	token->prev = prev;
	if (token->prev != NULL)
	token->prev->next = token;
	token->type = token_type;
	return (token);
	}

	static char is_space(char c)
	{
	if (c == ' ')
	return (1);
	if (c == '\t')
	return (1);
	if (c == '\n')
	return (1);
	return (0);
	}

	static char is_metachar(char c)
	{
	// Se os redirects forem lidados direto no tokenizer, adicionar metachar
	if (c == '\|')
	return (1);
	if (c == '&')
	return (1);
	return (is_space(c));
	}

	static char word(const char input, size_t *offset)
	{
	size_t len;
	char quote;
	char *value;
	const char *start;
	size_t counter;

	quote = '\0';
	start = (input + *offset);
	if (start == '"' \|\| start == '\'')
	{
	counter = 0;
	// Se não houver uma quote no terminando o LITERAL, tratar como
	// caractere
	while (start[++counter] != '\0')
	{
	if (start[counter] == *start)
	{
	quote = *start;
	start += 1;
	*offset += 1;
	}
	}
	}
	len = 0;
	while (input[*offset] != '\0')
	{
	// Se quote não tiver setado, separar WORDs por metachars/espaços
	// Se estiver, ir até o final do quote
	if ((quote == '\0'
	&& (is_metachar(input[*offset])
	\|\| is_space(input[*offset])))
	\|\| (input[*offset] == quote))
	break ;
	len++;
	*offset += 1;
	}
	value = calloc(1, len + 1);
	ASSERT((value != NULL), "Failed to alloc value");
	strncpy(value, start, len);
	return (value);
	}

	static char operator(const char input, size_t *offset)
	{
	size_t len;
	char operator;
	char *value;
	const char *start;

	start = input + *offset;
	len = 1;
	operator = *start;
	if (operator == start[1])
	{
	len++;
	*offset += 1;
	}
	value = calloc(1, len + 1);
	ASSERT((value != NULL), "Failed to alloc value");
	strncpy(value, start, len);
	return (value);
	}

	static const char value(t_token token, const char input, size_t offset)
	{
	if (token->type == WORD)
	return (word(input, offset));
	if (token->type == OPERATOR)
	return (operator(input, offset));
	return (NULL);
	}

	static t_token tokenize(const char input)
	{
	t_type type;
	t_token *start;
	t_token *token;
	size_t offset;

	offset = 0;
	start = NULL;
	token = NULL;
	while (input[offset] != '\0')
	{
	// Espaço só pula
	if ((is_space(input[offset]) != 0) && ++offset)
	continue ;
	// Tratar & como OPERATOR somente se for &&
	if (input[offset] == '\|' \|\| (strncmp("&&", input + offset, 2) == 0))
	type = OPERATOR;
	else
	type = WORD;
	// Geramos um token com o tipo e depois pegamos o valor
	token = new_token(type, token);
	ASSERT((token != NULL), "Could not parse token");
	token->value = value(token, input, &offset);
	if (start == NULL)
	start = token;
	if (input[offset] != '\0')
	offset++;
	}
	return start;
	}

	static t_ast new_node(t_ast_type type, t_ast lhs)
	{
	t_ast *node;

	node = calloc(1, sizeof(t_ast));
	ASSERT((node != NULL), "Failed to alloc node");
	node->type = type;
	if (node->type != COMMAND)
	node->data.child.lhs = lhs;
	return (node);
	}

	static void parse_command(t_ast node, t_token *token)
	{
	size_t len;
	size_t copied;
	t_token *start;

	len = 0;
	start = *token;
	while (*token != NULL)
	{
	len += strlen((*token)->value);
	if ((token)->next == NULL \|\| (token)->next->type != WORD)
	break ;
	len += 1;
	(token) = (token)->next;
	}
	node->data.command = calloc(1, sizeof(t_command));
	node->data.command->value = calloc(1, len + 1);
	copied = 0;
	while (start != NULL)
	{
	strncpy((char *)node->data.command->value + copied, start->value, strlen(start->value));
	copied += strlen(start->value);
	start = start->next;
	if (start == (*token)->next)
	break ;
	strncpy((char *)node->data.command->value + copied, " ", 1);
	copied += 1;
	}
	}

	static t_ast_type operator_type(t_token *token)
	{
	if (strcmp(L_PIPE, token->value) == 0)
	return (PIPE);
	ASSERT(0, "Not implemented");
	return (AST_COUNT);
	}

	static t_ast build_ast(t_token head)
	{
	t_ast *ast;
	t_ast *node;
	t_token *token;
	t_ast_type type;

	ast = NULL;
	node = NULL;
	token = head;
	while (token != NULL)
	{
	if (token->type == WORD)
	type = COMMAND;
	else if (token->type == OPERATOR)
	type = operator_type(token);
	node = new_node(type, ast);
	if (node->type == COMMAND)
	{
	parse_command(node, &token);
	if (ast != NULL && ast->type == PIPE)
	ast->data.child.rhs = node;
	}
	if (ast == NULL \|\| node->type == PIPE)
	ast = node;
	token = token->next;
	}
	return (ast);
	}

	static const char token_type(t_token token)
	{
	if (token->type == WORD)
	return ("WORD");
	if (token->type == OPERATOR)
	return ("OPERATOR");
	ASSERT(0, "Unreachable");
	return (NULL);
	}

	static const char node_type(t_ast node)
	{
	if (node->type == COMMAND)
	return ("COMMAND");
	if (node->type == PIPE)
	return ("PIPE");
	ASSERT(0, "Unreachable");
	return (NULL);
	}

	static const char ast_value(t_ast node)
	{
	char *str;
	char *lhs;
	char *rhs;

	if (node == NULL)
	return ("NULL");
	if (node->type == COMMAND)
	asprintf(&str, "COMMAND '%s'", node->data.command->value);
	else if (node->type == PIPE)
	asprintf(&str, "PIPE '%s' TO '%s'", ast_value(node->data.child.lhs), ast_value(node->data.child.rhs));
	return (str);
	}

	int main()
	{
	t_token *token;
	t_ast *ast;

	token = tokenize(raw_input);
	ast = build_ast(token);
	printf("%s\n", ast_value(ast));
	return (0);
	}