diff --git a/include/csquare/arena.h b/include/csquare/arena.h new file mode 100644 index 0000000..d5192f1 --- /dev/null +++ b/include/csquare/arena.h @@ -0,0 +1,17 @@ +#ifndef _ARENA_ALLOCATOR_H +#define _ARENA_ALLOCATOR_H + +#include +#include +#include + +typedef struct { + uint8_t *mem; + size_t cap; + size_t offset; +} arena; + +void *arena_alloc(arena *a, size_t size); +void arena_free(arena *a); + +#endif diff --git a/include/csquare/error.h b/include/csquare/error.h new file mode 100644 index 0000000..c07cacd --- /dev/null +++ b/include/csquare/error.h @@ -0,0 +1,43 @@ +#ifndef ERROR_H +#define ERROR_H + +#include + +typedef enum { + ERROR_NONE, + SYNERR_UNKNOWN_CHARACTER, + SYNERR_UNTERMINATED_STRING +} error_type; + +typedef enum { ERROR_LEVEL_WARNING, ERROR_LEVEL_ERROR } error_level; + +typedef struct { + int line; + int col; +} file_pos; + +typedef struct { + const char *ptr; + size_t len; +} text_span; + +typedef struct { + const char *message; + error_type type; + error_level level; + + const char *file; + file_pos pos; + + text_span line; + text_span highlight; +} error_info; + +error_info new_error_info(const char *msg, error_type type, error_level level, + const char *file, int line, int col, + const char *line_str, int highlight_start, + int highlight_len); + +void print_error(const error_info *err); + +#endif diff --git a/include/csquare/lexer/lexer.h b/include/csquare/lexer/lexer.h index 16a9c6a..339f47d 100644 --- a/include/csquare/lexer/lexer.h +++ b/include/csquare/lexer/lexer.h @@ -1,6 +1,7 @@ #ifndef _LEXER_H #define _LEXER_H +#include "csquare/error.h" #include #include #include @@ -93,13 +94,20 @@ struct token { const char *start; int length; token_type type; + + int line; + int col; + error_type errtype; + const char *errmsg; }; typedef struct token token; -token *new_token(const char *start, int length, token_type type); +token *new_token(const char *start, int length, token_type type, int line, + int col); void free_token(token *tk); -token *error_token(const char *msg); +token *error_token(const char *msg, const char *src, int len, int line, int col, + error_type errtype); typedef struct { token **tokens; @@ -115,7 +123,7 @@ void add_token(token_list *list, token *tk); #define isdigit(c) (c >= '0' && c <= '9') #define isalpha(c) ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) -#define LEX_FUNC_ARGS const char *p, int *len +#define LEX_FUNC_ARGS const char *p, int *len, int *line, int *col token *lex_symbol(LEX_FUNC_ARGS); token *lex_digit(LEX_FUNC_ARGS); token *lex_string(LEX_FUNC_ARGS); diff --git a/include/csquare/parser/rules/literals.h b/include/csquare/parser/rules/literals.h new file mode 100644 index 0000000..caa186b --- /dev/null +++ b/include/csquare/parser/rules/literals.h @@ -0,0 +1,8 @@ +#ifndef _RULES_LITERALS_H +#define _RULES_LITERALS_H + +#include "csquare/parser/parser.h" + +void parse_literal(parser *p, node *curr); + +#endif diff --git a/src/csquare/arena.c b/src/csquare/arena.c new file mode 100644 index 0000000..905ca1f --- /dev/null +++ b/src/csquare/arena.c @@ -0,0 +1,38 @@ +#include "csquare/arena.h" +#include +#include + +#include +#include + +void *arena_alloc(arena *a, size_t size) { + size = (size + 7) & ~7; // 8-byte alignment + + if (a->offset + size > a->cap) { + size_t new_cap = a->cap ? a->cap : 1024; + + while (new_cap < a->offset + size) { + new_cap *= 2; + } + + unsigned char *new_mem = realloc(a->mem, new_cap); + if (!new_mem) { + abort(); + } + + a->mem = new_mem; + a->cap = new_cap; + } + + void *ptr = a->mem + a->offset; + a->offset += size; + + return ptr; +} + +void arena_free(arena *a) { + free(a->mem); + a->mem = NULL; + a->cap = 0; + a->offset = 0; +} diff --git a/src/csquare/error.c b/src/csquare/error.c new file mode 100644 index 0000000..a86a6cf --- /dev/null +++ b/src/csquare/error.c @@ -0,0 +1,71 @@ +#include "csquare/error.h" +#include +#include + +#include "csquare/error.h" +#include + +error_info new_error_info(const char *msg, error_type type, error_level level, + const char *file, int line, int col, + const char *line_str, int highlight_start, + int highlight_len) { + error_info e; + e.message = strdup(msg); + e.type = type; + e.level = level; + e.file = file; + e.pos = (file_pos){line, col}; + + e.line.ptr = line_str; + + size_t line_len = 0; + while (line_str[line_len] && line_str[line_len] != '\n') + line_len++; + e.line.len = line_len; + + e.highlight.ptr = line_str + highlight_start; + e.highlight.len = highlight_len; + + return e; +} + +#define CRED "\x1b[31m" +#define CYELLOW "\x1b[33m" +#define CCYAN "\x1b[36m" +#define CBLUE "\x1b[34m" +#define CRESET "\x1b[0m" + +// static void print_span(const char *ptr, size_t len) { +// fwrite(ptr, 1, len, stdout); +// } + +void print_error(const error_info *e) { + const char *level_color = (e->level == ERROR_LEVEL_ERROR) ? CRED : CYELLOW; + + printf("%s%s:%s %s\n", level_color, + (e->level == ERROR_LEVEL_ERROR ? "error" : "warning"), CRESET, + e->message); + printf(" --> %s:%d:%d\n", e->file, e->pos.line, e->pos.col); + printf(" |\n"); + + printf("%4d | ", e->pos.line); + + fwrite(e->line.ptr, 1, e->highlight.ptr - e->line.ptr, stdout); + + printf(CRED); + fwrite(e->highlight.ptr, 1, e->highlight.len, stdout); + printf(CRESET); + + size_t rest_len = + e->line.len - (e->highlight.ptr - e->line.ptr) - e->highlight.len; + fwrite(e->highlight.ptr + e->highlight.len, 1, rest_len, stdout); + printf("\n"); + + printf(" | "); + for (size_t i = 0; i < e->highlight.ptr - e->line.ptr; i++) + putchar((e->line.ptr[i] == '\t') ? '\t' : ' '); + printf(CRED); + for (size_t i = 0; i < e->highlight.len; i++) + putchar('^'); + printf(CRESET "\n"); +} diff --git a/src/lexer/lex_digit.c b/src/lexer/lex_digit.c index 64c5d84..2d0b2ea 100644 --- a/src/lexer/lex_digit.c +++ b/src/lexer/lex_digit.c @@ -1,7 +1,7 @@ #include "csquare/lexer/lexer.h" #include -token *lex_digit(const char *p, int *len) { +token *lex_digit(const char *p, int *len, int *line, int *col) { const char *start = p; token_type type = T_DECIMAL; bool has_dot = false; @@ -42,5 +42,6 @@ token *lex_digit(const char *p, int *len) { } *len = (int)(p - start); - return new_token(start, *len, type); + *col = (*col) + (*len); + return new_token(start, *len, type, *line, *col - *len); } diff --git a/src/lexer/lex_ident.c b/src/lexer/lex_ident.c index 772678a..c4b837f 100644 --- a/src/lexer/lex_ident.c +++ b/src/lexer/lex_ident.c @@ -38,7 +38,7 @@ const struct { int keyword_count = sizeof(keyword_table) / sizeof(keyword_table[0]); -token *lex_ident(const char *p, int *len) { +token *lex_ident(const char *p, int *len, int *line, int *col) { char buf[64]; int bufi = 0; token_type type = T_IDENTIFIER; @@ -76,5 +76,6 @@ token *lex_ident(const char *p, int *len) { const char *start = p - bufi; *len = bufi; - return new_token(start, bufi, type); + *col = (*col) + (*len); + return new_token(start, bufi, type, *line, *col - *len); } diff --git a/src/lexer/lex_string.c b/src/lexer/lex_string.c index 561c037..ae4622a 100644 --- a/src/lexer/lex_string.c +++ b/src/lexer/lex_string.c @@ -1,6 +1,7 @@ +#include "csquare/error.h" #include "csquare/lexer/lexer.h" -token *lex_string(const char *p, int *len) { +token *lex_string(const char *p, int *len, int *line, int *col) { const char *start = p; char delim = *p; p++; @@ -13,16 +14,18 @@ token *lex_string(const char *p, int *len) { if (*p != delim) { *len = (int)(p - start); + *col = (*col) + (*len); const char *prefix = "Unterminated string: "; char *msg = malloc(strlen(prefix) + *len + 1); sprintf(msg, "Unterminated string: %.*s", *len, start); - return error_token(msg); + return error_token(msg, p, *len, *line, *col, SYNERR_UNTERMINATED_STRING); } p++; *len = (int)(p - start); - return new_token(start, *len, T_STRING); + *col = (*col) + (*len); + return new_token(start, *len, T_STRING, *line, *col - *len); } diff --git a/src/lexer/lex_symbol.c b/src/lexer/lex_symbol.c index 23a04f0..14b503d 100644 --- a/src/lexer/lex_symbol.c +++ b/src/lexer/lex_symbol.c @@ -18,7 +18,7 @@ const struct { int symbol_count = sizeof(symbol_table) / sizeof(symbol_table[0]); -token *lex_symbol(const char *p, int *len) { +token *lex_symbol(const char *p, int *len, int *line, int *col) { int best_len = 0; token_type best_type = T_ERROR; @@ -33,8 +33,12 @@ token *lex_symbol(const char *p, int *len) { if (best_len == 0) { best_len = 1; best_type = T_ERROR; + *len = best_len; + *col = (*col) + (*line); + return NULL; } *len = best_len; - return new_token(p, best_len, best_type); + *col = (*col) + (*line); + return new_token(p, best_len, best_type, *line, *col - *len); } diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index 8ba596c..fd268ee 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -1,4 +1,5 @@ #include "csquare/lexer/lexer.h" +#include "csquare/error.h" #include #include #include @@ -7,47 +8,38 @@ const char *token_type_str[T__COUNT] = {TOKEN_TYPES}; -token *new_token(const char *start, int length, token_type type) { +token *new_token(const char *start, int length, token_type type, int line, + int col) { token *tk = malloc(sizeof(token)); if (!tk) { perror("malloc failed"); return NULL; } - tk->start = strdup(start); + tk->start = start; tk->length = length; tk->type = type; + tk->line = line; + tk->col = col; + tk->errtype = ERROR_NONE; + tk->errmsg = ""; return tk; } void free_token(token *tk) { if (!tk) return; - + // if (tk->type == T_ERROR && tk->errmsg) + // free((void *)tk->errmsg); free(tk); } -token *emit(const char *src, int starti, int endi, token_type type) { - if (starti < 0 || endi < starti) { - return NULL; - } - - int len = endi - starti; - char *start = malloc(len + 1); - if (!start) { - return NULL; - } - - for (int i = 0; i < len; i++) { - start[i] = src[starti + i]; - } - start[len] = '\0'; - - return new_token(start, len, type); -} - -token *error_token(const char *msg) { - return new_token(msg, strlen(msg), T_ERROR); +token *error_token(const char *msg, const char *src, int len, int line, int col, + error_type errtype) { + token *tk = new_token(src, len, T_ERROR, line, col); + tk->errtype = errtype; + tk->errmsg = msg; + return tk; } void free_token_list(token_list *list) { @@ -83,10 +75,17 @@ token_list *lex(const char *src) { char *p = (char *)src; + int line = 1; + int col = 1; while (*p) { char c = *p; + col++; if (isws(c)) { + if (c == '\n' /* || c == '\r'*/) { + line++; + col = 1; + } p++; continue; } @@ -109,30 +108,40 @@ token_list *lex(const char *src) { } int consumed = 0; - token *tk = NULL; + const char *msg = "unknown character \x1b[32m'%c'\x1b[0m"; + char buf[32]; + sprintf(buf, msg, c); + token *tk; + token *errtk = + error_token(strdup(buf), p, 1, line, col, SYNERR_UNKNOWN_CHARACTER); if (isdigit(c)) { - tk = lex_digit(p, &consumed); + tk = lex_digit(p, &consumed, &line, &col); } else if (isalpha(c) || c == '_') { - tk = lex_ident(p, &consumed); + tk = lex_ident(p, &consumed, &line, &col); } else if (c == '"' || c == '\'') { - tk = lex_string(p, &consumed); + tk = lex_string(p, &consumed, &line, &col); } else { - tk = lex_symbol(p, &consumed); + tk = lex_symbol(p, &consumed, &line, &col); } + if (!tk) + tk = errtk; p += consumed; add_token(list, tk); } - add_token(list, new_token(p, 0, T_EOF)); + add_token(list, new_token(p, 0, T_EOF, line, col)); return list; } void print_token(token *tk) { const char *type_color = "\x1b[32m"; - if (tk->type == T_ERROR) + int print_errmsg = 0; + if (tk->type == T_ERROR) { type_color = "\x1b[31m"; + print_errmsg = 1; + } printf("Text: \x1b[33m"); @@ -144,5 +153,9 @@ void print_token(token *tk) { putchar(c); } - printf("\x1b[0m, Type: %s%s\x1b[0m\n", type_color, token_type_str[tk->type]); + printf("\x1b[0m, Type: %s%s\x1b[0m", type_color, token_type_str[tk->type]); + if (print_errmsg) { + printf(", Error message: \x1b[31m%s\x1b[0m", tk->errmsg); + } + printf("\n"); } diff --git a/src/main.c b/src/main.c index b86b711..cd4bc80 100644 --- a/src/main.c +++ b/src/main.c @@ -1,3 +1,4 @@ +#include "csquare/error.h" #include "csquare/lexer/lexer.h" #include "csquare/opt-common.h" #include @@ -31,6 +32,34 @@ char *read_file(const char *filename) { return buffer; } +const char *get_line(const char *src, int target_line, size_t *out_len) { + if (target_line < 1) + return NULL; + + int line = 1; + const char *p = src; + + while (*p) { + if (line == target_line) { + const char *line_start = p; + size_t len = 0; + while (p[len] && p[len] != '\n') + len++; + if (out_len) + *out_len = len; + return line_start; + } + + if (*p == '\n') + line++; + p++; + } + + if (out_len) + *out_len = 0; + return NULL; +} + int main(int argc, char *argv[]) { csq_options *opts = options_parse(argc, argv); if (!opts) @@ -41,14 +70,32 @@ int main(int argc, char *argv[]) { return EXIT_FAILURE; } - char *src = read_file(argv[1]); + const char *filename = argv[1]; + char *src = read_file(filename); if (!src) return EXIT_FAILURE; token_list *lexed = lex(src); for (size_t i = 0; i < lexed->count; i++) { - print_token(lexed->tokens[i]); + token *tk = lexed->tokens[i]; + if (tk->type == T_ERROR) { + printf("\n"); + size_t line_len; + const char *line = get_line(src, tk->line, &line_len); + if (!line) { + line = ""; + line_len = 0; + } + + int highlight_start = tk->start - line; + int highlight_len = tk->length; + + error_info e = new_error_info(tk->errmsg, tk->errtype, ERROR_LEVEL_ERROR, + filename, tk->line, tk->col, line, + highlight_start, highlight_len); + print_error(&e); + } } free_token_list(lexed);