diff options
| author | Joel Martin <github@martintribe.org> | 2014-03-24 16:32:24 -0500 |
|---|---|---|
| committer | Joel Martin <github@martintribe.org> | 2014-03-24 16:32:24 -0500 |
| commit | 3169070063b2cb877200117ebb384269d73bcb93 (patch) | |
| tree | 23de3db1ea5c37afd21a45b6ed7771f56a08c0c4 /c/reader.c | |
| download | mal-3169070063b2cb877200117ebb384269d73bcb93.tar.gz mal-3169070063b2cb877200117ebb384269d73bcb93.zip | |
Current state of mal for Clojure West lighting talk.
Diffstat (limited to 'c/reader.c')
| -rw-r--r-- | c/reader.c | 285 |
1 files changed, 285 insertions, 0 deletions
diff --git a/c/reader.c b/c/reader.c new file mode 100644 index 0000000..044bb84 --- /dev/null +++ b/c/reader.c @@ -0,0 +1,285 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include <glib/gregex.h> +#include <glib-object.h> + +#include "types.h" +#include "reader.h" + +// Declare +MalVal *read_form(Reader *reader); + +Reader *reader_new() { + Reader *reader = (Reader*)malloc(sizeof(Reader)); + reader->array = g_array_sized_new(TRUE, FALSE, sizeof(char *), 8); + reader->position = 0; + return reader; +} + +int reader_append(Reader *reader, char* token) { + g_array_append_val(reader->array, token); + return TRUE; +} + +char *reader_peek(Reader *reader) { + return g_array_index(reader->array, char*, reader->position); +} + +char *reader_next(Reader *reader) { + if (reader->position >= reader->array->len) { + return NULL; + } else { + return g_array_index(reader->array, char*, reader->position++); + } +} + +void reader_free(Reader *reader) { + int i; + for(i=0; i < reader->array->len; i++) { + free(g_array_index(reader->array, char*, i)); + } + g_array_free(reader->array, TRUE); + free(reader); +} + +Reader *tokenize(char *line) { + GRegex *regex; + GMatchInfo *matchInfo; + GError *err = NULL; + + Reader *reader = reader_new(); + + regex = g_regex_new ("[\\s ,]*(~@|[\\[\\]{}()'`~@]|\"(?:[\\\\].|[^\\\\\"])*\"|;.*|[^\\s \\[\\]{}()'\"`~@,;]*)", 0, 0, &err); + g_regex_match (regex, line, 0, &matchInfo); + + if (err != NULL) { + fprintf(stderr, "Tokenize error: %s\n", err->message); + return NULL; + } + + while (g_match_info_matches(matchInfo)) { + gchar *result = g_match_info_fetch(matchInfo, 1); + if (result[0] != '\0' && result[0] != ';') { + reader_append(reader, result); + } + g_match_info_next(matchInfo, &err); + } + g_match_info_free(matchInfo); + g_regex_unref(regex); + if (reader->array->len == 0) { + reader_free(reader); + return NULL; + } else { + return reader; + } +} + + +// From http://creativeandcritical.net/str-replace-c/ - Laird Shaw +char *replace_str(const char *str, const char *old, const char *new) +{ + char *ret, *r; + const char *p, *q; + size_t oldlen = strlen(old); + size_t count, retlen, newlen = strlen(new); + + if (oldlen != newlen) { + for (count = 0, p = str; (q = strstr(p, old)) != NULL; p = q + oldlen) + count++; + /* this is undefined if p - str > PTRDIFF_MAX */ + retlen = p - str + strlen(p) + count * (newlen - oldlen); + } else + retlen = strlen(str); + + if ((ret = malloc(retlen + 1)) == NULL) + return NULL; + + for (r = ret, p = str; (q = strstr(p, old)) != NULL; p = q + oldlen) { + /* this is undefined if q - p > PTRDIFF_MAX */ + ptrdiff_t l = q - p; + memcpy(r, p, l); + r += l; + memcpy(r, new, newlen); + r += newlen; + } + strcpy(r, p); + + return ret; +} + + +MalVal *read_atom(Reader *reader) { + char *token; + GRegex *regex; + GMatchInfo *matchInfo; + GError *err = NULL; + gint pos; + MalVal *atom; + + token = reader_next(reader); + //g_print("read_atom token: %s\n", token); + + regex = g_regex_new ("(^-?[0-9]+$)|(^-?[0-9][0-9.]*$)|(^nil$)|(^true$)|(^false$)|^\"(.*)\"$|(^[^\"]*$)", 0, 0, &err); + g_regex_match (regex, token, 0, &matchInfo); + + if (g_match_info_fetch_pos(matchInfo, 1, &pos, NULL) && pos != -1) { + //g_print("read_atom integer\n"); + atom = malval_new_integer(g_ascii_strtoll(token, NULL, 10)); + } else if (g_match_info_fetch_pos(matchInfo, 2, &pos, NULL) && pos != -1) { + //g_print("read_atom float\n"); + atom = malval_new_float(g_ascii_strtod(token, NULL)); + } else if (g_match_info_fetch_pos(matchInfo, 3, &pos, NULL) && pos != -1) { + //g_print("read_atom nil\n"); + atom = &mal_nil; + } else if (g_match_info_fetch_pos(matchInfo, 4, &pos, NULL) && pos != -1) { + //g_print("read_atom true\n"); + atom = &mal_true; + } else if (g_match_info_fetch_pos(matchInfo, 5, &pos, NULL) && pos != -1) { + //g_print("read_atom false\n"); + atom = &mal_false; + } else if (g_match_info_fetch_pos(matchInfo, 6, &pos, NULL) && pos != -1) { + //g_print("read_atom string: %s\n", token); + char *str_tmp = replace_str(g_match_info_fetch(matchInfo, 6), "\\\"", "\""); + atom = malval_new_string(str_tmp); + } else if (g_match_info_fetch_pos(matchInfo, 7, &pos, NULL) && pos != -1) { + //g_print("read_atom symbol\n"); + atom = malval_new_symbol(g_match_info_fetch(matchInfo, 7)); + } else { + malval_free(atom); + atom = NULL; + } + return atom; +} + +MalVal *read_list(Reader *reader, MalType type, char start, char end) { + MalVal *ast, *form; + char *token = reader_next(reader); + //g_print("read_list start token: %s\n", token); + if (token[0] != start) { abort("expected '(' or '['"); } + + ast = malval_new_list(type, g_array_new(TRUE, TRUE, sizeof(MalVal*))); + + while ((token = reader_peek(reader)) && + token[0] != end) { + //g_print("read_list internal token %s\n", token); + form = read_form(reader); + if (!form) { + if (!mal_error) { abort("unknown read_list failure"); } + g_array_free(ast->val.array, TRUE); + malval_free(ast); + return NULL; + } + g_array_append_val(ast->val.array, form); + } + if (!token) { abort("expected ')' or ']', got EOF"); } + reader_next(reader); + //g_print("read_list end token: %s\n", token); + return ast; +} + +MalVal *read_hash_map(Reader *reader) { + MalVal *lst = read_list(reader, MAL_LIST, '{', '}'); + MalVal *hm = hash_map(lst); + malval_free(lst); + return hm; +} + + +MalVal *read_form(Reader *reader) { + char *token; + MalVal *form = NULL, *tmp; + +// while(token = reader_next(reader)) { +// printf("token: %s\n", token); +// } +// return NULL; + + token = reader_peek(reader); + + if (!token) { return NULL; } + //g_print("read_form token: %s\n", token); + + switch (token[0]) { + case ';': + abort("comments not yet implemented"); + break; + case '\'': + reader_next(reader); + form = _list(2, malval_new_symbol("quote"), + read_form(reader)); + break; + case '`': + reader_next(reader); + form = _list(2, malval_new_symbol("quasiquote"), + read_form(reader)); + break; + case '~': + reader_next(reader); + if (token[1] == '@') { + form = _list(2, malval_new_symbol("splice-unquote"), + read_form(reader)); + } else { + form = _list(2, malval_new_symbol("unquote"), + read_form(reader)); + }; + break; + case '^': + reader_next(reader); + MalVal *meta = read_form(reader); + form = _list(3, malval_new_symbol("with-meta"), + read_form(reader), meta); + break; + case '@': + reader_next(reader); + form = _list(2, malval_new_symbol("deref"), + read_form(reader)); + break; + + + // list + case ')': + abort("unexpected ')'"); + break; + case '(': + form = read_list(reader, MAL_LIST, '(', ')'); + break; + + // vector + case ']': + abort("unexpected ']'"); + break; + case '[': + form = read_list(reader, MAL_VECTOR, '[', ']'); + break; + + // hash-map + case '}': + abort("unexpected '}'"); + break; + case '{': + form = read_hash_map(reader); + break; + + default: + form = read_atom(reader); + break; + } + return form; + +} + +MalVal *read_str (char *str) { + Reader *reader; + char *token; + MalVal *ast = NULL; + + reader = tokenize(str); + if (reader) { + ast = read_form(reader); + reader_free(reader); + } + + return ast; +} |
