/* Copyright 2005-2006 Garrett Rooney. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include /* for abort */ #include #include "lexer.h" #include "etl_stream.h" #define BSIZE 8192 #define MAX_TEXT_BYTES 1024 struct etl_template_lexer_t { etl_stream_t *stream; /* the stream we're reading from */ /* bookkeeping data for the re2c lexer */ char *buffer; char *cursor; char *limit; char *marker; char *tstart; char *string_start; char *string_end; /* bookkeeping data for the stored up text */ char text_buffer[MAX_TEXT_BYTES]; int ntext_bytes; int saved_bytes; int pending_token; /* next token after a text node, saved to return later */ bool eof; /* have we hit the end of the input? */ int in_directive; /* are we inside a etl directive? */ int ret_print_next; /* should we return a print as our next token? */ int in_string; /* are we inside a string? */ int line; /* the current line in the input. */ }; etl_template_lexer_t * etl_template_lexer_create(etl_stream_t *stream) { etl_template_lexer_t *l = calloc(1, sizeof(*l)); l->stream = stream; l->buffer = calloc(1, BSIZE); /* Line numbers in source generally start at 1, so that's what we do. */ l->line = 1; return l; } void etl_template_lexer_destroy(etl_template_lexer_t *l) { if (! l) return; etl_stream_destroy(l->stream); free(l->buffer); free(l); } static etl_error_t * fill(etl_template_lexer_t *l) { if (l->cursor == l->limit) { size_t len = BSIZE; bool eof = 0; ETL_ERR(etl_stream_readline(l->stream, l->buffer, &len, &eof)); if (eof) { l->eof = true; } if (len == 0) { l->buffer[0] = '\n'; l->buffer[1] = '\0'; l->limit = l->buffer + 1; } else { l->limit = l->buffer + len; } l->cursor = l->marker = l->tstart = l->buffer; } return ETL_SUCCESS; } static char * lexer_text(etl_template_lexer_t *lexer) { if (lexer->string_start && lexer->string_end) { size_t len = lexer->string_end - lexer->string_start; int seen_backslash = 0; size_t i, idx = 0; char *str; if (lexer->string_end - lexer->string_start < 0) return NULL; str = calloc(1, len + 1); for (i = 0; i < len; ++i) { if (seen_backslash) { if (lexer->string_start[i] == '\\') str[idx] = '\\'; else str[idx] = lexer->string_start[i]; idx++; seen_backslash = 0; } else { if (lexer->string_start[i] == '\\') seen_backslash = 1; else { str[idx] = lexer->string_start[i]; idx++; } } } lexer->string_start = lexer->string_end = 0; return str; } else if (lexer->saved_bytes) { char *str = calloc(1, lexer->saved_bytes + 1); memcpy(str, lexer->text_buffer, lexer->saved_bytes); lexer->saved_bytes = 0; return str; } else { size_t len = lexer->cursor - lexer->tstart; char *str = calloc(1, len + 1); memcpy(str, lexer->tstart, len); return str; } } /* Defines needed for re2c. */ #define YYCTYPE char #define YYCURSOR l->cursor #define YYLIMIT l->limit #define YYMARKER l->marker #define YYFILL(n) { ETL_ERR(fill(l)); } /* A little shorthand... */ #define RET(i) do { \ if (l->ntext_bytes) \ { \ l->pending_token = i; \ (*t)->type = ETL_TMPL_TOK_TEXT; \ l->saved_bytes = l->ntext_bytes; \ l->ntext_bytes = 0; \ } \ else \ { \ (*t)->type = i; \ } \ (*t)->data = lexer_text(l); \ return ETL_SUCCESS; \ } while (0) etl_error_t * etl_template_lexer_scan(etl_token_t **t, etl_template_lexer_t *l) { *t = calloc(1, sizeof(**t)); (*t)->line = l->line; if (l->pending_token) { (*t)->type = l->pending_token; l->pending_token = 0; (*t)->data = lexer_text(l); return ETL_SUCCESS; } start: l->tstart = l->cursor; if (! l->in_directive) { /*!re2c identifier = [a-zA-Z_\200-\377]*; integer = [0-9]*; any = [\000-\377]; nl = "\r"? "\n"; ws = [ \t\r]; "\\\\" { l->tstart++; /* Skip one of them... */ goto text; } "\\[%" { /* I don't really understand why this is needed... */ YYCURSOR--; goto text; } "\\" { l->tstart++; } "[%=" { l->in_directive = 1; l->ret_print_next = 1; RET(ETL_TMPL_TOK_START_DIR); } ws* "[%-" { l->in_directive = 1; RET(ETL_TMPL_TOK_START_DIR); } "[%" { l->in_directive = 1; RET(ETL_TMPL_TOK_START_DIR); } nl { if (l->eof) RET(ETL_TMPL_TOK_EOI); else { l->line++; goto text; } } any { text: l->text_buffer[l->ntext_bytes] = *(YYCURSOR - 1); l->ntext_bytes++; if (l->ntext_bytes == MAX_TEXT_BYTES) { (*t)->type = ETL_TMPL_TOK_TEXT; l->saved_bytes = l->ntext_bytes; l->ntext_bytes = 0; (*t)->data = lexer_text(l); return ETL_SUCCESS; } else goto start; } */ } else { if (l->ret_print_next) { l->ret_print_next = 0; RET(ETL_TMPL_TOK_PRINT); } if (l->in_string) { /*!re2c "\\\"" { goto start; } "\"" { l->in_string = 0; l->string_end = l->cursor - 1; RET(ETL_TMPL_TOK_STRING); } nl { if (l->eof) RET(ETL_TMPL_TOK_EOI); else { l->line++; goto string; } } any { string: goto start; } */ } else { /*!re2c "\"" { l->in_string = 1; l->string_start = l->cursor; goto start; } "-%]" ws* { l->in_directive = 0; RET(ETL_TMPL_TOK_END_DIR); } "-%]" ws* nl { l->line++; l->in_directive = 0; RET(ETL_TMPL_TOK_END_DIR); } "%]" { l->in_directive = 0; RET(ETL_TMPL_TOK_END_DIR); } "," { RET(ETL_TMPL_TOK_COMMA); } "\." { RET(ETL_TMPL_TOK_DOT); } "if" { RET(ETL_TMPL_TOK_IF); } "in" { RET(ETL_TMPL_TOK_IN); } "to" { RET(ETL_TMPL_TOK_TO); } "unless" { RET(ETL_TMPL_TOK_UNLESS); } "end" { RET(ETL_TMPL_TOK_END); } "for" { RET(ETL_TMPL_TOK_FOR); } "else" { RET(ETL_TMPL_TOK_ELSE); } "print" { RET(ETL_TMPL_TOK_PRINT); } "macro" { RET(ETL_TMPL_TOK_MACRO); } "include" { RET(ETL_TMPL_TOK_INCLUDE); } "==" { RET(ETL_TMPL_TOK_EQ); } "!=" { RET(ETL_TMPL_TOK_NEQ); } "<" { RET(ETL_TMPL_TOK_LT); } ">" { RET(ETL_TMPL_TOK_GT); } "<=" { RET(ETL_TMPL_TOK_LTEQ); } ">=" { RET(ETL_TMPL_TOK_GTEQ); } "%" { RET(ETL_TMPL_TOK_MOD); } "+" { RET(ETL_TMPL_TOK_PLUS); } "-" { RET(ETL_TMPL_TOK_MINUS); } "*" { RET(ETL_TMPL_TOK_TIMES); } "/" { RET(ETL_TMPL_TOK_DIV); } "|" { RET(ETL_TMPL_TOK_PIPE); } "[" { RET(ETL_TMPL_TOK_OPEN_BRACKET); } "]" { RET(ETL_TMPL_TOK_CLOSE_BRACKET); } "(" { RET(ETL_TMPL_TOK_OPEN_PAREN); } ")" { RET(ETL_TMPL_TOK_CLOSE_PAREN); } ws { goto start; } integer { RET(ETL_TMPL_TOK_INTEGER); } identifier { RET(ETL_TMPL_TOK_VAR); } nl { if (l->eof) RET(ETL_TMPL_TOK_EOI); else { l->line++; goto start; } } any { /* We don't understand it, so... */ return etl_error_create(ETL_EINVAL, "Invalid input"); } */ } } abort(); } void etl_template_token_destroy(etl_token_t *tok) { if (! tok) return; free(tok->data); free(tok); }