/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Lexical analysis for genksyms. * Copyright 1996, 1997 Linux International. * * New implementation contributed by Richard Henderson <rth@tamu.edu> * Based on original work by Bjorn Ekwall <bj0rn@blox.se> * * Taken from Linux modutils 2.4.22. */ %{ #include <limits.h> #include <stdlib.h> #include <string.h> #include <ctype.h> #include "genksyms.h" #include "parse.tab.h" /* We've got a two-level lexer here. We let flex do basic tokenization and then we categorize those basic tokens in the second stage. */ #define YY_DECL static int yylex1(void) %} IDENT [A-Za-z_\$][A-Za-z0-9_\$]* O_INT 0[0-7]* D_INT [1-9][0-9]* X_INT 0[Xx][0-9A-Fa-f]+ I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu] INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}? FRAC ([0-9]*\.[0-9]+)|([0-9]+\.) EXP [Ee][+-]?[0-9]+ F_SUF [FfLl] REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?) STRING L?\"([^\\\"]*\\.)*[^\\\"]*\" CHAR L?\'([^\\\']*\\.)*[^\\\']*\' MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) /* We don't do multiple input files. */ %option noyywrap %option noinput %% /* Keep track of our location in the original source files. */ ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; ^#.*\n cur_line++; \n cur_line++; /* Ignore all other whitespace. */ [ \t\f\v\r]+ ; {STRING} return STRING; {CHAR} return CHAR; {IDENT} return IDENT; /* The Pedant requires that the other C multi-character tokens be recognized as tokens. We don't actually use them since we don't parse expressions, but we do want whitespace to be arranged around them properly. */ {MC_TOKEN} return OTHER; {INT} return INT; {REAL} return REAL; "..." return DOTS; /* All other tokens are single characters. */ . return yytext[0]; %% /* Bring in the keyword recognizer. */ #include "keywords.c" /* Macros to append to our phrase collection list. */ /* * We mark any token, that that equals to a known enumerator, as * SYM_ENUM_CONST. The parser will change this for struct and union tags later, * the only problem is struct and union members: * enum e { a, b }; struct s { int a, b; } * but in this case, the only effect will be, that the ABI checksums become * more volatile, which is acceptable. Also, such collisions are quite rare, * so far it was only observed in include/linux/telephony.h. */ #define _APP(T,L) do { \ cur_node = next_node; \ next_node = xmalloc(sizeof(*next_node)); \ next_node->next = cur_node; \ cur_node->string = memcpy(xmalloc(L+1), T, L+1); \ cur_node->tag = \ find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\ SYM_ENUM_CONST : SYM_NORMAL ; \ cur_node->in_source_file = in_source_file; \ } while (0) #define APP _APP(yytext, yyleng) /* The second stage lexer. Here we incorporate knowledge of the state of the parser to tailor the tokens that are returned. */ int yylex(void) { static enum { ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1, ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT, } lexstate = ST_NOTSTARTED; static int suppress_type_lookup, dont_want_brace_phrase; static struct string_list *next_node; static char *source_file; int token, count = 0; struct string_list *cur_node; if (lexstate == ST_NOTSTARTED) { next_node = xmalloc(sizeof(*next_node)); next_node->next = NULL; lexstate = ST_NORMAL; } repeat: token = yylex1(); if (token == 0) return 0; else if (token == FILENAME) { char *file, *e; /* Save the filename and line number for later error messages. */ if (cur_filename) free(cur_filename); file = strchr(yytext, '\"')+1; e = strchr(file, '\"'); *e = '\0'; cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1); cur_line = atoi(yytext+2); if (!source_file) { source_file = xstrdup(cur_filename); in_source_file = 1; } else { in_source_file = (strcmp(cur_filename, source_file) == 0); } goto repeat; } switch (lexstate) { case ST_NORMAL: switch (token) { case IDENT: APP; { int r = is_reserved_word(yytext, yyleng); if (r >= 0) { switch (token = r) { case ATTRIBUTE_KEYW: lexstate = ST_ATTRIBUTE; count = 0; goto repeat; case ASM_KEYW: lexstate = ST_ASM; count = 0; goto repeat; case TYPEOF_KEYW: lexstate = ST_TYPEOF; count = 0; goto repeat; case STRUCT_KEYW: case UNION_KEYW: case ENUM_KEYW: dont_want_brace_phrase = 3; suppress_type_lookup = 2; goto fini; case EXPORT_SYMBOL_KEYW: goto fini; case STATIC_ASSERT_KEYW: lexstate = ST_STATIC_ASSERT; count = 0; goto repeat; } } if (!suppress_type_lookup) { if (find_symbol(yytext, SYM_TYPEDEF, 1)) token = TYPE; } } break; case '[': APP; lexstate = ST_BRACKET; count = 1; goto repeat; case '{': APP; if (dont_want_brace_phrase) break; lexstate = ST_BRACE; count = 1; goto repeat; case '=': case ':': APP; lexstate = ST_EXPRESSION; break; default: APP; break; } break; case ST_ATTRIBUTE: APP; switch (token) { case '(': ++count; goto repeat; case ')': if (--count == 0) { lexstate = ST_NORMAL; token = ATTRIBUTE_PHRASE; break; } goto repeat; default: goto repeat; } break; case ST_ASM: APP; switch (token) { case '(': ++count; goto repeat; case ')': if (--count == 0) { lexstate = ST_NORMAL; token = ASM_PHRASE; break; } goto repeat; default: goto repeat; } break; case ST_TYPEOF_1: if (token == IDENT) { if (is_reserved_word(yytext, yyleng) >= 0 || find_symbol(yytext, SYM_TYPEDEF, 1)) { yyless(0); unput('('); lexstate = ST_NORMAL; token = TYPEOF_KEYW; break; } _APP("(", 1); } lexstate = ST_TYPEOF; /* FALLTHRU */ case ST_TYPEOF: switch (token) { case '(': if ( ++count == 1 ) lexstate = ST_TYPEOF_1; else APP; goto repeat; case ')': APP; if (--count == 0) { lexstate = ST_NORMAL; token = TYPEOF_PHRASE; break; } goto repeat; default: APP; goto repeat; } break; case ST_BRACKET: APP; switch (token) { case '[': ++count; goto repeat; case ']': if (--count == 0) { lexstate = ST_NORMAL; token = BRACKET_PHRASE; break; } goto repeat; default: goto repeat; } break; case ST_BRACE: APP; switch (token) { case '{': ++count; goto repeat; case '}': if (--count == 0) { lexstate = ST_NORMAL; token = BRACE_PHRASE; break; } goto repeat; default: goto repeat; } break; case ST_EXPRESSION: switch (token) { case '(': case '[': case '{': ++count; APP; goto repeat; case '}': /* is this the last line of an enum declaration? */ if (count == 0) { /* Put back the token we just read so's we can find it again after registering the expression. */ unput(token); lexstate = ST_NORMAL; token = EXPRESSION_PHRASE; break; } /* FALLTHRU */ case ')': case ']': --count; APP; goto repeat; case ',': case ';': if (count == 0) { /* Put back the token we just read so's we can find it again after registering the expression. */ unput(token); lexstate = ST_NORMAL; token = EXPRESSION_PHRASE; break; } APP; goto repeat; default: APP; goto repeat; } break; case ST_STATIC_ASSERT: APP; switch (token) { case '(': ++count; goto repeat; case ')': if (--count == 0) { lexstate = ST_NORMAL; token = STATIC_ASSERT_PHRASE; break; } goto repeat; default: goto repeat; } break; default: exit(1); } fini: if (suppress_type_lookup > 0) --suppress_type_lookup; if (dont_want_brace_phrase > 0) --dont_want_brace_phrase; yylval = &next_node->next; return token; }