Commit c5a04734 by Zack Weinberg

[multiple changes]

2000-04-25  Zack Weinberg  <zack@wolery.cumb.org>

	* cpplib.h (struct cpp_buffer): Add 'mapped' flag; fix
	commentary.

2000-04-25  Neil Booth  <NeilB@earthling.net>

	Restore previous patch, plus the following fixes:

	* cpphash.c (_cpp_create_definition): Test PREV_WHITESPACE in
	flags, not CPP_OPEN_PAREN.
	* cpplex.c (expand_token_space, init_token_list,
	cpp_free_token_list): Put the dummy token at list->tokens[-1].
	(_cpp_lex_line, _cpp_lex_file): token list is 0-based.

From-SVN: r33419
parent e0075d84
2000-04-25 Zack Weinberg <zack@wolery.cumb.org>
* cpplib.h (struct cpp_buffer): Add 'mapped' flag; fix
commentary.
2000-04-25 Neil Booth <NeilB@earthling.net>
Restore previous patch, plus the following fixes:
* cpphash.c (_cpp_create_definition): Test PREV_WHITESPACE in
flags, not CPP_OPEN_PAREN.
* cpplex.c (expand_token_space, init_token_list,
cpp_free_token_list): Put the dummy token at list->tokens[-1].
(_cpp_lex_line, _cpp_lex_file): token list is 0-based.
Tue Apr 25 14:06:40 2000 Alexandre Oliva <oliva@lsd.ic.unicamp.br> Tue Apr 25 14:06:40 2000 Alexandre Oliva <oliva@lsd.ic.unicamp.br>
* config/i386/freebsd.h (INT_ASM_OP): Define. * config/i386/freebsd.h (INT_ASM_OP): Define.
......
...@@ -459,7 +459,8 @@ collect_objlike_expansion (pfile, list) ...@@ -459,7 +459,8 @@ collect_objlike_expansion (pfile, list)
default:; default:;
} }
if (i > 1 && !last_was_paste && (list->tokens[i].flags & HSPACE_BEFORE)) if (i > 1 && !last_was_paste
&& (list->tokens[i].flags & PREV_WHITESPACE))
CPP_PUTC (pfile, ' '); CPP_PUTC (pfile, ' ');
CPP_PUTS (pfile, CPP_PUTS (pfile,
...@@ -571,10 +572,10 @@ collect_funlike_expansion (pfile, list, arglist, replacement) ...@@ -571,10 +572,10 @@ collect_funlike_expansion (pfile, list, arglist, replacement)
} }
if (last_token != PASTE && last_token != START if (last_token != PASTE && last_token != START
&& (list->tokens[i].flags & HSPACE_BEFORE)) && (list->tokens[i].flags & PREV_WHITESPACE))
CPP_PUTC (pfile, ' '); CPP_PUTC (pfile, ' ');
if (last_token == ARG && CPP_TRADITIONAL (pfile) if (last_token == ARG && CPP_TRADITIONAL (pfile)
&& !(list->tokens[i].flags & HSPACE_BEFORE)) && !(list->tokens[i].flags & PREV_WHITESPACE))
endpat->raw_after = 1; endpat->raw_after = 1;
switch (token) switch (token)
...@@ -616,7 +617,7 @@ collect_funlike_expansion (pfile, list, arglist, replacement) ...@@ -616,7 +617,7 @@ collect_funlike_expansion (pfile, list, arglist, replacement)
{ {
int raw_before = (last_token == PASTE int raw_before = (last_token == PASTE
|| (CPP_TRADITIONAL (pfile) || (CPP_TRADITIONAL (pfile)
&& !(list->tokens[i].flags & HSPACE_BEFORE))); && !(list->tokens[i].flags & PREV_WHITESPACE)));
add_pat (&pat, &endpat, add_pat (&pat, &endpat,
CPP_WRITTEN (pfile) - last /* nchars */, j /* argno */, CPP_WRITTEN (pfile) - last /* nchars */, j /* argno */,
...@@ -865,7 +866,7 @@ _cpp_create_definition (pfile, list, hp) ...@@ -865,7 +866,7 @@ _cpp_create_definition (pfile, list, hp)
/* The macro is function-like only if the next character, /* The macro is function-like only if the next character,
with no intervening whitespace, is '('. */ with no intervening whitespace, is '('. */
else if (list->tokens[1].type == CPP_OPEN_PAREN else if (list->tokens[1].type == CPP_OPEN_PAREN
&& ! (list->tokens[1].flags & HSPACE_BEFORE)) && ! (list->tokens[1].flags & PREV_WHITESPACE))
{ {
struct arglist args; struct arglist args;
int replacement; int replacement;
...@@ -884,7 +885,7 @@ _cpp_create_definition (pfile, list, hp) ...@@ -884,7 +885,7 @@ _cpp_create_definition (pfile, list, hp)
whitespace after the name (6.10.3 para 3). */ whitespace after the name (6.10.3 para 3). */
else else
{ {
if (! (list->tokens[1].flags & CPP_OPEN_PAREN)) if (! (list->tokens[1].flags & PREV_WHITESPACE))
cpp_pedwarn (pfile, cpp_pedwarn (pfile,
"The C standard requires whitespace after #define %s", "The C standard requires whitespace after #define %s",
hp->name); hp->name);
......
...@@ -317,5 +317,6 @@ extern void _cpp_scan_line PARAMS ((cpp_reader *, cpp_toklist *)); ...@@ -317,5 +317,6 @@ extern void _cpp_scan_line PARAMS ((cpp_reader *, cpp_toklist *));
/* In cpplib.c */ /* In cpplib.c */
extern int _cpp_handle_directive PARAMS ((cpp_reader *)); extern int _cpp_handle_directive PARAMS ((cpp_reader *));
extern void _cpp_handle_eof PARAMS ((cpp_reader *)); extern void _cpp_handle_eof PARAMS ((cpp_reader *));
extern void _cpp_check_directive PARAMS((cpp_toklist *, cpp_token *));
#endif #endif
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
Based on CCCP program by Paul Rubin, June 1986 Based on CCCP program by Paul Rubin, June 1986
Adapted to ANSI C, Richard Stallman, Jan 1987 Adapted to ANSI C, Richard Stallman, Jan 1987
Broken out to separate file, Zack Weinberg, Mar 2000 Broken out to separate file, Zack Weinberg, Mar 2000
Single-pass line tokenization by Neil Booth, April 2000
This program is free software; you can redistribute it and/or modify it This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the under the terms of the GNU General Public License as published by the
...@@ -54,12 +55,15 @@ static void output_line_command PARAMS ((cpp_reader *, cpp_printer *, ...@@ -54,12 +55,15 @@ static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
unsigned int)); unsigned int));
static void bump_column PARAMS ((cpp_printer *, unsigned int, static void bump_column PARAMS ((cpp_printer *, unsigned int,
unsigned int)); unsigned int));
static void expand_name_space PARAMS ((cpp_toklist *)); static void expand_name_space PARAMS ((cpp_toklist *, unsigned int));
static void expand_token_space PARAMS ((cpp_toklist *)); static void expand_token_space PARAMS ((cpp_toklist *));
static void init_token_list PARAMS ((cpp_reader *, cpp_toklist *, int)); static void init_token_list PARAMS ((cpp_reader *, cpp_toklist *, int));
static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *, static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *,
unsigned int)); unsigned int));
#define auto_expand_name_space(list) \
expand_name_space ((list), (list)->name_cap / 2)
/* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */ /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
void void
...@@ -431,12 +435,12 @@ cpp_file_buffer (pfile) ...@@ -431,12 +435,12 @@ cpp_file_buffer (pfile)
/* Expand a token list's string space. */ /* Expand a token list's string space. */
static void static void
expand_name_space (list) expand_name_space (list, len)
cpp_toklist *list; cpp_toklist *list;
{ unsigned int len;
list->name_cap *= 2; {
list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap += len;
list->name_cap); list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
} }
/* Expand the number of tokens in a list. */ /* Expand the number of tokens in a list. */
...@@ -446,36 +450,42 @@ expand_token_space (list) ...@@ -446,36 +450,42 @@ expand_token_space (list)
{ {
list->tokens_cap *= 2; list->tokens_cap *= 2;
list->tokens = (cpp_token *) list->tokens = (cpp_token *)
xrealloc (list->tokens, list->tokens_cap * sizeof (cpp_token)); xrealloc (list->tokens - 1, (list->tokens_cap + 1) * sizeof (cpp_token));
list->tokens++; /* Skip the dummy. */
} }
/* Initialise a token list. */ /* Initialize a token list. We allocate an extra token in front of
the token list, as this allows us to always peek at the previous
token without worrying about underflowing the list. */
static void static void
init_token_list (pfile, list, recycle) init_token_list (pfile, list, recycle)
cpp_reader *pfile; cpp_reader *pfile;
cpp_toklist *list; cpp_toklist *list;
int recycle; int recycle;
{ {
/* Recycling a used list saves 2 free-malloc pairs. */ /* Recycling a used list saves 3 free-malloc pairs. */
if (recycle) if (!recycle)
{ {
list->tokens_used = 0; /* Initialize token space. Put a dummy token before the start
list->name_used = 0; that will fail matches. */
} list->tokens_cap = 256; /* 4K's worth. */
else
{
/* Initialise token space. */
list->tokens_cap = 256; /* 4K on Intel. */
list->tokens_used = 0;
list->tokens = (cpp_token *) list->tokens = (cpp_token *)
xmalloc (list->tokens_cap * sizeof (cpp_token)); xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
list->tokens[0].type = CPP_EOF;
list->tokens++;
/* Initialise name space. */ /* Initialize name space. */
list->name_cap = 1024; list->name_cap = 1024;
list->name_used = 0;
list->namebuf = (unsigned char *) xmalloc (list->name_cap); list->namebuf = (unsigned char *) xmalloc (list->name_cap);
/* Only create a comment space on demand. */
list->comments_cap = 0;
list->comments = 0;
} }
list->tokens_used = 0;
list->name_used = 0;
list->comments_used = 0;
if (pfile->buffer) if (pfile->buffer)
list->line = pfile->buffer->lineno; list->line = pfile->buffer->lineno;
list->dir_handler = 0; list->dir_handler = 0;
...@@ -522,7 +532,7 @@ _cpp_scan_line (pfile, list) ...@@ -522,7 +532,7 @@ _cpp_scan_line (pfile, list)
if (list->tokens_used >= list->tokens_cap) if (list->tokens_used >= list->tokens_cap)
expand_token_space (list); expand_token_space (list);
if (list->name_used + len >= list->name_cap) if (list->name_used + len >= list->name_cap)
expand_name_space (list); auto_expand_name_space (list);
if (type == CPP_MACRO) if (type == CPP_MACRO)
type = CPP_NAME; type = CPP_NAME;
...@@ -530,7 +540,7 @@ _cpp_scan_line (pfile, list) ...@@ -530,7 +540,7 @@ _cpp_scan_line (pfile, list)
list->tokens_used++; list->tokens_used++;
list->tokens[i].type = type; list->tokens[i].type = type;
list->tokens[i].col = col; list->tokens[i].col = col;
list->tokens[i].flags = space_before ? HSPACE_BEFORE : 0; list->tokens[i].flags = space_before ? PREV_WHITESPACE : 0;
if (type == CPP_VSPACE) if (type == CPP_VSPACE)
break; break;
...@@ -2037,3 +2047,1332 @@ _cpp_init_input_buffer (pfile) ...@@ -2037,3 +2047,1332 @@ _cpp_init_input_buffer (pfile)
pfile->input_buffer = tmp; pfile->input_buffer = tmp;
pfile->input_buffer_len = 8192; pfile->input_buffer_len = 8192;
} }
#if 0
static void expand_comment_space PARAMS ((cpp_toklist *));
void init_trigraph_map PARAMS ((void));
static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
unsigned char *));
static const unsigned char *backslash_start PARAMS ((cpp_reader *,
const unsigned char *));
static int skip_block_comment PARAMS ((cpp_reader *));
static int skip_line_comment PARAMS ((cpp_reader *));
static void skip_whitespace PARAMS ((cpp_reader *, int));
static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *,
unsigned int));
static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
static void copy_comment PARAMS ((cpp_toklist *, const unsigned char *,
unsigned int, unsigned int, unsigned int));
void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *));
static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
unsigned int spell_char PARAMS ((unsigned char *, cpp_toklist *,
cpp_token *token));
unsigned int spell_string PARAMS ((unsigned char *, cpp_toklist *,
cpp_token *token));
unsigned int spell_comment PARAMS ((unsigned char *, cpp_toklist *,
cpp_token *token));
unsigned int spell_name PARAMS ((unsigned char *, cpp_toklist *,
cpp_token *token));
unsigned int spell_other PARAMS ((unsigned char *, cpp_toklist *,
cpp_token *token));
typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
cpp_token *));
/* Macros on a cpp_name. */
#define INIT_NAME(list, name) \
do {(name).len = 0; (name).offset = (list)->name_used;} while (0)
#define IS_DIRECTIVE(list) (list->tokens[0].type == CPP_HASH)
#define COLUMN(cur) ((cur) - buffer->line_base)
/* Maybe put these in the ISTABLE eventually. */
#define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
#define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
/* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
character, if any, is in buffer. */
#define handle_newline(cur, limit, c) \
do {\
if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
(cur)++; \
CPP_BUMP_LINE_CUR (pfile, (cur)); \
} while (0)
#define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
#define PREV_TOKEN_TYPE (cur_token[-1].type)
#define SPELL_TEXT 0
#define SPELL_HANDLER 1
#define SPELL_NONE 2
#define SPELL_EOL 3
#define T(e, s) {SPELL_TEXT, s},
#define H(e, s) {SPELL_HANDLER, s},
#define N(e, s) {SPELL_NONE, s},
#define E(e, s) {SPELL_EOL, s},
static const struct token_spelling
{
char type;
PTR speller;
} token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
#undef T
#undef H
#undef N
#undef E
static const unsigned char *digraph_spellings [] = {"%:", "%:%:", "<:",
":>", "<%", "%>"};
static void
expand_comment_space (list)
cpp_toklist *list;
{
if (list->comments_cap == 0)
{
list->comments_cap = 10;
list->comments = (cpp_token *)
xmalloc (list->comments_cap * sizeof (cpp_token));
}
else
{
list->comments_cap *= 2;
list->comments = (cpp_token *)
xrealloc (list->comments, list->comments_cap);
}
}
void
cpp_free_token_list (list)
cpp_toklist *list;
{
if (list->comments)
free (list->comments);
free (list->tokens - 1);
free (list->namebuf);
free (list);
}
static char trigraph_map[256];
void
init_trigraph_map ()
{
trigraph_map['='] = '#';
trigraph_map['('] = '[';
trigraph_map[')'] = ']';
trigraph_map['/'] = '\\';
trigraph_map['\''] = '^';
trigraph_map['<'] = '{';
trigraph_map['>'] = '}';
trigraph_map['!'] = '|';
trigraph_map['-'] = '~';
}
/* Call when a trigraph is encountered. It warns if necessary, and
returns true if the trigraph should be honoured. END is the third
character of a trigraph in the input stream. */
static int
trigraph_ok (pfile, end)
cpp_reader *pfile;
const unsigned char *end;
{
int accept = CPP_OPTION (pfile, trigraphs);
if (CPP_OPTION (pfile, warn_trigraphs))
{
unsigned int col = end - 1 - pfile->buffer->line_base;
if (accept)
cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
"trigraph ??%c converted to %c",
(int) *end, (int) trigraph_map[*end]);
else
cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
"trigraph ??%c ignored", (int) *end);
}
return accept;
}
/* Scan a string for trigraphs, warning or replacing them inline as
appropriate. When parsing a string, we must call this routine
before processing a newline character (if trigraphs are enabled),
since the newline might be escaped by a preceding backslash
trigraph sequence. Returns a pointer to the end of the name after
replacement. */
static unsigned char*
trigraph_replace (pfile, src, limit)
cpp_reader *pfile;
unsigned char *src;
unsigned char* limit;
{
unsigned char *dest;
/* Starting with src[1], find two consecutive '?'. The case of no
trigraphs is streamlined. */
for (; src + 1 < limit; src += 2)
{
if (src[0] != '?')
continue;
/* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
if (src[-1] == '?')
src--;
else if (src + 2 == limit || src[1] != '?')
continue;
/* Check if it really is a trigraph. */
if (trigraph_map[src[2]] == 0)
continue;
dest = src;
goto trigraph_found;
}
return limit;
/* Now we have a trigraph, we need to scan the remaining buffer, and
copy-shifting its contents left if replacement is enabled. */
for (; src + 2 < limit; dest++, src++)
if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
{
trigraph_found:
src += 2;
if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
*dest = trigraph_map[*src];
}
/* Copy remaining (at most 2) characters. */
while (src < limit)
*dest++ = *src++;
return dest;
}
/* If CUR is a backslash or the end of a trigraphed backslash, return
a pointer to its beginning, otherwise NULL. We don't read beyond
the buffer start, because there is the start of the comment in the
buffer. */
static const unsigned char *
backslash_start (pfile, cur)
cpp_reader *pfile;
const unsigned char *cur;
{
if (cur[0] == '\\')
return cur;
if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
&& trigraph_ok (pfile, cur))
return cur - 2;
return 0;
}
/* Skip a C-style block comment. This is probably the trickiest
handler. We find the end of the comment by seeing if an asterisk
is before every '/' we encounter. The nasty complication is that a
previous asterisk may be separated by one or more escaped newlines.
Returns non-zero if comment terminated by EOF, zero otherwise. */
static int
skip_block_comment (pfile)
cpp_reader *pfile;
{
cpp_buffer *buffer = pfile->buffer;
const unsigned char *char_after_star = 0;
register const unsigned char *cur = buffer->cur;
int seen_eof = 0;
/* Inner loop would think the comment has ended if the first comment
character is a '/'. Avoid this and keep the inner loop clean by
skipping such a character. */
if (cur < buffer->rlimit && cur[0] == '/')
cur++;
for (; cur < buffer->rlimit; )
{
unsigned char c = *cur++;
/* People like decorating comments with '*', so check for
'/' instead for efficiency. */
if (c == '/')
{
if (cur[-2] == '*' || cur - 1 == char_after_star)
goto out;
/* Warn about potential nested comments, but not when
the final character inside the comment is a '/'.
Don't bother to get it right across escaped newlines. */
if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
&& cur[0] == '*' && cur[1] != '/')
{
buffer->cur = cur;
cpp_warning (pfile, "'/*' within comment");
}
}
else if (IS_NEWLINE(c))
{
const unsigned char* bslash = backslash_start (pfile, cur - 2);
handle_newline (cur, buffer->rlimit, c);
/* Work correctly if there is an asterisk before an
arbirtrarily long sequence of escaped newlines. */
if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
char_after_star = cur;
else
char_after_star = 0;
}
}
seen_eof = 1;
out:
buffer->cur = cur;
return seen_eof;
}
/* Skip a C++ or Chill line comment. Handles escaped newlines.
Returns non-zero if a multiline comment. */
static int
skip_line_comment (pfile)
cpp_reader *pfile;
{
cpp_buffer *buffer = pfile->buffer;
register const unsigned char *cur = buffer->cur;
int multiline = 0;
for (; cur < buffer->rlimit; )
{
unsigned char c = *cur++;
if (IS_NEWLINE (c))
{
/* Check for a (trigaph?) backslash escaping the newline. */
if (!backslash_start (pfile, cur - 2))
goto out;
multiline = 1;
handle_newline (cur, buffer->rlimit, c);
}
}
cur++;
out:
buffer->cur = cur - 1; /* Leave newline for caller. */
return multiline;
}
/* Skips whitespace, stopping at next non-whitespace character. */
static void
skip_whitespace (pfile, in_directive)
cpp_reader *pfile;
int in_directive;
{
cpp_buffer *buffer = pfile->buffer;
register const unsigned char *cur = buffer->cur;
unsigned short null_count = 0;
for (; cur < buffer->rlimit; )
{
unsigned char c = *cur++;
if (IS_HSPACE(c)) /* FIXME: Fix ISTABLE. */
continue;
if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines. */
goto out;
if (c == '\0')
null_count++;
/* Mut be '\f' or '\v' */
else if (in_directive && CPP_PEDANTIC (pfile))
cpp_pedwarn (pfile, "%s in preprocessing directive",
c == '\f' ? "formfeed" : "vertical tab");
}
cur++;
out:
buffer->cur = cur - 1;
if (null_count)
cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
: "embedded null character ignored");
}
/* Parse (append) an identifier. */
static void
parse_name (pfile, list, name)
cpp_reader *pfile;
cpp_toklist *list;
cpp_name *name;
{
const unsigned char *name_limit;
unsigned char *namebuf;
cpp_buffer *buffer = pfile->buffer;
register const unsigned char *cur = buffer->cur;
expanded:
name_limit = list->namebuf + list->name_cap;
namebuf = list->namebuf + list->name_used;
for (; cur < buffer->rlimit && namebuf < name_limit; )
{
unsigned char c = *namebuf = *cur; /* Copy a single char. */
if (! is_idchar(c))
goto out;
namebuf++;
cur++;
if (c == '$' && CPP_PEDANTIC (pfile))
{
buffer->cur = cur;
cpp_pedwarn (pfile, "'$' character in identifier");
}
}
/* Run out of name space? */
if (cur < buffer->rlimit)
{
list->name_used = namebuf - list->namebuf;
auto_expand_name_space (list);
goto expanded;
}
out:
buffer->cur = cur;
name->len = namebuf - (list->namebuf + name->offset);
list->name_used = namebuf - list->namebuf;
}
/* Parse (append) a number. */
#define VALID_SIGN(c, prevc) \
(((c) == '+' || (c) == '-') && \
((prevc) == 'e' || (prevc) == 'E' \
|| (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
static void
parse_number (pfile, list, name)
cpp_reader *pfile;
cpp_toklist *list;
cpp_name *name;
{
const unsigned char *name_limit;
unsigned char *namebuf;
cpp_buffer *buffer = pfile->buffer;
register const unsigned char *cur = buffer->cur;
expanded:
name_limit = list->namebuf + list->name_cap;
namebuf = list->namebuf + list->name_used;
for (; cur < buffer->rlimit && namebuf < name_limit; )
{
unsigned char c = *namebuf = *cur; /* Copy a single char. */
/* Perhaps we should accept '$' here if we accept it for
identifiers. We know namebuf[-1] is safe, because for c to
be a sign we must have pushed at least one character. */
if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
goto out;
namebuf++;
cur++;
}
/* Run out of name space? */
if (cur < buffer->rlimit)
{
list->name_used = namebuf - list->namebuf;
auto_expand_name_space (list);
goto expanded;
}
out:
buffer->cur = cur;
name->len = namebuf - (list->namebuf + name->offset);
list->name_used = namebuf - list->namebuf;
}
/* Places a string terminated by an unescaped TERMINATOR into a
cpp_name, which should be expandable and thus at the top of the
list's stack. Handles embedded trigraphs, if necessary, and
escaped newlines.
Can be used for character constants (terminator = '\''), string
constants ('"'), angled headers ('>') and assertions (')'). */
static void
parse_string (pfile, list, name, terminator)
cpp_reader *pfile;
cpp_toklist *list;
cpp_name *name;
unsigned int terminator;
{
cpp_buffer *buffer = pfile->buffer;
register const unsigned char *cur = buffer->cur;
const unsigned char *name_limit;
unsigned char *namebuf;
unsigned int null_count = 0;
int trigraphed_len = 0;
expanded:
name_limit = list->namebuf + list->name_cap;
namebuf = list->namebuf + list->name_used;
for (; cur < buffer->rlimit && namebuf < name_limit; )
{
unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
if (c == '\0')
null_count++;
else if (c == terminator || IS_NEWLINE (c))
{
unsigned char* name_start = list->namebuf + name->offset;
/* Needed for trigraph_replace and multiline string warning. */
buffer->cur = cur;
/* Scan for trigraphs before checking if backslash-escaped. */
if (CPP_OPTION (pfile, trigraphs)
|| CPP_OPTION (pfile, warn_trigraphs))
{
namebuf = trigraph_replace (pfile, name_start + trigraphed_len,
namebuf);
trigraphed_len = namebuf - 2 - (name_start + trigraphed_len);
if (trigraphed_len < 0)
trigraphed_len = 0;
}
namebuf--; /* Drop the newline / terminator from the name. */
if (IS_NEWLINE (c))
{
/* Drop a backslash newline, and continue. */
if (namebuf[-1] == '\\')
{
handle_newline (cur, buffer->rlimit, c);
namebuf--;
continue;
}
cur--;
/* In Fortran and assembly language, silently terminate
strings of either variety at end of line. This is a
kludge around not knowing where comments are in these
languages. */
if (CPP_OPTION (pfile, lang_fortran)
|| CPP_OPTION (pfile, lang_asm))
goto out;
/* Character constants, headers and asserts may not
extend over multiple lines. In Standard C, neither
may strings. We accept multiline strings as an
extension, but not in directives. */
if (terminator != '"' || IS_DIRECTIVE (list))
goto unterminated;
cur++; /* Move forwards again. */
if (pfile->multiline_string_line == 0)
{
pfile->multiline_string_line = list->line;
if (CPP_PEDANTIC (pfile))
cpp_pedwarn (pfile, "multi-line string constant");
}
*namebuf++ = '\n';
handle_newline (cur, buffer->rlimit, c);
}
else
{
unsigned char *temp;
/* An odd number of consecutive backslashes represents
an escaped terminator. */
temp = namebuf - 1;
while (temp >= name_start && *temp == '\\')
temp--;
if ((namebuf - temp) & 1)
goto out;
namebuf++;
}
}
}
/* Run out of name space? */
if (cur < buffer->rlimit)
{
list->name_used = namebuf - list->namebuf;
auto_expand_name_space (list);
goto expanded;
}
/* We may not have trigraph-replaced the input for this code path,
but as the input is in error by being unterminated we don't
bother. Prevent warnings about no newlines at EOF. */
if (IS_NEWLINE(cur[-1]))
cur--;
unterminated:
cpp_error (pfile, "missing terminating %c character", (int) terminator);
if (terminator == '\"' && pfile->multiline_string_line != list->line
&& pfile->multiline_string_line != 0)
{
cpp_error_with_line (pfile, pfile->multiline_string_line, -1,
"possible start of unterminated string literal");
pfile->multiline_string_line = 0;
}
out:
buffer->cur = cur;
name->len = namebuf - (list->namebuf + name->offset);
list->name_used = namebuf - list->namebuf;
if (null_count > 0)
cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
: "null character preserved"));
}
/* The character C helps us distinguish comment types: '*' = C style,
'-' = Chill-style and '/' = C++ style. For code simplicity, the
stored comment includes any C-style comment terminator. */
static void
copy_comment (list, from, len, tok_no, type)
cpp_toklist *list;
const unsigned char *from;
unsigned int len;
unsigned int tok_no;
unsigned int type;
{
cpp_token *comment;
if (list->comments_used == list->comments_cap)
expand_comment_space (list);
if (list->name_used + len > list->name_cap)
expand_name_space (list, len);
comment = &list->comments[list->comments_used++];
comment->type = type;
comment->aux = tok_no;
comment->val.name.len = len;
comment->val.name.offset = list->name_used;
memcpy (list->namebuf + list->name_used, from, len);
list->name_used += len;
}
/*
* The tokenizer's main loop. Returns a token list, representing a
* logical line in the input file, terminated with a CPP_VSPACE
* token. On EOF, a token list containing the single CPP_EOF token
* is returned.
*
* Implementation relies almost entirely on lookback, rather than
* looking forwards. This means that tokenization requires just
* a single pass of the file, even in the presence of trigraphs and
* escaped newlines, providing significant performance benefits.
* Trigraph overhead is negligible if they are disabled, and low
* even when enabled.
*/
#define PUSH_TOKEN(ttype) cur_token++->type = ttype
#define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
#define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
#define BACKUP_DIGRAPH(ttype) do { \
BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
void
_cpp_lex_line (pfile, list)
cpp_reader *pfile;
cpp_toklist *list;
{
cpp_token *cur_token, *token_limit;
cpp_buffer *buffer = pfile->buffer;
register const unsigned char *cur = buffer->cur;
unsigned char flags = 0;
expanded:
token_limit = list->tokens + list->tokens_cap;
cur_token = list->tokens + list->tokens_used;
for (; cur < buffer->rlimit && cur_token < token_limit;)
{
unsigned char c = *cur++;
/* Optimize whitespace skipping, in particular the case of a
single whitespace character, as every other token is probably
whitespace. (' ' '\t' '\v' '\f' '\0'). */
if (is_hspace ((unsigned int) c))
{
if (c == '\0' || (cur < buffer->rlimit && is_hspace (*cur)))
{
buffer->cur = cur - (c == '\0'); /* Get the null warning. */
skip_whitespace (pfile, IS_DIRECTIVE (list));
cur = buffer->cur;
}
flags = PREV_WHITESPACE;
if (cur == buffer->rlimit)
break;
c = *cur++;
}
/* Initialize current token. Its type is set in the switch. */
cur_token->col = COLUMN (cur);
cur_token->flags = flags;
flags = 0;
switch (c)
{
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
/* Prepend an immediately previous CPP_DOT token. */
if (PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ())
{
cur_token--;
if (list->name_cap == list->name_used)
auto_expand_name_space (list);
cur_token->val.name.len = 1;
cur_token->val.name.offset = list->name_used;
list->namebuf[list->name_used++] = '.';
}
else
INIT_NAME (list, cur_token->val.name);
cur--; /* Backup character. */
continue_number:
buffer->cur = cur;
parse_number (pfile, list, &cur_token->val.name);
cur = buffer->cur;
PUSH_TOKEN (CPP_NUMBER); /* Number not yet interpreted. */
break;
letter:
case '_':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
INIT_NAME (list, cur_token->val.name);
cur--; /* Backup character. */
cur_token->type = CPP_NAME; /* Identifier, macro etc. */
continue_name:
buffer->cur = cur;
parse_name (pfile, list, &cur_token->val.name);
cur = buffer->cur;
/* Find handler for newly created / extended directive. */
if (IS_DIRECTIVE (list) && cur_token == &list->tokens[1])
_cpp_check_directive (list, cur_token);
cur_token++;
break;
case '\'':
/* Fall through. */
case '\"':
cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
/* Do we have a wide string? */
if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
&& cur_token[-1].val.name.len == 1
&& TOK_NAME (list, cur_token - 1)[0] == 'L'
&& !CPP_TRADITIONAL (pfile))
{
/* No need for 'L' any more. */
list->name_used--;
(--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
}
do_parse_string:
/* Here c is one of ' " > or ). */
INIT_NAME (list, cur_token->val.name);
buffer->cur = cur;
parse_string (pfile, list, &cur_token->val.name, c);
cur = buffer->cur;
cur_token++;
break;
case '/':
cur_token->type = CPP_DIV;
if (IMMED_TOKEN ())
{
if (PREV_TOKEN_TYPE == CPP_DIV)
{
/* We silently allow C++ comments in system headers,
irrespective of conformance mode, because lots of
broken systems do that and trying to clean it up
in fixincludes is a nightmare. */
if (buffer->system_header_p)
goto do_line_comment;
else if (CPP_OPTION (pfile, cplusplus_comments))
{
if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
&& ! buffer->warned_cplusplus_comments)
{
buffer->cur = cur;
cpp_pedwarn (pfile,
"C++ style comments are not allowed in ISO C89");
cpp_pedwarn (pfile,
"(this will be reported only once per input file)");
buffer->warned_cplusplus_comments = 1;
}
do_line_comment:
buffer->cur = cur;
if (cur[-2] != c)
cpp_warning (pfile,
"comment start split across lines");
if (skip_line_comment (pfile))
cpp_error_with_line (pfile, list->line,
cur_token[-1].col,
"multi-line comment");
if (!CPP_OPTION (pfile, discard_comments))
copy_comment (list, cur, buffer->cur - cur,
cur_token - 1 - list->tokens, c == '/'
? CPP_CPP_COMMENT: CPP_CHILL_COMMENT);
cur = buffer->cur;
/* Back-up to first '-' or '/'. */
cur_token -= 2;
if (!CPP_OPTION (pfile, traditional))
flags = PREV_WHITESPACE;
}
}
}
cur_token++;
break;
case '*':
cur_token->type = CPP_MULT;
if (IMMED_TOKEN ())
{
if (PREV_TOKEN_TYPE == CPP_DIV)
{
buffer->cur = cur;
if (cur[-2] != '/')
cpp_warning (pfile,
"comment start '/*' split across lines");
if (skip_block_comment (pfile))
cpp_error_with_line (pfile, list->line, cur_token[-1].col,
"unterminated comment");
else if (buffer->cur[-2] != '*')
cpp_warning (pfile,
"comment end '*/' split across lines");
if (!CPP_OPTION (pfile, discard_comments))
copy_comment (list, cur, buffer->cur - cur,
cur_token - 1 - list->tokens, CPP_C_COMMENT);
cur = buffer->cur;
cur_token -= 2;
if (!CPP_OPTION (pfile, traditional))
flags = PREV_WHITESPACE;
}
else if (CPP_OPTION (pfile, cplusplus))
{
/* In C++, there are .* and ->* operators. */
if (PREV_TOKEN_TYPE == CPP_DEREF)
BACKUP_TOKEN (CPP_DEREF_STAR);
else if (PREV_TOKEN_TYPE == CPP_DOT)
BACKUP_TOKEN (CPP_DOT_STAR);
}
}
cur_token++;
break;
case '\n':
case '\r':
handle_newline (cur, buffer->rlimit, c);
if (PREV_TOKEN_TYPE != CPP_BACKSLASH || !IMMED_TOKEN ())
{
if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
{
buffer->cur = cur;
cpp_warning (pfile,
"backslash and newline separated by space");
}
PUSH_TOKEN (CPP_VSPACE);
goto out;
}
/* Remove the escaped newline. Then continue to process
any interrupted name or number. */
cur_token--;
if (IMMED_TOKEN ())
{
cur_token--;
if (cur_token->type == CPP_NAME)
goto continue_name;
else if (cur_token->type == CPP_NUMBER)
goto continue_number;
cur_token++;
}
break;
case '-':
if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
{
if (CPP_OPTION (pfile, chill))
goto do_line_comment;
REVISE_TOKEN (CPP_MINUS_MINUS);
}
else
PUSH_TOKEN (CPP_MINUS);
break;
/* The digraph flag checking ensures that ## and %:%:
are interpreted as CPP_PASTE, but #%: and %:# are not. */
make_hash:
case '#':
if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
&& ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
REVISE_TOKEN (CPP_PASTE);
else
PUSH_TOKEN (CPP_HASH);
break;
case ':':
cur_token->type = CPP_COLON;
if (IMMED_TOKEN ())
{
if (PREV_TOKEN_TYPE == CPP_COLON
&& CPP_OPTION (pfile, cplusplus))
BACKUP_TOKEN (CPP_SCOPE);
/* Digraph: "<:" is a '[' */
else if (PREV_TOKEN_TYPE == CPP_LESS)
BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
/* Digraph: "%:" is a '#' */
else if (PREV_TOKEN_TYPE == CPP_MOD)
{
(--cur_token)->flags |= DIGRAPH;
goto make_hash;
}
}
cur_token++;
break;
case '&':
if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
REVISE_TOKEN (CPP_AND_AND);
else
PUSH_TOKEN (CPP_AND);
break;
make_or:
case '|':
if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
REVISE_TOKEN (CPP_OR_OR);
else
PUSH_TOKEN (CPP_OR);
break;
case '+':
if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
REVISE_TOKEN (CPP_PLUS_PLUS);
else
PUSH_TOKEN (CPP_PLUS);
break;
case '=':
/* This relies on equidistance of "?=" and "?" tokens. */
if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
else
PUSH_TOKEN (CPP_EQ);
break;
case '>':
cur_token->type = CPP_GREATER;
if (IMMED_TOKEN ())
{
if (PREV_TOKEN_TYPE == CPP_GREATER)
BACKUP_TOKEN (CPP_RSHIFT);
else if (PREV_TOKEN_TYPE == CPP_MINUS)
BACKUP_TOKEN (CPP_DEREF);
/* Digraph: ":>" is a ']' */
else if (PREV_TOKEN_TYPE == CPP_COLON)
BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
/* Digraph: "%>" is a '}' */
else if (PREV_TOKEN_TYPE == CPP_MOD)
BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
}
cur_token++;
break;
case '<':
if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
{
REVISE_TOKEN (CPP_LSHIFT);
break;
}
/* Is this the beginning of a header name? */
if (list->dir_flags & SYNTAX_INCLUDE)
{
c = '>'; /* Terminator. */
cur_token->type = CPP_HEADER_NAME;
goto do_parse_string;
}
PUSH_TOKEN (CPP_LESS);
break;
case '%':
/* Digraph: "<%" is a '{' */
cur_token->type = CPP_MOD;
if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
BACKUP_DIGRAPH (CPP_OPEN_BRACE);
cur_token++;
break;
case ')':
PUSH_TOKEN (CPP_CLOSE_PAREN);
break;
case '(':
/* Is this the beginning of an assertion string? */
if (list->dir_flags & SYNTAX_ASSERT)
{
c = ')'; /* Terminator. */
cur_token->type = CPP_ASSERTION;
goto do_parse_string;
}
PUSH_TOKEN (CPP_OPEN_PAREN);
break;
make_complement:
case '~':
PUSH_TOKEN (CPP_COMPL);
break;
case '?':
if (cur + 1 < buffer->rlimit && *cur == '?'
&& trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
{
/* Handle trigraph. */
cur++;
switch (*cur++)
{
case '(': goto make_open_square;
case ')': goto make_close_square;
case '<': goto make_open_brace;
case '>': goto make_close_brace;
case '=': goto make_hash;
case '!': goto make_or;
case '-': goto make_complement;
case '/': goto make_backslash;
case '\'': goto make_xor;
}
}
if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
{
/* GNU C++ defines <? and >? operators. */
if (PREV_TOKEN_TYPE == CPP_LESS)
{
REVISE_TOKEN (CPP_MIN);
break;
}
else if (PREV_TOKEN_TYPE == CPP_GREATER)
{
REVISE_TOKEN (CPP_MAX);
break;
}
}
PUSH_TOKEN (CPP_QUERY);
break;
case '.':
if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
&& IMMED_TOKEN ()
&& !(cur_token[-1].flags & PREV_WHITESPACE))
{
cur_token -= 2;
PUSH_TOKEN (CPP_ELLIPSIS);
}
else
PUSH_TOKEN (CPP_DOT);
break;
make_xor:
case '^': PUSH_TOKEN (CPP_XOR); break;
make_open_brace:
case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
make_close_brace:
case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
make_open_square:
case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
make_close_square:
case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
make_backslash:
case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
case '!': PUSH_TOKEN (CPP_NOT); break;
case ',': PUSH_TOKEN (CPP_COMMA); break;
case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
case '$':
if (CPP_OPTION (pfile, dollars_in_ident))
goto letter;
/* Fall through */
default:
cur_token->aux = c;
PUSH_TOKEN (CPP_OTHER);
break;
}
}
/* Run out of token space? */
if (cur_token == token_limit)
{
list->tokens_used = cur_token - list->tokens;
expand_token_space (list);
goto expanded;
}
cur_token->type = CPP_EOF;
cur_token->flags = flags;
if (cur_token != &list->tokens[0])
{
/* Next call back will get just a CPP_EOF. */
buffer->cur = cur;
cpp_warning (pfile, "no newline at end of file");
PUSH_TOKEN (CPP_VSPACE);
}
out:
buffer->cur = cur;
list->tokens_used = cur_token - list->tokens;
/* FIXME: take this check out and put it in the caller.
list->directive == 0 indicates an unknown directive (but null
directive is OK). This is the first time we can be sure the
directive is invalid, and thus warn about it, because it might
have been split by escaped newlines. Also, don't complain about
invalid directives in assembly source, we don't know where the
comments are, and # may introduce assembler pseudo-ops. */
if (IS_DIRECTIVE (list) && list->dir_handler == 0
&& list->tokens[1].type != CPP_VSPACE
&& !CPP_OPTION (pfile, lang_asm))
cpp_error_with_line (pfile, list->line, list->tokens[1].col,
"invalid preprocessing directive");
}
/* Token spelling functions. Used for output of a preprocessed file,
stringizing and token pasting. They all assume sufficient buffer
is allocated, and return exactly how much they used. */
/* Needs buffer of 3 + len. */
unsigned int
spell_char (buffer, list, token)
unsigned char *buffer;
cpp_toklist *list;
cpp_token *token;
{
unsigned char* orig_buff = buffer;
size_t len;
if (token->type == CPP_WCHAR)
*buffer++ = 'L';
*buffer++ = '\'';
len = token->val.name.len;
memcpy (buffer, TOK_NAME (list, token), len);
buffer += len;
*buffer++ = '\'';
return buffer - orig_buff;
}
/* Needs buffer of 3 + len. */
unsigned int
spell_string (buffer, list, token)
unsigned char *buffer;
cpp_toklist *list;
cpp_token *token;
{
unsigned char* orig_buff = buffer;
size_t len;
if (token->type == CPP_WSTRING)
*buffer++ = 'L';
*buffer++ = '"';
len = token->val.name.len;
memcpy (buffer, TOK_NAME (list, token), len);
buffer += len;
*buffer++ = '"';
return buffer - orig_buff;
}
/* Needs buffer of len + 2. */
unsigned int
spell_comment (buffer, list, token)
unsigned char *buffer;
cpp_toklist *list;
cpp_token *token;
{
size_t len;
if (token->type == CPP_C_COMMENT)
{
*buffer++ = '/';
*buffer++ = '*';
}
else if (token->type == CPP_CPP_COMMENT)
{
*buffer++ = '/';
*buffer++ = '/';
}
else
{
*buffer++ = '-';
*buffer++ = '-';
}
len = token->val.name.len;
memcpy (buffer, TOK_NAME (list, token), len);
return len + 2;
}
/* Needs buffer of len. */
unsigned int
spell_name (buffer, list, token)
unsigned char *buffer;
cpp_toklist *list;
cpp_token *token;
{
size_t len;
len = token->val.name.len;
memcpy (buffer, TOK_NAME (list, token), len);
buffer += len;
return len;
}
/* Needs buffer of 1. */
unsigned int
spell_other (buffer, list, token)
unsigned char *buffer;
cpp_toklist *list ATTRIBUTE_UNUSED;
cpp_token *token;
{
*buffer++ = token->aux;
return 1;
}
void
_cpp_lex_file (pfile)
cpp_reader* pfile;
{
int recycle;
cpp_toklist* list;
init_trigraph_map ();
list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
for (recycle = 0; ;)
{
init_token_list (pfile, list, recycle);
recycle = 1;
_cpp_lex_line (pfile, list);
if (list->tokens[0].type == CPP_EOF)
break;
if (list->dir_handler)
{
if (list->dir_handler (pfile))
{
list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
recycle = 0;
}
}
else
_cpp_output_list (pfile, list);
}
}
static void
_cpp_output_list (pfile, list)
cpp_reader *pfile;
cpp_toklist *list;
{
unsigned int comment_no = 0;
cpp_token *token, *comment_token = 0;
if (list->comments_used > 0)
comment_token = list->tokens + list->comments[0].aux;
CPP_RESERVE (pfile, 2); /* Always have room for " \n". */
for (token = &list->tokens[0];; token++)
{
if (token->flags & PREV_WHITESPACE)
{
/* Output comments if -C. Otherwise a space will do. */
if (token == comment_token)
{
cpp_token *comment = &list->comments[comment_no];
do
{
/* Longest wrapper is 4. */
CPP_RESERVE (pfile, 4 + 2 + comment->val.name.len);
pfile->limit += spell_comment (pfile->limit, list, comment);
comment_no++, comment++;
if (comment_no == list->comments_used)
break;
comment_token = comment->aux + list->tokens;
}
while (comment_token == token);
}
else
CPP_PUTC_Q (pfile, ' ');
}
switch (token_spellings[token->type].type)
{
case SPELL_TEXT:
{
const unsigned char *spelling;
unsigned char c;
CPP_RESERVE (pfile, 4 + 2); /* Longest is 4. */
if (token->flags & DIGRAPH)
spelling = digraph_spellings [token->type - CPP_FIRST_DIGRAPH];
else
spelling = token_spellings[token->type].speller;
while ((c = *spelling++) != '\0')
CPP_PUTC_Q (pfile, c);
}
break;
case SPELL_HANDLER:
{
speller s;
s = (speller) token_spellings[token->type].speller;
/* Longest wrapper is 4. */
CPP_RESERVE (pfile, 4 + 2 + token->val.name.len);
pfile->limit += s (pfile->limit, list, token);
}
break;
case SPELL_EOL:
CPP_PUTC_Q (pfile, '\n');
return;
case SPELL_NONE:
cpp_error (pfile, "Unwriteable token");
break;
}
}
}
#endif
...@@ -150,6 +150,29 @@ DIRECTIVE_TABLE ...@@ -150,6 +150,29 @@ DIRECTIVE_TABLE
#undef D #undef D
#undef DIRECTIVE_TABLE #undef DIRECTIVE_TABLE
/* Check if a token's name matches that of a known directive. Put in
this file to save exporting dtable and other unneeded information. */
void
_cpp_check_directive (list, token)
cpp_toklist *list;
cpp_token *token;
{
const char *name = list->namebuf + token->val.name.offset;
size_t len = token->val.name.len;
unsigned int i;
list->dir_handler = 0;
list->dir_flags = 0;
for (i = 0; i < N_DIRECTIVES; i++)
if (dtable[i].length == len && !strncmp (dtable[i].name, name, len))
{
list->dir_handler = dtable[i].func;
list->dir_flags = dtable[i].flags;
break;
}
}
/* Handle a possible # directive. /* Handle a possible # directive.
'#' has already been read. */ '#' has already been read. */
......
...@@ -34,13 +34,26 @@ typedef struct cpp_options cpp_options; ...@@ -34,13 +34,26 @@ typedef struct cpp_options cpp_options;
typedef struct cpp_printer cpp_printer; typedef struct cpp_printer cpp_printer;
typedef struct cpp_token cpp_token; typedef struct cpp_token cpp_token;
typedef struct cpp_toklist cpp_toklist; typedef struct cpp_toklist cpp_toklist;
typedef struct cpp_name cpp_name;
/* The first two groups, apart from '=', can appear in preprocessor
expressions. This allows a lookup table to be implemented in
_cpp_parse_expr.
The first group, to CPP_LAST_EQ, can be immediately followed by an
'='. The lexer needs operators ending in '=', like ">>=", to be in
the same order as their counterparts without the '=', like ">>". */
/* Positions in the table. */
#define CPP_LAST_EQ CPP_LSHIFT
#define CPP_FIRST_DIGRAPH CPP_HASH
/* Put operators that can appear in a preprocessor expression first.
This allows a lookup table to be implemented in _cpp_parse_expr.
Ordering within this group is currently not significant, apart
from those ending in '=' being at the end. */
#define TTYPE_TABLE \ #define TTYPE_TABLE \
T(CPP_PLUS = 0, "+") /* math */ \ T(CPP_EQ = 0, "=") \
T(CPP_NOT, "!") \
T(CPP_GREATER, ">") /* compare */ \
T(CPP_LESS, "<") \
T(CPP_PLUS, "+") /* math */ \
T(CPP_MINUS, "-") \ T(CPP_MINUS, "-") \
T(CPP_MULT, "*") \ T(CPP_MULT, "*") \
T(CPP_DIV, "/") \ T(CPP_DIV, "/") \
...@@ -51,22 +64,19 @@ typedef struct cpp_toklist cpp_toklist; ...@@ -51,22 +64,19 @@ typedef struct cpp_toklist cpp_toklist;
T(CPP_COMPL, "~") \ T(CPP_COMPL, "~") \
T(CPP_RSHIFT, ">>") \ T(CPP_RSHIFT, ">>") \
T(CPP_LSHIFT, "<<") \ T(CPP_LSHIFT, "<<") \
T(CPP_NOT, "!") /* logicals */ \ \
T(CPP_AND_AND, "&&") \ T(CPP_AND_AND, "&&") /* logical */ \
T(CPP_OR_OR, "||") \ T(CPP_OR_OR, "||") \
T(CPP_QUERY, "?") \ T(CPP_QUERY, "?") \
T(CPP_COLON, ":") \ T(CPP_COLON, ":") \
T(CPP_COMMA, ",") /* grouping */ \ T(CPP_COMMA, ",") /* grouping */ \
T(CPP_OPEN_PAREN, "(") \ T(CPP_OPEN_PAREN, "(") \
T(CPP_CLOSE_PAREN, ")") \ T(CPP_CLOSE_PAREN, ")") \
T(CPP_GREATER, ">") /* compare */ \ T(CPP_EQ_EQ, "==") /* compare */ \
T(CPP_LESS, "<") \
T(CPP_EQ_EQ, "==") \
T(CPP_NOT_EQ, "!=") \ T(CPP_NOT_EQ, "!=") \
T(CPP_GREATER_EQ, ">=") \ T(CPP_GREATER_EQ, ">=") \
T(CPP_LESS_EQ, "<=") \ T(CPP_LESS_EQ, "<=") \
\ \
/* The remainder of the punctuation. Order is not significant. */ \
T(CPP_PLUS_EQ, "+=") /* math */ \ T(CPP_PLUS_EQ, "+=") /* math */ \
T(CPP_MINUS_EQ, "-=") \ T(CPP_MINUS_EQ, "-=") \
T(CPP_MULT_EQ, "*=") \ T(CPP_MULT_EQ, "*=") \
...@@ -78,55 +88,67 @@ typedef struct cpp_toklist cpp_toklist; ...@@ -78,55 +88,67 @@ typedef struct cpp_toklist cpp_toklist;
T(CPP_COMPL_EQ, "~=") \ T(CPP_COMPL_EQ, "~=") \
T(CPP_RSHIFT_EQ, ">>=") \ T(CPP_RSHIFT_EQ, ">>=") \
T(CPP_LSHIFT_EQ, "<<=") \ T(CPP_LSHIFT_EQ, "<<=") \
T(CPP_EQ, "=") /* assign */ \ /* Digraphs together, beginning with CPP_FIRST_DIGRAPH. */ \
T(CPP_HASH, "#") /* digraphs */ \
T(CPP_PASTE, "##") \
T(CPP_OPEN_SQUARE, "[") \
T(CPP_CLOSE_SQUARE, "]") \
T(CPP_OPEN_BRACE, "{") \
T(CPP_CLOSE_BRACE, "}") \
/* The remainder of the punctuation. Order is not significant. */ \
T(CPP_SEMICOLON, ";") /* structure */ \
T(CPP_ELLIPSIS, "...") \
T(CPP_BACKSLASH, "\\") \
T(CPP_PLUS_PLUS, "++") /* increment */ \ T(CPP_PLUS_PLUS, "++") /* increment */ \
T(CPP_MINUS_MINUS, "--") \ T(CPP_MINUS_MINUS, "--") \
T(CPP_DEREF, "->") /* accessors */ \ T(CPP_DEREF, "->") /* accessors */ \
T(CPP_DOT, ".") \ T(CPP_DOT, ".") \
T(CPP_OPEN_SQUARE, "[") \
T(CPP_CLOSE_SQUARE, "]") \
T(CPP_SCOPE, "::") \ T(CPP_SCOPE, "::") \
T(CPP_DEREF_STAR, "->*") \ T(CPP_DEREF_STAR, "->*") \
T(CPP_DOT_STAR, ".*") \ T(CPP_DOT_STAR, ".*") \
T(CPP_OPEN_BRACE, "{") /* structure */ \
T(CPP_CLOSE_BRACE, "}") \
T(CPP_SEMICOLON, ";") \
T(CPP_ELLIPSIS, "...") \
T(CPP_HASH, "#") \
T(CPP_PASTE, "##") \
T(CPP_BACKSLASH, "\\") \
T(CPP_MIN, "<?") /* extension */ \ T(CPP_MIN, "<?") /* extension */ \
T(CPP_MAX, ">?") \ T(CPP_MAX, ">?") \
T(CPP_OTHER, spell_other) /* stray punctuation */ \ H(CPP_OTHER, spell_other) /* stray punctuation */ \
\ \
T(CPP_NAME, spell_name) /* word */ \ H(CPP_NAME, spell_name) /* word */ \
T(CPP_INT, 0) /* 23 */ \ N(CPP_INT, 0) /* 23 */ \
T(CPP_FLOAT, 0) /* 3.14159 */ \ N(CPP_FLOAT, 0) /* 3.14159 */ \
T(CPP_NUMBER, spell_name) /* 34_be+ta */ \ H(CPP_NUMBER, spell_name) /* 34_be+ta */ \
T(CPP_CHAR, spell_char) /* 'char' */ \ H(CPP_CHAR, spell_char) /* 'char' */ \
T(CPP_WCHAR, spell_char) /* L'char' */ \ H(CPP_WCHAR, spell_char) /* L'char' */ \
T(CPP_STRING, spell_string) /* "string" */ \ H(CPP_STRING, spell_string) /* "string" */ \
T(CPP_WSTRING, spell_string) /* L"string" */ \ H(CPP_WSTRING, spell_string) /* L"string" */ \
\ \
T(CPP_COMMENT, spell_comment) /* Only if output comments. */ \ H(CPP_C_COMMENT, spell_comment) /* Only if output comments. */ \
T(CPP_VSPACE, "\n") /* End of line. */ \ H(CPP_CPP_COMMENT, spell_comment) /* Only if output comments. */ \
T(CPP_EOF, 0) /* End of file. */ \ H(CPP_CHILL_COMMENT, spell_comment) /* Only if output comments. */ \
T(CPP_HEADER_NAME, 0) /* <stdio.h> in #include */ \ N(CPP_MACRO_ARG, 0) /* Macro argument. */ \
T(CPP_ASSERTION, 0) /* (...) in #assert */ \ N(CPP_SUBLIST, 0) /* Sublist. */ \
E(CPP_VSPACE, "\n") /* End of line. */ \
N(CPP_EOF, 0) /* End of file. */ \
N(CPP_HEADER_NAME, 0) /* <stdio.h> in #include */ \
N(CPP_ASSERTION, 0) /* (...) in #assert */ \
\ \
/* Obsolete - will be removed when no code uses them still. */ \ /* Obsolete - will be removed when no code uses them still. */ \
T(CPP_HSPACE, 0) /* Horizontal white space. */ \ H(CPP_COMMENT, 0) /* Only if output comments. */ \
T(CPP_POP, 0) /* End of buffer. */ \ N(CPP_HSPACE, 0) /* Horizontal white space. */ \
T(CPP_DIRECTIVE, 0) /* #define and the like */ \ N(CPP_POP, 0) /* End of buffer. */ \
T(CPP_MACRO, 0) /* Like a NAME, but expanded. */ N(CPP_DIRECTIVE, 0) /* #define and the like */ \
N(CPP_MACRO, 0) /* Like a NAME, but expanded. */
#define T(e, s) e, #define T(e, s) e,
#define H(e, s) e,
#define N(e, s) e,
#define E(e, s) e,
enum cpp_ttype enum cpp_ttype
{ {
TTYPE_TABLE TTYPE_TABLE
N_TTYPES N_TTYPES
}; };
#undef T #undef T
#undef H
#undef N
#undef E
/* Payload of a NAME, NUMBER, FLOAT, STRING, or COMMENT token. */ /* Payload of a NAME, NUMBER, FLOAT, STRING, or COMMENT token. */
struct cpp_name struct cpp_name
...@@ -135,8 +157,12 @@ struct cpp_name ...@@ -135,8 +157,12 @@ struct cpp_name
unsigned int offset; /* from list->namebuf */ unsigned int offset; /* from list->namebuf */
}; };
/* Per token flags. */ #define TOK_NAME(list, token) ((list)->namebuf + (token)->val.name.offset)
#define HSPACE_BEFORE (1 << 0) /* token preceded by hspace */
/* Flags for the cpp_token structure. */
#define PREV_WHITESPACE 1 /* If whitespace before this token. */
#define DIGRAPH 2 /* If it was a digraph. */
#define UNSIGNED_INT 4 /* If int preprocessing token unsigned. */
/* A preprocessing token. /* A preprocessing token.
This has been carefully packed and should occupy 16 bytes on This has been carefully packed and should occupy 16 bytes on
...@@ -150,8 +176,9 @@ struct cpp_token ...@@ -150,8 +176,9 @@ struct cpp_token
unsigned char type; unsigned char type;
#endif #endif
unsigned char flags; /* flags - see above */ unsigned char flags; /* flags - see above */
unsigned int aux; /* hash of a NAME, or something - unsigned int aux; /* CPP_OTHER character. Hash of a
see uses in the code */ NAME, or something - see uses
in the code */
union union
{ {
struct cpp_name name; /* a string */ struct cpp_name name; /* a string */
...@@ -168,7 +195,7 @@ typedef int (*parse_cleanup_t) PARAMS ((cpp_buffer *, cpp_reader *)); ...@@ -168,7 +195,7 @@ typedef int (*parse_cleanup_t) PARAMS ((cpp_buffer *, cpp_reader *));
struct cpp_toklist struct cpp_toklist
{ {
struct cpp_token *tokens; /* actual tokens as an array */ cpp_token *tokens; /* actual tokens as an array */
unsigned int tokens_used; /* tokens used */ unsigned int tokens_used; /* tokens used */
unsigned int tokens_cap; /* tokens allocated */ unsigned int tokens_cap; /* tokens allocated */
...@@ -178,6 +205,11 @@ struct cpp_toklist ...@@ -178,6 +205,11 @@ struct cpp_toklist
unsigned int line; /* starting line number */ unsigned int line; /* starting line number */
/* Comment copying. */
cpp_token *comments; /* comment tokens. */
unsigned int comments_used; /* comment tokens used. */
unsigned int comments_cap; /* comment token capacity. */
/* Only used if tokens[0].type == CPP_DIRECTIVE. This is the /* Only used if tokens[0].type == CPP_DIRECTIVE. This is the
handler to call after lexing the rest of this line. The flags handler to call after lexing the rest of this line. The flags
indicate whether the rest of the line gets special treatment indicate whether the rest of the line gets special treatment
...@@ -244,8 +276,12 @@ struct cpp_buffer ...@@ -244,8 +276,12 @@ struct cpp_buffer
/* True if we have already warned about C++ comments in this file. /* True if we have already warned about C++ comments in this file.
The warning happens only for C89 extended mode with -pedantic on, The warning happens only for C89 extended mode with -pedantic on,
and only once per file (otherwise it would be far too noisy). */ or for -Wtraditional, and only once per file (otherwise it would
be far too noisy). */
char warned_cplusplus_comments; char warned_cplusplus_comments;
/* True if this buffer's data is mmapped. */
char mapped;
}; };
struct file_name_map_list; struct file_name_map_list;
...@@ -561,6 +597,7 @@ struct cpp_printer ...@@ -561,6 +597,7 @@ struct cpp_printer
/* Name under which this program was invoked. */ /* Name under which this program was invoked. */
extern const char *progname; extern const char *progname;
extern void _cpp_lex_file PARAMS((cpp_reader *));
extern int cpp_handle_options PARAMS ((cpp_reader *, int, char **)); extern int cpp_handle_options PARAMS ((cpp_reader *, int, char **));
extern enum cpp_ttype cpp_get_token PARAMS ((cpp_reader *)); extern enum cpp_ttype cpp_get_token PARAMS ((cpp_reader *));
extern enum cpp_ttype cpp_get_non_space_token PARAMS ((cpp_reader *)); extern enum cpp_ttype cpp_get_non_space_token PARAMS ((cpp_reader *));
...@@ -580,6 +617,8 @@ extern void cpp_assert PARAMS ((cpp_reader *, const char *)); ...@@ -580,6 +617,8 @@ extern void cpp_assert PARAMS ((cpp_reader *, const char *));
extern void cpp_undef PARAMS ((cpp_reader *, const char *)); extern void cpp_undef PARAMS ((cpp_reader *, const char *));
extern void cpp_unassert PARAMS ((cpp_reader *, const char *)); extern void cpp_unassert PARAMS ((cpp_reader *, const char *));
extern void cpp_free_token_list PARAMS ((cpp_toklist *));
/* N.B. The error-message-printer prototypes have not been nicely /* N.B. The error-message-printer prototypes have not been nicely
formatted because exgettext needs to see 'msgid' on the same line formatted because exgettext needs to see 'msgid' on the same line
as the name of the function in order to work properly. Only the as the name of the function in order to work properly. Only the
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment