Commit 0d3ba739 by Dave Brolley Committed by Dave Brolley

lex.c (mbchar.h): #include it.

1998-07-20  Dave Brolley  <brolley@cygnus.com>
	* lex.c (mbchar.h): #include it.
	(GET_ENVIRONMENT): New macro.
	(init_parse): Set character set based on LANG environment variable.
	(real_yylex): Handle multibyte characters in character literals.
	(real_yylex): Handle multibyte characters in string literals.

From-SVN: r21304
parent 56f48ce9
1998-07-20 Dave Brolley <brolley@cygnus.com>
* lex.c (mbchar.h): #include it.
(GET_ENVIRONMENT): New macro.
(init_parse): Set character set based on LANG environment variable.
(real_yylex): Handle multibyte characters in character literals.
(real_yylex): Handle multibyte characters in string literals.
1998-07-19 Jason Merrill <jason@yorick.cygnus.com> 1998-07-19 Jason Merrill <jason@yorick.cygnus.com>
* lex.c (do_identifier): Look for class value even if we don't * lex.c (do_identifier): Look for class value even if we don't
......
...@@ -244,7 +244,7 @@ spew.o : spew.c $(CONFIG_H) $(CXX_TREE_H) \ ...@@ -244,7 +244,7 @@ spew.o : spew.c $(CONFIG_H) $(CXX_TREE_H) \
lex.o : lex.c $(CONFIG_H) $(CXX_TREE_H) \ lex.o : lex.c $(CONFIG_H) $(CXX_TREE_H) \
$(PARSE_H) input.c $(srcdir)/../flags.h hash.h lex.h \ $(PARSE_H) input.c $(srcdir)/../flags.h hash.h lex.h \
$(srcdir)/../c-pragma.h $(srcdir)/../system.h $(srcdir)/../toplev.h \ $(srcdir)/../c-pragma.h $(srcdir)/../system.h $(srcdir)/../toplev.h \
$(srcdir)/../output.h $(srcdir)/../output.h $(srcdir)/../mbchar.h
decl.o : decl.c $(CONFIG_H) $(CXX_TREE_H) $(srcdir)/../flags.h \ decl.o : decl.c $(CONFIG_H) $(CXX_TREE_H) $(srcdir)/../flags.h \
lex.h decl.h $(srcdir)/../stack.h $(srcdir)/../output.h \ lex.h decl.h $(srcdir)/../stack.h $(srcdir)/../output.h \
$(srcdir)/../except.h $(srcdir)/../system.h $(srcdir)/../toplev.h $(srcdir)/../except.h $(srcdir)/../system.h $(srcdir)/../toplev.h
......
...@@ -39,15 +39,13 @@ Boston, MA 02111-1307, USA. */ ...@@ -39,15 +39,13 @@ Boston, MA 02111-1307, USA. */
#include "toplev.h" #include "toplev.h"
#include "output.h" #include "output.h"
/* MULTIBYTE_CHARS support only works for native compilers.
??? Ideally what we want is to model widechar support after
the current floating point support. */
#ifdef CROSS_COMPILE
#undef MULTIBYTE_CHARS
#endif
#ifdef MULTIBYTE_CHARS #ifdef MULTIBYTE_CHARS
#include "mbchar.h"
#include <locale.h> #include <locale.h>
#ifndef GET_ENVIRONMENT
#define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME))
#endif
#endif #endif
#define obstack_chunk_alloc xmalloc #define obstack_chunk_alloc xmalloc
...@@ -474,6 +472,12 @@ init_parse (filename) ...@@ -474,6 +472,12 @@ init_parse (filename)
int i; int i;
#ifdef MULTIBYTE_CHARS
/* Change to the native locale for multibyte conversions. */
setlocale (LC_CTYPE, "");
GET_ENVIRONMENT (literal_codeset, "LANG");
#endif
#if USE_CPPLIB #if USE_CPPLIB
yy_cur = "\n"; yy_cur = "\n";
yy_lim = yy_cur + 1; yy_lim = yy_cur + 1;
...@@ -3922,30 +3926,27 @@ real_yylex () ...@@ -3922,30 +3926,27 @@ real_yylex ()
{ {
register int result = 0; register int result = 0;
register int num_chars = 0; register int num_chars = 0;
int chars_seen = 0;
unsigned width = TYPE_PRECISION (char_type_node); unsigned width = TYPE_PRECISION (char_type_node);
int max_chars; int max_chars;
if (wide_flag)
{
width = WCHAR_TYPE_SIZE;
#ifdef MULTIBYTE_CHARS #ifdef MULTIBYTE_CHARS
max_chars = MB_CUR_MAX; int longest_char = local_mb_cur_max ();
#else (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
max_chars = 1;
#endif #endif
}
else max_chars = TYPE_PRECISION (integer_type_node) / width;
max_chars = TYPE_PRECISION (integer_type_node) / width; if (wide_flag)
width = WCHAR_TYPE_SIZE;
while (1) while (1)
{ {
tryagain: tryagain:
c = getch (); c = getch ();
if (c == '\'' || c == EOF) if (c == '\'' || c == EOF)
break; break;
++chars_seen;
if (c == '\\') if (c == '\\')
{ {
int ignore = 0; int ignore = 0;
...@@ -3954,7 +3955,7 @@ real_yylex () ...@@ -3954,7 +3955,7 @@ real_yylex ()
goto tryagain; goto tryagain;
if (width < HOST_BITS_PER_INT if (width < HOST_BITS_PER_INT
&& (unsigned) c >= (1 << width)) && (unsigned) c >= (1 << width))
warning ("escape sequence out of range for character"); pedwarn ("escape sequence out of range for character");
#ifdef MAP_CHARACTER #ifdef MAP_CHARACTER
if (ISPRINT (c)) if (ISPRINT (c))
c = MAP_CHARACTER (c); c = MAP_CHARACTER (c);
...@@ -3963,21 +3964,79 @@ real_yylex () ...@@ -3963,21 +3964,79 @@ real_yylex ()
else if (c == '\n') else if (c == '\n')
{ {
if (pedantic) if (pedantic)
pedwarn ("ANSI C++ forbids newline in character constant"); pedwarn ("ANSI C forbids newline in character constant");
lineno++; lineno++;
} }
#ifdef MAP_CHARACTER
else else
c = MAP_CHARACTER (c); {
#ifdef MULTIBYTE_CHARS
wchar_t wc;
int i;
int char_len = -1;
for (i = 1; i <= longest_char; ++i)
{
if (i > maxtoken - 4)
extend_token_buffer (token_buffer);
token_buffer[i] = c;
char_len = local_mbtowc (& wc,
token_buffer + 1,
i);
if (char_len != -1)
break;
c = getch ();
}
if (char_len > 1)
{
/* mbtowc sometimes needs an extra char before accepting */
if (char_len < i)
put_back (c);
if (! wide_flag)
{
/* Merge character into result; ignore excess chars. */
for (i = 1; i <= char_len; ++i)
{
if (i > max_chars)
break;
if (width < HOST_BITS_PER_INT)
result = (result << width)
| (token_buffer[i]
& ((1 << width) - 1));
else
result = token_buffer[i];
}
num_chars += char_len;
goto tryagain;
}
c = wc;
}
else
{
if (char_len == -1)
warning ("Ignoring invalid multibyte character");
if (wide_flag)
c = wc;
#ifdef MAP_CHARACTER
else
c = MAP_CHARACTER (c);
#endif #endif
}
#else /* ! MULTIBYTE_CHARS */
#ifdef MAP_CHARACTER
c = MAP_CHARACTER (c);
#endif
#endif /* ! MULTIBYTE_CHARS */
}
num_chars++; if (wide_flag)
if (num_chars > maxtoken - 4) {
extend_token_buffer (token_buffer); if (chars_seen == 1) /* only keep the first one */
result = c;
token_buffer[num_chars] = c; goto tryagain;
}
/* Merge character into result; ignore excess chars. */ /* Merge character into result; ignore excess chars. */
num_chars++;
if (num_chars < max_chars + 1) if (num_chars < max_chars + 1)
{ {
if (width < HOST_BITS_PER_INT) if (width < HOST_BITS_PER_INT)
...@@ -3987,19 +4046,16 @@ real_yylex () ...@@ -3987,19 +4046,16 @@ real_yylex ()
} }
} }
token_buffer[num_chars + 1] = '\'';
token_buffer[num_chars + 2] = 0;
if (c != '\'') if (c != '\'')
error ("malformatted character constant"); error ("malformatted character constant");
else if (num_chars == 0) else if (chars_seen == 0)
error ("empty character constant"); error ("empty character constant");
else if (num_chars > max_chars) else if (num_chars > max_chars)
{ {
num_chars = max_chars; num_chars = max_chars;
error ("character constant too long"); error ("character constant too long");
} }
else if (num_chars != 1 && warn_multichar) else if (chars_seen != 1 && warn_multichar)
warning ("multi-character character constant"); warning ("multi-character character constant");
/* If char type is signed, sign-extend the constant. */ /* If char type is signed, sign-extend the constant. */
...@@ -4012,37 +4068,21 @@ real_yylex () ...@@ -4012,37 +4068,21 @@ real_yylex ()
else if (TREE_UNSIGNED (char_type_node) else if (TREE_UNSIGNED (char_type_node)
|| ((result >> (num_bits - 1)) & 1) == 0) || ((result >> (num_bits - 1)) & 1) == 0)
yylval.ttype yylval.ttype
= build_int_2 (result & ((unsigned HOST_WIDE_INT) ~0 = build_int_2 (result & (~(unsigned HOST_WIDE_INT) 0
>> (HOST_BITS_PER_WIDE_INT - num_bits)), >> (HOST_BITS_PER_WIDE_INT - num_bits)),
0); 0);
else else
yylval.ttype yylval.ttype
= build_int_2 (result | ~((unsigned HOST_WIDE_INT) ~0 = build_int_2 (result | ~(~(unsigned HOST_WIDE_INT) 0
>> (HOST_BITS_PER_WIDE_INT - num_bits)), >> (HOST_BITS_PER_WIDE_INT - num_bits)),
-1); -1);
if (num_chars<=1) if (chars_seen <= 1)
TREE_TYPE (yylval.ttype) = char_type_node; TREE_TYPE (yylval.ttype) = char_type_node;
else else
TREE_TYPE (yylval.ttype) = integer_type_node; TREE_TYPE (yylval.ttype) = integer_type_node;
} }
else else
{ {
#ifdef MULTIBYTE_CHARS
/* Set the initial shift state and convert the next sequence. */
result = 0;
/* In all locales L'\0' is zero and mbtowc will return zero,
so don't use it. */
if (num_chars > 1
|| (num_chars == 1 && token_buffer[1] != '\0'))
{
wchar_t wc;
(void) mbtowc (NULL, NULL, 0);
if (mbtowc (& wc, token_buffer + 1, num_chars) == num_chars)
result = wc;
else
warning ("Ignoring invalid multibyte character");
}
#endif
yylval.ttype = build_int_2 (result, 0); yylval.ttype = build_int_2 (result, 0);
TREE_TYPE (yylval.ttype) = wchar_type_node; TREE_TYPE (yylval.ttype) = wchar_type_node;
} }
...@@ -4055,6 +4095,12 @@ real_yylex () ...@@ -4055,6 +4095,12 @@ real_yylex ()
string_constant: string_constant:
{ {
register char *p; register char *p;
unsigned width = wide_flag ? WCHAR_TYPE_SIZE
: TYPE_PRECISION (char_type_node);
#ifdef MULTIBYTE_CHARS
int longest_char = local_mb_cur_max ();
(void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
#endif
c = getch (); c = getch ();
p = token_buffer + 1; p = token_buffer + 1;
...@@ -4068,9 +4114,8 @@ real_yylex () ...@@ -4068,9 +4114,8 @@ real_yylex ()
c = readescape (&ignore); c = readescape (&ignore);
if (ignore) if (ignore)
goto skipnewline; goto skipnewline;
if (!wide_flag if (width < HOST_BITS_PER_INT
&& TYPE_PRECISION (char_type_node) < HOST_BITS_PER_INT && (unsigned) c >= (1 << width))
&& c >= ((unsigned) 1 << TYPE_PRECISION (char_type_node)))
warning ("escape sequence out of range for character"); warning ("escape sequence out of range for character");
} }
else if (c == '\n') else if (c == '\n')
...@@ -4079,10 +4124,74 @@ real_yylex () ...@@ -4079,10 +4124,74 @@ real_yylex ()
pedwarn ("ANSI C++ forbids newline in string constant"); pedwarn ("ANSI C++ forbids newline in string constant");
lineno++; lineno++;
} }
else
{
#ifdef MULTIBYTE_CHARS
wchar_t wc;
int i;
int char_len = -1;
for (i = 0; i < longest_char; ++i)
{
if (p + i == token_buffer + maxtoken)
p = extend_token_buffer (p);
p[i] = c;
char_len = local_mbtowc (& wc, p, i + 1);
if (char_len != -1)
break;
c = getch ();
}
if (char_len == -1)
warning ("Ignoring invalid multibyte character");
else
{
/* mbtowc sometimes needs an extra char before accepting */
if (char_len <= i)
put_back (c);
if (wide_flag)
{
*(wchar_t *)p = wc;
p += sizeof (wc);
}
else
p += (i + 1);
c = getch ();
continue;
}
#endif /* MULTIBYTE_CHARS */
}
if (p == token_buffer + maxtoken) /* Add this single character into the buffer either as a wchar_t
p = extend_token_buffer (p); or as a single byte. */
*p++ = c; if (wide_flag)
{
unsigned width = TYPE_PRECISION (char_type_node);
unsigned bytemask = (1 << width) - 1;
int byte;
if (p + WCHAR_BYTES >= token_buffer + maxtoken)
p = extend_token_buffer (p);
for (byte = 0; byte < WCHAR_BYTES; ++byte)
{
int value;
if (byte >= sizeof (c))
value = 0;
else
value = (c >> (byte * width)) & bytemask;
if (BYTES_BIG_ENDIAN)
p[WCHAR_BYTES - byte - 1] = value;
else
p[byte] = value;
}
p += WCHAR_BYTES;
}
else
{
if (p == token_buffer + maxtoken)
p = extend_token_buffer (p);
*p++ = c;
}
skipnewline: skipnewline:
c = getch (); c = getch ();
...@@ -4091,56 +4200,36 @@ real_yylex () ...@@ -4091,56 +4200,36 @@ real_yylex ()
break; break;
} }
} }
*p = 0;
/* We have read the entire constant.
Construct a STRING_CST for the result. */
/* Terminate the string value, either with a single byte zero
or with a wide zero. */
if (wide_flag) if (wide_flag)
{ {
/* If this is a L"..." wide-string, convert the multibyte string if (p + WCHAR_BYTES >= token_buffer + maxtoken)
to a wide character string. */ p = extend_token_buffer (p);
char *widep = (char *) alloca ((p - token_buffer) * WCHAR_BYTES); bzero (p, WCHAR_BYTES);
int len; p += WCHAR_BYTES;
#ifdef MULTIBYTE_CHARS
len = mbstowcs ((wchar_t *) widep, token_buffer + 1, p - token_buffer);
if (len < 0 || len >= (p - token_buffer))
{
warning ("Ignoring invalid multibyte string");
len = 0;
}
bzero (widep + (len * WCHAR_BYTES), WCHAR_BYTES);
#else
{
char *wp, *cp;
wp = widep + (BYTES_BIG_ENDIAN ? WCHAR_BYTES - 1 : 0);
bzero (widep, (p - token_buffer) * WCHAR_BYTES);
for (cp = token_buffer + 1; cp < p; cp++)
*wp = *cp, wp += WCHAR_BYTES;
len = p - token_buffer - 1;
}
#endif
if (processing_template_decl)
push_obstacks (&permanent_obstack, &permanent_obstack);
yylval.ttype = build_string ((len + 1) * WCHAR_BYTES, widep);
if (processing_template_decl)
pop_obstacks ();
TREE_TYPE (yylval.ttype) = wchar_array_type_node;
} }
else else
{ {
if (processing_template_decl) if (p == token_buffer + maxtoken)
push_obstacks (&permanent_obstack, &permanent_obstack); p = extend_token_buffer (p);
yylval.ttype = build_string (p - token_buffer, token_buffer + 1); *p++ = 0;
if (processing_template_decl)
pop_obstacks ();
TREE_TYPE (yylval.ttype) = char_array_type_node;
} }
*p++ = '"'; /* We have read the entire constant.
*p = 0; Construct a STRING_CST for the result. */
if (processing_template_decl)
push_obstacks (&permanent_obstack, &permanent_obstack);
yylval.ttype = build_string (p - (token_buffer + 1), token_buffer + 1);
if (processing_template_decl)
pop_obstacks ();
if (wide_flag)
TREE_TYPE (yylval.ttype) = wchar_array_type_node;
else
TREE_TYPE (yylval.ttype) = char_array_type_node;
value = STRING; break; value = STRING; break;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment