Commit b917101e by Robert Dewar Committed by Arnaud Charlet

a-textio.adb, [...]: Extensive changes to private part for wide character encoding

2007-12-06  Robert Dewar  <dewar@adacore.com>

	* a-textio.adb, a-textio.ads: 
	Extensive changes to private part for wide character encoding

	* a-witeio.ads, a-witeio.adb, a-ztexio.ads, a-ztexio.adb
	(Look_Ahead): Fix mishandling of encoded sequences
	Move declaration of Wch_Con to private part (should not be visible)

	* ali.adb (Scan_ALI): Set default encoding method to brackets instead of
	UTF-8. Probably this is never used, but if it is, brackets is
	clearly correct.

	* bindgen.adb (Get_WC_Encoding): New procedure to properly handle
	setting wide character encoding for no main program case and when
	encoding is specified using -W?
	Initialize stack limit of environment task if stack limit method of
	stack checking is enabled.
	(Gen_Adainit_Ada): Use Get_WC_Encoding to output encoding method
	(Gen_Adainit_C): Use Get_WC_Encoding to output encoding method
	(Get_Main_Unit_Name): New function.
	(Gen_Adainit_Ada): Add call to main program for .NET when needed.
	(Gen_Output_File): Set Bind_Main_Program to True for .NET

	* bindusg.adb: Add line for -Wx switch

	* s-wchcon.adb, s-wchcon.ads: (Is_Start_Of_Encoding): New function
	Add comments
	Add new useful constant WC_Longest_Sequences

	* switch-b.adb: Clean up handling of -Wx switch
	For -gnatWx, set Wide_Character_Encoding_Method_Specified

	* switch-c.adb: -gnatg activates warning on assertion errors
	For -gnatWx, set Wide_Character_Encoding_Method_Specified

	* s-wchcon.adb: (Is_Start_Of_Encoding): New function

From-SVN: r130817
parent a6e8413c
...@@ -45,6 +45,7 @@ with Ada.IO_Exceptions; ...@@ -45,6 +45,7 @@ with Ada.IO_Exceptions;
with Ada.Streams; with Ada.Streams;
with System; with System;
with System.File_Control_Block; with System.File_Control_Block;
with System.WCh_Con;
package Ada.Text_IO is package Ada.Text_IO is
pragma Elaborate_Body; pragma Elaborate_Body;
...@@ -334,6 +335,11 @@ private ...@@ -334,6 +335,11 @@ private
-- Text_IO File Control Block -- -- Text_IO File Control Block --
-------------------------------- --------------------------------
Default_WCEM : System.WCh_Con.WC_Encoding_Method :=
System.WCh_Con.WCEM_UTF8;
-- This gets modified during initialization (see body) using
-- the default value established in the call to Set_Globals.
package FCB renames System.File_Control_Block; package FCB renames System.File_Control_Block;
type Text_AFCB; type Text_AFCB;
...@@ -366,6 +372,31 @@ private ...@@ -366,6 +372,31 @@ private
-- after a LM-PM sequence when logically we are before the LM-PM. This -- after a LM-PM sequence when logically we are before the LM-PM. This
-- flag can only be set if Before_LM is also set. -- flag can only be set if Before_LM is also set.
WC_Method : System.WCh_Con.WC_Encoding_Method := Default_WCEM;
-- Encoding method to be used for this file. Text_IO does not deal with
-- wide characters, but it does deal with upper half characters in the
-- range 16#80#-16#FF# which may need encoding, e.g. in UTF-8 mode.
Before_Upper_Half_Character : Boolean := False;
-- This flag is set to indicate that an encoded upper half character has
-- been read by Text_IO.Look_Ahead. If it is set to True, then it means
-- that the stream is logically positioned before the character but is
-- physically positioned after it. The character involved must be in
-- the range 16#80#-16#FF#, i.e. if the flag is set, then we know the
-- next character has a code greater than 16#7F#, and the value of this
-- character is saved in Saved_Upper_Half_Character.
Saved_Upper_Half_Character : Character;
-- This field is valid only if Before_Upper_Half_Character is set. It
-- contains an upper-half character read by Look_Ahead. If Look_Ahead
-- reads a character in the range 16#00# to 16#7F#, then it can use
-- ungetc to put it back, but ungetc cannot be called more than once,
-- so for characters above this range, we don't try to back up the
-- file. Instead we save the character in this field and set the flag
-- Before_Upper_Half_Character to True to indicate that we are logically
-- positioned before this character even though the stream is physically
-- positioned after it.
end record; end record;
function AFCB_Allocate (Control_Block : Text_AFCB) return FCB.AFCB_Ptr; function AFCB_Allocate (Control_Block : Text_AFCB) return FCB.AFCB_Ptr;
......
...@@ -31,7 +31,6 @@ ...@@ -31,7 +31,6 @@
-- -- -- --
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
with Ada.Exceptions; use Ada.Exceptions;
with Ada.Streams; use Ada.Streams; with Ada.Streams; use Ada.Streams;
with Interfaces.C_Streams; use Interfaces.C_Streams; with Interfaces.C_Streams; use Interfaces.C_Streams;
...@@ -76,9 +75,9 @@ package body Ada.Wide_Text_IO is ...@@ -76,9 +75,9 @@ package body Ada.Wide_Text_IO is
-- done in Get_Immediate mode (i.e. without waiting for a line return). -- done in Get_Immediate mode (i.e. without waiting for a line return).
procedure Set_WCEM (File : in out File_Type); procedure Set_WCEM (File : in out File_Type);
-- Called by Open and Create to set the wide character encoding method -- Called by Open and Create to set the wide character encoding method for
-- for the file, processing a WCEM form parameter if one is present. -- the file, processing a WCEM form parameter if one is present. File is
-- File is IN OUT because it may be closed in case of an error. -- IN OUT because it may be closed in case of an error.
------------------- -------------------
-- AFCB_Allocate -- -- AFCB_Allocate --
...@@ -249,7 +248,6 @@ package body Ada.Wide_Text_IO is ...@@ -249,7 +248,6 @@ package body Ada.Wide_Text_IO is
return False; return False;
elsif File.Before_LM then elsif File.Before_LM then
if File.Before_LM_PM then if File.Before_LM_PM then
return Nextc (File) = EOF; return Nextc (File) = EOF;
end if; end if;
...@@ -420,6 +418,8 @@ package body Ada.Wide_Text_IO is ...@@ -420,6 +418,8 @@ package body Ada.Wide_Text_IO is
File.Before_Wide_Character := False; File.Before_Wide_Character := False;
Item := File.Saved_Wide_Character; Item := File.Saved_Wide_Character;
-- Ada.Text_IO checks Before_LM_PM here, shouldn't we do the same???
else else
Get_Character (File, C); Get_Character (File, C);
Item := Get_Wide_Char (C, File); Item := Get_Wide_Char (C, File);
...@@ -555,6 +555,8 @@ package body Ada.Wide_Text_IO is ...@@ -555,6 +555,8 @@ package body Ada.Wide_Text_IO is
Item := Wide_Character'Val (LM); Item := Wide_Character'Val (LM);
else else
-- Shouldn't we use getc_immediate_nowait here, like Text_IO???
ch := Getc_Immed (File); ch := Getc_Immed (File);
if ch = EOF then if ch = EOF then
...@@ -749,7 +751,7 @@ package body Ada.Wide_Text_IO is ...@@ -749,7 +751,7 @@ package body Ada.Wide_Text_IO is
end if; end if;
end In_Char; end In_Char;
-- Start of processing for In_Char -- Start of processing for Get_Wide_Char
begin begin
return WC_In (C, File.WC_Method); return WC_In (C, File.WC_Method);
...@@ -904,7 +906,7 @@ package body Ada.Wide_Text_IO is ...@@ -904,7 +906,7 @@ package body Ada.Wide_Text_IO is
End_Of_Line := True; End_Of_Line := True;
Item := Wide_Character'Val (0); Item := Wide_Character'Val (0);
-- If we are before a wide character, just return it (this happens -- If we are before a wide character, just return it (this can happen
-- if there are two calls to Look_Ahead in a row). -- if there are two calls to Look_Ahead in a row).
elsif File.Before_Wide_Character then elsif File.Before_Wide_Character then
...@@ -924,19 +926,21 @@ package body Ada.Wide_Text_IO is ...@@ -924,19 +926,21 @@ package body Ada.Wide_Text_IO is
Ungetc (ch, File); Ungetc (ch, File);
Item := Wide_Character'Val (0); Item := Wide_Character'Val (0);
-- If the character is in the range 16#0000# to 16#007F# it stands -- Case where character obtained does not represent the start of an
-- for itself and occupies a single byte, so we can unget it with -- encoded sequence so it stands for itself and we can unget it with
-- no difficulty. -- no difficulty.
elsif ch <= 16#0080# then elsif not Is_Start_Of_Encoding
(Character'Val (ch), File.WC_Method)
then
End_Of_Line := False; End_Of_Line := False;
Ungetc (ch, File); Ungetc (ch, File);
Item := Wide_Character'Val (ch); Item := Wide_Character'Val (ch);
-- For a character above this range, we read the character, using -- For the start of an encoding, we read the character using the
-- the Get_Wide_Char routine. It may well occupy more than one byte -- Get_Wide_Char routine. It will occupy more than one byte so we
-- so we can't put it back with ungetc. Instead we save it in the -- can't put it back with ungetc. Instead we save it in the control
-- control block, setting a flag that everyone interested in reading -- block, setting a flag that everyone interested in reading
-- characters must test before reading the stream. -- characters must test before reading the stream.
else else
...@@ -1552,7 +1556,7 @@ package body Ada.Wide_Text_IO is ...@@ -1552,7 +1556,7 @@ package body Ada.Wide_Text_IO is
end if; end if;
Close (File); Close (File);
Raise_Exception (Use_Error'Identity, "invalid WCEM form parameter"); raise Use_Error with "invalid WCEM form parameter";
end if; end if;
end Set_WCEM; end Set_WCEM;
...@@ -1638,7 +1642,6 @@ package body Ada.Wide_Text_IO is ...@@ -1638,7 +1642,6 @@ package body Ada.Wide_Text_IO is
Ungetc (ch, File); Ungetc (ch, File);
end if; end if;
end if; end if;
end loop; end loop;
File.Before_Wide_Character := False; File.Before_Wide_Character := False;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
-- -- -- --
-- S p e c -- -- S p e c --
-- -- -- --
-- Copyright (C) 1992-2006, Free Software Foundation, Inc. -- -- Copyright (C) 1992-2007, Free Software Foundation, Inc. --
-- -- -- --
-- This specification is derived from the Ada Reference Manual for use with -- -- This specification is derived from the Ada Reference Manual for use with --
-- GNAT. The copyright notice above, and the license provisions that follow -- -- GNAT. The copyright notice above, and the license provisions that follow --
...@@ -50,8 +50,6 @@ with System.WCh_Con; ...@@ -50,8 +50,6 @@ with System.WCh_Con;
package Ada.Wide_Text_IO is package Ada.Wide_Text_IO is
package WCh_Con renames System.WCh_Con;
type File_Type is limited private; type File_Type is limited private;
type File_Mode is (In_File, Out_File, Append_File); type File_Mode is (In_File, Out_File, Append_File);
...@@ -303,6 +301,8 @@ package Ada.Wide_Text_IO is ...@@ -303,6 +301,8 @@ package Ada.Wide_Text_IO is
Layout_Error : exception renames IO_Exceptions.Layout_Error; Layout_Error : exception renames IO_Exceptions.Layout_Error;
private private
package WCh_Con renames System.WCh_Con;
----------------------------------- -----------------------------------
-- Handling of Format Characters -- -- Handling of Format Characters --
----------------------------------- -----------------------------------
......
...@@ -31,7 +31,6 @@ ...@@ -31,7 +31,6 @@
-- -- -- --
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
with Ada.Exceptions; use Ada.Exceptions;
with Ada.Streams; use Ada.Streams; with Ada.Streams; use Ada.Streams;
with Interfaces.C_Streams; use Interfaces.C_Streams; with Interfaces.C_Streams; use Interfaces.C_Streams;
...@@ -76,9 +75,9 @@ package body Ada.Wide_Wide_Text_IO is ...@@ -76,9 +75,9 @@ package body Ada.Wide_Wide_Text_IO is
-- are done in Get_Immediate mode (i.e. without waiting for a line return). -- are done in Get_Immediate mode (i.e. without waiting for a line return).
procedure Set_WCEM (File : in out File_Type); procedure Set_WCEM (File : in out File_Type);
-- Called by Open and Create to set the wide character encoding method -- Called by Open and Create to set the wide character encoding method for
-- for the file, processing a WCEM form parameter if one is present. -- the file, processing a WCEM form parameter if one is present. File is
-- File is IN OUT because it may be closed in case of an error. -- IN OUT because it may be closed in case of an error.
------------------- -------------------
-- AFCB_Allocate -- -- AFCB_Allocate --
...@@ -249,7 +248,6 @@ package body Ada.Wide_Wide_Text_IO is ...@@ -249,7 +248,6 @@ package body Ada.Wide_Wide_Text_IO is
return False; return False;
elsif File.Before_LM then elsif File.Before_LM then
if File.Before_LM_PM then if File.Before_LM_PM then
return Nextc (File) = EOF; return Nextc (File) = EOF;
end if; end if;
...@@ -420,6 +418,8 @@ package body Ada.Wide_Wide_Text_IO is ...@@ -420,6 +418,8 @@ package body Ada.Wide_Wide_Text_IO is
File.Before_Wide_Wide_Character := False; File.Before_Wide_Wide_Character := False;
Item := File.Saved_Wide_Wide_Character; Item := File.Saved_Wide_Wide_Character;
-- Ada.Text_IO checks Before_LM_PM here, shouldn't we do the same???
else else
Get_Character (File, C); Get_Character (File, C);
Item := Get_Wide_Wide_Char (C, File); Item := Get_Wide_Wide_Char (C, File);
...@@ -555,6 +555,8 @@ package body Ada.Wide_Wide_Text_IO is ...@@ -555,6 +555,8 @@ package body Ada.Wide_Wide_Text_IO is
Item := Wide_Wide_Character'Val (LM); Item := Wide_Wide_Character'Val (LM);
else else
-- Shouldn't we use getc_immediate_nowait here, like Text_IO???
ch := Getc_Immed (File); ch := Getc_Immed (File);
if ch = EOF then if ch = EOF then
...@@ -904,7 +906,7 @@ package body Ada.Wide_Wide_Text_IO is ...@@ -904,7 +906,7 @@ package body Ada.Wide_Wide_Text_IO is
End_Of_Line := True; End_Of_Line := True;
Item := Wide_Wide_Character'Val (0); Item := Wide_Wide_Character'Val (0);
-- If we are before a wide character, just return it (this happens -- If we are before a wide character, just return it (this can happen
-- if there are two calls to Look_Ahead in a row). -- if there are two calls to Look_Ahead in a row).
elsif File.Before_Wide_Wide_Character then elsif File.Before_Wide_Wide_Character then
...@@ -924,20 +926,22 @@ package body Ada.Wide_Wide_Text_IO is ...@@ -924,20 +926,22 @@ package body Ada.Wide_Wide_Text_IO is
Ungetc (ch, File); Ungetc (ch, File);
Item := Wide_Wide_Character'Val (0); Item := Wide_Wide_Character'Val (0);
-- If the character is in the range 16#0000# to 16#007F# it stands -- Case where character obtained does not represent the start of an
-- for itself and occupies a single byte, so we can unget it with -- encoded sequence so it stands for itself and we can unget it with
-- no difficulty. -- no difficulty.
elsif ch <= 16#0080# then elsif not Is_Start_Of_Encoding
(Character'Val (ch), File.WC_Method)
then
End_Of_Line := False; End_Of_Line := False;
Ungetc (ch, File); Ungetc (ch, File);
Item := Wide_Wide_Character'Val (ch); Item := Wide_Wide_Character'Val (ch);
-- For a character above this range, we read the character, using -- For the start of an encoding, we read the character using the
-- the Get_Wide_Wide_Char routine. It may well occupy more than one -- Get_Wide_Wide_Char routine. It will occupy more than one byte so
-- byte so we can't put it back with ungetc. Instead we save it in -- we can't put it back with ungetc. Instead we save it in the
-- the control block, setting a flag that everyone interested in -- control block, setting a flag that everyone interested in reading
-- reading characters must test before reading the stream. -- characters must test before reading the stream.
else else
Item := Get_Wide_Wide_Char (Character'Val (ch), File); Item := Get_Wide_Wide_Char (Character'Val (ch), File);
...@@ -1552,7 +1556,7 @@ package body Ada.Wide_Wide_Text_IO is ...@@ -1552,7 +1556,7 @@ package body Ada.Wide_Wide_Text_IO is
end if; end if;
Close (File); Close (File);
Raise_Exception (Use_Error'Identity, "invalid WCEM form parameter"); raise Use_Error with "invalid WCEM form parameter";
end if; end if;
end Set_WCEM; end Set_WCEM;
...@@ -1638,7 +1642,6 @@ package body Ada.Wide_Wide_Text_IO is ...@@ -1638,7 +1642,6 @@ package body Ada.Wide_Wide_Text_IO is
Ungetc (ch, File); Ungetc (ch, File);
end if; end if;
end if; end if;
end loop; end loop;
File.Before_Wide_Wide_Character := False; File.Before_Wide_Wide_Character := False;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
-- -- -- --
-- S p e c -- -- S p e c --
-- -- -- --
-- Copyright (C) 1992-2006, Free Software Foundation, Inc. -- -- Copyright (C) 1992-2007, Free Software Foundation, Inc. --
-- -- -- --
-- This specification is derived from the Ada Reference Manual for use with -- -- This specification is derived from the Ada Reference Manual for use with --
-- GNAT. The copyright notice above, and the license provisions that follow -- -- GNAT. The copyright notice above, and the license provisions that follow --
...@@ -50,8 +50,6 @@ with System.WCh_Con; ...@@ -50,8 +50,6 @@ with System.WCh_Con;
package Ada.Wide_Wide_Text_IO is package Ada.Wide_Wide_Text_IO is
package WCh_Con renames System.WCh_Con;
type File_Type is limited private; type File_Type is limited private;
type File_Mode is (In_File, Out_File, Append_File); type File_Mode is (In_File, Out_File, Append_File);
...@@ -303,6 +301,8 @@ package Ada.Wide_Wide_Text_IO is ...@@ -303,6 +301,8 @@ package Ada.Wide_Wide_Text_IO is
Layout_Error : exception renames IO_Exceptions.Layout_Error; Layout_Error : exception renames IO_Exceptions.Layout_Error;
private private
package WCh_Con renames System.WCh_Con;
----------------------------------- -----------------------------------
-- Handling of Format Characters -- -- Handling of Format Characters --
----------------------------------- -----------------------------------
......
...@@ -824,7 +824,7 @@ package body ALI is ...@@ -824,7 +824,7 @@ package body ALI is
Sfile => No_File, Sfile => No_File,
Task_Dispatching_Policy => ' ', Task_Dispatching_Policy => ' ',
Time_Slice_Value => -1, Time_Slice_Value => -1,
WC_Encoding => '8', WC_Encoding => 'b',
Unit_Exception_Table => False, Unit_Exception_Table => False,
Ver => (others => ' '), Ver => (others => ' '),
Ver_Len => 0, Ver_Len => 0,
...@@ -930,13 +930,23 @@ package body ALI is ...@@ -930,13 +930,23 @@ package body ALI is
else else
Checkc (' '); Checkc (' ');
Name_Len := 0;
-- Scan out argument
Name_Len := 0;
while not At_Eol loop while not At_Eol loop
Name_Len := Name_Len + 1; Name_Len := Name_Len + 1;
Name_Buffer (Name_Len) := Getc; Name_Buffer (Name_Len) := Getc;
end loop; end loop;
-- If -fstack-check, record that it occurred
if Name_Buffer (1 .. Name_Len) = "-fstack-check" then
Stack_Check_Switch_Set := True;
end if;
-- Store the argument
Args.Increment_Last; Args.Increment_Last;
Args.Table (Args.Last) := new String'(Name_Buffer (1 .. Name_Len)); Args.Table (Args.Last) := new String'(Name_Buffer (1 .. Name_Len));
......
...@@ -26,6 +26,8 @@ ...@@ -26,6 +26,8 @@
with Osint; use Osint; with Osint; use Osint;
with Output; use Output; with Output; use Output;
with System.WCh_Con; use System.WCh_Con;
package body Bindusg is package body Bindusg is
Already_Displayed : Boolean := False; Already_Displayed : Boolean := False;
...@@ -222,11 +224,27 @@ package body Bindusg is ...@@ -222,11 +224,27 @@ package body Bindusg is
Write_Line (" -v Verbose mode. Error messages, " & Write_Line (" -v Verbose mode. Error messages, " &
"header, summary output to stdout"); "header, summary output to stdout");
-- Lines for -w switch -- Line for -w switch
Write_Line (" -wx Warning mode. (x=s/e for " & Write_Line (" -wx Warning mode. (x=s/e for " &
"suppress/treat as error)"); "suppress/treat as error)");
-- Line for -W switch
Write_Str (" -W? Wide character encoding method (");
for J in WC_Encoding_Method loop
Write_Char (WC_Encoding_Letters (J));
if J = WC_Encoding_Method'Last then
Write_Char (')');
else
Write_Char ('/');
end if;
end loop;
Write_Eol;
-- Line for -x switch -- Line for -x switch
Write_Line (" -x Exclude source files (check object " & Write_Line (" -x Exclude source files (check object " &
......
...@@ -71,4 +71,18 @@ package body System.WCh_Con is ...@@ -71,4 +71,18 @@ package body System.WCh_Con is
end if; end if;
end Get_WC_Encoding_Method; end Get_WC_Encoding_Method;
--------------------------
-- Is_Start_Of_Encoding --
--------------------------
function Is_Start_Of_Encoding
(C : Character;
EM : WC_Encoding_Method) return Boolean
is
begin
return (EM in WC_Upper_Half_Encoding_Method
and then Character'Pos (C) >= 16#80#)
or else (EM in WC_ESC_Encoding_Method and then C = ASCII.ESC);
end Is_Start_Of_Encoding;
end System.WCh_Con; end System.WCh_Con;
...@@ -106,8 +106,8 @@ package System.WCh_Con is ...@@ -106,8 +106,8 @@ package System.WCh_Con is
-- sequence ESC a b c d (five characters, where abcd are ASCII hex -- sequence ESC a b c d (five characters, where abcd are ASCII hex
-- characters, using upper case for letters). This method is easy -- characters, using upper case for letters). This method is easy
-- to deal with in external environments that do not support wide -- to deal with in external environments that do not support wide
-- characters, and covers the whole BMP. This is the default encoding -- characters, and covers the whole 16-bit BMP. Codes larger than
-- method. -- 16#FFFF# are not representable using this encoding method.
WCEM_Upper : constant WC_Encoding_Method := 2; WCEM_Upper : constant WC_Encoding_Method := 2;
-- The wide character with encoding 16#abcd#, where the upper bit is on -- The wide character with encoding 16#abcd#, where the upper bit is on
...@@ -115,7 +115,8 @@ package System.WCh_Con is ...@@ -115,7 +115,8 @@ package System.WCh_Con is
-- 16#cd#. The second byte may never be a format control character, but -- 16#cd#. The second byte may never be a format control character, but
-- is not required to be in the upper half. This method can be also used -- is not required to be in the upper half. This method can be also used
-- for shift-JIS or EUC where the internal coding matches the external -- for shift-JIS or EUC where the internal coding matches the external
-- coding. -- coding. Codes larger than 16#FFFF# are not representable using this
-- encoding method.
WCEM_Shift_JIS : constant WC_Encoding_Method := 3; WCEM_Shift_JIS : constant WC_Encoding_Method := 3;
-- A wide character is represented by a two character sequence 16#ab# -- A wide character is represented by a two character sequence 16#ab#
...@@ -123,19 +124,21 @@ package System.WCh_Con is ...@@ -123,19 +124,21 @@ package System.WCh_Con is
-- as described above. The internal character code is the corresponding -- as described above. The internal character code is the corresponding
-- JIS character according to the standard algorithm for Shift-JIS -- JIS character according to the standard algorithm for Shift-JIS
-- conversion. See the body of package System.JIS_Conversions for -- conversion. See the body of package System.JIS_Conversions for
-- further details. -- further details. Codes larger than 16#FFFF are not representable
-- using this encoding method.
WCEM_EUC : constant WC_Encoding_Method := 4; WCEM_EUC : constant WC_Encoding_Method := 4;
-- A wide character is represented by a two character sequence 16#ab# and -- A wide character is represented by a two character sequence 16#ab# and
-- 16#cd#, with both characters being in the upper half set. The internal -- 16#cd#, with both characters being in the upper half set. The internal
-- character code is the corresponding JIS character according to the EUC -- character code is the corresponding JIS character according to the EUC
-- encoding algorithm. See the body of package System.JIS_Conversions for -- encoding algorithm. See the body of package System.JIS_Conversions for
-- further details. -- further details. Codes larger than 16#FFFF# are not representable using
-- this encoding method.
WCEM_UTF8 : constant WC_Encoding_Method := 5; WCEM_UTF8 : constant WC_Encoding_Method := 5;
-- An ISO 10646-1 BMP/Unicode wide character is represented in -- An ISO 10646-1 BMP/Unicode wide character is represented in UCS
-- UCS Transformation Format 8 (UTF-8) as defined in Annex R of ISO -- Transformation Format 8 (UTF-8), as defined in Annex R of ISO
-- 10646-1/Am.2. Depending on the character value, a Unicode character -- 10646-1/Am.2. Depending on the character value, a Unicode character
-- is represented as the one to six byte sequence. -- is represented as the one to six byte sequence.
-- --
-- 16#0000_0000#-16#0000_007f#: 2#0xxxxxxx# -- 16#0000_0000#-16#0000_007f#: 2#0xxxxxxx#
...@@ -151,7 +154,8 @@ package System.WCh_Con is ...@@ -151,7 +154,8 @@ package System.WCh_Con is
-- where the xxx bits correspond to the left-padded bits of the -- where the xxx bits correspond to the left-padded bits of the
-- 16-bit character value. Note that all lower half ASCII characters -- 16-bit character value. Note that all lower half ASCII characters
-- are represented as ASCII bytes and all upper half characters and -- are represented as ASCII bytes and all upper half characters and
-- other wide characters are represented as sequences of upper-half. -- other wide characters are represented as sequences of upper-half. This
-- encoding method can represent the entire range of Wide_Wide_Character.
WCEM_Brackets : constant WC_Encoding_Method := 6; WCEM_Brackets : constant WC_Encoding_Method := 6;
-- A wide character is represented using one of the following sequences: -- A wide character is represented using one of the following sequences:
...@@ -161,7 +165,10 @@ package System.WCh_Con is ...@@ -161,7 +165,10 @@ package System.WCh_Con is
-- ["xxxxxx"] -- ["xxxxxx"]
-- ["xxxxxxxx"] -- ["xxxxxxxx"]
-- --
-- where xx are hexadecimal digits representing the character code. -- where xx are hexadecimal digits representing the character code. This
-- encoding method can represent the entire range of Wide_Wide_Character
-- but in the general case results in ambiguous representations (there is
-- no ambiguity in Ada sources, since the above sequences are illegal Ada).
WC_Encoding_Letters : constant array (WC_Encoding_Method) of Character := WC_Encoding_Letters : constant array (WC_Encoding_Method) of Character :=
(WCEM_Hex => 'h', (WCEM_Hex => 'h',
...@@ -183,10 +190,20 @@ package System.WCh_Con is ...@@ -183,10 +190,20 @@ package System.WCh_Con is
-- Encoding methods using an upper half character (16#80#..16#FF) at -- Encoding methods using an upper half character (16#80#..16#FF) at
-- the start of the sequence. -- the start of the sequence.
WC_Longest_Sequence : constant := 10; WC_Longest_Sequence : constant := 12;
-- The longest number of characters that can be used for a wide character -- The longest number of characters that can be used for a wide character
-- or wide wide character sequence for any of the active encoding methods. -- or wide wide character sequence for any of the active encoding methods.
WC_Longest_Sequences : constant array (WC_Encoding_Method) of Natural :=
(WCEM_Hex => 5,
WCEM_Upper => 2,
WCEM_Shift_JIS => 2,
WCEM_EUC => 2,
WCEM_UTF8 => 6,
WCEM_Brackets => 12);
-- The longest number of characters that can be used for a wide character
-- or wide wide character sequence using the given encoding method.
function Get_WC_Encoding_Method (C : Character) return WC_Encoding_Method; function Get_WC_Encoding_Method (C : Character) return WC_Encoding_Method;
-- Given a character C, returns corresponding encoding method (see array -- Given a character C, returns corresponding encoding method (see array
-- WC_Encoding_Letters above). Raises Constraint_Error if not in list. -- WC_Encoding_Letters above). Raises Constraint_Error if not in list.
...@@ -196,4 +213,12 @@ package System.WCh_Con is ...@@ -196,4 +213,12 @@ package System.WCh_Con is
-- utf8, brackets, return the corresponding encoding method. Raises -- utf8, brackets, return the corresponding encoding method. Raises
-- Constraint_Error if not in list. -- Constraint_Error if not in list.
function Is_Start_Of_Encoding
(C : Character;
EM : WC_Encoding_Method) return Boolean;
pragma Inline (Is_Start_Of_Encoding);
-- Returns True if the Character C is the start of a multi-character
-- encoding sequence for the given encoding method EM. If EM is set to
-- WCEM_Brackets, this function always returns False.
end System.WCh_Con; end System.WCh_Con;
...@@ -417,21 +417,21 @@ package body Switch.B is ...@@ -417,21 +417,21 @@ package body Switch.B is
-- Processing for W switch -- Processing for W switch
when 'W' => when 'W' =>
if Ptr = Max then
Bad_Switch (Switch_Chars);
end if;
Ptr := Ptr + 1; Ptr := Ptr + 1;
for J in WC_Encoding_Method loop if Ptr > Max then
if Switch_Chars (Ptr) = WC_Encoding_Letters (J) then Bad_Switch (Switch_Chars);
Wide_Character_Encoding_Method := J; end if;
exit;
elsif J = WC_Encoding_Method'Last then begin
Wide_Character_Encoding_Method :=
Get_WC_Encoding_Method (Switch_Chars (Ptr));
exception
when Constraint_Error =>
Bad_Switch (Switch_Chars); Bad_Switch (Switch_Chars);
end if; end;
end loop;
Wide_Character_Encoding_Method_Specified := True;
Upper_Half_Encoding := Upper_Half_Encoding :=
Wide_Character_Encoding_Method in Wide_Character_Encoding_Method in
......
...@@ -479,6 +479,7 @@ package body Switch.C is ...@@ -479,6 +479,7 @@ package body Switch.C is
Constant_Condition_Warnings := True; Constant_Condition_Warnings := True;
Implementation_Unit_Warnings := True; Implementation_Unit_Warnings := True;
Ineffective_Inline_Warnings := True; Ineffective_Inline_Warnings := True;
Warn_On_Assertion_Failure := True;
Warn_On_Assumed_Low_Bound := True; Warn_On_Assumed_Low_Bound := True;
Warn_On_Bad_Fixed_Value := True; Warn_On_Bad_Fixed_Value := True;
Warn_On_Constant := True; Warn_On_Constant := True;
...@@ -833,9 +834,11 @@ package body Switch.C is ...@@ -833,9 +834,11 @@ package body Switch.C is
Bad_Switch ("-gnatW" & Switch_Chars (Ptr .. Max)); Bad_Switch ("-gnatW" & Switch_Chars (Ptr .. Max));
end; end;
Wide_Character_Encoding_Method_Specified := True;
Upper_Half_Encoding := Upper_Half_Encoding :=
Wide_Character_Encoding_Method in Wide_Character_Encoding_Method in
WC_Upper_Half_Encoding_Method; WC_Upper_Half_Encoding_Method;
Ptr := Ptr + 1; Ptr := Ptr + 1;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment