Commit b917101e by Robert Dewar Committed by Arnaud Charlet

a-textio.adb, [...]: Extensive changes to private part for wide character encoding

2007-12-06  Robert Dewar  <dewar@adacore.com>

	* a-textio.adb, a-textio.ads: 
	Extensive changes to private part for wide character encoding

	* a-witeio.ads, a-witeio.adb, a-ztexio.ads, a-ztexio.adb
	(Look_Ahead): Fix mishandling of encoded sequences
	Move declaration of Wch_Con to private part (should not be visible)

	* ali.adb (Scan_ALI): Set default encoding method to brackets instead of
	UTF-8. Probably this is never used, but if it is, brackets is
	clearly correct.

	* bindgen.adb (Get_WC_Encoding): New procedure to properly handle
	setting wide character encoding for no main program case and when
	encoding is specified using -W?
	Initialize stack limit of environment task if stack limit method of
	stack checking is enabled.
	(Gen_Adainit_Ada): Use Get_WC_Encoding to output encoding method
	(Gen_Adainit_C): Use Get_WC_Encoding to output encoding method
	(Get_Main_Unit_Name): New function.
	(Gen_Adainit_Ada): Add call to main program for .NET when needed.
	(Gen_Output_File): Set Bind_Main_Program to True for .NET

	* bindusg.adb: Add line for -Wx switch

	* s-wchcon.adb, s-wchcon.ads: (Is_Start_Of_Encoding): New function
	Add comments
	Add new useful constant WC_Longest_Sequences

	* switch-b.adb: Clean up handling of -Wx switch
	For -gnatWx, set Wide_Character_Encoding_Method_Specified

	* switch-c.adb: -gnatg activates warning on assertion errors
	For -gnatWx, set Wide_Character_Encoding_Method_Specified

	* s-wchcon.adb: (Is_Start_Of_Encoding): New function

From-SVN: r130817
parent a6e8413c
......@@ -36,6 +36,8 @@ with Interfaces.C_Streams; use Interfaces.C_Streams;
with System.File_IO;
with System.CRTL;
with System.WCh_Cnv; use System.WCh_Cnv;
with System.WCh_Con; use System.WCh_Con;
with Ada.Unchecked_Conversion;
with Ada.Unchecked_Deallocation;
......@@ -55,6 +57,45 @@ package body Ada.Text_IO is
use type System.CRTL.size_t;
WC_Encoding : Character;
pragma Import (C, WC_Encoding, "__gl_wc_encoding");
-----------------------
-- Local Subprograms --
-----------------------
function Getc_Immed (File : File_Type) return int;
-- This routine is identical to Getc, except that the read is done in
-- Get_Immediate mode (i.e. without waiting for a line return).
function Get_Upper_Half_Char
(C : Character;
File : File_Type) return Character;
-- This function is shared by Get and Get_Immediate to extract an encoded
-- upper half character value from the given File. The first byte has
-- already been read and is passed in C. The character value is returned as
-- the result, and the file pointer is bumped past the character.
-- Constraint_Error is raised if the encoded value is outside the bounds of
-- type Character.
function Get_Upper_Half_Char_Immed
(C : Character;
File : File_Type) return Character;
-- This routine is identical to Get_Upper_Half_Char, except that the reads
-- are done in Get_Immediate mode (i.e. without waiting for a line return).
function Has_Upper_Half_Character (Item : String) return Boolean;
-- Returns True if any of the characters is in the range 16#80#-16#FF#
procedure Put_Encoded (File : File_Type; Char : Character);
-- Called to output a character Char to the given File, when the encoding
-- method for the file is other than brackets, and Char is upper half.
procedure Set_WCEM (File : in out File_Type);
-- Called by Open and Create to set the wide character encoding method for
-- the file, processing a WCEM form parameter if one is present. File is
-- IN OUT because it may be closed in case of an error.
-------------------
-- AFCB_Allocate --
-------------------
......@@ -155,6 +196,7 @@ package body Ada.Text_IO is
Text => True);
File.Self := File;
Set_WCEM (File);
end Create;
-------------------
......@@ -218,8 +260,10 @@ package body Ada.Text_IO is
begin
FIO.Check_Read_Status (AP (File));
if File.Before_LM then
if File.Before_Upper_Half_Character then
return False;
elsif File.Before_LM then
if File.Before_LM_PM then
return Nextc (File) = EOF;
end if;
......@@ -276,7 +320,10 @@ package body Ada.Text_IO is
begin
FIO.Check_Read_Status (AP (File));
if File.Before_LM then
if File.Before_Upper_Half_Character then
return False;
elsif File.Before_LM then
return True;
else
......@@ -310,6 +357,9 @@ package body Ada.Text_IO is
if not File.Is_Regular_File then
return False;
elsif File.Before_Upper_Half_Character then
return False;
elsif File.Before_LM then
if File.Before_LM_PM then
return True;
......@@ -389,7 +439,11 @@ package body Ada.Text_IO is
begin
FIO.Check_Read_Status (AP (File));
if File.Before_LM then
if File.Before_Upper_Half_Character then
File.Before_Upper_Half_Character := False;
Item := File.Saved_Upper_Half_Character;
elsif File.Before_LM then
File.Before_LM := False;
File.Col := 1;
......@@ -486,40 +540,39 @@ package body Ada.Text_IO is
-- Get_Immediate --
-------------------
-- More work required here ???
procedure Get_Immediate
(File : File_Type;
Item : out Character)
is
ch : int;
end_of_file : int;
procedure getc_immediate
(stream : FILEs;
ch : out int;
end_of_file : out int);
pragma Import (C, getc_immediate, "getc_immediate");
begin
FIO.Check_Read_Status (AP (File));
if File.Before_LM then
if File.Before_Upper_Half_Character then
File.Before_Upper_Half_Character := False;
Item := File.Saved_Upper_Half_Character;
elsif File.Before_LM then
File.Before_LM := False;
File.Before_LM_PM := False;
ch := LM;
Item := Character'Val (LM);
else
getc_immediate (File.Stream, ch, end_of_file);
ch := Getc_Immed (File);
if ferror (File.Stream) /= 0 then
raise Device_Error;
elsif end_of_file /= 0 then
if ch = EOF then
raise End_Error;
else
if not Is_Start_Of_Encoding
(Character'Val (ch), File.WC_Method)
then
Item := Character'Val (ch);
else
Item := Get_Upper_Half_Char_Immed (Character'Val (ch), File);
end if;
end if;
end if;
Item := Character'Val (ch);
end Get_Immediate;
procedure Get_Immediate
......@@ -547,19 +600,17 @@ package body Ada.Text_IO is
begin
FIO.Check_Read_Status (AP (File));
Available := True;
-- If we are logically before an end of line, but physically after it,
-- then we just return the end of line character, no I/O is necessary.
if File.Before_Upper_Half_Character then
File.Before_Upper_Half_Character := False;
Item := File.Saved_Upper_Half_Character;
if File.Before_LM then
elsif File.Before_LM then
File.Before_LM := False;
File.Before_LM_PM := False;
Available := True;
Item := Character'Val (LM);
-- Normal case where a read operation is required
else
getc_immediate_nowait (File.Stream, ch, end_of_file, avail);
......@@ -575,7 +626,14 @@ package body Ada.Text_IO is
else
Available := True;
if Is_Start_Of_Encoding
(Character'Val (ch), File.WC_Method)
then
Item := Character'Val (ch);
else
Item := Get_Upper_Half_Char_Immed (Character'Val (ch), File);
end if;
end if;
end if;
......@@ -764,6 +822,92 @@ package body Ada.Text_IO is
return Get_Line (Current_In);
end Get_Line;
-------------------------
-- Get_Upper_Half_Char --
-------------------------
function Get_Upper_Half_Char
(C : Character;
File : File_Type) return Character
is
Result : Wide_Character;
function In_Char return Character;
-- Function used to obtain additional characters it the wide character
-- sequence is more than one character long.
function WC_In is new Char_Sequence_To_Wide_Char (In_Char);
-------------
-- In_Char --
-------------
function In_Char return Character is
ch : constant Integer := Getc (File);
begin
if ch = EOF then
raise End_Error;
else
return Character'Val (ch);
end if;
end In_Char;
-- Start of processing for Get_Upper_Half_Char
begin
Result := WC_In (C, File.WC_Method);
if Wide_Character'Pos (Result) > 16#FF# then
raise Constraint_Error
with "invalid wide character in Text_'I'O input";
else
return Character'Val (Wide_Character'Pos (Result));
end if;
end Get_Upper_Half_Char;
-------------------------------
-- Get_Upper_Half_Char_Immed --
-------------------------------
function Get_Upper_Half_Char_Immed
(C : Character;
File : File_Type) return Character
is
Result : Wide_Character;
function In_Char return Character;
-- Function used to obtain additional characters it the wide character
-- sequence is more than one character long.
function WC_In is new Char_Sequence_To_Wide_Char (In_Char);
-------------
-- In_Char --
-------------
function In_Char return Character is
ch : constant Integer := Getc_Immed (File);
begin
if ch = EOF then
raise End_Error;
else
return Character'Val (ch);
end if;
end In_Char;
-- Start of processing for Get_Upper_Half_Char_Immed
begin
Result := WC_In (C, File.WC_Method);
if Wide_Character'Pos (Result) > 16#FF# then
raise Constraint_Error
with "invalid wide character in Text_'I'O input";
else
return Character'Val (Wide_Character'Pos (Result));
end if;
end Get_Upper_Half_Char_Immed;
----------
-- Getc --
----------
......@@ -781,6 +925,54 @@ package body Ada.Text_IO is
end if;
end Getc;
----------------
-- Getc_Immed --
----------------
function Getc_Immed (File : File_Type) return int is
ch : int;
end_of_file : int;
procedure getc_immediate
(stream : FILEs; ch : out int; end_of_file : out int);
pragma Import (C, getc_immediate, "getc_immediate");
begin
FIO.Check_Read_Status (AP (File));
if File.Before_LM then
File.Before_LM := False;
File.Before_LM_PM := False;
ch := LM;
else
getc_immediate (File.Stream, ch, end_of_file);
if ferror (File.Stream) /= 0 then
raise Device_Error;
elsif end_of_file /= 0 then
return EOF;
end if;
end if;
return ch;
end Getc_Immed;
------------------------------
-- Has_Upper_Half_Character --
------------------------------
function Has_Upper_Half_Character (Item : String) return Boolean is
begin
for J in Item'Range loop
if Character'Pos (Item (J)) >= 16#80# then
return True;
end if;
end loop;
return False;
end Has_Upper_Half_Character;
-------------
-- Is_Open --
-------------
......@@ -838,22 +1030,54 @@ package body Ada.Text_IO is
begin
FIO.Check_Read_Status (AP (File));
-- If we are logically before a line mark, we can return immediately
if File.Before_LM then
End_Of_Line := True;
Item := ASCII.NUL;
-- If we are before an upper half character just return it (this can
-- happen if there are two calls to Look_Ahead in a row.
elsif File.Before_Upper_Half_Character then
End_Of_Line := False;
Item := File.Saved_Upper_Half_Character;
-- Otherwise we must read a character from the input stream
else
ch := Nextc (File);
ch := Getc (File);
if ch = LM
or else ch = EOF
or else (ch = PM and then File.Is_Regular_File)
then
End_Of_Line := True;
Ungetc (ch, File);
Item := ASCII.NUL;
else
-- Case where character obtained does not represent the start of an
-- encoded sequence so it stands for itself and we can unget it with
-- no difficulty.
elsif not Is_Start_Of_Encoding
(Character'Val (ch), File.WC_Method)
then
End_Of_Line := False;
Ungetc (ch, File);
Item := Character'Val (ch);
-- For the start of an encoding, we read the character using the
-- Get_Upper_Half_Char routine. It will occupy more than one byte
-- so we can't put it back with ungetc. Instead we save it in the
-- control block, setting a flag that everyone interested in reading
-- characters must test before reading the stream.
else
Item := Get_Upper_Half_Char (Character'Val (ch), File);
End_Of_Line := False;
File.Saved_Upper_Half_Character := Item;
File.Before_Upper_Half_Character := True;
end if;
end if;
end Look_Ahead;
......@@ -997,6 +1221,7 @@ package body Ada.Text_IO is
Text => True);
File.Self := File;
Set_WCEM (File);
end Open;
----------
......@@ -1048,10 +1273,21 @@ package body Ada.Text_IO is
New_Line (File);
end if;
-- If lower half character, or brackets encoding, output directly
if Character'Pos (Item) < 16#80#
or else File.WC_Method = WCEM_Brackets
then
if fputc (Character'Pos (Item), File.Stream) = EOF then
raise Device_Error;
end if;
-- Case of upper half character with non-brackets encoding
else
Put_Encoded (File, Item);
end if;
File.Col := File.Col + 1;
end Put;
......@@ -1065,10 +1301,21 @@ package body Ada.Text_IO is
New_Line (Current_Out);
end if;
-- If lower half character, or brackets encoding, output directly
if Character'Pos (Item) < 16#80#
or else Default_WCEM = WCEM_Brackets
then
if fputc (Character'Pos (Item), Current_Out.Stream) = EOF then
raise Device_Error;
end if;
-- Case of upper half character with non-brackets encoding
else
Put_Encoded (Current_Out, Item);
end if;
Current_Out.Col := Current_Out.Col + 1;
end Put;
......@@ -1083,12 +1330,18 @@ package body Ada.Text_IO is
begin
FIO.Check_Write_Status (AP (File));
-- Only have something to do if string is non-null
if Item'Length > 0 then
-- If we have bounded lines, then do things character by
-- character (this seems a rare case anyway!)
-- If we have bounded lines, or if the file encoding is other than
-- Brackets and the string has at least one upper half character,
-- then output the string character by character.
if File.Line_Length /= 0 then
if File.Line_Length /= 0
or else (File.WC_Method /= WCEM_Brackets
and then Has_Upper_Half_Character (Item))
then
for J in Item'Range loop
Put (File, Item (J));
end loop;
......@@ -1109,6 +1362,31 @@ package body Ada.Text_IO is
Put (Current_Out, Item);
end Put;
-----------------
-- Put_Encoded --
-----------------
procedure Put_Encoded (File : File_Type; Char : Character) is
procedure Out_Char (C : Character);
-- Procedure to output one character of an upper half encoded sequence
procedure WC_Out is new Wide_Char_To_Char_Sequence (Out_Char);
--------------
-- Out_Char --
--------------
procedure Out_Char (C : Character) is
begin
Putc (Character'Pos (C), File);
end Out_Char;
-- Start of processing for Put_Encoded
begin
WC_Out (Wide_Character'Val (Character'Pos (Char)), File.WC_Method);
end Put_Encoded;
--------------
-- Put_Line --
--------------
......@@ -1123,16 +1401,24 @@ package body Ada.Text_IO is
begin
FIO.Check_Write_Status (AP (File));
-- If we have bounded lines, then just do a put and a new line. In
-- this case we will end up doing things character by character in
-- any case, and it is a rare situation.
-- If we have bounded lines, or if the file encoding is other than
-- Brackets and the string has at least one upper half character, then
-- output the string character by character.
if File.Line_Length /= 0
or else (File.WC_Method /= WCEM_Brackets
and then Has_Upper_Half_Character (Item))
then
for J in Item'Range loop
Put (File, Item (J));
end loop;
if File.Line_Length /= 0 then
Put (File, Item);
New_Line (File);
return;
end if;
-- Normal case where we do not need to output character by character
-- We setup a single string that has the necessary terminators and
-- then write it with a single call. The reason for doing this is
-- that it gives better behavior for the use of Put_Line in multi-
......@@ -1211,6 +1497,8 @@ package body Ada.Text_IO is
pragma Warnings (Off, Discard_ch);
begin
-- Need to deal with Before_Upper_Half_Character ???
if File.Mode /= FCB.In_File then
raise Mode_Error;
end if;
......@@ -1553,6 +1841,36 @@ package body Ada.Text_IO is
Set_Page_Length (Current_Out, To);
end Set_Page_Length;
--------------
-- Set_WCEM --
--------------
procedure Set_WCEM (File : in out File_Type) is
Start : Natural;
Stop : Natural;
begin
File.WC_Method := WCEM_Brackets;
FIO.Form_Parameter (File.Form.all, "wcem", Start, Stop);
if Start = 0 then
File.WC_Method := WCEM_Brackets;
elsif Start /= 0 then
if Stop = Start then
for J in WC_Encoding_Letters'Range loop
if File.Form (Start) = WC_Encoding_Letters (J) then
File.WC_Method := J;
return;
end if;
end loop;
end if;
Close (File);
raise Use_Error with "invalid WCEM form parameter";
end if;
end Set_WCEM;
---------------
-- Skip_Line --
---------------
......@@ -1640,8 +1958,9 @@ package body Ada.Text_IO is
Ungetc (ch, File);
end if;
end if;
end loop;
File.Before_Upper_Half_Character := False;
end Skip_Line;
procedure Skip_Line (Spacing : Positive_Count := 1) is
......@@ -1702,6 +2021,7 @@ package body Ada.Text_IO is
File.Page := File.Page + 1;
File.Line := 1;
File.Col := 1;
File.Before_Upper_Half_Character := False;
end Skip_Page;
procedure Skip_Page is
......@@ -1901,6 +2221,12 @@ begin
-- Initialize Standard Files --
-------------------------------
for J in WC_Encoding_Method loop
if WC_Encoding = WC_Encoding_Letters (J) then
Default_WCEM := J;
end if;
end loop;
-- Note: the names in these files are bogus, and probably it would be
-- better for these files to have no names, but the ACVC test insist!
-- We use names that are bound to fail in open etc.
......@@ -1915,6 +2241,7 @@ begin
Standard_Err.Is_Text_File := True;
Standard_Err.Access_Method := 'T';
Standard_Err.Self := Standard_Err;
Standard_Err.WC_Method := Default_WCEM;
Standard_In.Stream := stdin;
Standard_In.Name := In_Name'Access;
......@@ -1926,6 +2253,7 @@ begin
Standard_In.Is_Text_File := True;
Standard_In.Access_Method := 'T';
Standard_In.Self := Standard_In;
Standard_In.WC_Method := Default_WCEM;
Standard_Out.Stream := stdout;
Standard_Out.Name := Out_Name'Access;
......@@ -1937,6 +2265,7 @@ begin
Standard_Out.Is_Text_File := True;
Standard_Out.Access_Method := 'T';
Standard_Out.Self := Standard_Out;
Standard_Out.WC_Method := Default_WCEM;
FIO.Chain_File (AP (Standard_In));
FIO.Chain_File (AP (Standard_Out));
......
......@@ -45,6 +45,7 @@ with Ada.IO_Exceptions;
with Ada.Streams;
with System;
with System.File_Control_Block;
with System.WCh_Con;
package Ada.Text_IO is
pragma Elaborate_Body;
......@@ -334,6 +335,11 @@ private
-- Text_IO File Control Block --
--------------------------------
Default_WCEM : System.WCh_Con.WC_Encoding_Method :=
System.WCh_Con.WCEM_UTF8;
-- This gets modified during initialization (see body) using
-- the default value established in the call to Set_Globals.
package FCB renames System.File_Control_Block;
type Text_AFCB;
......@@ -366,6 +372,31 @@ private
-- after a LM-PM sequence when logically we are before the LM-PM. This
-- flag can only be set if Before_LM is also set.
WC_Method : System.WCh_Con.WC_Encoding_Method := Default_WCEM;
-- Encoding method to be used for this file. Text_IO does not deal with
-- wide characters, but it does deal with upper half characters in the
-- range 16#80#-16#FF# which may need encoding, e.g. in UTF-8 mode.
Before_Upper_Half_Character : Boolean := False;
-- This flag is set to indicate that an encoded upper half character has
-- been read by Text_IO.Look_Ahead. If it is set to True, then it means
-- that the stream is logically positioned before the character but is
-- physically positioned after it. The character involved must be in
-- the range 16#80#-16#FF#, i.e. if the flag is set, then we know the
-- next character has a code greater than 16#7F#, and the value of this
-- character is saved in Saved_Upper_Half_Character.
Saved_Upper_Half_Character : Character;
-- This field is valid only if Before_Upper_Half_Character is set. It
-- contains an upper-half character read by Look_Ahead. If Look_Ahead
-- reads a character in the range 16#00# to 16#7F#, then it can use
-- ungetc to put it back, but ungetc cannot be called more than once,
-- so for characters above this range, we don't try to back up the
-- file. Instead we save the character in this field and set the flag
-- Before_Upper_Half_Character to True to indicate that we are logically
-- positioned before this character even though the stream is physically
-- positioned after it.
end record;
function AFCB_Allocate (Control_Block : Text_AFCB) return FCB.AFCB_Ptr;
......
......@@ -31,7 +31,6 @@
-- --
------------------------------------------------------------------------------
with Ada.Exceptions; use Ada.Exceptions;
with Ada.Streams; use Ada.Streams;
with Interfaces.C_Streams; use Interfaces.C_Streams;
......@@ -76,9 +75,9 @@ package body Ada.Wide_Text_IO is
-- done in Get_Immediate mode (i.e. without waiting for a line return).
procedure Set_WCEM (File : in out File_Type);
-- Called by Open and Create to set the wide character encoding method
-- for the file, processing a WCEM form parameter if one is present.
-- File is IN OUT because it may be closed in case of an error.
-- Called by Open and Create to set the wide character encoding method for
-- the file, processing a WCEM form parameter if one is present. File is
-- IN OUT because it may be closed in case of an error.
-------------------
-- AFCB_Allocate --
......@@ -249,7 +248,6 @@ package body Ada.Wide_Text_IO is
return False;
elsif File.Before_LM then
if File.Before_LM_PM then
return Nextc (File) = EOF;
end if;
......@@ -420,6 +418,8 @@ package body Ada.Wide_Text_IO is
File.Before_Wide_Character := False;
Item := File.Saved_Wide_Character;
-- Ada.Text_IO checks Before_LM_PM here, shouldn't we do the same???
else
Get_Character (File, C);
Item := Get_Wide_Char (C, File);
......@@ -555,6 +555,8 @@ package body Ada.Wide_Text_IO is
Item := Wide_Character'Val (LM);
else
-- Shouldn't we use getc_immediate_nowait here, like Text_IO???
ch := Getc_Immed (File);
if ch = EOF then
......@@ -749,7 +751,7 @@ package body Ada.Wide_Text_IO is
end if;
end In_Char;
-- Start of processing for In_Char
-- Start of processing for Get_Wide_Char
begin
return WC_In (C, File.WC_Method);
......@@ -904,7 +906,7 @@ package body Ada.Wide_Text_IO is
End_Of_Line := True;
Item := Wide_Character'Val (0);
-- If we are before a wide character, just return it (this happens
-- If we are before a wide character, just return it (this can happen
-- if there are two calls to Look_Ahead in a row).
elsif File.Before_Wide_Character then
......@@ -924,19 +926,21 @@ package body Ada.Wide_Text_IO is
Ungetc (ch, File);
Item := Wide_Character'Val (0);
-- If the character is in the range 16#0000# to 16#007F# it stands
-- for itself and occupies a single byte, so we can unget it with
-- Case where character obtained does not represent the start of an
-- encoded sequence so it stands for itself and we can unget it with
-- no difficulty.
elsif ch <= 16#0080# then
elsif not Is_Start_Of_Encoding
(Character'Val (ch), File.WC_Method)
then
End_Of_Line := False;
Ungetc (ch, File);
Item := Wide_Character'Val (ch);
-- For a character above this range, we read the character, using
-- the Get_Wide_Char routine. It may well occupy more than one byte
-- so we can't put it back with ungetc. Instead we save it in the
-- control block, setting a flag that everyone interested in reading
-- For the start of an encoding, we read the character using the
-- Get_Wide_Char routine. It will occupy more than one byte so we
-- can't put it back with ungetc. Instead we save it in the control
-- block, setting a flag that everyone interested in reading
-- characters must test before reading the stream.
else
......@@ -1552,7 +1556,7 @@ package body Ada.Wide_Text_IO is
end if;
Close (File);
Raise_Exception (Use_Error'Identity, "invalid WCEM form parameter");
raise Use_Error with "invalid WCEM form parameter";
end if;
end Set_WCEM;
......@@ -1638,7 +1642,6 @@ package body Ada.Wide_Text_IO is
Ungetc (ch, File);
end if;
end if;
end loop;
File.Before_Wide_Character := False;
......
......@@ -6,7 +6,7 @@
-- --
-- S p e c --
-- --
-- Copyright (C) 1992-2006, Free Software Foundation, Inc. --
-- Copyright (C) 1992-2007, Free Software Foundation, Inc. --
-- --
-- This specification is derived from the Ada Reference Manual for use with --
-- GNAT. The copyright notice above, and the license provisions that follow --
......@@ -50,8 +50,6 @@ with System.WCh_Con;
package Ada.Wide_Text_IO is
package WCh_Con renames System.WCh_Con;
type File_Type is limited private;
type File_Mode is (In_File, Out_File, Append_File);
......@@ -303,6 +301,8 @@ package Ada.Wide_Text_IO is
Layout_Error : exception renames IO_Exceptions.Layout_Error;
private
package WCh_Con renames System.WCh_Con;
-----------------------------------
-- Handling of Format Characters --
-----------------------------------
......
......@@ -31,7 +31,6 @@
-- --
------------------------------------------------------------------------------
with Ada.Exceptions; use Ada.Exceptions;
with Ada.Streams; use Ada.Streams;
with Interfaces.C_Streams; use Interfaces.C_Streams;
......@@ -76,9 +75,9 @@ package body Ada.Wide_Wide_Text_IO is
-- are done in Get_Immediate mode (i.e. without waiting for a line return).
procedure Set_WCEM (File : in out File_Type);
-- Called by Open and Create to set the wide character encoding method
-- for the file, processing a WCEM form parameter if one is present.
-- File is IN OUT because it may be closed in case of an error.
-- Called by Open and Create to set the wide character encoding method for
-- the file, processing a WCEM form parameter if one is present. File is
-- IN OUT because it may be closed in case of an error.
-------------------
-- AFCB_Allocate --
......@@ -249,7 +248,6 @@ package body Ada.Wide_Wide_Text_IO is
return False;
elsif File.Before_LM then
if File.Before_LM_PM then
return Nextc (File) = EOF;
end if;
......@@ -420,6 +418,8 @@ package body Ada.Wide_Wide_Text_IO is
File.Before_Wide_Wide_Character := False;
Item := File.Saved_Wide_Wide_Character;
-- Ada.Text_IO checks Before_LM_PM here, shouldn't we do the same???
else
Get_Character (File, C);
Item := Get_Wide_Wide_Char (C, File);
......@@ -555,6 +555,8 @@ package body Ada.Wide_Wide_Text_IO is
Item := Wide_Wide_Character'Val (LM);
else
-- Shouldn't we use getc_immediate_nowait here, like Text_IO???
ch := Getc_Immed (File);
if ch = EOF then
......@@ -904,7 +906,7 @@ package body Ada.Wide_Wide_Text_IO is
End_Of_Line := True;
Item := Wide_Wide_Character'Val (0);
-- If we are before a wide character, just return it (this happens
-- If we are before a wide character, just return it (this can happen
-- if there are two calls to Look_Ahead in a row).
elsif File.Before_Wide_Wide_Character then
......@@ -924,20 +926,22 @@ package body Ada.Wide_Wide_Text_IO is
Ungetc (ch, File);
Item := Wide_Wide_Character'Val (0);
-- If the character is in the range 16#0000# to 16#007F# it stands
-- for itself and occupies a single byte, so we can unget it with
-- Case where character obtained does not represent the start of an
-- encoded sequence so it stands for itself and we can unget it with
-- no difficulty.
elsif ch <= 16#0080# then
elsif not Is_Start_Of_Encoding
(Character'Val (ch), File.WC_Method)
then
End_Of_Line := False;
Ungetc (ch, File);
Item := Wide_Wide_Character'Val (ch);
-- For a character above this range, we read the character, using
-- the Get_Wide_Wide_Char routine. It may well occupy more than one
-- byte so we can't put it back with ungetc. Instead we save it in
-- the control block, setting a flag that everyone interested in
-- reading characters must test before reading the stream.
-- For the start of an encoding, we read the character using the
-- Get_Wide_Wide_Char routine. It will occupy more than one byte so
-- we can't put it back with ungetc. Instead we save it in the
-- control block, setting a flag that everyone interested in reading
-- characters must test before reading the stream.
else
Item := Get_Wide_Wide_Char (Character'Val (ch), File);
......@@ -1552,7 +1556,7 @@ package body Ada.Wide_Wide_Text_IO is
end if;
Close (File);
Raise_Exception (Use_Error'Identity, "invalid WCEM form parameter");
raise Use_Error with "invalid WCEM form parameter";
end if;
end Set_WCEM;
......@@ -1638,7 +1642,6 @@ package body Ada.Wide_Wide_Text_IO is
Ungetc (ch, File);
end if;
end if;
end loop;
File.Before_Wide_Wide_Character := False;
......
......@@ -6,7 +6,7 @@
-- --
-- S p e c --
-- --
-- Copyright (C) 1992-2006, Free Software Foundation, Inc. --
-- Copyright (C) 1992-2007, Free Software Foundation, Inc. --
-- --
-- This specification is derived from the Ada Reference Manual for use with --
-- GNAT. The copyright notice above, and the license provisions that follow --
......@@ -50,8 +50,6 @@ with System.WCh_Con;
package Ada.Wide_Wide_Text_IO is
package WCh_Con renames System.WCh_Con;
type File_Type is limited private;
type File_Mode is (In_File, Out_File, Append_File);
......@@ -303,6 +301,8 @@ package Ada.Wide_Wide_Text_IO is
Layout_Error : exception renames IO_Exceptions.Layout_Error;
private
package WCh_Con renames System.WCh_Con;
-----------------------------------
-- Handling of Format Characters --
-----------------------------------
......
......@@ -824,7 +824,7 @@ package body ALI is
Sfile => No_File,
Task_Dispatching_Policy => ' ',
Time_Slice_Value => -1,
WC_Encoding => '8',
WC_Encoding => 'b',
Unit_Exception_Table => False,
Ver => (others => ' '),
Ver_Len => 0,
......@@ -930,13 +930,23 @@ package body ALI is
else
Checkc (' ');
Name_Len := 0;
-- Scan out argument
Name_Len := 0;
while not At_Eol loop
Name_Len := Name_Len + 1;
Name_Buffer (Name_Len) := Getc;
end loop;
-- If -fstack-check, record that it occurred
if Name_Buffer (1 .. Name_Len) = "-fstack-check" then
Stack_Check_Switch_Set := True;
end if;
-- Store the argument
Args.Increment_Last;
Args.Table (Args.Last) := new String'(Name_Buffer (1 .. Name_Len));
......
......@@ -40,6 +40,7 @@ with Targparm; use Targparm;
with Types; use Types;
with System.OS_Lib; use System.OS_Lib;
with System.WCh_Con; use System.WCh_Con;
with GNAT.Heap_Sort_A; use GNAT.Heap_Sort_A;
......@@ -286,6 +287,9 @@ package body Bindgen is
-- This function tries Ada_Main first, and if there is such a clash, then
-- it tries Ada_Name_01, Ada_Name_02 ... Ada_Name_99 in sequence.
function Get_Main_Unit_Name (S : String) return String;
-- Return the main unit name corresponding to S by replacing '.' with '_'
function Get_Main_Name return String;
-- This function is used in the Ada main output case to compute the
-- correct external main program. It is "main" by default, unless the
......@@ -293,6 +297,12 @@ package body Bindgen is
-- is the name of the Ada main name without the "_ada". This default
-- can be overridden explicitly using the -Mname binder switch.
function Get_WC_Encoding return Character;
-- Return wide character encoding method to set as WC_Encoding in output.
-- If -W has been used, returns the specified encoding, otherwise returns
-- the encoding method used for the main program source. If there is no
-- main program source (-z switch used), returns brackets ('b').
function Lt_Linker_Option (Op1, Op2 : Natural) return Boolean;
-- Compare linker options, when sorting, first according to
-- Is_Internal_File (internal files come later) and then by
......@@ -595,6 +605,40 @@ package body Bindgen is
WBI (" Handler_Installed : Integer;");
WBI (" pragma Import (C, Handler_Installed, " &
"""__gnat_handler_installed"");");
-- Initialize stack limit variable of the environment task if the
-- stack check method is stack limit and if stack check is enabled.
if Stack_Check_Limits_On_Target
and then (Stack_Check_Default_On_Target or Stack_Check_Switch_Set)
then
WBI ("");
WBI (" procedure Initialize_Stack_Limit;");
WBI (" pragma Import (C, Initialize_Stack_Limit, " &
"""__gnat_initialize_stack_limit"");");
end if;
if VM_Target = CLI_Target
and then not No_Main_Subprogram
then
WBI ("");
if ALIs.Table (ALIs.First).Main_Program = Func then
WBI (" Result : Integer;");
WBI ("");
WBI (" function Ada_Main_Program return Integer;");
else
WBI (" procedure Ada_Main_Program;");
end if;
Get_Name_String (Units.Table (First_Unit_Entry).Uname);
Name_Len := Name_Len - 2;
WBI (" pragma Import (CIL, Ada_Main_Program, """
& Name_Buffer (1 .. Name_Len) & "."
& Get_Main_Unit_Name (Name_Buffer (1 .. Name_Len)) & """);");
end if;
WBI (" begin");
Set_String (" Main_Priority := ");
......@@ -616,7 +660,8 @@ package body Bindgen is
Write_Statement_Buffer;
Set_String (" WC_Encoding := '");
Set_Char (ALIs.Table (ALIs.First).WC_Encoding);
Set_Char (Get_WC_Encoding);
Set_String ("';");
Write_Statement_Buffer;
......@@ -736,11 +781,31 @@ package body Bindgen is
Write_Statement_Buffer;
end if;
-- Initialize stack limit variable of the environment task if the
-- stack check method is stack limit and if stack check is enabled.
if Stack_Check_Limits_On_Target
and then (Stack_Check_Default_On_Target or Stack_Check_Switch_Set)
then
WBI ("");
WBI (" Initialize_Stack_Limit;");
end if;
-- Generate elaboration calls
WBI ("");
Gen_Elab_Calls_Ada;
if VM_Target = CLI_Target
and then not No_Main_Subprogram
then
if ALIs.Table (ALIs.First).Main_Program = Func then
WBI (" Result := Ada_Main_Program;");
else
WBI (" Ada_Main_Program;");
end if;
end if;
WBI (" end " & Ada_Init_Name.all & ";");
end Gen_Adainit_Ada;
......@@ -866,7 +931,8 @@ package body Bindgen is
WBI (" extern char __gl_wc_encoding;");
Set_String (" __gl_wc_encoding = '");
Set_Char (ALIs.Table (ALIs.First).WC_Encoding);
Set_Char (Get_WC_Encoding);
Set_String ("';");
Write_Statement_Buffer;
......@@ -966,6 +1032,16 @@ package body Bindgen is
WBI (" }");
end if;
-- Initialize stack limit for the environment task if the stack
-- check method is stack limit and if stack check is enabled.
if Stack_Check_Limits_On_Target
and then (Stack_Check_Default_On_Target or Stack_Check_Switch_Set)
then
WBI ("");
WBI (" __gnat_initialize_stack_limit ();");
end if;
-- Generate call to set Initialize_Scalar values if needed
if Initialize_Scalars_Used then
......@@ -2018,8 +2094,11 @@ package body Bindgen is
if VM_Target /= No_VM then
Ada_Bind_File := True;
if VM_Target = JVM_Target then
Bind_Main_Program := False;
end if;
end if;
-- Override time slice value if -T switch is set
......@@ -2113,12 +2192,13 @@ package body Bindgen is
Resolve_Binder_Options;
if VM_Target /= No_VM then
if not Suppress_Standard_Library_On_Target then
-- Usually, adafinal is called using a pragma Import C. Since
-- Import C doesn't have the same semantics for JGNAT, we use
-- standard Ada.
if VM_Target /= No_VM then
WBI ("with System.Standard_Library;");
end if;
end if;
......@@ -2129,6 +2209,7 @@ package body Bindgen is
-- Main program case
if Bind_Main_Program then
if VM_Target = No_VM then
-- Generate argc/argv stuff unless suppressed
......@@ -2140,8 +2221,9 @@ package body Bindgen is
WBI (" gnat_argv : System.Address;");
WBI (" gnat_envp : System.Address;");
-- If the standard library is not suppressed, these variables are
-- in the runtime data area for easy access from the runtime
-- If the standard library is not suppressed, these variables
-- are in the runtime data area for easy access from the
-- runtime
if not Suppress_Standard_Library_On_Target then
WBI ("");
......@@ -2162,18 +2244,18 @@ package body Bindgen is
if Exit_Status_Supported_On_Target then
WBI (" gnat_exit_status : Integer := 0;");
end if;
else
WBI (" gnat_exit_status : Integer;");
WBI (" pragma Import (C, gnat_exit_status);");
end if;
end if;
-- Generate the GNAT_Version and Ada_Main_Program_Name info only for
-- the main program. Otherwise, it can lead under some circumstances
-- to a symbol duplication during the link (for instance when a
-- C program uses 2 Ada libraries)
-- Generate the GNAT_Version and Ada_Main_Program_Name info only
-- for the main program. Otherwise, it can lead under some
-- circumstances to a symbol duplication during the link (for
-- instance when a C program uses 2 Ada libraries)
end if;
if Bind_Main_Program then
WBI ("");
WBI (" GNAT_Version : constant String :=");
WBI (" ""GNAT Version: " &
......@@ -2183,8 +2265,14 @@ package body Bindgen is
WBI ("");
Set_String (" Ada_Main_Program_Name : constant String := """);
Get_Name_String (Units.Table (First_Unit_Entry).Uname);
if VM_Target = No_VM then
Set_Main_Program_Name;
Set_String (""" & Ascii.NUL;");
else
Set_String (Name_Buffer (1 .. Name_Len - 2) & """;");
end if;
Write_Statement_Buffer;
WBI
......@@ -2212,7 +2300,7 @@ package body Bindgen is
WBI (" pragma Linker_Constructor (" & Ada_Init_Name.all & ");");
end if;
if Bind_Main_Program then
if Bind_Main_Program and then VM_Target = No_VM then
-- If we have the standard library, then Break_Start is defined
-- there, but when the standard library is suppressed, Break_Start
......@@ -2369,7 +2457,7 @@ package body Bindgen is
Gen_Adafinal_Ada;
if Bind_Main_Program then
if Bind_Main_Program and then VM_Target = No_VM then
-- When suppressing the standard library then generate dummy body
-- for Break_Start
......@@ -2477,6 +2565,16 @@ package body Bindgen is
WBI ("extern void __gnat_stack_usage_initialize (int size);");
end if;
-- Initialize stack limit for the environment task if the stack
-- check method is stack limit and if stack check is enabled.
if Stack_Check_Limits_On_Target
and then (Stack_Check_Default_On_Target or Stack_Check_Switch_Set)
then
WBI ("");
WBI ("extern void __gnat_initialize_stack_limit (void);");
end if;
WBI ("");
Gen_Elab_Defs_C;
......@@ -2944,6 +3042,23 @@ package body Bindgen is
end Gen_Versions_C;
------------------------
-- Get_Main_Unit_Name --
------------------------
function Get_Main_Unit_Name (S : String) return String is
Result : String := S;
begin
for J in S'Range loop
if Result (J) = '.' then
Result (J) := '_';
end if;
end loop;
return Result;
end Get_Main_Unit_Name;
-----------------------
-- Get_Ada_Main_Name --
-----------------------
......@@ -2959,14 +3074,8 @@ package body Bindgen is
-- ada_<main procedure>.
if VM_Target /= No_VM then
-- Get main program name
Get_Name_String (Units.Table (First_Unit_Entry).Uname);
-- Remove the %b
return "ada_" & Name_Buffer (1 .. Name_Len - 2);
return "ada_" & Get_Main_Unit_Name (Name_Buffer (1 .. Name_Len - 2));
end if;
-- This loop tries the following possibilities in order
......@@ -3051,6 +3160,38 @@ package body Bindgen is
end if;
end Get_Main_Name;
---------------------
-- Get_WC_Encoding --
---------------------
function Get_WC_Encoding return Character is
begin
-- If encoding method specified by -W switch, then return it
if Wide_Character_Encoding_Method_Specified then
return WC_Encoding_Letters (Wide_Character_Encoding_Method);
-- If no main program, and not specified, set brackets, we really have
-- no better choice. If some other encoding is required when there is
-- no main, it must be set explicitly using -Wx.
-- Note: if the ALI file always passed the wide character encoding
-- of every file, then we could use the encoding of the initial
-- specified file, but this information is passed only for potential
-- main programs. We could fix this sometime, but it is a very minor
-- point (wide character default encoding for [Wide_[Wide_]Text_IO
-- when there is no main program).
elsif No_Main_Subprogram then
return 'b';
-- Otherwise if there is a main program, take encoding from it
else
return ALIs.Table (ALIs.First).WC_Encoding;
end if;
end Get_WC_Encoding;
----------------------
-- Lt_Linker_Option --
----------------------
......
......@@ -26,6 +26,8 @@
with Osint; use Osint;
with Output; use Output;
with System.WCh_Con; use System.WCh_Con;
package body Bindusg is
Already_Displayed : Boolean := False;
......@@ -222,11 +224,27 @@ package body Bindusg is
Write_Line (" -v Verbose mode. Error messages, " &
"header, summary output to stdout");
-- Lines for -w switch
-- Line for -w switch
Write_Line (" -wx Warning mode. (x=s/e for " &
"suppress/treat as error)");
-- Line for -W switch
Write_Str (" -W? Wide character encoding method (");
for J in WC_Encoding_Method loop
Write_Char (WC_Encoding_Letters (J));
if J = WC_Encoding_Method'Last then
Write_Char (')');
else
Write_Char ('/');
end if;
end loop;
Write_Eol;
-- Line for -x switch
Write_Line (" -x Exclude source files (check object " &
......
......@@ -71,4 +71,18 @@ package body System.WCh_Con is
end if;
end Get_WC_Encoding_Method;
--------------------------
-- Is_Start_Of_Encoding --
--------------------------
function Is_Start_Of_Encoding
(C : Character;
EM : WC_Encoding_Method) return Boolean
is
begin
return (EM in WC_Upper_Half_Encoding_Method
and then Character'Pos (C) >= 16#80#)
or else (EM in WC_ESC_Encoding_Method and then C = ASCII.ESC);
end Is_Start_Of_Encoding;
end System.WCh_Con;
......@@ -106,8 +106,8 @@ package System.WCh_Con is
-- sequence ESC a b c d (five characters, where abcd are ASCII hex
-- characters, using upper case for letters). This method is easy
-- to deal with in external environments that do not support wide
-- characters, and covers the whole BMP. This is the default encoding
-- method.
-- characters, and covers the whole 16-bit BMP. Codes larger than
-- 16#FFFF# are not representable using this encoding method.
WCEM_Upper : constant WC_Encoding_Method := 2;
-- The wide character with encoding 16#abcd#, where the upper bit is on
......@@ -115,7 +115,8 @@ package System.WCh_Con is
-- 16#cd#. The second byte may never be a format control character, but
-- is not required to be in the upper half. This method can be also used
-- for shift-JIS or EUC where the internal coding matches the external
-- coding.
-- coding. Codes larger than 16#FFFF# are not representable using this
-- encoding method.
WCEM_Shift_JIS : constant WC_Encoding_Method := 3;
-- A wide character is represented by a two character sequence 16#ab#
......@@ -123,18 +124,20 @@ package System.WCh_Con is
-- as described above. The internal character code is the corresponding
-- JIS character according to the standard algorithm for Shift-JIS
-- conversion. See the body of package System.JIS_Conversions for
-- further details.
-- further details. Codes larger than 16#FFFF are not representable
-- using this encoding method.
WCEM_EUC : constant WC_Encoding_Method := 4;
-- A wide character is represented by a two character sequence 16#ab# and
-- 16#cd#, with both characters being in the upper half set. The internal
-- character code is the corresponding JIS character according to the EUC
-- encoding algorithm. See the body of package System.JIS_Conversions for
-- further details.
-- further details. Codes larger than 16#FFFF# are not representable using
-- this encoding method.
WCEM_UTF8 : constant WC_Encoding_Method := 5;
-- An ISO 10646-1 BMP/Unicode wide character is represented in
-- UCS Transformation Format 8 (UTF-8) as defined in Annex R of ISO
-- An ISO 10646-1 BMP/Unicode wide character is represented in UCS
-- Transformation Format 8 (UTF-8), as defined in Annex R of ISO
-- 10646-1/Am.2. Depending on the character value, a Unicode character
-- is represented as the one to six byte sequence.
--
......@@ -151,7 +154,8 @@ package System.WCh_Con is
-- where the xxx bits correspond to the left-padded bits of the
-- 16-bit character value. Note that all lower half ASCII characters
-- are represented as ASCII bytes and all upper half characters and
-- other wide characters are represented as sequences of upper-half.
-- other wide characters are represented as sequences of upper-half. This
-- encoding method can represent the entire range of Wide_Wide_Character.
WCEM_Brackets : constant WC_Encoding_Method := 6;
-- A wide character is represented using one of the following sequences:
......@@ -161,7 +165,10 @@ package System.WCh_Con is
-- ["xxxxxx"]
-- ["xxxxxxxx"]
--
-- where xx are hexadecimal digits representing the character code.
-- where xx are hexadecimal digits representing the character code. This
-- encoding method can represent the entire range of Wide_Wide_Character
-- but in the general case results in ambiguous representations (there is
-- no ambiguity in Ada sources, since the above sequences are illegal Ada).
WC_Encoding_Letters : constant array (WC_Encoding_Method) of Character :=
(WCEM_Hex => 'h',
......@@ -183,10 +190,20 @@ package System.WCh_Con is
-- Encoding methods using an upper half character (16#80#..16#FF) at
-- the start of the sequence.
WC_Longest_Sequence : constant := 10;
WC_Longest_Sequence : constant := 12;
-- The longest number of characters that can be used for a wide character
-- or wide wide character sequence for any of the active encoding methods.
WC_Longest_Sequences : constant array (WC_Encoding_Method) of Natural :=
(WCEM_Hex => 5,
WCEM_Upper => 2,
WCEM_Shift_JIS => 2,
WCEM_EUC => 2,
WCEM_UTF8 => 6,
WCEM_Brackets => 12);
-- The longest number of characters that can be used for a wide character
-- or wide wide character sequence using the given encoding method.
function Get_WC_Encoding_Method (C : Character) return WC_Encoding_Method;
-- Given a character C, returns corresponding encoding method (see array
-- WC_Encoding_Letters above). Raises Constraint_Error if not in list.
......@@ -196,4 +213,12 @@ package System.WCh_Con is
-- utf8, brackets, return the corresponding encoding method. Raises
-- Constraint_Error if not in list.
function Is_Start_Of_Encoding
(C : Character;
EM : WC_Encoding_Method) return Boolean;
pragma Inline (Is_Start_Of_Encoding);
-- Returns True if the Character C is the start of a multi-character
-- encoding sequence for the given encoding method EM. If EM is set to
-- WCEM_Brackets, this function always returns False.
end System.WCh_Con;
......@@ -417,21 +417,21 @@ package body Switch.B is
-- Processing for W switch
when 'W' =>
if Ptr = Max then
Bad_Switch (Switch_Chars);
end if;
Ptr := Ptr + 1;
for J in WC_Encoding_Method loop
if Switch_Chars (Ptr) = WC_Encoding_Letters (J) then
Wide_Character_Encoding_Method := J;
exit;
elsif J = WC_Encoding_Method'Last then
if Ptr > Max then
Bad_Switch (Switch_Chars);
end if;
end loop;
begin
Wide_Character_Encoding_Method :=
Get_WC_Encoding_Method (Switch_Chars (Ptr));
exception
when Constraint_Error =>
Bad_Switch (Switch_Chars);
end;
Wide_Character_Encoding_Method_Specified := True;
Upper_Half_Encoding :=
Wide_Character_Encoding_Method in
......
......@@ -479,6 +479,7 @@ package body Switch.C is
Constant_Condition_Warnings := True;
Implementation_Unit_Warnings := True;
Ineffective_Inline_Warnings := True;
Warn_On_Assertion_Failure := True;
Warn_On_Assumed_Low_Bound := True;
Warn_On_Bad_Fixed_Value := True;
Warn_On_Constant := True;
......@@ -833,6 +834,8 @@ package body Switch.C is
Bad_Switch ("-gnatW" & Switch_Chars (Ptr .. Max));
end;
Wide_Character_Encoding_Method_Specified := True;
Upper_Half_Encoding :=
Wide_Character_Encoding_Method in
WC_Upper_Half_Encoding_Method;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment