DataInputStream.java 28.6 KB
Newer Older
1
/* DataInputStream.java -- FilteredInputStream that implements DataInput
2
   Copyright (C) 1998, 1999, 2000, 2001, 2003  Free Software Foundation
Tom Tromey committed
3

4
This file is part of GNU Classpath.
Tom Tromey committed
5

6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
 
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING.  If not, write to the
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA.

21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
Linking this library statically or dynamically with other modules is
making a combined work based on this library.  Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.

As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module.  An independent module is a module which is not derived from
or based on this library.  If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so.  If you do not wish to do so, delete this
exception statement from your version. */
Tom Tromey committed
37 38 39 40 41 42 43 44 45
 
package java.io;

/* Written using "Java Class Libraries", 2nd edition, ISBN 0-201-31002-3
 * "The Java Language Specification", ISBN 0-201-63451-1
 * plus online API docs for JDK 1.2 beta from http://www.javasoft.com.
 * Status:  Believed complete and correct.
 */
 
46 47 48 49 50 51 52 53
/**
 * This subclass of <code>FilteredInputStream</code> implements the
 * <code>DataInput</code> interface that provides method for reading primitive
 * Java data types from a stream.
 *
 * @see DataInput
 *
 * @author Warren Levy <warrenl@cygnus.com>
54
 * @author Aaron M. Renn <arenn@urbanophile.com>
55 56
 * @date October 20, 1998.  
 */
Tom Tromey committed
57 58
public class DataInputStream extends FilterInputStream implements DataInput
{
59 60 61 62
  // readLine() hack to ensure that an '\r' not followed by an '\n' is
  // handled correctly. If set, readLine() will ignore the first char it sees
  // if that char is a '\n'
  boolean ignoreInitialNewline = false;
63 64

  // Byte buffer, used to make primitive read calls more efficient.
65
  byte[] buf = new byte [8];
66
  
67 68 69 70 71 72
  /**
   * This constructor initializes a new <code>DataInputStream</code>
   * to read from the specified subordinate stream.
   *
   * @param in The subordinate <code>InputStream</code> to read from
   */
73
  public DataInputStream (InputStream in)
Tom Tromey committed
74
  {
75
    super (in);
Tom Tromey committed
76 77
  }

78 79 80 81 82 83 84 85 86 87 88 89 90
  /**
   * This method reads bytes from the underlying stream into the specified
   * byte array buffer.  It will attempt to fill the buffer completely, but
   * may return a short count if there is insufficient data remaining to be
   * read to fill the buffer.
   *
   * @param b The buffer into which bytes will be read.
   * 
   * @return The actual number of bytes read, or -1 if end of stream reached 
   * before reading any bytes.
   *
   * @exception IOException If an error occurs.
   */
91
  public final int read (byte[] b) throws IOException
Tom Tromey committed
92
  {
93
    return in.read (b, 0, b.length);
Tom Tromey committed
94 95
  }

96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
  /**
   * This method reads bytes from the underlying stream into the specified
   * byte array buffer.  It will attempt to read <code>len</code> bytes and
   * will start storing them at position <code>off</code> into the buffer.
   * This method can return a short count if there is insufficient data
   * remaining to be read to complete the desired read length.
   *
   * @param b The buffer into which bytes will be read.
   * @param off The offset into the buffer to start storing bytes.
   * @param len The requested number of bytes to read.
   *
   * @return The actual number of bytes read, or -1 if end of stream reached
   * before reading any bytes.
   *
   * @exception IOException If an error occurs.
   */
112
  public final int read (byte[] b, int off, int len) throws IOException
Tom Tromey committed
113
  {
114
    return in.read (b, off, len);
Tom Tromey committed
115 116
  }

117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
  /**
   * This method reads a Java boolean value from an input stream.  It does
   * so by reading a single byte of data.  If that byte is zero, then the
   * value returned is <code>false</code>.  If the byte is non-zero, then
   * the value returned is <code>true</code>.
   * <p>
   * This method can read a <code>boolean</code> written by an object
   * implementing the <code>writeBoolean()</code> method in the
   * <code>DataOutput</code> interface. 
   *
   * @return The <code>boolean</code> value read
   *
   * @exception EOFException If end of file is reached before reading
   * the boolean
   * @exception IOException If any other error occurs
132 133
   *
   * @see DataOutput#writeBoolean
134
   */
135
  public final boolean readBoolean () throws IOException
Tom Tromey committed
136
  {
137
    return convertToBoolean (in.read ());
Tom Tromey committed
138 139
  }

140 141 142 143 144 145 146 147 148 149 150 151 152
  /**
   * This method reads a Java byte value from an input stream.  The value
   * is in the range of -128 to 127.
   * <p>
   * This method can read a <code>byte</code> written by an object
   * implementing the <code>writeByte()</code> method in the
   * <code>DataOutput</code> interface.
   *
   * @return The <code>byte</code> value read
   *
   * @exception EOFException If end of file is reached before reading the byte
   * @exception IOException If any other error occurs
   *
153
   * @see DataOutput#writeByte
154
   */
155
  public final byte readByte () throws IOException
Tom Tromey committed
156
  {
157
    return convertToByte (in.read ());
Tom Tromey committed
158 159
  }

160 161 162 163 164 165 166 167 168 169 170 171
  /**
   * This method reads a Java <code>char</code> value from an input stream.  
   * It operates by reading two bytes from the stream and converting them to 
   * a single 16-bit Java <code>char</code>.  The two bytes are stored most
   * significant byte first (i.e., "big endian") regardless of the native
   * host byte ordering. 
   * <p>
   * As an example, if <code>byte1</code> and <code>byte2</code>
   * represent the first and second byte read from the stream
   * respectively, they will be transformed to a <code>char</code> in
   * the following manner: 
   * <p>
172
   * <code>(char)(((byte1 &amp; 0xFF) &lt;&lt; 8) | (byte2 &amp; 0xFF)</code>
173 174 175 176 177 178 179 180 181 182
   * <p>
   * This method can read a <code>char</code> written by an object
   * implementing the <code>writeChar()</code> method in the
   * <code>DataOutput</code> interface. 
   *
   * @return The <code>char</code> value read 
   *
   * @exception EOFException If end of file is reached before reading the char
   * @exception IOException If any other error occurs
   *
183
   * @see DataOutput#writeChar
184
   */
185
  public final char readChar () throws IOException
Tom Tromey committed
186
  {
187
    readFully (buf, 0, 2);
188
    return convertToChar (buf);
Tom Tromey committed
189 190
  }

191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
  /**
   * This method reads a Java double value from an input stream.  It operates
   * by first reading a <code>long</code> value from the stream by calling the
   * <code>readLong()</code> method in this interface, then converts
   * that <code>long</code> to a <code>double</code> using the
   * <code>longBitsToDouble</code> method in the class
   * <code>java.lang.Double</code> 
   * <p>
   * This method can read a <code>double</code> written by an object
   * implementing the <code>writeDouble()</code> method in the
   * <code>DataOutput</code> interface.
   *
   * @return The <code>double</code> value read
   *
   * @exception EOFException If end of file is reached before reading
   * the double
   * @exception IOException If any other error occurs
   *
209 210
   * @see DataOutput#writeDouble
   * @see java.lang.Double#longBitsToDouble
211
   */
212
  public final double readDouble () throws IOException
Tom Tromey committed
213
  {
214
    return Double.longBitsToDouble (readLong ());
Tom Tromey committed
215 216
  }

217 218 219 220 221 222 223 224 225
  /**
   * This method reads a Java float value from an input stream.  It
   * operates by first reading an <code>int</code> value from the
   * stream by calling the <code>readInt()</code> method in this
   * interface, then converts that <code>int</code> to a
   * <code>float</code> using the <code>intBitsToFloat</code> method
   * in the class <code>java.lang.Float</code>
   * <p>
   * This method can read a <code>float</code> written by an object
226
   * implementing the <code>writeFloat()</code> method in the
227 228 229 230 231 232 233
   * <code>DataOutput</code> interface.
   *
   * @return The <code>float</code> value read
   *
   * @exception EOFException If end of file is reached before reading the float
   * @exception IOException If any other error occurs
   *
234 235 236
   * @see DataOutput#writeFloat 
   * @see java.lang.Float#intBitsToFloat
   */
237
  public final float readFloat () throws IOException
Tom Tromey committed
238
  {
239
    return Float.intBitsToFloat (readInt ());
Tom Tromey committed
240 241
  }

242 243 244 245
  /**
   * This method reads raw bytes into the passed array until the array is
   * full.  Note that this method blocks until the data is available and
   * throws an exception if there is not enough data left in the stream to
246 247 248
   * fill the buffer.  Note also that zero length buffers are permitted.
   * In this case, the method will return immediately without reading any
   * bytes from the stream.
249 250 251
   *
   * @param b The buffer into which to read the data
   *
252 253 254 255
   * @exception EOFException If end of file is reached before filling the
   * buffer
   * @exception IOException If any other error occurs
   */
256
  public final void readFully (byte[] b) throws IOException
Tom Tromey committed
257
  {
258
    readFully (b, 0, b.length);
Tom Tromey committed
259 260
  }

261
  /**
262 263 264 265 266 267 268 269 270
   * This method reads raw bytes into the passed array <code>buf</code>
   * starting
   * <code>offset</code> bytes into the buffer.  The number of bytes read
   * will be
   * exactly <code>len</code>.  Note that this method blocks until the data is
   * available and throws an exception if there is not enough data left in
   * the stream to read <code>len</code> bytes.  Note also that zero length
   * buffers are permitted.  In this case, the method will return immediately
   * without reading any bytes from the stream.
271 272 273 274 275
   *
   * @param buf The buffer into which to read the data
   * @param offset The offset into the buffer to start storing data
   * @param len The number of bytes to read into the buffer
   *
276 277
   * @exception EOFException If end of file is reached before filling the
   * buffer
278 279
   * @exception IOException If any other error occurs
   */
280
  public final void readFully (byte[] buf, int offset, int len) throws IOException
Tom Tromey committed
281
  {
282 283 284
    if (len < 0)
      throw new IndexOutOfBoundsException("Negative length: " + len);
    
Tom Tromey committed
285 286
    while (len > 0)
      {
287
	// in.read will block until some data is available.
288
	int numread = in.read (buf, offset, len);
Tom Tromey committed
289
	if (numread < 0)
290
	  throw new EOFException ();
Tom Tromey committed
291
	len -= numread;
292
	offset += numread;
Tom Tromey committed
293 294 295
      }
  }

296
  /**
297 298 299 300 301 302 303 304
   * This method reads a Java <code>int</code> value from an input stream
   * It operates by reading four bytes from the stream and converting them to
   * a single Java <code>int</code>.  The bytes are stored most
   * significant byte first (i.e., "big endian") regardless of the native
   * host byte ordering.
   * <p>
   * As an example, if <code>byte1</code> through <code>byte4</code> represent
   * the first four bytes read from the stream, they will be
305 306
   * transformed to an <code>int</code> in the following manner:
   * <p>
307 308
   * <code>(int)(((byte1 &amp; 0xFF) &lt;&lt; 24) + ((byte2 &amp; 0xFF) &lt;&lt; 16) +
   * ((byte3 &amp; 0xFF)&lt;&lt; 8) + (byte4 &amp; 0xFF)))</code>
309
   * <p>
310
   * The value returned is in the range of -2147483648 to 2147483647.
311 312 313 314 315 316 317 318 319 320
   * <p>
   * This method can read an <code>int</code> written by an object
   * implementing the <code>writeInt()</code> method in the
   * <code>DataOutput</code> interface.
   *
   * @return The <code>int</code> value read
   *
   * @exception EOFException If end of file is reached before reading the int
   * @exception IOException If any other error occurs
   *
321
   * @see DataOutput#writeInt
322
   */
323
  public final int readInt () throws IOException
Tom Tromey committed
324
  {
325
    readFully (buf, 0, 4);
326
    return convertToInt (buf);
Tom Tromey committed
327 328
  }

329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
  /**
   * This method reads the next line of text data from an input
   * stream.  It operates by reading bytes and converting those bytes
   * to <code>char</code> values by treating the byte read as the low
   * eight bits of the <code>char</code> and using 0 as the high eight
   * bits.  Because of this, it does not support the full 16-bit
   * Unicode character set.
   * <p>
   * The reading of bytes ends when either the end of file or a line
   * terminator is encountered.  The bytes read are then returned as a
   * <code>String</code> A line terminator is a byte sequence
   * consisting of either <code>\r</code>, <code>\n</code> or
   * <code>\r\n</code>.  These termination charaters are discarded and
   * are not returned as part of the string.
   * <p>
   * This method can read data that was written by an object implementing the
   * <code>writeLine()</code> method in <code>DataOutput</code>.
   *
   * @return The line read as a <code>String</code>
   *
   * @exception IOException If an error occurs
   *
   * @see DataOutput
   *
   * @deprecated
   */
355
  public final String readLine () throws IOException
Tom Tromey committed
356
  {
357
    StringBuffer strb = new StringBuffer ();
Tom Tromey committed
358

359
    readloop: while (true)
Tom Tromey committed
360
      {
361 362 363 364 365 366
        int c = 0;
        char ch = ' ';
        boolean getnext = true;
        while (getnext)
          {
	    getnext = false;
367
	    c = in.read();
368
	    if (c < 0)	// got an EOF
369
	      return strb.length () > 0 ? strb.toString () : null;
370 371 372 373 374 375 376 377 378 379 380 381
	    ch = (char) c;
	    if ((ch &= 0xFF) == '\n')
	      // hack to correctly handle '\r\n' sequences
	      if (ignoreInitialNewline)
		{
		  ignoreInitialNewline = false;
		  getnext = true;
		}
	      else
		break readloop;
	  }

Tom Tromey committed
382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404
	if (ch == '\r')
	  {
	    // FIXME: The following code tries to adjust the stream back one
	    // character if the next char read is '\n'.  As a last resort,
	    // it tries to mark the position before reading but the bottom
	    // line is that it is possible that this method will not properly
	    // deal with a '\r' '\n' combination thus not fulfilling the
	    // DataInput contract for readLine.  It's not a particularly
	    // safe approach threadwise since it is unsynchronized and
	    // since it might mark an input stream behind the users back.
	    // Along the same vein it could try the same thing for
	    // ByteArrayInputStream and PushbackInputStream, but that is
	    // probably overkill since this is deprecated & BufferedInputStream
	    // is the most likely type of input stream.
	    //
	    // The alternative is to somehow push back the next byte if it
	    // isn't a '\n' or to have the reading methods of this class
	    // keep track of whether the last byte read was '\r' by readLine
	    // and then skip the very next byte if it is '\n'.  Either way,
	    // this would increase the complexity of the non-deprecated methods
	    // and since it is undesirable to make non-deprecated methods
	    // less efficient, the following seems like the most reasonable
	    // approach.
405 406 407
	    int next_c = 0;
            char next_ch = ' ';
	    if (in instanceof BufferedInputStream)
Tom Tromey committed
408
	      {
409
	        next_c = in.read();
410 411 412 413 414 415 416
	        next_ch = (char) (next_c & 0xFF);
		if ((next_ch != '\n') && (next_c >= 0)) 
		  {
	            BufferedInputStream bin = (BufferedInputStream) in;
		    if (bin.pos > 0)
                      bin.pos--;
		  }
Tom Tromey committed
417 418 419
	      }
	    else if (markSupported())
	      {
420
	        next_c = in.read();
421 422 423 424
	        next_ch = (char) (next_c & 0xFF);
		if ((next_ch != '\n') && (next_c >= 0)) 
		  {
		    mark(1);
425
		    if ((in.read() & 0xFF) != '\n')
426 427 428 429 430 431 432 433
		      reset();
		  }
	      } 
	    // In order to catch cases where 'in' isn't a BufferedInputStream
	    // and doesn't support mark() (such as reading from a Socket), set 
	    // a flag that instructs readLine() to ignore the first character 
	    // it sees _if_ that character is a '\n'.
	    else ignoreInitialNewline = true;
Tom Tromey committed
434 435 436 437 438
	    break;
	  }
	strb.append(ch);
      }

439
    return strb.length() > 0 ? strb.toString() : "";
Tom Tromey committed
440 441
  }

442
  /**
443 444 445
   * This method reads a Java <code>long</code> value from an input stream
   * It operates by reading eight bytes from the stream and converting them to
   * a single Java <code>long</code>.  The bytes are stored most
446
   * significant byte first (i.e., "big endian") regardless of the native
447
   * host byte ordering.
448
   * <p>
449 450 451
   * As an example, if <code>byte1</code> through <code>byte8</code> represent
   * the first eight bytes read from the stream, they will be
   * transformed to an <code>long</code> in the following manner:
452
   * <p>
453 454 455 456
   * <code>(long)(((byte1 &amp; 0xFF) &lt;&lt; 56) + ((byte2 &amp; 0xFF) &lt;&lt; 48) +
   * ((byte3 &amp; 0xFF) &lt;&lt; 40) + ((byte4 &amp; 0xFF) &lt;&lt; 32) +
   * ((byte5 &amp; 0xFF) &lt;&lt; 24) + ((byte6 &amp; 0xFF) &lt;&lt; 16) +
   * ((byte7 &amp; 0xFF) &lt;&lt; 8) + (byte8 &amp; 0xFF)))
457
   * </code>
458
   * <p>
459 460
   * The value returned is in the range of -9223372036854775808 to
   * 9223372036854775807.
461 462 463 464 465 466 467 468 469 470
   * <p>
   * This method can read an <code>long</code> written by an object
   * implementing the <code>writeLong()</code> method in the
   * <code>DataOutput</code> interface.
   *
   * @return The <code>long</code> value read
   *
   * @exception EOFException If end of file is reached before reading the long
   * @exception IOException If any other error occurs
   *
471
   * @see DataOutput#writeLong
472
   */
473
  public final long readLong () throws IOException
Tom Tromey committed
474
  {
475
    readFully (buf, 0, 8);
476
    return convertToLong (buf);
Tom Tromey committed
477 478
  }

479 480 481 482 483 484 485 486 487 488 489 490
  /**
   * This method reads a signed 16-bit value into a Java in from the
   * stream.  It operates by reading two bytes from the stream and
   * converting them to a single 16-bit Java <code>short</code>.  The
   * two bytes are stored most significant byte first (i.e., "big
   * endian") regardless of the native host byte ordering.
   * <p>
   * As an example, if <code>byte1</code> and <code>byte2</code>
   * represent the first and second byte read from the stream
   * respectively, they will be transformed to a <code>short</code>. in
   * the following manner:
   * <p>
491
   * <code>(short)(((byte1 &amp; 0xFF) &lt;&lt; 8) | (byte2 &amp; 0xFF))</code>
492 493 494 495 496 497 498 499 500 501 502 503
   * <p>
   * The value returned is in the range of -32768 to 32767.
   * <p>
   * This method can read a <code>short</code> written by an object
   * implementing the <code>writeShort()</code> method in the
   * <code>DataOutput</code> interface.
   *
   * @return The <code>short</code> value read
   *
   * @exception EOFException If end of file is reached before reading the value
   * @exception IOException If any other error occurs
   *
504
   * @see DataOutput#writeShort
505
   */
506
  public final short readShort () throws IOException
Tom Tromey committed
507
  {
508
    readFully (buf, 0, 2);
509
    return convertToShort (buf);
Tom Tromey committed
510
  }
511
  
512 513 514 515 516 517 518 519 520 521 522 523 524 525
  /**
   * This method reads 8 unsigned bits into a Java <code>int</code>
   * value from the stream. The value returned is in the range of 0 to
   * 255.
   * <p>
   * This method can read an unsigned byte written by an object
   * implementing the <code>writeUnsignedByte()</code> method in the
   * <code>DataOutput</code> interface.
   *
   * @return The unsigned bytes value read as a Java <code>int</code>.
   *
   * @exception EOFException If end of file is reached before reading the value
   * @exception IOException If any other error occurs
   *
526
   * @see DataOutput#writeByte
527
   */
528
  public final int readUnsignedByte () throws IOException
Tom Tromey committed
529
  {
530
    return convertToUnsignedByte (in.read ());
Tom Tromey committed
531 532
  }

533 534 535 536 537 538 539
  /**
   * This method reads 16 unsigned bits into a Java int value from the stream.
   * It operates by reading two bytes from the stream and converting them to 
   * a single Java <code>int</code>  The two bytes are stored most
   * significant byte first (i.e., "big endian") regardless of the native
   * host byte ordering. 
   * <p>
Anthony Green committed
540
   * As an example, if <code>byte1</code> and <code>byte2</code>
541 542 543 544
   * represent the first and second byte read from the stream
   * respectively, they will be transformed to an <code>int</code> in
   * the following manner:
   * <p>
545
   * <code>(int)(((byte1 &amp; 0xFF) &lt;&lt; 8) + (byte2 &amp; 0xFF))</code>
546 547 548 549 550 551 552 553 554 555 556
   * <p>
   * The value returned is in the range of 0 to 65535.
   * <p>
   * This method can read an unsigned short written by an object
   * implementing the <code>writeUnsignedShort()</code> method in the
   * <code>DataOutput</code> interface.
   *
   * @return The unsigned short value read as a Java <code>int</code>
   *
   * @exception EOFException If end of file is reached before reading the value
   * @exception IOException If any other error occurs
557 558
   *
   * @see DataOutput#writeShort
559
   */
560
  public final int readUnsignedShort () throws IOException
Tom Tromey committed
561
  {
562
    readFully (buf, 0, 2);
563
    return convertToUnsignedShort (buf);
Tom Tromey committed
564 565
  }

566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614
  /**
   * This method reads a <code>String</code> from an input stream that
   * is encoded in a modified UTF-8 format.  This format has a leading
   * two byte sequence that contains the remaining number of bytes to
   * read.  This two byte sequence is read using the
   * <code>readUnsignedShort()</code> method of this interface.
   * <p>
   * After the number of remaining bytes have been determined, these
   * bytes are read an transformed into <code>char</code> values.
   * These <code>char</code> values are encoded in the stream using
   * either a one, two, or three byte format.  The particular format
   * in use can be determined by examining the first byte read.
   * <p>
   * If the first byte has a high order bit of 0, then that character
   * consists on only one byte.  This character value consists of
   * seven bits that are at positions 0 through 6 of the byte.  As an
   * example, if <code>byte1</code> is the byte read from the stream,
   * it would be converted to a <code>char</code> like so:
   * <p>
   * <code>(char)byte1</code>
   * <p>
   * If the first byte has 110 as its high order bits, then the 
   * character consists of two bytes.  The bits that make up the character
   * value are in positions 0 through 4 of the first byte and bit positions
   * 0 through 5 of the second byte.  (The second byte should have 
   * 10 as its high order bits).  These values are in most significant
   * byte first (i.e., "big endian") order.
   * <p>
   * As an example, if <code>byte1</code> and <code>byte2</code> are
   * the first two bytes read respectively, and the high order bits of
   * them match the patterns which indicate a two byte character
   * encoding, then they would be converted to a Java
   * <code>char</code> like so:
   * <p>
   * <code>(char)(((byte1 & 0x1F) << 6) | (byte2 & 0x3F))</code>
   * <p>
   * If the first byte has a 1110 as its high order bits, then the
   * character consists of three bytes.  The bits that make up the character
   * value are in positions 0 through 3 of the first byte and bit positions
   * 0 through 5 of the other two bytes.  (The second and third bytes should
   * have 10 as their high order bits).  These values are in most
   * significant byte first (i.e., "big endian") order.
   * <p>
   * As an example, if <code>byte1</code> <code>byte2</code> and
   * <code>byte3</code> are the three bytes read, and the high order
   * bits of them match the patterns which indicate a three byte
   * character encoding, then they would be converted to a Java
   * <code>char</code> like so:
   * <p>
615 616
   * <code>(char)(((byte1 & 0x0F) << 12) | ((byte2 & 0x3F) << 6) | 
   * (byte3 & 0x3F))</code>
617 618 619 620 621 622 623 624 625 626 627
   * <p>
   * Note that all characters are encoded in the method that requires
   * the fewest number of bytes with the exception of the character
   * with the value of <code>&#92;u0000</code> which is encoded as two
   * bytes.  This is a modification of the UTF standard used to
   * prevent C language style <code>NUL</code> values from appearing
   * in the byte stream.
   * <p>
   * This method can read data that was written by an object implementing the
   * <code>writeUTF()</code> method in <code>DataOutput</code>
   * 
628
   * @return The <code>String</code> read
629 630 631 632 633 634
   *
   * @exception EOFException If end of file is reached before reading
   * the String
   * @exception UTFDataFormatException If the data is not in UTF-8 format
   * @exception IOException If any other error occurs
   *
635
   * @see DataOutput#writeUTF
636
   */
637
  public final String readUTF () throws IOException
Tom Tromey committed
638
  {
639
    return readUTF (this);
Tom Tromey committed
640 641
  }

642 643 644 645 646 647 648 649 650
  /**
   * This method reads a String encoded in UTF-8 format from the 
   * specified <code>DataInput</code> source.
   *
   * @param in The <code>DataInput</code> source to read from
   *
   * @return The String read from the source
   *
   * @exception IOException If an error occurs
651 652
   *
   * @see DataInput#readUTF
653
   */
654
  public static final String readUTF(DataInput in) throws IOException
Tom Tromey committed
655
  {
656 657
    final int UTFlen = in.readUnsignedShort ();
    byte[] buf = new byte [UTFlen];
Tom Tromey committed
658 659 660 661 662

    // This blocks until the entire string is available rather than
    // doing partial processing on the bytes that are available and then
    // blocking.  An advantage of the latter is that Exceptions
    // could be thrown earlier.  The former is a bit cleaner.
663
    in.readFully (buf, 0, UTFlen);
Tom Tromey committed
664

665
    return convertFromUTF (buf);
Tom Tromey committed
666 667
  }

668 669 670
  /**
   * This method attempts to skip and discard the specified number of bytes 
   * in the input stream.  It may actually skip fewer bytes than requested. 
671 672
   * This method will not skip any bytes if passed a negative number of bytes 
   * to skip. 
673 674
   *
   * @param n The requested number of bytes to skip.
675
   *
676
   * @return The requested number of bytes to skip.
677
   *
678
   * @exception IOException If an error occurs.
679 680 681 682
   * @specnote The JDK docs claim that this returns the number of bytes 
   *  actually skipped. The JCL claims that this method can throw an 
   *  EOFException. Neither of these appear to be true in the JDK 1.3's
   *  implementation. This tries to implement the actual JDK behaviour.
683
   */
684
  public final int skipBytes (int n) throws IOException
Tom Tromey committed
685
  {
686 687 688
    if (n <= 0)
      return 0;    
    try
Tom Tromey committed
689
      {
690
        return (int) in.skip (n);
Tom Tromey committed
691
      }
692 693 694 695 696
    catch (EOFException x)
      {
        // do nothing.
      }         
    return n;
Tom Tromey committed
697
  }
698
  
699
  static boolean convertToBoolean (int b) throws EOFException
700 701
  {
    if (b < 0)
702 703
      throw new EOFException ();
    
704 705 706
    return (b != 0);
  }

707
  static byte convertToByte (int i) throws EOFException
708 709
  {
    if (i < 0)
710 711
      throw new EOFException ();
    
712 713 714
    return (byte) i;
  }

715
  static int convertToUnsignedByte (int i) throws EOFException
716 717
  {
    if (i < 0)
718 719
      throw new EOFException ();
    
720 721 722
    return (i & 0xFF);
  }

723
  static char convertToChar (byte[] buf)
724
  {
725 726
    return (char) ((buf [0] << 8)
		    | (buf [1] & 0xff));  
727 728
  }  

729
  static short convertToShort (byte[] buf)
730
  {
731 732
    return (short) ((buf [0] << 8)
		    | (buf [1] & 0xff));  
733 734
  }  

735
  static int convertToUnsignedShort (byte[] buf)
736
  {
737 738
    return (((buf [0] & 0xff) << 8)
	    | (buf [1] & 0xff));  
739 740
  }

741
  static int convertToInt (byte[] buf)
742
  {
743 744 745 746
    return (((buf [0] & 0xff) << 24)
	    | ((buf [1] & 0xff) << 16)
	    | ((buf [2] & 0xff) << 8)
	    | (buf [3] & 0xff));  
747 748
  }

749
  static long convertToLong (byte[] buf)
750
  {
751 752 753 754 755 756 757 758
    return (((long)(buf [0] & 0xff) << 56) |
	    ((long)(buf [1] & 0xff) << 48) |
	    ((long)(buf [2] & 0xff) << 40) |
	    ((long)(buf [3] & 0xff) << 32) |
	    ((long)(buf [4] & 0xff) << 24) |
	    ((long)(buf [5] & 0xff) << 16) |
	    ((long)(buf [6] & 0xff) <<  8) |
	    ((long)(buf [7] & 0xff)));  
759 760
  }

761 762 763 764
  // FIXME: This method should be re-thought.  I suspect we have multiple
  // UTF-8 decoders floating around.  We should use the standard charset
  // converters, maybe and adding a direct call into one of the new
  // NIO converters for a super-fast UTF8 decode.
765
  static String convertFromUTF (byte[] buf) 
766 767
    throws EOFException, UTFDataFormatException
  {
Mark Wielaard committed
768 769
    // Give StringBuffer an initial estimated size to avoid 
    // enlarge buffer frequently
770
    StringBuffer strbuf = new StringBuffer (buf.length / 2 + 2);
771 772 773

    for (int i = 0; i < buf.length; )
      {
774 775 776
	if ((buf [i] & 0x80) == 0)		// bit pattern 0xxxxxxx
	  strbuf.append ((char) (buf [i++] & 0xFF));
	else if ((buf [i] & 0xE0) == 0xC0)	// bit pattern 110xxxxx
777
	  {
778 779 780
	    if (i + 1 >= buf.length
		|| (buf [i + 1] & 0xC0) != 0x80)
	      throw new UTFDataFormatException ();
781

782 783
	    strbuf.append((char) (((buf [i++] & 0x1F) << 6)
				  | (buf [i++] & 0x3F)));
784
	  }
785
	else if ((buf [i] & 0xF0) == 0xE0)	// bit pattern 1110xxxx
786
	  {
787 788 789 790 791 792 793 794
	    if (i + 2 >= buf.length
		|| (buf [i + 1] & 0xC0) != 0x80
		|| (buf [i + 2] & 0xC0) != 0x80)
	      throw new UTFDataFormatException ();

	    strbuf.append ((char) (((buf [i++] & 0x0F) << 12)
				   | ((buf [i++] & 0x3F) << 6)
				   | (buf [i++] & 0x3F)));
795
	  }
796 797
	else // must be ((buf [i] & 0xF0) == 0xF0 || (buf [i] & 0xC0) == 0x80)
	  throw new UTFDataFormatException ();	// bit patterns 1111xxxx or
798 799 800
						// 		10xxxxxx
      }

801
    return strbuf.toString ();
802
  }
Tom Tromey committed
803
}