01 /*
02 * TestDocumentStaxUtils.java
03 *
04 * Copyright (c) 1995-2010, The University of Sheffield. See the file
05 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
06 *
07 * This file is part of GATE (see http://gate.ac.uk/), and is free
08 * software, licenced under the GNU Library General Public License,
09 * Version 2, June 1991 (in the distribution as file licence.html,
10 * and also available at http://gate.ac.uk/gate/licence.html).
11 *
12 * Ian Roberts, 1/Sep/2008
13 *
14 * $Id: TestDocumentStaxUtils.java 12006 2009-12-01 17:24:28Z thomas_heitz $
15 */
16
17 package gate.corpora;
18
19 import junit.framework.*;
20
21 public class TestDocumentStaxUtils extends TestCase {
22
23 public TestDocumentStaxUtils(String name) {
24 super(name);
25 }
26
27 public void setUp() {
28
29 }
30
31 public void testIllegalXMLCharacters() throws Exception {
32 char[] chars = new char[] {
33 '\u0000', // null
34 '\n', // LF (this is OK)
35 '\uD801', '\uDC01', // surrogate pair, this is OK
36 ' ', // space (this is OK)
37 '\uDC03' // unpaired low surrogate
38 };
39
40 DocumentStaxUtils.replaceXMLIllegalCharacters(chars);
41 assertEquals("Null character should have been replaced by space",
42 ' ', chars[0]);
43 assertEquals("Line feed character should not have been replaced",
44 '\n', chars[1]);
45 assertEquals("High surrogate of a valid pair should not have been replaced",
46 '\uD801', chars[2]);
47 assertEquals("Low surrogate of a valid pair should not have been replaced",
48 '\uDC01', chars[3]);
49 assertEquals("Space character should not have been replaced",
50 ' ', chars[4]);
51 assertEquals("Unpaired low surrogate should have been replaced",
52 ' ', chars[5]);
53 }
54
55 /** Test suite routine for the test runner */
56 public static Test suite() {
57 return new TestSuite(TestDocumentStaxUtils.class);
58 } // suite
59
60 }
|