script_test.go 6.95 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package unicode_test

import (
	"testing"
	. "unicode"
)

type T struct {
13
	rune   rune
14 15 16
	script string
}

17 18
// Hand-chosen tests from Unicode 5.1.0, 6.0.0, 6.2.0, 6.3.0, 7.0.0 and 8.0.0
// mostly to discover when new scripts and categories arise.
19
var inTest = []T{
20
	{0x11711, "Ahom"},
21
	{0x1e900, "Adlam"},
22
	{0x14646, "Anatolian_Hieroglyphs"},
23 24 25
	{0x06e2, "Arabic"},
	{0x0567, "Armenian"},
	{0x10b20, "Avestan"},
26
	{0x11c00, "Bhaiksuki"},
27 28
	{0x1b37, "Balinese"},
	{0xa6af, "Bamum"},
29
	{0x16ada, "Bassa_Vah"},
30
	{0x1be1, "Batak"},
31 32 33 34 35
	{0x09c2, "Bengali"},
	{0x3115, "Bopomofo"},
	{0x282d, "Braille"},
	{0x1a1a, "Buginese"},
	{0x1747, "Buhid"},
36
	{0x11011, "Brahmi"},
37 38
	{0x156d, "Canadian_Aboriginal"},
	{0x102a9, "Carian"},
39
	{0x10563, "Caucasian_Albanian"},
40
	{0x11111, "Chakma"},
41 42 43 44 45 46 47 48 49 50
	{0xaa4d, "Cham"},
	{0x13c2, "Cherokee"},
	{0x0020, "Common"},
	{0x1d4a5, "Common"},
	{0x2cfc, "Coptic"},
	{0x12420, "Cuneiform"},
	{0x1080c, "Cypriot"},
	{0xa663, "Cyrillic"},
	{0x10430, "Deseret"},
	{0x094a, "Devanagari"},
51
	{0x1BC00, "Duployan"},
52
	{0x13001, "Egyptian_Hieroglyphs"},
53
	{0x10500, "Elbasan"},
54 55 56 57
	{0x1271, "Ethiopic"},
	{0x10fc, "Georgian"},
	{0x2c40, "Glagolitic"},
	{0x10347, "Gothic"},
58
	{0x11303, "Grantha"},
59 60 61 62 63 64
	{0x03ae, "Greek"},
	{0x0abf, "Gujarati"},
	{0x0a24, "Gurmukhi"},
	{0x3028, "Han"},
	{0x11b8, "Hangul"},
	{0x1727, "Hanunoo"},
65
	{0x108FF, "Hatran"},
66 67 68 69 70 71 72 73 74 75 76 77 78
	{0x05a0, "Hebrew"},
	{0x3058, "Hiragana"},
	{0x10841, "Imperial_Aramaic"},
	{0x20e6, "Inherited"},
	{0x10b70, "Inscriptional_Pahlavi"},
	{0x10b5a, "Inscriptional_Parthian"},
	{0xa9d0, "Javanese"},
	{0x1109f, "Kaithi"},
	{0x0cbd, "Kannada"},
	{0x30a6, "Katakana"},
	{0xa928, "Kayah_Li"},
	{0x10a11, "Kharoshthi"},
	{0x17c6, "Khmer"},
79 80
	{0x11211, "Khojki"},
	{0x112df, "Khudawadi"},
81 82 83 84
	{0x0eaa, "Lao"},
	{0x1d79, "Latin"},
	{0x1c10, "Lepcha"},
	{0x1930, "Limbu"},
85
	{0x10755, "Linear_A"},
86 87 88 89
	{0x1003c, "Linear_B"},
	{0xa4e1, "Lisu"},
	{0x10290, "Lycian"},
	{0x10930, "Lydian"},
90
	{0x11173, "Mahajani"},
91
	{0x0d42, "Malayalam"},
92
	{0x0843, "Mandaic"},
93
	{0x10ac8, "Manichaean"},
94
	{0x11cB6, "Marchen"},
95
	{0xabd0, "Meetei_Mayek"},
96
	{0x1e800, "Mende_Kikakui"},
97 98 99
	{0x1099f, "Meroitic_Hieroglyphs"},
	{0x109a0, "Meroitic_Cursive"},
	{0x16f00, "Miao"},
100
	{0x11611, "Modi"},
101
	{0x1822, "Mongolian"},
102
	{0x16a60, "Mro"},
103
	{0x11293, "Multani"},
104
	{0x104c, "Myanmar"},
105
	{0x10880, "Nabataean"},
106
	{0x11400, "Newa"},
107 108 109 110
	{0x19c3, "New_Tai_Lue"},
	{0x07f8, "Nko"},
	{0x169b, "Ogham"},
	{0x1c6a, "Ol_Chiki"},
111
	{0x10C80, "Old_Hungarian"},
112
	{0x10310, "Old_Italic"},
113 114
	{0x10a80, "Old_North_Arabian"},
	{0x10350, "Old_Permic"},
115 116 117 118
	{0x103c9, "Old_Persian"},
	{0x10a6f, "Old_South_Arabian"},
	{0x10c20, "Old_Turkic"},
	{0x0b3e, "Oriya"},
119
	{0x104d9, "Osage"},
120
	{0x10491, "Osmanya"},
121 122 123
	{0x16b2b, "Pahawh_Hmong"},
	{0x10876, "Palmyrene"},
	{0x11ACE, "Pau_Cin_Hau"},
124 125
	{0xa860, "Phags_Pa"},
	{0x10918, "Phoenician"},
126
	{0x10baf, "Psalter_Pahlavi"},
127 128 129 130
	{0xa949, "Rejang"},
	{0x16c0, "Runic"},
	{0x081d, "Samaritan"},
	{0xa892, "Saurashtra"},
131
	{0x111a0, "Sharada"},
132
	{0x10463, "Shavian"},
133
	{0x115c1, "Siddham"},
134
	{0x1D920, "SignWriting"},
135
	{0x0dbd, "Sinhala"},
136
	{0x110d0, "Sora_Sompeng"},
137 138 139 140 141 142 143 144
	{0x1ba3, "Sundanese"},
	{0xa803, "Syloti_Nagri"},
	{0x070f, "Syriac"},
	{0x170f, "Tagalog"},
	{0x176f, "Tagbanwa"},
	{0x1972, "Tai_Le"},
	{0x1a62, "Tai_Tham"},
	{0xaadc, "Tai_Viet"},
145
	{0x116c9, "Takri"},
146
	{0x0bbf, "Tamil"},
147
	{0x17000, "Tangut"},
148 149 150 151 152
	{0x0c55, "Telugu"},
	{0x07a7, "Thaana"},
	{0x0e46, "Thai"},
	{0x0f36, "Tibetan"},
	{0x2d55, "Tifinagh"},
153
	{0x114d9, "Tirhuta"},
154 155
	{0x10388, "Ugaritic"},
	{0xa60e, "Vai"},
156
	{0x118ff, "Warang_Citi"},
157 158 159 160 161 162 163 164 165
	{0xa216, "Yi"},
}

var outTest = []T{ // not really worth being thorough
	{0x20, "Telugu"},
}

var inCategoryTest = []T{
	{0x0081, "Cc"},
166
	{0x200B, "Cf"},
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
	{0xf0000, "Co"},
	{0xdb80, "Cs"},
	{0x0236, "Ll"},
	{0x1d9d, "Lm"},
	{0x07cf, "Lo"},
	{0x1f8a, "Lt"},
	{0x03ff, "Lu"},
	{0x0bc1, "Mc"},
	{0x20df, "Me"},
	{0x07f0, "Mn"},
	{0x1bb2, "Nd"},
	{0x10147, "Nl"},
	{0x2478, "No"},
	{0xfe33, "Pc"},
	{0x2011, "Pd"},
	{0x301e, "Pe"},
	{0x2e03, "Pf"},
	{0x2e02, "Pi"},
	{0x0022, "Po"},
	{0x2770, "Ps"},
	{0x00a4, "Sc"},
	{0xa711, "Sk"},
	{0x25f9, "Sm"},
	{0x2108, "So"},
	{0x2028, "Zl"},
	{0x2029, "Zp"},
	{0x202f, "Zs"},
194 195 196 197 198 199 200 201
	// Unifieds.
	{0x04aa, "L"},
	{0x0009, "C"},
	{0x1712, "M"},
	{0x0031, "N"},
	{0x00bb, "P"},
	{0x00a2, "S"},
	{0x00a0, "Z"},
202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
}

var inPropTest = []T{
	{0x0046, "ASCII_Hex_Digit"},
	{0x200F, "Bidi_Control"},
	{0x2212, "Dash"},
	{0xE0001, "Deprecated"},
	{0x00B7, "Diacritic"},
	{0x30FE, "Extender"},
	{0xFF46, "Hex_Digit"},
	{0x2E17, "Hyphen"},
	{0x2FFB, "IDS_Binary_Operator"},
	{0x2FF3, "IDS_Trinary_Operator"},
	{0xFA6A, "Ideographic"},
	{0x200D, "Join_Control"},
	{0x0EC4, "Logical_Order_Exception"},
	{0x2FFFF, "Noncharacter_Code_Point"},
	{0x065E, "Other_Alphabetic"},
220
	{0x2065, "Other_Default_Ignorable_Code_Point"},
221 222 223 224 225 226 227 228
	{0x0BD7, "Other_Grapheme_Extend"},
	{0x0387, "Other_ID_Continue"},
	{0x212E, "Other_ID_Start"},
	{0x2094, "Other_Lowercase"},
	{0x2040, "Other_Math"},
	{0x216F, "Other_Uppercase"},
	{0x0027, "Pattern_Syntax"},
	{0x0020, "Pattern_White_Space"},
229
	{0x06DD, "Prepended_Concatenation_Mark"},
230 231
	{0x300D, "Quotation_Mark"},
	{0x2EF3, "Radical"},
232 233
	{0x061F, "STerm"}, // Deprecated alias of Sentence_Terminal
	{0x061F, "Sentence_Terminal"},
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
	{0x2071, "Soft_Dotted"},
	{0x003A, "Terminal_Punctuation"},
	{0x9FC3, "Unified_Ideograph"},
	{0xFE0F, "Variation_Selector"},
	{0x0020, "White_Space"},
}

func TestScripts(t *testing.T) {
	notTested := make(map[string]bool)
	for k := range Scripts {
		notTested[k] = true
	}
	for _, test := range inTest {
		if _, ok := Scripts[test.script]; !ok {
			t.Fatal(test.script, "not a known script")
		}
		if !Is(Scripts[test.script], test.rune) {
251
			t.Errorf("IsScript(%U, %s) = false, want true", test.rune, test.script)
252
		}
253
		delete(notTested, test.script)
254 255 256
	}
	for _, test := range outTest {
		if Is(Scripts[test.script], test.rune) {
257
			t.Errorf("IsScript(%U, %s) = true, want false", test.rune, test.script)
258 259 260
		}
	}
	for k := range notTested {
261
		t.Error("script not tested:", k)
262 263 264 265 266 267 268 269 270 271 272 273 274
	}
}

func TestCategories(t *testing.T) {
	notTested := make(map[string]bool)
	for k := range Categories {
		notTested[k] = true
	}
	for _, test := range inCategoryTest {
		if _, ok := Categories[test.script]; !ok {
			t.Fatal(test.script, "not a known category")
		}
		if !Is(Categories[test.script], test.rune) {
275
			t.Errorf("IsCategory(%U, %s) = false, want true", test.rune, test.script)
276
		}
277
		delete(notTested, test.script)
278 279
	}
	for k := range notTested {
280
		t.Error("category not tested:", k)
281 282 283 284 285 286 287 288 289 290 291 292 293
	}
}

func TestProperties(t *testing.T) {
	notTested := make(map[string]bool)
	for k := range Properties {
		notTested[k] = true
	}
	for _, test := range inPropTest {
		if _, ok := Properties[test.script]; !ok {
			t.Fatal(test.script, "not a known prop")
		}
		if !Is(Properties[test.script], test.rune) {
294
			t.Errorf("IsCategory(%U, %s) = false, want true", test.rune, test.script)
295
		}
296
		delete(notTested, test.script)
297 298
	}
	for k := range notTested {
299
		t.Error("property not tested:", k)
300 301
	}
}