[1/6] generated character data for libc/ctype

Message ID ca759852-fc43-bc42-7a3a-9d61505eac95@towo.net
State New
Headers show
Series
  • [1/6] generated character data for libc/ctype
Related show

Commit Message

Thomas Wolff March 7, 2018, 11:18 p.m.
Makefile add-ons for both patch series (libc/string and libc/ctype) will 
be sent separately.
From 4cd871bea1c6cf677d57587a7e844bb9cb3b19be Mon Sep 17 00:00:00 2001
From: Thomas Wolff <towo@towo.net>
Date: Sun, 25 Feb 2018 16:29:33 +0100
Subject: [PATCH 1/6] generated case conversion data, Unicode 10.0

Table caseconv.t provides case conversion data for the tow* functions, 
especially towupper and towlower.
These data are generated from Unicode data.
---
 newlib/libc/ctype/caseconv.t | 304 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 304 insertions(+)
 create mode 100644 newlib/libc/ctype/caseconv.t

Comments

Thomas Wolff March 7, 2018, 11:20 p.m. | #1
From d38f23695b7678032d0afed05ff66f30fbc3399b Mon Sep 17 00:00:00 2001
From: Thomas Wolff <towo@towo.net>
Date: Sun, 25 Feb 2018 16:30:27 +0100
Subject: [PATCH 2/6] generated character category data, Unicode 10.0

Table categories.t and tag enumeration categories.cat provide 
character class data for most of the isw* functions.
These data are generated from Unicode data.
---
 newlib/libc/ctype/categories.cat |   29 +
 newlib/libc/ctype/categories.t   | 2086 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 2115 insertions(+)
 create mode 100644 newlib/libc/ctype/categories.cat
 create mode 100644 newlib/libc/ctype/categories.t

diff --git a/newlib/libc/ctype/categories.cat b/newlib/libc/ctype/categories.cat
new file mode 100644
index 0000000..cade60a
--- /dev/null
+++ b/newlib/libc/ctype/categories.cat
@@ -0,0 +1,29 @@
+  CAT_Cc,
+  CAT_Cf,
+  CAT_Cs,
+  CAT_LC,
+  CAT_Ll,
+  CAT_Lm,
+  CAT_Lo,
+  CAT_Lt,
+  CAT_Lu,
+  CAT_Mc,
+  CAT_Me,
+  CAT_Mn,
+  CAT_Nd,
+  CAT_Nl,
+  CAT_No,
+  CAT_Pc,
+  CAT_Pd,
+  CAT_Pe,
+  CAT_Pf,
+  CAT_Pi,
+  CAT_Po,
+  CAT_Ps,
+  CAT_Sc,
+  CAT_Sk,
+  CAT_Sm,
+  CAT_So,
+  CAT_Zl,
+  CAT_Zp,
+  CAT_Zs,
diff --git a/newlib/libc/ctype/categories.t b/newlib/libc/ctype/categories.t
new file mode 100644
index 0000000..dd5cf7d
--- /dev/null
+++ b/newlib/libc/ctype/categories.t
@@ -0,0 +1,2086 @@
+    {CAT_Cc, 0x0000, 31},
+    {CAT_Zs, 0x0020, 0},
+    {CAT_Po, 0x0021, 2},
+    {CAT_Sc, 0x0024, 0},
+    {CAT_Po, 0x0025, 2},
+    {CAT_Ps, 0x0028, 0},
+    {CAT_Pe, 0x0029, 0},
+    {CAT_Po, 0x002A, 0},
+    {CAT_Sm, 0x002B, 0},
+    {CAT_Po, 0x002C, 0},
+    {CAT_Pd, 0x002D, 0},
+    {CAT_Po, 0x002E, 1},
+    {CAT_Nd, 0x0030, 9},
+    {CAT_Po, 0x003A, 1},
+    {CAT_Sm, 0x003C, 2},
+    {CAT_Po, 0x003F, 1},
+    {CAT_LC, 0x0041, 25},
+    {CAT_Ps, 0x005B, 0},
+    {CAT_Po, 0x005C, 0},
+    {CAT_Pe, 0x005D, 0},
+    {CAT_Sk, 0x005E, 0},
+    {CAT_Pc, 0x005F, 0},
+    {CAT_Sk, 0x0060, 0},
+    {CAT_LC, 0x0061, 25},
+    {CAT_Ps, 0x007B, 0},
+    {CAT_Sm, 0x007C, 0},
+    {CAT_Pe, 0x007D, 0},
+    {CAT_Sm, 0x007E, 0},
+    {CAT_Cc, 0x007F, 32},
+    {CAT_Zs, 0x00A0, 0},
+    {CAT_Po, 0x00A1, 0},
+    {CAT_Sc, 0x00A2, 3},
+    {CAT_So, 0x00A6, 0},
+    {CAT_Po, 0x00A7, 0},
+    {CAT_Sk, 0x00A8, 0},
+    {CAT_So, 0x00A9, 0},
+    {CAT_Lo, 0x00AA, 0},
+    {CAT_Pi, 0x00AB, 0},
+    {CAT_Sm, 0x00AC, 0},
+    {CAT_Cf, 0x00AD, 0},
+    {CAT_So, 0x00AE, 0},
+    {CAT_Sk, 0x00AF, 0},
+    {CAT_So, 0x00B0, 0},
+    {CAT_Sm, 0x00B1, 0},
+    {CAT_No, 0x00B2, 1},
+    {CAT_Sk, 0x00B4, 0},
+    {CAT_LC, 0x00B5, 0},
+    {CAT_Po, 0x00B6, 1},
+    {CAT_Sk, 0x00B8, 0},
+    {CAT_No, 0x00B9, 0},
+    {CAT_Lo, 0x00BA, 0},
+    {CAT_Pf, 0x00BB, 0},
+    {CAT_No, 0x00BC, 2},
+    {CAT_Po, 0x00BF, 0},
+    {CAT_LC, 0x00C0, 22},
+    {CAT_Sm, 0x00D7, 0},
+    {CAT_LC, 0x00D8, 6},
+    {CAT_Ll, 0x00DF, 0},
+    {CAT_LC, 0x00E0, 22},
+    {CAT_Sm, 0x00F7, 0},
+    {CAT_LC, 0x00F8, 63},
+    {CAT_Ll, 0x0138, 0},
+    {CAT_LC, 0x0139, 15},
+    {CAT_Ll, 0x0149, 0},
+    {CAT_LC, 0x014A, 66},
+    {CAT_Ll, 0x018D, 0},
+    {CAT_LC, 0x018E, 12},
+    {CAT_Ll, 0x019B, 0},
+    {CAT_LC, 0x019C, 13},
+    {CAT_Ll, 0x01AA, 1},
+    {CAT_LC, 0x01AC, 13},
+    {CAT_Ll, 0x01BA, 0},
+    {CAT_Lo, 0x01BB, 0},
+    {CAT_LC, 0x01BC, 1},
+    {CAT_Ll, 0x01BE, 0},
+    {CAT_LC, 0x01BF, 0},
+    {CAT_Lo, 0x01C0, 3},
+    {CAT_LC, 0x01C4, 0},
+    {CAT_Lt, 0x01C5, 0},
+    {CAT_LC, 0x01C6, 1},
+    {CAT_Lt, 0x01C8, 0},
+    {CAT_LC, 0x01C9, 1},
+    {CAT_Lt, 0x01CB, 0},
+    {CAT_LC, 0x01CC, 35},
+    {CAT_Ll, 0x01F0, 0},
+    {CAT_LC, 0x01F1, 0},
+    {CAT_Lt, 0x01F2, 0},
+    {CAT_LC, 0x01F3, 45},
+    {CAT_Ll, 0x0221, 0},
+    {CAT_LC, 0x0222, 17},
+    {CAT_Ll, 0x0234, 5},
+    {CAT_LC, 0x023A, 26},
+    {CAT_Ll, 0x0255, 0},
+    {CAT_LC, 0x0256, 1},
+    {CAT_Ll, 0x0258, 0},
+    {CAT_LC, 0x0259, 0},
+    {CAT_Ll, 0x025A, 0},
+    {CAT_LC, 0x025B, 1},
+    {CAT_Ll, 0x025D, 2},
+    {CAT_LC, 0x0260, 1},
+    {CAT_Ll, 0x0262, 0},
+    {CAT_LC, 0x0263, 0},
+    {CAT_Ll, 0x0264, 0},
+    {CAT_LC, 0x0265, 1},
+    {CAT_Ll, 0x0267, 0},
+    {CAT_LC, 0x0268, 4},
+    {CAT_Ll, 0x026D, 1},
+    {CAT_LC, 0x026F, 0},
+    {CAT_Ll, 0x0270, 0},
+    {CAT_LC, 0x0271, 1},
+    {CAT_Ll, 0x0273, 1},
+    {CAT_LC, 0x0275, 0},
+    {CAT_Ll, 0x0276, 6},
+    {CAT_LC, 0x027D, 0},
+    {CAT_Ll, 0x027E, 1},
+    {CAT_LC, 0x0280, 0},
+    {CAT_Ll, 0x0281, 1},
+    {CAT_LC, 0x0283, 0},
+    {CAT_Ll, 0x0284, 2},
+    {CAT_LC, 0x0287, 5},
+    {CAT_Ll, 0x028D, 4},
+    {CAT_LC, 0x0292, 0},
+    {CAT_Ll, 0x0293, 0},
+    {CAT_Lo, 0x0294, 0},
+    {CAT_Ll, 0x0295, 7},
+    {CAT_LC, 0x029D, 1},
+    {CAT_Ll, 0x029F, 16},
+    {CAT_Lm, 0x02B0, 17},
+    {CAT_Sk, 0x02C2, 3},
+    {CAT_Lm, 0x02C6, 11},
+    {CAT_Sk, 0x02D2, 13},
+    {CAT_Lm, 0x02E0, 4},
+    {CAT_Sk, 0x02E5, 6},
+    {CAT_Lm, 0x02EC, 0},
+    {CAT_Sk, 0x02ED, 0},
+    {CAT_Lm, 0x02EE, 0},
+    {CAT_Sk, 0x02EF, 16},
+    {CAT_Mn, 0x0300, 111},
+    {CAT_LC, 0x0370, 3},
+    {CAT_Lm, 0x0374, 0},
+    {CAT_Sk, 0x0375, 0},
+    {CAT_LC, 0x0376, 1},
+    {CAT_Lm, 0x037A, 0},
+    {CAT_LC, 0x037B, 2},
+    {CAT_Po, 0x037E, 0},
+    {CAT_LC, 0x037F, 0},
+    {CAT_Sk, 0x0384, 1},
+    {CAT_LC, 0x0386, 0},
+    {CAT_Po, 0x0387, 0},
+    {CAT_LC, 0x0388, 2},
+    {CAT_LC, 0x038C, 0},
+    {CAT_LC, 0x038E, 1},
+    {CAT_Ll, 0x0390, 0},
+    {CAT_LC, 0x0391, 16},
+    {CAT_LC, 0x03A3, 12},
+    {CAT_Ll, 0x03B0, 0},
+    {CAT_LC, 0x03B1, 32},
+    {CAT_Lu, 0x03D2, 2},
+    {CAT_LC, 0x03D5, 32},
+    {CAT_Sm, 0x03F6, 0},
+    {CAT_LC, 0x03F7, 4},
+    {CAT_Ll, 0x03FC, 0},
+    {CAT_LC, 0x03FD, 132},
+    {CAT_So, 0x0482, 0},
+    {CAT_Mn, 0x0483, 4},
+    {CAT_Me, 0x0488, 1},
+    {CAT_LC, 0x048A, 165},
+    {CAT_LC, 0x0531, 37},
+    {CAT_Lm, 0x0559, 0},
+    {CAT_Po, 0x055A, 5},
+    {CAT_LC, 0x0561, 37},
+    {CAT_Ll, 0x0587, 0},
+    {CAT_Po, 0x0589, 0},
+    {CAT_Pd, 0x058A, 0},
+    {CAT_So, 0x058D, 1},
+    {CAT_Sc, 0x058F, 0},
+    {CAT_Mn, 0x0591, 44},
+    {CAT_Pd, 0x05BE, 0},
+    {CAT_Mn, 0x05BF, 0},
+    {CAT_Po, 0x05C0, 0},
+    {CAT_Mn, 0x05C1, 1},
+    {CAT_Po, 0x05C3, 0},
+    {CAT_Mn, 0x05C4, 1},
+    {CAT_Po, 0x05C6, 0},
+    {CAT_Mn, 0x05C7, 0},
+    {CAT_Lo, 0x05D0, 26},
+    {CAT_Lo, 0x05F0, 2},
+    {CAT_Po, 0x05F3, 1},
+    {CAT_Cf, 0x0600, 5},
+    {CAT_Sm, 0x0606, 2},
+    {CAT_Po, 0x0609, 1},
+    {CAT_Sc, 0x060B, 0},
+    {CAT_Po, 0x060C, 1},
+    {CAT_So, 0x060E, 1},
+    {CAT_Mn, 0x0610, 10},
+    {CAT_Po, 0x061B, 0},
+    {CAT_Cf, 0x061C, 0},
+    {CAT_Po, 0x061E, 1},
+    {CAT_Lo, 0x0620, 31},
+    {CAT_Lm, 0x0640, 0},
+    {CAT_Lo, 0x0641, 9},
+    {CAT_Mn, 0x064B, 20},
+    {CAT_Nd, 0x0660, 9},
+    {CAT_Po, 0x066A, 3},
+    {CAT_Lo, 0x066E, 1},
+    {CAT_Mn, 0x0670, 0},
+    {CAT_Lo, 0x0671, 98},
+    {CAT_Po, 0x06D4, 0},
+    {CAT_Lo, 0x06D5, 0},
+    {CAT_Mn, 0x06D6, 6},
+    {CAT_Cf, 0x06DD, 0},
+    {CAT_So, 0x06DE, 0},
+    {CAT_Mn, 0x06DF, 5},
+    {CAT_Lm, 0x06E5, 1},
+    {CAT_Mn, 0x06E7, 1},
+    {CAT_So, 0x06E9, 0},
+    {CAT_Mn, 0x06EA, 3},
+    {CAT_Lo, 0x06EE, 1},
+    {CAT_Nd, 0x06F0, 9},
+    {CAT_Lo, 0x06FA, 2},
+    {CAT_So, 0x06FD, 1},
+    {CAT_Lo, 0x06FF, 0},
+    {CAT_Po, 0x0700, 13},
+    {CAT_Cf, 0x070F, 0},
+    {CAT_Lo, 0x0710, 0},
+    {CAT_Mn, 0x0711, 0},
+    {CAT_Lo, 0x0712, 29},
+    {CAT_Mn, 0x0730, 26},
+    {CAT_Lo, 0x074D, 88},
+    {CAT_Mn, 0x07A6, 10},
+    {CAT_Lo, 0x07B1, 0},
+    {CAT_Nd, 0x07C0, 9},
+    {CAT_Lo, 0x07CA, 32},
+    {CAT_Mn, 0x07EB, 8},
+    {CAT_Lm, 0x07F4, 1},
+    {CAT_So, 0x07F6, 0},
+    {CAT_Po, 0x07F7, 2},
+    {CAT_Lm, 0x07FA, 0},
+    {CAT_Lo, 0x0800, 21},
+    {CAT_Mn, 0x0816, 3},
+    {CAT_Lm, 0x081A, 0},
+    {CAT_Mn, 0x081B, 8},
+    {CAT_Lm, 0x0824, 0},
+    {CAT_Mn, 0x0825, 2},
+    {CAT_Lm, 0x0828, 0},
+    {CAT_Mn, 0x0829, 4},
+    {CAT_Po, 0x0830, 14},
+    {CAT_Lo, 0x0840, 24},
+    {CAT_Mn, 0x0859, 2},
+    {CAT_Po, 0x085E, 0},
+    {CAT_Lo, 0x0860, 10},
+    {CAT_Lo, 0x08A0, 20},
+    {CAT_Lo, 0x08B6, 7},
+    {CAT_Mn, 0x08D4, 13},
+    {CAT_Cf, 0x08E2, 0},
+    {CAT_Mn, 0x08E3, 31},
+    {CAT_Mc, 0x0903, 0},
+    {CAT_Lo, 0x0904, 53},
+    {CAT_Mn, 0x093A, 0},
+    {CAT_Mc, 0x093B, 0},
+    {CAT_Mn, 0x093C, 0},
+    {CAT_Lo, 0x093D, 0},
+    {CAT_Mc, 0x093E, 2},
+    {CAT_Mn, 0x0941, 7},
+    {CAT_Mc, 0x0949, 3},
+    {CAT_Mn, 0x094D, 0},
+    {CAT_Mc, 0x094E, 1},
+    {CAT_Lo, 0x0950, 0},
+    {CAT_Mn, 0x0951, 6},
+    {CAT_Lo, 0x0958, 9},
+    {CAT_Mn, 0x0962, 1},
+    {CAT_Po, 0x0964, 1},
+    {CAT_Nd, 0x0966, 9},
+    {CAT_Po, 0x0970, 0},
+    {CAT_Lm, 0x0971, 0},
+    {CAT_Lo, 0x0972, 14},
+    {CAT_Mn, 0x0981, 0},
+    {CAT_Mc, 0x0982, 1},
+    {CAT_Lo, 0x0985, 7},
+    {CAT_Lo, 0x098F, 1},
+    {CAT_Lo, 0x0993, 21},
+    {CAT_Lo, 0x09AA, 6},
+    {CAT_Lo, 0x09B2, 0},
+    {CAT_Lo, 0x09B6, 3},
+    {CAT_Mn, 0x09BC, 0},
+    {CAT_Lo, 0x09BD, 0},
+    {CAT_Mc, 0x09BE, 2},
+    {CAT_Mn, 0x09C1, 3},
+    {CAT_Mc, 0x09C7, 1},
+    {CAT_Mc, 0x09CB, 1},
+    {CAT_Mn, 0x09CD, 0},
+    {CAT_Lo, 0x09CE, 0},
+    {CAT_Mc, 0x09D7, 0},
+    {CAT_Lo, 0x09DC, 1},
+    {CAT_Lo, 0x09DF, 2},
+    {CAT_Mn, 0x09E2, 1},
+    {CAT_Nd, 0x09E6, 9},
+    {CAT_Lo, 0x09F0, 1},
+    {CAT_Sc, 0x09F2, 1},
+    {CAT_No, 0x09F4, 5},
+    {CAT_So, 0x09FA, 0},
+    {CAT_Sc, 0x09FB, 0},
+    {CAT_Lo, 0x09FC, 0},
+    {CAT_Po, 0x09FD, 0},
+    {CAT_Mn, 0x0A01, 1},
+    {CAT_Mc, 0x0A03, 0},
+    {CAT_Lo, 0x0A05, 5},
+    {CAT_Lo, 0x0A0F, 1},
+    {CAT_Lo, 0x0A13, 21},
+    {CAT_Lo, 0x0A2A, 6},
+    {CAT_Lo, 0x0A32, 1},
+    {CAT_Lo, 0x0A35, 1},
+    {CAT_Lo, 0x0A38, 1},
+    {CAT_Mn, 0x0A3C, 0},
+    {CAT_Mc, 0x0A3E, 2},
+    {CAT_Mn, 0x0A41, 1},
+    {CAT_Mn, 0x0A47, 1},
+    {CAT_Mn, 0x0A4B, 2},
+    {CAT_Mn, 0x0A51, 0},
+    {CAT_Lo, 0x0A59, 3},
+    {CAT_Lo, 0x0A5E, 0},
+    {CAT_Nd, 0x0A66, 9},
+    {CAT_Mn, 0x0A70, 1},
+    {CAT_Lo, 0x0A72, 2},
+    {CAT_Mn, 0x0A75, 0},
+    {CAT_Mn, 0x0A81, 1},
+    {CAT_Mc, 0x0A83, 0},
+    {CAT_Lo, 0x0A85, 8},
+    {CAT_Lo, 0x0A8F, 2},
+    {CAT_Lo, 0x0A93, 21},
+    {CAT_Lo, 0x0AAA, 6},
+    {CAT_Lo, 0x0AB2, 1},
+    {CAT_Lo, 0x0AB5, 4},
+    {CAT_Mn, 0x0ABC, 0},
+    {CAT_Lo, 0x0ABD, 0},
+    {CAT_Mc, 0x0ABE, 2},
+    {CAT_Mn, 0x0AC1, 4},
+    {CAT_Mn, 0x0AC7, 1},
+    {CAT_Mc, 0x0AC9, 0},
+    {CAT_Mc, 0x0ACB, 1},
+    {CAT_Mn, 0x0ACD, 0},
+    {CAT_Lo, 0x0AD0, 0},
+    {CAT_Lo, 0x0AE0, 1},
+    {CAT_Mn, 0x0AE2, 1},
+    {CAT_Nd, 0x0AE6, 9},
+    {CAT_Po, 0x0AF0, 0},
+    {CAT_Sc, 0x0AF1, 0},
+    {CAT_Lo, 0x0AF9, 0},
+    {CAT_Mn, 0x0AFA, 5},
+    {CAT_Mn, 0x0B01, 0},
+    {CAT_Mc, 0x0B02, 1},
+    {CAT_Lo, 0x0B05, 7},
+    {CAT_Lo, 0x0B0F, 1},
+    {CAT_Lo, 0x0B13, 21},
+    {CAT_Lo, 0x0B2A, 6},
+    {CAT_Lo, 0x0B32, 1},
+    {CAT_Lo, 0x0B35, 4},
+    {CAT_Mn, 0x0B3C, 0},
+    {CAT_Lo, 0x0B3D, 0},
+    {CAT_Mc, 0x0B3E, 0},
+    {CAT_Mn, 0x0B3F, 0},
+    {CAT_Mc, 0x0B40, 0},
+    {CAT_Mn, 0x0B41, 3},
+    {CAT_Mc, 0x0B47, 1},
+    {CAT_Mc, 0x0B4B, 1},
+    {CAT_Mn, 0x0B4D, 0},
+    {CAT_Mn, 0x0B56, 0},
+    {CAT_Mc, 0x0B57, 0},
+    {CAT_Lo, 0x0B5C, 1},
+    {CAT_Lo, 0x0B5F, 2},
+    {CAT_Mn, 0x0B62, 1},
+    {CAT_Nd, 0x0B66, 9},
+    {CAT_So, 0x0B70, 0},
+    {CAT_Lo, 0x0B71, 0},
+    {CAT_No, 0x0B72, 5},
+    {CAT_Mn, 0x0B82, 0},
+    {CAT_Lo, 0x0B83, 0},
+    {CAT_Lo, 0x0B85, 5},
+    {CAT_Lo, 0x0B8E, 2},
+    {CAT_Lo, 0x0B92, 3},
+    {CAT_Lo, 0x0B99, 1},
+    {CAT_Lo, 0x0B9C, 0},
+    {CAT_Lo, 0x0B9E, 1},
+    {CAT_Lo, 0x0BA3, 1},
+    {CAT_Lo, 0x0BA8, 2},
+    {CAT_Lo, 0x0BAE, 11},
+    {CAT_Mc, 0x0BBE, 1},
+    {CAT_Mn, 0x0BC0, 0},
+    {CAT_Mc, 0x0BC1, 1},
+    {CAT_Mc, 0x0BC6, 2},
+    {CAT_Mc, 0x0BCA, 2},
+    {CAT_Mn, 0x0BCD, 0},
+    {CAT_Lo, 0x0BD0, 0},
+    {CAT_Mc, 0x0BD7, 0},
+    {CAT_Nd, 0x0BE6, 9},
+    {CAT_No, 0x0BF0, 2},
+    {CAT_So, 0x0BF3, 5},
+    {CAT_Sc, 0x0BF9, 0},
+    {CAT_So, 0x0BFA, 0},
+    {CAT_Mn, 0x0C00, 0},
+    {CAT_Mc, 0x0C01, 2},
+    {CAT_Lo, 0x0C05, 7},
+    {CAT_Lo, 0x0C0E, 2},
+    {CAT_Lo, 0x0C12, 22},
+    {CAT_Lo, 0x0C2A, 15},
+    {CAT_Lo, 0x0C3D, 0},
+    {CAT_Mn, 0x0C3E, 2},
+    {CAT_Mc, 0x0C41, 3},
+    {CAT_Mn, 0x0C46, 2},
+    {CAT_Mn, 0x0C4A, 3},
+    {CAT_Mn, 0x0C55, 1},
+    {CAT_Lo, 0x0C58, 2},
+    {CAT_Lo, 0x0C60, 1},
+    {CAT_Mn, 0x0C62, 1},
+    {CAT_Nd, 0x0C66, 9},
+    {CAT_No, 0x0C78, 6},
+    {CAT_So, 0x0C7F, 0},
+    {CAT_Lo, 0x0C80, 0},
+    {CAT_Mn, 0x0C81, 0},
+    {CAT_Mc, 0x0C82, 1},
+    {CAT_Lo, 0x0C85, 7},
+    {CAT_Lo, 0x0C8E, 2},
+    {CAT_Lo, 0x0C92, 22},
+    {CAT_Lo, 0x0CAA, 9},
+    {CAT_Lo, 0x0CB5, 4},
+    {CAT_Mn, 0x0CBC, 0},
+    {CAT_Lo, 0x0CBD, 0},
+    {CAT_Mc, 0x0CBE, 0},
+    {CAT_Mn, 0x0CBF, 0},
+    {CAT_Mc, 0x0CC0, 4},
+    {CAT_Mn, 0x0CC6, 0},
+    {CAT_Mc, 0x0CC7, 1},
+    {CAT_Mc, 0x0CCA, 1},
+    {CAT_Mn, 0x0CCC, 1},
+    {CAT_Mc, 0x0CD5, 1},
+    {CAT_Lo, 0x0CDE, 0},
+    {CAT_Lo, 0x0CE0, 1},
+    {CAT_Mn, 0x0CE2, 1},
+    {CAT_Nd, 0x0CE6, 9},
+    {CAT_Lo, 0x0CF1, 1},
+    {CAT_Mn, 0x0D00, 1},
+    {CAT_Mc, 0x0D02, 1},
+    {CAT_Lo, 0x0D05, 7},
+    {CAT_Lo, 0x0D0E, 2},
+    {CAT_Lo, 0x0D12, 40},
+    {CAT_Mn, 0x0D3B, 1},
+    {CAT_Lo, 0x0D3D, 0},
+    {CAT_Mc, 0x0D3E, 2},
+    {CAT_Mn, 0x0D41, 3},
+    {CAT_Mc, 0x0D46, 2},
+    {CAT_Mc, 0x0D4A, 2},
+    {CAT_Mn, 0x0D4D, 0},
+    {CAT_Lo, 0x0D4E, 0},
+    {CAT_So, 0x0D4F, 0},
+    {CAT_Lo, 0x0D54, 2},
+    {CAT_Mc, 0x0D57, 0},
+    {CAT_No, 0x0D58, 6},
+    {CAT_Lo, 0x0D5F, 2},
+    {CAT_Mn, 0x0D62, 1},
+    {CAT_Nd, 0x0D66, 9},
+    {CAT_No, 0x0D70, 8},
+    {CAT_So, 0x0D79, 0},
+    {CAT_Lo, 0x0D7A, 5},
+    {CAT_Mc, 0x0D82, 1},
+    {CAT_Lo, 0x0D85, 17},
+    {CAT_Lo, 0x0D9A, 23},
+    {CAT_Lo, 0x0DB3, 8},
+    {CAT_Lo, 0x0DBD, 0},
+    {CAT_Lo, 0x0DC0, 6},
+    {CAT_Mn, 0x0DCA, 0},
+    {CAT_Mc, 0x0DCF, 2},
+    {CAT_Mn, 0x0DD2, 2},
+    {CAT_Mn, 0x0DD6, 0},
+    {CAT_Mc, 0x0DD8, 7},
+    {CAT_Nd, 0x0DE6, 9},
+    {CAT_Mc, 0x0DF2, 1},
+    {CAT_Po, 0x0DF4, 0},
+    {CAT_Lo, 0x0E01, 47},
+    {CAT_Mn, 0x0E31, 0},
+    {CAT_Lo, 0x0E32, 1},
+    {CAT_Mn, 0x0E34, 6},
+    {CAT_Sc, 0x0E3F, 0},
+    {CAT_Lo, 0x0E40, 5},
+    {CAT_Lm, 0x0E46, 0},
+    {CAT_Mn, 0x0E47, 7},
+    {CAT_Po, 0x0E4F, 0},
+    {CAT_Nd, 0x0E50, 9},
+    {CAT_Po, 0x0E5A, 1},
+    {CAT_Lo, 0x0E81, 1},
+    {CAT_Lo, 0x0E84, 0},
+    {CAT_Lo, 0x0E87, 1},
+    {CAT_Lo, 0x0E8A, 0},
+    {CAT_Lo, 0x0E8D, 0},
+    {CAT_Lo, 0x0E94, 3},
+    {CAT_Lo, 0x0E99, 6},
+    {CAT_Lo, 0x0EA1, 2},
+    {CAT_Lo, 0x0EA5, 0},
+    {CAT_Lo, 0x0EA7, 0},
+    {CAT_Lo, 0x0EAA, 1},
+    {CAT_Lo, 0x0EAD, 3},
+    {CAT_Mn, 0x0EB1, 0},
+    {CAT_Lo, 0x0EB2, 1},
+    {CAT_Mn, 0x0EB4, 5},
+    {CAT_Mn, 0x0EBB, 1},
+    {CAT_Lo, 0x0EBD, 0},
+    {CAT_Lo, 0x0EC0, 4},
+    {CAT_Lm, 0x0EC6, 0},
+    {CAT_Mn, 0x0EC8, 5},
+    {CAT_Nd, 0x0ED0, 9},
+    {CAT_Lo, 0x0EDC, 3},
+    {CAT_Lo, 0x0F00, 0},
+    {CAT_So, 0x0F01, 2},
+    {CAT_Po, 0x0F04, 14},
+    {CAT_So, 0x0F13, 0},
+    {CAT_Po, 0x0F14, 0},
+    {CAT_So, 0x0F15, 2},
+    {CAT_Mn, 0x0F18, 1},
+    {CAT_So, 0x0F1A, 5},
+    {CAT_Nd, 0x0F20, 9},
+    {CAT_No, 0x0F2A, 9},
+    {CAT_So, 0x0F34, 0},
+    {CAT_Mn, 0x0F35, 0},
+    {CAT_So, 0x0F36, 0},
+    {CAT_Mn, 0x0F37, 0},
+    {CAT_So, 0x0F38, 0},
+    {CAT_Mn, 0x0F39, 0},
+    {CAT_Ps, 0x0F3A, 0},
+    {CAT_Pe, 0x0F3B, 0},
+    {CAT_Ps, 0x0F3C, 0},
+    {CAT_Pe, 0x0F3D, 0},
+    {CAT_Mc, 0x0F3E, 1},
+    {CAT_Lo, 0x0F40, 7},
+    {CAT_Lo, 0x0F49, 35},
+    {CAT_Mn, 0x0F71, 13},
+    {CAT_Mc, 0x0F7F, 0},
+    {CAT_Mn, 0x0F80, 4},
+    {CAT_Po, 0x0F85, 0},
+    {CAT_Mn, 0x0F86, 1},
+    {CAT_Lo, 0x0F88, 4},
+    {CAT_Mn, 0x0F8D, 10},
+    {CAT_Mn, 0x0F99, 35},
+    {CAT_So, 0x0FBE, 7},
+    {CAT_Mn, 0x0FC6, 0},
+    {CAT_So, 0x0FC7, 5},
+    {CAT_So, 0x0FCE, 1},
+    {CAT_Po, 0x0FD0, 4},
+    {CAT_So, 0x0FD5, 3},
+    {CAT_Po, 0x0FD9, 1},
+    {CAT_Lo, 0x1000, 42},
+    {CAT_Mc, 0x102B, 1},
+    {CAT_Mn, 0x102D, 3},
+    {CAT_Mc, 0x1031, 0},
+    {CAT_Mn, 0x1032, 5},
+    {CAT_Mc, 0x1038, 0},
+    {CAT_Mn, 0x1039, 1},
+    {CAT_Mc, 0x103B, 1},
+    {CAT_Mn, 0x103D, 1},
+    {CAT_Lo, 0x103F, 0},
+    {CAT_Nd, 0x1040, 9},
+    {CAT_Po, 0x104A, 5},
+    {CAT_Lo, 0x1050, 5},
+    {CAT_Mc, 0x1056, 1},
+    {CAT_Mn, 0x1058, 1},
+    {CAT_Lo, 0x105A, 3},
+    {CAT_Mn, 0x105E, 2},
+    {CAT_Lo, 0x1061, 0},
+    {CAT_Mc, 0x1062, 2},
+    {CAT_Lo, 0x1065, 1},
+    {CAT_Mc, 0x1067, 6},
+    {CAT_Lo, 0x106E, 2},
+    {CAT_Mn, 0x1071, 3},
+    {CAT_Lo, 0x1075, 12},
+    {CAT_Mn, 0x1082, 0},
+    {CAT_Mc, 0x1083, 1},
+    {CAT_Mn, 0x1085, 1},
+    {CAT_Mc, 0x1087, 5},
+    {CAT_Mn, 0x108D, 0},
+    {CAT_Lo, 0x108E, 0},
+    {CAT_Mc, 0x108F, 0},
+    {CAT_Nd, 0x1090, 9},
+    {CAT_Mc, 0x109A, 2},
+    {CAT_Mn, 0x109D, 0},
+    {CAT_So, 0x109E, 1},
+    {CAT_LC, 0x10A0, 37},
+    {CAT_LC, 0x10C7, 0},
+    {CAT_LC, 0x10CD, 0},
+    {CAT_Lo, 0x10D0, 42},
+    {CAT_Po, 0x10FB, 0},
+    {CAT_Lm, 0x10FC, 0},
+    {CAT_Lo, 0x10FD, 331},
+    {CAT_Lo, 0x124A, 3},
+    {CAT_Lo, 0x1250, 6},
+    {CAT_Lo, 0x1258, 0},
+    {CAT_Lo, 0x125A, 3},
+    {CAT_Lo, 0x1260, 40},
+    {CAT_Lo, 0x128A, 3},
+    {CAT_Lo, 0x1290, 32},
+    {CAT_Lo, 0x12B2, 3},
+    {CAT_Lo, 0x12B8, 6},
+    {CAT_Lo, 0x12C0, 0},
+    {CAT_Lo, 0x12C2, 3},
+    {CAT_Lo, 0x12C8, 14},
+    {CAT_Lo, 0x12D8, 56},
+    {CAT_Lo, 0x1312, 3},
+    {CAT_Lo, 0x1318, 66},
+    {CAT_Mn, 0x135D, 2},
+    {CAT_Po, 0x1360, 8},
+    {CAT_No, 0x1369, 19},
+    {CAT_Lo, 0x1380, 15},
+    {CAT_So, 0x1390, 9},
+    {CAT_LC, 0x13A0, 85},
+    {CAT_LC, 0x13F8, 5},
+    {CAT_Pd, 0x1400, 0},
+    {CAT_Lo, 0x1401, 619},
+    {CAT_Po, 0x166D, 1},
+    {CAT_Lo, 0x166F, 16},
+    {CAT_Zs, 0x1680, 0},
+    {CAT_Lo, 0x1681, 25},
+    {CAT_Ps, 0x169B, 0},
+    {CAT_Pe, 0x169C, 0},
+    {CAT_Lo, 0x16A0, 74},
+    {CAT_Po, 0x16EB, 2},
+    {CAT_Nl, 0x16EE, 2},
+    {CAT_Lo, 0x16F1, 7},
+    {CAT_Lo, 0x1700, 12},
+    {CAT_Lo, 0x170E, 3},
+    {CAT_Mn, 0x1712, 2},
+    {CAT_Lo, 0x1720, 17},
+    {CAT_Mn, 0x1732, 2},
+    {CAT_Po, 0x1735, 1},
+    {CAT_Lo, 0x1740, 17},
+    {CAT_Mn, 0x1752, 1},
+    {CAT_Lo, 0x1760, 12},
+    {CAT_Lo, 0x176E, 2},
+    {CAT_Mn, 0x1772, 1},
+    {CAT_Lo, 0x1780, 51},
+    {CAT_Mn, 0x17B4, 1},
+    {CAT_Mc, 0x17B6, 0},
+    {CAT_Mn, 0x17B7, 6},
+    {CAT_Mc, 0x17BE, 7},
+    {CAT_Mn, 0x17C6, 0},
+    {CAT_Mc, 0x17C7, 1},
+    {CAT_Mn, 0x17C9, 10},
+    {CAT_Po, 0x17D4, 2},
+    {CAT_Lm, 0x17D7, 0},
+    {CAT_Po, 0x17D8, 2},
+    {CAT_Sc, 0x17DB, 0},
+    {CAT_Lo, 0x17DC, 0},
+    {CAT_Mn, 0x17DD, 0},
+    {CAT_Nd, 0x17E0, 9},
+    {CAT_No, 0x17F0, 9},
+    {CAT_Po, 0x1800, 5},
+    {CAT_Pd, 0x1806, 0},
+    {CAT_Po, 0x1807, 3},
+    {CAT_Mn, 0x180B, 2},
+    {CAT_Cf, 0x180E, 0},
+    {CAT_Nd, 0x1810, 9},
+    {CAT_Lo, 0x1820, 34},
+    {CAT_Lm, 0x1843, 0},
+    {CAT_Lo, 0x1844, 51},
+    {CAT_Lo, 0x1880, 4},
+    {CAT_Mn, 0x1885, 1},
+    {CAT_Lo, 0x1887, 33},
+    {CAT_Mn, 0x18A9, 0},
+    {CAT_Lo, 0x18AA, 0},
+    {CAT_Lo, 0x18B0, 69},
+    {CAT_Lo, 0x1900, 30},
+    {CAT_Mn, 0x1920, 2},
+    {CAT_Mc, 0x1923, 3},
+    {CAT_Mn, 0x1927, 1},
+    {CAT_Mc, 0x1929, 2},
+    {CAT_Mc, 0x1930, 1},
+    {CAT_Mn, 0x1932, 0},
+    {CAT_Mc, 0x1933, 5},
+    {CAT_Mn, 0x1939, 2},
+    {CAT_So, 0x1940, 0},
+    {CAT_Po, 0x1944, 1},
+    {CAT_Nd, 0x1946, 9},
+    {CAT_Lo, 0x1950, 29},
+    {CAT_Lo, 0x1970, 4},
+    {CAT_Lo, 0x1980, 43},
+    {CAT_Lo, 0x19B0, 25},
+    {CAT_Nd, 0x19D0, 9},
+    {CAT_No, 0x19DA, 0},
+    {CAT_So, 0x19DE, 33},
+    {CAT_Lo, 0x1A00, 22},
+    {CAT_Mn, 0x1A17, 1},
+    {CAT_Mc, 0x1A19, 1},
+    {CAT_Mn, 0x1A1B, 0},
+    {CAT_Po, 0x1A1E, 1},
+    {CAT_Lo, 0x1A20, 52},
+    {CAT_Mc, 0x1A55, 0},
+    {CAT_Mn, 0x1A56, 0},
+    {CAT_Mc, 0x1A57, 0},
+    {CAT_Mn, 0x1A58, 6},
+    {CAT_Mn, 0x1A60, 0},
+    {CAT_Mc, 0x1A61, 0},
+    {CAT_Mn, 0x1A62, 0},
+    {CAT_Mc, 0x1A63, 1},
+    {CAT_Mn, 0x1A65, 7},
+    {CAT_Mc, 0x1A6D, 5},
+    {CAT_Mn, 0x1A73, 9},
+    {CAT_Mn, 0x1A7F, 0},
+    {CAT_Nd, 0x1A80, 9},
+    {CAT_Nd, 0x1A90, 9},
+    {CAT_Po, 0x1AA0, 6},
+    {CAT_Lm, 0x1AA7, 0},
+    {CAT_Po, 0x1AA8, 5},
+    {CAT_Mn, 0x1AB0, 13},
+    {CAT_Me, 0x1ABE, 0},
+    {CAT_Mn, 0x1B00, 3},
+    {CAT_Mc, 0x1B04, 0},
+    {CAT_Lo, 0x1B05, 46},
+    {CAT_Mn, 0x1B34, 0},
+    {CAT_Mc, 0x1B35, 0},
+    {CAT_Mn, 0x1B36, 4},
+    {CAT_Mc, 0x1B3B, 0},
+    {CAT_Mn, 0x1B3C, 0},
+    {CAT_Mc, 0x1B3D, 4},
+    {CAT_Mn, 0x1B42, 0},
+    {CAT_Mc, 0x1B43, 1},
+    {CAT_Lo, 0x1B45, 6},
+    {CAT_Nd, 0x1B50, 9},
+    {CAT_Po, 0x1B5A, 6},
+    {CAT_So, 0x1B61, 9},
+    {CAT_Mn, 0x1B6B, 8},
+    {CAT_So, 0x1B74, 8},
+    {CAT_Mn, 0x1B80, 1},
+    {CAT_Mc, 0x1B82, 0},
+    {CAT_Lo, 0x1B83, 29},
+    {CAT_Mc, 0x1BA1, 0},
+    {CAT_Mn, 0x1BA2, 3},
+    {CAT_Mc, 0x1BA6, 1},
+    {CAT_Mn, 0x1BA8, 1},
+    {CAT_Mc, 0x1BAA, 0},
+    {CAT_Mn, 0x1BAB, 2},
+    {CAT_Lo, 0x1BAE, 1},
+    {CAT_Nd, 0x1BB0, 9},
+    {CAT_Lo, 0x1BBA, 43},
+    {CAT_Mn, 0x1BE6, 0},
+    {CAT_Mc, 0x1BE7, 0},
+    {CAT_Mn, 0x1BE8, 1},
+    {CAT_Mc, 0x1BEA, 2},
+    {CAT_Mn, 0x1BED, 0},
+    {CAT_Mc, 0x1BEE, 0},
+    {CAT_Mn, 0x1BEF, 2},
+    {CAT_Mc, 0x1BF2, 1},
+    {CAT_Po, 0x1BFC, 3},
+    {CAT_Lo, 0x1C00, 35},
+    {CAT_Mc, 0x1C24, 7},
+    {CAT_Mn, 0x1C2C, 7},
+    {CAT_Mc, 0x1C34, 1},
+    {CAT_Mn, 0x1C36, 1},
+    {CAT_Po, 0x1C3B, 4},
+    {CAT_Nd, 0x1C40, 9},
+    {CAT_Lo, 0x1C4D, 2},
+    {CAT_Nd, 0x1C50, 9},
+    {CAT_Lo, 0x1C5A, 29},
+    {CAT_Lm, 0x1C78, 5},
+    {CAT_Po, 0x1C7E, 1},
+    {CAT_LC, 0x1C80, 8},
+    {CAT_Po, 0x1CC0, 7},
+    {CAT_Mn, 0x1CD0, 2},
+    {CAT_Po, 0x1CD3, 0},
+    {CAT_Mn, 0x1CD4, 12},
+    {CAT_Mc, 0x1CE1, 0},
+    {CAT_Mn, 0x1CE2, 6},
+    {CAT_Lo, 0x1CE9, 3},
+    {CAT_Mn, 0x1CED, 0},
+    {CAT_Lo, 0x1CEE, 3},
+    {CAT_Mc, 0x1CF2, 1},
+    {CAT_Mn, 0x1CF4, 0},
+    {CAT_Lo, 0x1CF5, 1},
+    {CAT_Mc, 0x1CF7, 0},
+    {CAT_Mn, 0x1CF8, 1},
+    {CAT_Ll, 0x1D00, 43},
+    {CAT_Lm, 0x1D2C, 62},
+    {CAT_Ll, 0x1D6B, 12},
+    {CAT_Lm, 0x1D78, 0},
+    {CAT_LC, 0x1D79, 0},
+    {CAT_Ll, 0x1D7A, 2},
+    {CAT_LC, 0x1D7D, 0},
+    {CAT_Ll, 0x1D7E, 28},
+    {CAT_Lm, 0x1D9B, 36},
+    {CAT_Mn, 0x1DC0, 57},
+    {CAT_Mn, 0x1DFB, 4},
+    {CAT_LC, 0x1E00, 149},
+    {CAT_Ll, 0x1E96, 4},
+    {CAT_LC, 0x1E9B, 0},
+    {CAT_Ll, 0x1E9C, 1},
+    {CAT_LC, 0x1E9E, 0},
+    {CAT_Ll, 0x1E9F, 0},
+    {CAT_LC, 0x1EA0, 117},
+    {CAT_LC, 0x1F18, 5},
+    {CAT_LC, 0x1F20, 37},
+    {CAT_LC, 0x1F48, 5},
+    {CAT_Ll, 0x1F50, 0},
+    {CAT_LC, 0x1F51, 0},
+    {CAT_Ll, 0x1F52, 0},
+    {CAT_LC, 0x1F53, 0},
+    {CAT_Ll, 0x1F54, 0},
+    {CAT_LC, 0x1F55, 0},
+    {CAT_Ll, 0x1F56, 0},
+    {CAT_LC, 0x1F57, 0},
+    {CAT_LC, 0x1F59, 0},
+    {CAT_LC, 0x1F5B, 0},
+    {CAT_LC, 0x1F5D, 0},
+    {CAT_LC, 0x1F5F, 30},
+    {CAT_LC, 0x1F80, 7},
+    {CAT_Lt, 0x1F88, 7},
+    {CAT_LC, 0x1F90, 7},
+    {CAT_Lt, 0x1F98, 7},
+    {CAT_LC, 0x1FA0, 7},
+    {CAT_Lt, 0x1FA8, 7},
+    {CAT_LC, 0x1FB0, 1},
+    {CAT_Ll, 0x1FB2, 0},
+    {CAT_LC, 0x1FB3, 0},
+    {CAT_Ll, 0x1FB4, 0},
+    {CAT_Ll, 0x1FB6, 1},
+    {CAT_LC, 0x1FB8, 3},
+    {CAT_Lt, 0x1FBC, 0},
+    {CAT_Sk, 0x1FBD, 0},
+    {CAT_LC, 0x1FBE, 0},
+    {CAT_Sk, 0x1FBF, 2},
+    {CAT_Ll, 0x1FC2, 0},
+    {CAT_LC, 0x1FC3, 0},
+    {CAT_Ll, 0x1FC4, 0},
+    {CAT_Ll, 0x1FC6, 1},
+    {CAT_LC, 0x1FC8, 3},
+    {CAT_Lt, 0x1FCC, 0},
+    {CAT_Sk, 0x1FCD, 2},
+    {CAT_LC, 0x1FD0, 1},
+    {CAT_Ll, 0x1FD2, 1},
+    {CAT_Ll, 0x1FD6, 1},
+    {CAT_LC, 0x1FD8, 3},
+    {CAT_Sk, 0x1FDD, 2},
+    {CAT_LC, 0x1FE0, 1},
+    {CAT_Ll, 0x1FE2, 2},
+    {CAT_LC, 0x1FE5, 0},
+    {CAT_Ll, 0x1FE6, 1},
+    {CAT_LC, 0x1FE8, 4},
+    {CAT_Sk, 0x1FED, 2},
+    {CAT_Ll, 0x1FF2, 0},
+    {CAT_LC, 0x1FF3, 0},
+    {CAT_Ll, 0x1FF4, 0},
+    {CAT_Ll, 0x1FF6, 1},
+    {CAT_LC, 0x1FF8, 3},
+    {CAT_Lt, 0x1FFC, 0},
+    {CAT_Sk, 0x1FFD, 1},
+    {CAT_Zs, 0x2000, 10},
+    {CAT_Cf, 0x200B, 4},
+    {CAT_Pd, 0x2010, 5},
+    {CAT_Po, 0x2016, 1},
+    {CAT_Pi, 0x2018, 0},
+    {CAT_Pf, 0x2019, 0},
+    {CAT_Ps, 0x201A, 0},
+    {CAT_Pi, 0x201B, 1},
+    {CAT_Pf, 0x201D, 0},
+    {CAT_Ps, 0x201E, 0},
+    {CAT_Pi, 0x201F, 0},
+    {CAT_Po, 0x2020, 7},
+    {CAT_Zl, 0x2028, 0},
+    {CAT_Zp, 0x2029, 0},
+    {CAT_Cf, 0x202A, 4},
+    {CAT_Zs, 0x202F, 0},
+    {CAT_Po, 0x2030, 8},
+    {CAT_Pi, 0x2039, 0},
+    {CAT_Pf, 0x203A, 0},
+    {CAT_Po, 0x203B, 3},
+    {CAT_Pc, 0x203F, 1},
+    {CAT_Po, 0x2041, 2},
+    {CAT_Sm, 0x2044, 0},
+    {CAT_Ps, 0x2045, 0},
+    {CAT_Pe, 0x2046, 0},
+    {CAT_Po, 0x2047, 10},
+    {CAT_Sm, 0x2052, 0},
+    {CAT_Po, 0x2053, 0},
+    {CAT_Pc, 0x2054, 0},
+    {CAT_Po, 0x2055, 9},
+    {CAT_Zs, 0x205F, 0},
+    {CAT_Cf, 0x2060, 4},
+    {CAT_Cf, 0x2066, 9},
+    {CAT_No, 0x2070, 0},
+    {CAT_Lm, 0x2071, 0},
+    {CAT_No, 0x2074, 5},
+    {CAT_Sm, 0x207A, 2},
+    {CAT_Ps, 0x207D, 0},
+    {CAT_Pe, 0x207E, 0},
+    {CAT_Lm, 0x207F, 0},
+    {CAT_No, 0x2080, 9},
+    {CAT_Sm, 0x208A, 2},
+    {CAT_Ps, 0x208D, 0},
+    {CAT_Pe, 0x208E, 0},
+    {CAT_Lm, 0x2090, 12},
+    {CAT_Sc, 0x20A0, 31},
+    {CAT_Mn, 0x20D0, 12},
+    {CAT_Me, 0x20DD, 3},
+    {CAT_Mn, 0x20E1, 0},
+    {CAT_Me, 0x20E2, 2},
+    {CAT_Mn, 0x20E5, 11},
+    {CAT_So, 0x2100, 1},
+    {CAT_Lu, 0x2102, 0},
+    {CAT_So, 0x2103, 3},
+    {CAT_Lu, 0x2107, 0},
+    {CAT_So, 0x2108, 1},
+    {CAT_Ll, 0x210A, 0},
+    {CAT_Lu, 0x210B, 2},
+    {CAT_Ll, 0x210E, 1},
+    {CAT_Lu, 0x2110, 2},
+    {CAT_Ll, 0x2113, 0},
+    {CAT_So, 0x2114, 0},
+    {CAT_Lu, 0x2115, 0},
+    {CAT_So, 0x2116, 1},
+    {CAT_Sm, 0x2118, 0},
+    {CAT_Lu, 0x2119, 4},
+    {CAT_So, 0x211E, 5},
+    {CAT_Lu, 0x2124, 0},
+    {CAT_So, 0x2125, 0},
+    {CAT_LC, 0x2126, 0},
+    {CAT_So, 0x2127, 0},
+    {CAT_Lu, 0x2128, 0},
+    {CAT_So, 0x2129, 0},
+    {CAT_LC, 0x212A, 1},
+    {CAT_Lu, 0x212C, 1},
+    {CAT_So, 0x212E, 0},
+    {CAT_Ll, 0x212F, 0},
+    {CAT_Lu, 0x2130, 1},
+    {CAT_LC, 0x2132, 0},
+    {CAT_Lu, 0x2133, 0},
+    {CAT_Ll, 0x2134, 0},
+    {CAT_Lo, 0x2135, 3},
+    {CAT_Ll, 0x2139, 0},
+    {CAT_So, 0x213A, 1},
+    {CAT_Ll, 0x213C, 1},
+    {CAT_Lu, 0x213E, 1},
+    {CAT_Sm, 0x2140, 4},
+    {CAT_Lu, 0x2145, 0},
+    {CAT_Ll, 0x2146, 3},
+    {CAT_So, 0x214A, 0},
+    {CAT_Sm, 0x214B, 0},
+    {CAT_So, 0x214C, 1},
+    {CAT_LC, 0x214E, 0},
+    {CAT_So, 0x214F, 0},
+    {CAT_No, 0x2150, 15},
+    {CAT_Nl, 0x2160, 34},
+    {CAT_LC, 0x2183, 1},
+    {CAT_Nl, 0x2185, 3},
+    {CAT_No, 0x2189, 0},
+    {CAT_So, 0x218A, 1},
+    {CAT_Sm, 0x2190, 4},
+    {CAT_So, 0x2195, 4},
+    {CAT_Sm, 0x219A, 1},
+    {CAT_So, 0x219C, 3},
+    {CAT_Sm, 0x21A0, 0},
+    {CAT_So, 0x21A1, 1},
+    {CAT_Sm, 0x21A3, 0},
+    {CAT_So, 0x21A4, 1},
+    {CAT_Sm, 0x21A6, 0},
+    {CAT_So, 0x21A7, 6},
+    {CAT_Sm, 0x21AE, 0},
+    {CAT_So, 0x21AF, 30},
+    {CAT_Sm, 0x21CE, 1},
+    {CAT_So, 0x21D0, 1},
+    {CAT_Sm, 0x21D2, 0},
+    {CAT_So, 0x21D3, 0},
+    {CAT_Sm, 0x21D4, 0},
+    {CAT_So, 0x21D5, 30},
+    {CAT_Sm, 0x21F4, 267},
+    {CAT_So, 0x2300, 7},
+    {CAT_Ps, 0x2308, 0},
+    {CAT_Pe, 0x2309, 0},
+    {CAT_Ps, 0x230A, 0},
+    {CAT_Pe, 0x230B, 0},
+    {CAT_So, 0x230C, 19},
+    {CAT_Sm, 0x2320, 1},
+    {CAT_So, 0x2322, 6},
+    {CAT_Ps, 0x2329, 0},
+    {CAT_Pe, 0x232A, 0},
+    {CAT_So, 0x232B, 80},
+    {CAT_Sm, 0x237C, 0},
+    {CAT_So, 0x237D, 29},
+    {CAT_Sm, 0x239B, 24},
+    {CAT_So, 0x23B4, 39},
+    {CAT_Sm, 0x23DC, 5},
+    {CAT_So, 0x23E2, 68},
+    {CAT_So, 0x2440, 10},
+    {CAT_No, 0x2460, 59},
+    {CAT_So, 0x249C, 77},
+    {CAT_No, 0x24EA, 21},
+    {CAT_So, 0x2500, 182},
+    {CAT_Sm, 0x25B7, 0},
+    {CAT_So, 0x25B8, 8},
+    {CAT_Sm, 0x25C1, 0},
+    {CAT_So, 0x25C2, 53},
+    {CAT_Sm, 0x25F8, 7},
+    {CAT_So, 0x2600, 110},
+    {CAT_Sm, 0x266F, 0},
+    {CAT_So, 0x2670, 247},
+    {CAT_Ps, 0x2768, 0},
+    {CAT_Pe, 0x2769, 0},
+    {CAT_Ps, 0x276A, 0},
+    {CAT_Pe, 0x276B, 0},
+    {CAT_Ps, 0x276C, 0},
+    {CAT_Pe, 0x276D, 0},
+    {CAT_Ps, 0x276E, 0},
+    {CAT_Pe, 0x276F, 0},
+    {CAT_Ps, 0x2770, 0},
+    {CAT_Pe, 0x2771, 0},
+    {CAT_Ps, 0x2772, 0},
+    {CAT_Pe, 0x2773, 0},
+    {CAT_Ps, 0x2774, 0},
+    {CAT_Pe, 0x2775, 0},
+    {CAT_No, 0x2776, 29},
+    {CAT_So, 0x2794, 43},
+    {CAT_Sm, 0x27C0, 4},
+    {CAT_Ps, 0x27C5, 0},
+    {CAT_Pe, 0x27C6, 0},
+    {CAT_Sm, 0x27C7, 30},
+    {CAT_Ps, 0x27E6, 0},
+    {CAT_Pe, 0x27E7, 0},
+    {CAT_Ps, 0x27E8, 0},
+    {CAT_Pe, 0x27E9, 0},
+    {CAT_Ps, 0x27EA, 0},
+    {CAT_Pe, 0x27EB, 0},
+    {CAT_Ps, 0x27EC, 0},
+    {CAT_Pe, 0x27ED, 0},
+    {CAT_Ps, 0x27EE, 0},
+    {CAT_Pe, 0x27EF, 0},
+    {CAT_Sm, 0x27F0, 15},
+    {CAT_So, 0x2800, 255},
+    {CAT_Sm, 0x2900, 130},
+    {CAT_Ps, 0x2983, 0},
+    {CAT_Pe, 0x2984, 0},
+    {CAT_Ps, 0x2985, 0},
+    {CAT_Pe, 0x2986, 0},
+    {CAT_Ps, 0x2987, 0},
+    {CAT_Pe, 0x2988, 0},
+    {CAT_Ps, 0x2989, 0},
+    {CAT_Pe, 0x298A, 0},
+    {CAT_Ps, 0x298B, 0},
+    {CAT_Pe, 0x298C, 0},
+    {CAT_Ps, 0x298D, 0},
+    {CAT_Pe, 0x298E, 0},
+    {CAT_Ps, 0x298F, 0},
+    {CAT_Pe, 0x2990, 0},
+    {CAT_Ps, 0x2991, 0},
+    {CAT_Pe, 0x2992, 0},
+    {CAT_Ps, 0x2993, 0},
+    {CAT_Pe, 0x2994, 0},
+    {CAT_Ps, 0x2995, 0},
+    {CAT_Pe, 0x2996, 0},
+    {CAT_Ps, 0x2997, 0},
+    {CAT_Pe, 0x2998, 0},
+    {CAT_Sm, 0x2999, 62},
+    {CAT_Ps, 0x29D8, 0},
+    {CAT_Pe, 0x29D9, 0},
+    {CAT_Ps, 0x29DA, 0},
+    {CAT_Pe, 0x29DB, 0},
+    {CAT_Sm, 0x29DC, 31},
+    {CAT_Ps, 0x29FC, 0},
+    {CAT_Pe, 0x29FD, 0},
+    {CAT_Sm, 0x29FE, 257},
+    {CAT_So, 0x2B00, 47},
+    {CAT_Sm, 0x2B30, 20},
+    {CAT_So, 0x2B45, 1},
+    {CAT_Sm, 0x2B47, 5},
+    {CAT_So, 0x2B4D, 38},
+    {CAT_So, 0x2B76, 31},
+    {CAT_So, 0x2B98, 33},
+    {CAT_So, 0x2BBD, 11},
+    {CAT_So, 0x2BCA, 8},
+    {CAT_So, 0x2BEC, 3},
+    {CAT_LC, 0x2C00, 46},
+    {CAT_LC, 0x2C30, 46},
+    {CAT_LC, 0x2C60, 16},
+    {CAT_Ll, 0x2C71, 0},
+    {CAT_LC, 0x2C72, 1},
+    {CAT_Ll, 0x2C74, 0},
+    {CAT_LC, 0x2C75, 1},
+    {CAT_Ll, 0x2C77, 4},
+    {CAT_Lm, 0x2C7C, 1},
+    {CAT_LC, 0x2C7E, 101},
+    {CAT_Ll, 0x2CE4, 0},
+    {CAT_So, 0x2CE5, 5},
+    {CAT_LC, 0x2CEB, 3},
+    {CAT_Mn, 0x2CEF, 2},
+    {CAT_LC, 0x2CF2, 1},
+    {CAT_Po, 0x2CF9, 3},
+    {CAT_No, 0x2CFD, 0},
+    {CAT_Po, 0x2CFE, 1},
+    {CAT_LC, 0x2D00, 37},
+    {CAT_LC, 0x2D27, 0},
+    {CAT_LC, 0x2D2D, 0},
+    {CAT_Lo, 0x2D30, 55},
+    {CAT_Lm, 0x2D6F, 0},
+    {CAT_Po, 0x2D70, 0},
+    {CAT_Mn, 0x2D7F, 0},
+    {CAT_Lo, 0x2D80, 22},
+    {CAT_Lo, 0x2DA0, 6},
+    {CAT_Lo, 0x2DA8, 6},
+    {CAT_Lo, 0x2DB0, 6},
+    {CAT_Lo, 0x2DB8, 6},
+    {CAT_Lo, 0x2DC0, 6},
+    {CAT_Lo, 0x2DC8, 6},
+    {CAT_Lo, 0x2DD0, 6},
+    {CAT_Lo, 0x2DD8, 6},
+    {CAT_Mn, 0x2DE0, 31},
+    {CAT_Po, 0x2E00, 1},
+    {CAT_Pi, 0x2E02, 0},
+    {CAT_Pf, 0x2E03, 0},
+    {CAT_Pi, 0x2E04, 0},
+    {CAT_Pf, 0x2E05, 0},
+    {CAT_Po, 0x2E06, 2},
+    {CAT_Pi, 0x2E09, 0},
+    {CAT_Pf, 0x2E0A, 0},
+    {CAT_Po, 0x2E0B, 0},
+    {CAT_Pi, 0x2E0C, 0},
+    {CAT_Pf, 0x2E0D, 0},
+    {CAT_Po, 0x2E0E, 8},
+    {CAT_Pd, 0x2E17, 0},
+    {CAT_Po, 0x2E18, 1},
+    {CAT_Pd, 0x2E1A, 0},
+    {CAT_Po, 0x2E1B, 0},
+    {CAT_Pi, 0x2E1C, 0},
+    {CAT_Pf, 0x2E1D, 0},
+    {CAT_Po, 0x2E1E, 1},
+    {CAT_Pi, 0x2E20, 0},
+    {CAT_Pf, 0x2E21, 0},
+    {CAT_Ps, 0x2E22, 0},
+    {CAT_Pe, 0x2E23, 0},
+    {CAT_Ps, 0x2E24, 0},
+    {CAT_Pe, 0x2E25, 0},
+    {CAT_Ps, 0x2E26, 0},
+    {CAT_Pe, 0x2E27, 0},
+    {CAT_Ps, 0x2E28, 0},
+    {CAT_Pe, 0x2E29, 0},
+    {CAT_Po, 0x2E2A, 4},
+    {CAT_Lm, 0x2E2F, 0},
+    {CAT_Po, 0x2E30, 9},
+    {CAT_Pd, 0x2E3A, 1},
+    {CAT_Po, 0x2E3C, 3},
+    {CAT_Pd, 0x2E40, 0},
+    {CAT_Po, 0x2E41, 0},
+    {CAT_Ps, 0x2E42, 0},
+    {CAT_Po, 0x2E43, 6},
+    {CAT_So, 0x2E80, 25},
+    {CAT_So, 0x2E9B, 88},
+    {CAT_So, 0x2F00, 213},
+    {CAT_So, 0x2FF0, 11},
+    {CAT_Zs, 0x3000, 0},
+    {CAT_Po, 0x3001, 2},
+    {CAT_So, 0x3004, 0},
+    {CAT_Lm, 0x3005, 0},
+    {CAT_Lo, 0x3006, 0},
+    {CAT_Nl, 0x3007, 0},
+    {CAT_Ps, 0x3008, 0},
+    {CAT_Pe, 0x3009, 0},
+    {CAT_Ps, 0x300A, 0},
+    {CAT_Pe, 0x300B, 0},
+    {CAT_Ps, 0x300C, 0},
+    {CAT_Pe, 0x300D, 0},
+    {CAT_Ps, 0x300E, 0},
+    {CAT_Pe, 0x300F, 0},
+    {CAT_Ps, 0x3010, 0},
+    {CAT_Pe, 0x3011, 0},
+    {CAT_So, 0x3012, 1},
+    {CAT_Ps, 0x3014, 0},
+    {CAT_Pe, 0x3015, 0},
+    {CAT_Ps, 0x3016, 0},
+    {CAT_Pe, 0x3017, 0},
+    {CAT_Ps, 0x3018, 0},
+    {CAT_Pe, 0x3019, 0},
+    {CAT_Ps, 0x301A, 0},
+    {CAT_Pe, 0x301B, 0},
+    {CAT_Pd, 0x301C, 0},
+    {CAT_Ps, 0x301D, 0},
+    {CAT_Pe, 0x301E, 1},
+    {CAT_So, 0x3020, 0},
+    {CAT_Nl, 0x3021, 8},
+    {CAT_Mn, 0x302A, 3},
+    {CAT_Mc, 0x302E, 1},
+    {CAT_Pd, 0x3030, 0},
+    {CAT_Lm, 0x3031, 4},
+    {CAT_So, 0x3036, 1},
+    {CAT_Nl, 0x3038, 2},
+    {CAT_Lm, 0x303B, 0},
+    {CAT_Lo, 0x303C, 0},
+    {CAT_Po, 0x303D, 0},
+    {CAT_So, 0x303E, 1},
+    {CAT_Lo, 0x3041, 85},
+    {CAT_Mn, 0x3099, 1},
+    {CAT_Sk, 0x309B, 1},
+    {CAT_Lm, 0x309D, 1},
+    {CAT_Lo, 0x309F, 0},
+    {CAT_Pd, 0x30A0, 0},
+    {CAT_Lo, 0x30A1, 89},
+    {CAT_Po, 0x30FB, 0},
+    {CAT_Lm, 0x30FC, 2},
+    {CAT_Lo, 0x30FF, 0},
+    {CAT_Lo, 0x3105, 41},
+    {CAT_Lo, 0x3131, 93},
+    {CAT_So, 0x3190, 1},
+    {CAT_No, 0x3192, 3},
+    {CAT_So, 0x3196, 9},
+    {CAT_Lo, 0x31A0, 26},
+    {CAT_So, 0x31C0, 35},
+    {CAT_Lo, 0x31F0, 15},
+    {CAT_So, 0x3200, 30},
+    {CAT_No, 0x3220, 9},
+    {CAT_So, 0x322A, 29},
+    {CAT_No, 0x3248, 7},
+    {CAT_So, 0x3250, 0},
+    {CAT_No, 0x3251, 14},
+    {CAT_So, 0x3260, 31},
+    {CAT_No, 0x3280, 9},
+    {CAT_So, 0x328A, 38},
+    {CAT_No, 0x32B1, 14},
+    {CAT_So, 0x32C0, 62},
+    {CAT_So, 0x3300, 255},
+    {CAT_Lo, 0x3400, 0},
+    {CAT_Lo, 0x4DB5, 0},
+    {CAT_So, 0x4DC0, 63},
+    {CAT_Lo, 0x4E00, 0},
+    {CAT_Lo, 0x9FEA, 0},
+    {CAT_Lo, 0xA000, 20},
+    {CAT_Lm, 0xA015, 0},
+    {CAT_Lo, 0xA016, 1142},
+    {CAT_So, 0xA490, 54},
+    {CAT_Lo, 0xA4D0, 39},
+    {CAT_Lm, 0xA4F8, 5},
+    {CAT_Po, 0xA4FE, 1},
+    {CAT_Lo, 0xA500, 267},
+    {CAT_Lm, 0xA60C, 0},
+    {CAT_Po, 0xA60D, 2},
+    {CAT_Lo, 0xA610, 15},
+    {CAT_Nd, 0xA620, 9},
+    {CAT_Lo, 0xA62A, 1},
+    {CAT_LC, 0xA640, 45},
+    {CAT_Lo, 0xA66E, 0},
+    {CAT_Mn, 0xA66F, 0},
+    {CAT_Me, 0xA670, 2},
+    {CAT_Po, 0xA673, 0},
+    {CAT_Mn, 0xA674, 9},
+    {CAT_Po, 0xA67E, 0},
+    {CAT_Lm, 0xA67F, 0},
+    {CAT_LC, 0xA680, 27},
+    {CAT_Lm, 0xA69C, 1},
+    {CAT_Mn, 0xA69E, 1},
+    {CAT_Lo, 0xA6A0, 69},
+    {CAT_Nl, 0xA6E6, 9},
+    {CAT_Mn, 0xA6F0, 1},
+    {CAT_Po, 0xA6F2, 5},
+    {CAT_Sk, 0xA700, 22},
+    {CAT_Lm, 0xA717, 8},
+    {CAT_Sk, 0xA720, 1},
+    {CAT_LC, 0xA722, 13},
+    {CAT_Ll, 0xA730, 1},
+    {CAT_LC, 0xA732, 61},
+    {CAT_Lm, 0xA770, 0},
+    {CAT_Ll, 0xA771, 7},
+    {CAT_LC, 0xA779, 14},
+    {CAT_Lm, 0xA788, 0},
+    {CAT_Sk, 0xA789, 1},
+    {CAT_LC, 0xA78B, 2},
+    {CAT_Ll, 0xA78E, 0},
+    {CAT_Lo, 0xA78F, 0},
+    {CAT_LC, 0xA790, 3},
+    {CAT_Ll, 0xA794, 1},
+    {CAT_LC, 0xA796, 24},
+    {CAT_LC, 0xA7B0, 7},
+    {CAT_Lo, 0xA7F7, 0},
+    {CAT_Lm, 0xA7F8, 1},
+    {CAT_Ll, 0xA7FA, 0},
+    {CAT_Lo, 0xA7FB, 6},
+    {CAT_Mn, 0xA802, 0},
+    {CAT_Lo, 0xA803, 2},
+    {CAT_Mn, 0xA806, 0},
+    {CAT_Lo, 0xA807, 3},
+    {CAT_Mn, 0xA80B, 0},
+    {CAT_Lo, 0xA80C, 22},
+    {CAT_Mc, 0xA823, 1},
+    {CAT_Mn, 0xA825, 1},
+    {CAT_Mc, 0xA827, 0},
+    {CAT_So, 0xA828, 3},
+    {CAT_No, 0xA830, 5},
+    {CAT_So, 0xA836, 1},
+    {CAT_Sc, 0xA838, 0},
+    {CAT_So, 0xA839, 0},
+    {CAT_Lo, 0xA840, 51},
+    {CAT_Po, 0xA874, 3},
+    {CAT_Mc, 0xA880, 1},
+    {CAT_Lo, 0xA882, 49},
+    {CAT_Mc, 0xA8B4, 15},
+    {CAT_Mn, 0xA8C4, 1},
+    {CAT_Po, 0xA8CE, 1},
+    {CAT_Nd, 0xA8D0, 9},
+    {CAT_Mn, 0xA8E0, 17},
+    {CAT_Lo, 0xA8F2, 5},
+    {CAT_Po, 0xA8F8, 2},
+    {CAT_Lo, 0xA8FB, 0},
+    {CAT_Po, 0xA8FC, 0},
+    {CAT_Lo, 0xA8FD, 0},
+    {CAT_Nd, 0xA900, 9},
+    {CAT_Lo, 0xA90A, 27},
+    {CAT_Mn, 0xA926, 7},
+    {CAT_Po, 0xA92E, 1},
+    {CAT_Lo, 0xA930, 22},
+    {CAT_Mn, 0xA947, 10},
+    {CAT_Mc, 0xA952, 1},
+    {CAT_Po, 0xA95F, 0},
+    {CAT_Lo, 0xA960, 28},
+    {CAT_Mn, 0xA980, 2},
+    {CAT_Mc, 0xA983, 0},
+    {CAT_Lo, 0xA984, 46},
+    {CAT_Mn, 0xA9B3, 0},
+    {CAT_Mc, 0xA9B4, 1},
+    {CAT_Mn, 0xA9B6, 3},
+    {CAT_Mc, 0xA9BA, 1},
+    {CAT_Mn, 0xA9BC, 0},
+    {CAT_Mc, 0xA9BD, 3},
+    {CAT_Po, 0xA9C1, 12},
+    {CAT_Lm, 0xA9CF, 0},
+    {CAT_Nd, 0xA9D0, 9},
+    {CAT_Po, 0xA9DE, 1},
+    {CAT_Lo, 0xA9E0, 4},
+    {CAT_Mn, 0xA9E5, 0},
+    {CAT_Lm, 0xA9E6, 0},
+    {CAT_Lo, 0xA9E7, 8},
+    {CAT_Nd, 0xA9F0, 9},
+    {CAT_Lo, 0xA9FA, 4},
+    {CAT_Lo, 0xAA00, 40},
+    {CAT_Mn, 0xAA29, 5},
+    {CAT_Mc, 0xAA2F, 1},
+    {CAT_Mn, 0xAA31, 1},
+    {CAT_Mc, 0xAA33, 1},
+    {CAT_Mn, 0xAA35, 1},
+    {CAT_Lo, 0xAA40, 2},
+    {CAT_Mn, 0xAA43, 0},
+    {CAT_Lo, 0xAA44, 7},
+    {CAT_Mn, 0xAA4C, 0},
+    {CAT_Mc, 0xAA4D, 0},
+    {CAT_Nd, 0xAA50, 9},
+    {CAT_Po, 0xAA5C, 3},
+    {CAT_Lo, 0xAA60, 15},
+    {CAT_Lm, 0xAA70, 0},
+    {CAT_Lo, 0xAA71, 5},
+    {CAT_So, 0xAA77, 2},
+    {CAT_Lo, 0xAA7A, 0},
+    {CAT_Mc, 0xAA7B, 0},
+    {CAT_Mn, 0xAA7C, 0},
+    {CAT_Mc, 0xAA7D, 0},
+    {CAT_Lo, 0xAA7E, 49},
+    {CAT_Mn, 0xAAB0, 0},
+    {CAT_Lo, 0xAAB1, 0},
+    {CAT_Mn, 0xAAB2, 2},
+    {CAT_Lo, 0xAAB5, 1},
+    {CAT_Mn, 0xAAB7, 1},
+    {CAT_Lo, 0xAAB9, 4},
+    {CAT_Mn, 0xAABE, 1},
+    {CAT_Lo, 0xAAC0, 0},
+    {CAT_Mn, 0xAAC1, 0},
+    {CAT_Lo, 0xAAC2, 0},
+    {CAT_Lo, 0xAADB, 1},
+    {CAT_Lm, 0xAADD, 0},
+    {CAT_Po, 0xAADE, 1},
+    {CAT_Lo, 0xAAE0, 10},
+    {CAT_Mc, 0xAAEB, 0},
+    {CAT_Mn, 0xAAEC, 1},
+    {CAT_Mc, 0xAAEE, 1},
+    {CAT_Po, 0xAAF0, 1},
+    {CAT_Lo, 0xAAF2, 0},
+    {CAT_Lm, 0xAAF3, 1},
+    {CAT_Mc, 0xAAF5, 0},
+    {CAT_Mn, 0xAAF6, 0},
+    {CAT_Lo, 0xAB01, 5},
+    {CAT_Lo, 0xAB09, 5},
+    {CAT_Lo, 0xAB11, 5},
+    {CAT_Lo, 0xAB20, 6},
+    {CAT_Lo, 0xAB28, 6},
+    {CAT_Ll, 0xAB30, 34},
+    {CAT_LC, 0xAB53, 0},
+    {CAT_Ll, 0xAB54, 6},
+    {CAT_Sk, 0xAB5B, 0},
+    {CAT_Lm, 0xAB5C, 3},
+    {CAT_Ll, 0xAB60, 5},
+    {CAT_LC, 0xAB70, 79},
+    {CAT_Lo, 0xABC0, 34},
+    {CAT_Mc, 0xABE3, 1},
+    {CAT_Mn, 0xABE5, 0},
+    {CAT_Mc, 0xABE6, 1},
+    {CAT_Mn, 0xABE8, 0},
+    {CAT_Mc, 0xABE9, 1},
+    {CAT_Po, 0xABEB, 0},
+    {CAT_Mc, 0xABEC, 0},
+    {CAT_Mn, 0xABED, 0},
+    {CAT_Nd, 0xABF0, 9},
+    {CAT_Lo, 0xAC00, 0},
+    {CAT_Lo, 0xD7A3, 0},
+    {CAT_Lo, 0xD7B0, 22},
+    {CAT_Lo, 0xD7CB, 48},
+    {CAT_Cs, 0xD800, 0},
+    {CAT_Cs, 0xDB7F, 1},
+    {CAT_Cs, 0xDBFF, 1},
+    {CAT_Cs, 0xDFFF, 0},
+    {CAT_Lo, 0xF900, 365},
+    {CAT_Lo, 0xFA70, 105},
+    {CAT_Ll, 0xFB00, 6},
+    {CAT_Ll, 0xFB13, 4},
+    {CAT_Lo, 0xFB1D, 0},
+    {CAT_Mn, 0xFB1E, 0},
+    {CAT_Lo, 0xFB1F, 9},
+    {CAT_Sm, 0xFB29, 0},
+    {CAT_Lo, 0xFB2A, 12},
+    {CAT_Lo, 0xFB38, 4},
+    {CAT_Lo, 0xFB3E, 0},
+    {CAT_Lo, 0xFB40, 1},
+    {CAT_Lo, 0xFB43, 1},
+    {CAT_Lo, 0xFB46, 107},
+    {CAT_Sk, 0xFBB2, 15},
+    {CAT_Lo, 0xFBD3, 362},
+    {CAT_Pe, 0xFD3E, 0},
+    {CAT_Ps, 0xFD3F, 0},
+    {CAT_Lo, 0xFD50, 63},
+    {CAT_Lo, 0xFD92, 53},
+    {CAT_Lo, 0xFDF0, 11},
+    {CAT_Sc, 0xFDFC, 0},
+    {CAT_So, 0xFDFD, 0},
+    {CAT_Mn, 0xFE00, 15},
+    {CAT_Po, 0xFE10, 6},
+    {CAT_Ps, 0xFE17, 0},
+    {CAT_Pe, 0xFE18, 0},
+    {CAT_Po, 0xFE19, 0},
+    {CAT_Mn, 0xFE20, 15},
+    {CAT_Po, 0xFE30, 0},
+    {CAT_Pd, 0xFE31, 1},
+    {CAT_Pc, 0xFE33, 1},
+    {CAT_Ps, 0xFE35, 0},
+    {CAT_Pe, 0xFE36, 0},
+    {CAT_Ps, 0xFE37, 0},
+    {CAT_Pe, 0xFE38, 0},
+    {CAT_Ps, 0xFE39, 0},
+    {CAT_Pe, 0xFE3A, 0},
+    {CAT_Ps, 0xFE3B, 0},
+    {CAT_Pe, 0xFE3C, 0},
+    {CAT_Ps, 0xFE3D, 0},
+    {CAT_Pe, 0xFE3E, 0},
+    {CAT_Ps, 0xFE3F, 0},
+    {CAT_Pe, 0xFE40, 0},
+    {CAT_Ps, 0xFE41, 0},
+    {CAT_Pe, 0xFE42, 0},
+    {CAT_Ps, 0xFE43, 0},
+    {CAT_Pe, 0xFE44, 0},
+    {CAT_Po, 0xFE45, 1},
+    {CAT_Ps, 0xFE47, 0},
+    {CAT_Pe, 0xFE48, 0},
+    {CAT_Po, 0xFE49, 3},
+    {CAT_Pc, 0xFE4D, 2},
+    {CAT_Po, 0xFE50, 2},
+    {CAT_Po, 0xFE54, 3},
+    {CAT_Pd, 0xFE58, 0},
+    {CAT_Ps, 0xFE59, 0},
+    {CAT_Pe, 0xFE5A, 0},
+    {CAT_Ps, 0xFE5B, 0},
+    {CAT_Pe, 0xFE5C, 0},
+    {CAT_Ps, 0xFE5D, 0},
+    {CAT_Pe, 0xFE5E, 0},
+    {CAT_Po, 0xFE5F, 2},
+    {CAT_Sm, 0xFE62, 0},
+    {CAT_Pd, 0xFE63, 0},
+    {CAT_Sm, 0xFE64, 2},
+    {CAT_Po, 0xFE68, 0},
+    {CAT_Sc, 0xFE69, 0},
+    {CAT_Po, 0xFE6A, 1},
+    {CAT_Lo, 0xFE70, 4},
+    {CAT_Lo, 0xFE76, 134},
+    {CAT_Cf, 0xFEFF, 0},
+    {CAT_Po, 0xFF01, 2},
+    {CAT_Sc, 0xFF04, 0},
+    {CAT_Po, 0xFF05, 2},
+    {CAT_Ps, 0xFF08, 0},
+    {CAT_Pe, 0xFF09, 0},
+    {CAT_Po, 0xFF0A, 0},
+    {CAT_Sm, 0xFF0B, 0},
+    {CAT_Po, 0xFF0C, 0},
+    {CAT_Pd, 0xFF0D, 0},
+    {CAT_Po, 0xFF0E, 1},
+    {CAT_Nd, 0xFF10, 9},
+    {CAT_Po, 0xFF1A, 1},
+    {CAT_Sm, 0xFF1C, 2},
+    {CAT_Po, 0xFF1F, 1},
+    {CAT_LC, 0xFF21, 25},
+    {CAT_Ps, 0xFF3B, 0},
+    {CAT_Po, 0xFF3C, 0},
+    {CAT_Pe, 0xFF3D, 0},
+    {CAT_Sk, 0xFF3E, 0},
+    {CAT_Pc, 0xFF3F, 0},
+    {CAT_Sk, 0xFF40, 0},
+    {CAT_LC, 0xFF41, 25},
+    {CAT_Ps, 0xFF5B, 0},
+    {CAT_Sm, 0xFF5C, 0},
+    {CAT_Pe, 0xFF5D, 0},
+    {CAT_Sm, 0xFF5E, 0},
+    {CAT_Ps, 0xFF5F, 0},
+    {CAT_Pe, 0xFF60, 0},
+    {CAT_Po, 0xFF61, 0},
+    {CAT_Ps, 0xFF62, 0},
+    {CAT_Pe, 0xFF63, 0},
+    {CAT_Po, 0xFF64, 1},
+    {CAT_Lo, 0xFF66, 9},
+    {CAT_Lm, 0xFF70, 0},
+    {CAT_Lo, 0xFF71, 44},
+    {CAT_Lm, 0xFF9E, 1},
+    {CAT_Lo, 0xFFA0, 30},
+    {CAT_Lo, 0xFFC2, 5},
+    {CAT_Lo, 0xFFCA, 5},
+    {CAT_Lo, 0xFFD2, 5},
+    {CAT_Lo, 0xFFDA, 2},
+    {CAT_Sc, 0xFFE0, 1},
+    {CAT_Sm, 0xFFE2, 0},
+    {CAT_Sk, 0xFFE3, 0},
+    {CAT_So, 0xFFE4, 0},
+    {CAT_Sc, 0xFFE5, 1},
+    {CAT_So, 0xFFE8, 0},
+    {CAT_Sm, 0xFFE9, 3},
+    {CAT_So, 0xFFED, 1},
+    {CAT_Cf, 0xFFF9, 2},
+    {CAT_So, 0xFFFC, 1},
+    {CAT_Lo, 0x10000, 11},
+    {CAT_Lo, 0x1000D, 25},
+    {CAT_Lo, 0x10028, 18},
+    {CAT_Lo, 0x1003C, 1},
+    {CAT_Lo, 0x1003F, 14},
+    {CAT_Lo, 0x10050, 13},
+    {CAT_Lo, 0x10080, 122},
+    {CAT_Po, 0x10100, 2},
+    {CAT_No, 0x10107, 44},
+    {CAT_So, 0x10137, 8},
+    {CAT_Nl, 0x10140, 52},
+    {CAT_No, 0x10175, 3},
+    {CAT_So, 0x10179, 16},
+    {CAT_No, 0x1018A, 1},
+    {CAT_So, 0x1018C, 2},
+    {CAT_So, 0x10190, 11},
+    {CAT_So, 0x101A0, 0},
+    {CAT_So, 0x101D0, 44},
+    {CAT_Mn, 0x101FD, 0},
+    {CAT_Lo, 0x10280, 28},
+    {CAT_Lo, 0x102A0, 48},
+    {CAT_Mn, 0x102E0, 0},
+    {CAT_No, 0x102E1, 26},
+    {CAT_Lo, 0x10300, 31},
+    {CAT_No, 0x10320, 3},
+    {CAT_Lo, 0x1032D, 19},
+    {CAT_Nl, 0x10341, 0},
+    {CAT_Lo, 0x10342, 7},
+    {CAT_Nl, 0x1034A, 0},
+    {CAT_Lo, 0x10350, 37},
+    {CAT_Mn, 0x10376, 4},
+    {CAT_Lo, 0x10380, 29},
+    {CAT_Po, 0x1039F, 0},
+    {CAT_Lo, 0x103A0, 35},
+    {CAT_Lo, 0x103C8, 7},
+    {CAT_Po, 0x103D0, 0},
+    {CAT_Nl, 0x103D1, 4},
+    {CAT_LC, 0x10400, 79},
+    {CAT_Lo, 0x10450, 77},
+    {CAT_Nd, 0x104A0, 9},
+    {CAT_LC, 0x104B0, 35},
+    {CAT_LC, 0x104D8, 35},
+    {CAT_Lo, 0x10500, 39},
+    {CAT_Lo, 0x10530, 51},
+    {CAT_Po, 0x1056F, 0},
+    {CAT_Lo, 0x10600, 310},
+    {CAT_Lo, 0x10740, 21},
+    {CAT_Lo, 0x10760, 7},
+    {CAT_Lo, 0x10800, 5},
+    {CAT_Lo, 0x10808, 0},
+    {CAT_Lo, 0x1080A, 43},
+    {CAT_Lo, 0x10837, 1},
+    {CAT_Lo, 0x1083C, 0},
+    {CAT_Lo, 0x1083F, 22},
+    {CAT_Po, 0x10857, 0},
+    {CAT_No, 0x10858, 7},
+    {CAT_Lo, 0x10860, 22},
+    {CAT_So, 0x10877, 1},
+    {CAT_No, 0x10879, 6},
+    {CAT_Lo, 0x10880, 30},
+    {CAT_No, 0x108A7, 8},
+    {CAT_Lo, 0x108E0, 18},
+    {CAT_Lo, 0x108F4, 1},
+    {CAT_No, 0x108FB, 4},
+    {CAT_Lo, 0x10900, 21},
+    {CAT_No, 0x10916, 5},
+    {CAT_Po, 0x1091F, 0},
+    {CAT_Lo, 0x10920, 25},
+    {CAT_Po, 0x1093F, 0},
+    {CAT_Lo, 0x10980, 55},
+    {CAT_No, 0x109BC, 1},
+    {CAT_Lo, 0x109BE, 1},
+    {CAT_No, 0x109C0, 15},
+    {CAT_No, 0x109D2, 45},
+    {CAT_Lo, 0x10A00, 0},
+    {CAT_Mn, 0x10A01, 2},
+    {CAT_Mn, 0x10A05, 1},
+    {CAT_Mn, 0x10A0C, 3},
+    {CAT_Lo, 0x10A10, 3},
+    {CAT_Lo, 0x10A15, 2},
+    {CAT_Lo, 0x10A19, 26},
+    {CAT_Mn, 0x10A38, 2},
+    {CAT_Mn, 0x10A3F, 0},
+    {CAT_No, 0x10A40, 7},
+    {CAT_Po, 0x10A50, 8},
+    {CAT_Lo, 0x10A60, 28},
+    {CAT_No, 0x10A7D, 1},
+    {CAT_Po, 0x10A7F, 0},
+    {CAT_Lo, 0x10A80, 28},
+    {CAT_No, 0x10A9D, 2},
+    {CAT_Lo, 0x10AC0, 7},
+    {CAT_So, 0x10AC8, 0},
+    {CAT_Lo, 0x10AC9, 27},
+    {CAT_Mn, 0x10AE5, 1},
+    {CAT_No, 0x10AEB, 4},
+    {CAT_Po, 0x10AF0, 6},
+    {CAT_Lo, 0x10B00, 53},
+    {CAT_Po, 0x10B39, 6},
+    {CAT_Lo, 0x10B40, 21},
+    {CAT_No, 0x10B58, 7},
+    {CAT_Lo, 0x10B60, 18},
+    {CAT_No, 0x10B78, 7},
+    {CAT_Lo, 0x10B80, 17},
+    {CAT_Po, 0x10B99, 3},
+    {CAT_No, 0x10BA9, 6},
+    {CAT_Lo, 0x10C00, 72},
+    {CAT_LC, 0x10C80, 50},
+    {CAT_LC, 0x10CC0, 50},
+    {CAT_No, 0x10CFA, 5},
+    {CAT_No, 0x10E60, 30},
+    {CAT_Mc, 0x11000, 0},
+    {CAT_Mn, 0x11001, 0},
+    {CAT_Mc, 0x11002, 0},
+    {CAT_Lo, 0x11003, 52},
+    {CAT_Mn, 0x11038, 14},
+    {CAT_Po, 0x11047, 6},
+    {CAT_No, 0x11052, 19},
+    {CAT_Nd, 0x11066, 9},
+    {CAT_Mn, 0x1107F, 2},
+    {CAT_Mc, 0x11082, 0},
+    {CAT_Lo, 0x11083, 44},
+    {CAT_Mc, 0x110B0, 2},
+    {CAT_Mn, 0x110B3, 3},
+    {CAT_Mc, 0x110B7, 1},
+    {CAT_Mn, 0x110B9, 1},
+    {CAT_Po, 0x110BB, 1},
+    {CAT_Cf, 0x110BD, 0},
+    {CAT_Po, 0x110BE, 3},
+    {CAT_Lo, 0x110D0, 24},
+    {CAT_Nd, 0x110F0, 9},
+    {CAT_Mn, 0x11100, 2},
+    {CAT_Lo, 0x11103, 35},
+    {CAT_Mn, 0x11127, 4},
+    {CAT_Mc, 0x1112C, 0},
+    {CAT_Mn, 0x1112D, 7},
+    {CAT_Nd, 0x11136, 9},
+    {CAT_Po, 0x11140, 3},
+    {CAT_Lo, 0x11150, 34},
+    {CAT_Mn, 0x11173, 0},
+    {CAT_Po, 0x11174, 1},
+    {CAT_Lo, 0x11176, 0},
+    {CAT_Mn, 0x11180, 1},
+    {CAT_Mc, 0x11182, 0},
+    {CAT_Lo, 0x11183, 47},
+    {CAT_Mc, 0x111B3, 2},
+    {CAT_Mn, 0x111B6, 8},
+    {CAT_Mc, 0x111BF, 1},
+    {CAT_Lo, 0x111C1, 3},
+    {CAT_Po, 0x111C5, 4},
+    {CAT_Mn, 0x111CA, 2},
+    {CAT_Po, 0x111CD, 0},
+    {CAT_Nd, 0x111D0, 9},
+    {CAT_Lo, 0x111DA, 0},
+    {CAT_Po, 0x111DB, 0},
+    {CAT_Lo, 0x111DC, 0},
+    {CAT_Po, 0x111DD, 2},
+    {CAT_No, 0x111E1, 19},
+    {CAT_Lo, 0x11200, 17},
+    {CAT_Lo, 0x11213, 24},
+    {CAT_Mc, 0x1122C, 2},
+    {CAT_Mn, 0x1122F, 2},
+    {CAT_Mc, 0x11232, 1},
+    {CAT_Mn, 0x11234, 0},
+    {CAT_Mc, 0x11235, 0},
+    {CAT_Mn, 0x11236, 1},
+    {CAT_Po, 0x11238, 5},
+    {CAT_Mn, 0x1123E, 0},
+    {CAT_Lo, 0x11280, 6},
+    {CAT_Lo, 0x11288, 0},
+    {CAT_Lo, 0x1128A, 3},
+    {CAT_Lo, 0x1128F, 14},
+    {CAT_Lo, 0x1129F, 9},
+    {CAT_Po, 0x112A9, 0},
+    {CAT_Lo, 0x112B0, 46},
+    {CAT_Mn, 0x112DF, 0},
+    {CAT_Mc, 0x112E0, 2},
+    {CAT_Mn, 0x112E3, 7},
+    {CAT_Nd, 0x112F0, 9},
+    {CAT_Mn, 0x11300, 1},
+    {CAT_Mc, 0x11302, 1},
+    {CAT_Lo, 0x11305, 7},
+    {CAT_Lo, 0x1130F, 1},
+    {CAT_Lo, 0x11313, 21},
+    {CAT_Lo, 0x1132A, 6},
+    {CAT_Lo, 0x11332, 1},
+    {CAT_Lo, 0x11335, 4},
+    {CAT_Mn, 0x1133C, 0},
+    {CAT_Lo, 0x1133D, 0},
+    {CAT_Mc, 0x1133E, 1},
+    {CAT_Mn, 0x11340, 0},
+    {CAT_Mc, 0x11341, 3},
+    {CAT_Mc, 0x11347, 1},
+    {CAT_Mc, 0x1134B, 2},
+    {CAT_Lo, 0x11350, 0},
+    {CAT_Mc, 0x11357, 0},
+    {CAT_Lo, 0x1135D, 4},
+    {CAT_Mc, 0x11362, 1},
+    {CAT_Mn, 0x11366, 6},
+    {CAT_Mn, 0x11370, 4},
+    {CAT_Lo, 0x11400, 52},
+    {CAT_Mc, 0x11435, 2},
+    {CAT_Mn, 0x11438, 7},
+    {CAT_Mc, 0x11440, 1},
+    {CAT_Mn, 0x11442, 2},
+    {CAT_Mc, 0x11445, 0},
+    {CAT_Mn, 0x11446, 0},
+    {CAT_Lo, 0x11447, 3},
+    {CAT_Po, 0x1144B, 4},
+    {CAT_Nd, 0x11450, 9},
+    {CAT_Po, 0x1145B, 0},
+    {CAT_Po, 0x1145D, 0},
+    {CAT_Lo, 0x11480, 47},
+    {CAT_Mc, 0x114B0, 2},
+    {CAT_Mn, 0x114B3, 5},
+    {CAT_Mc, 0x114B9, 0},
+    {CAT_Mn, 0x114BA, 0},
+    {CAT_Mc, 0x114BB, 3},
+    {CAT_Mn, 0x114BF, 1},
+    {CAT_Mc, 0x114C1, 0},
+    {CAT_Mn, 0x114C2, 1},
+    {CAT_Lo, 0x114C4, 1},
+    {CAT_Po, 0x114C6, 0},
+    {CAT_Lo, 0x114C7, 0},
+    {CAT_Nd, 0x114D0, 9},
+    {CAT_Lo, 0x11580, 46},
+    {CAT_Mc, 0x115AF, 2},
+    {CAT_Mn, 0x115B2, 3},
+    {CAT_Mc, 0x115B8, 3},
+    {CAT_Mn, 0x115BC, 1},
+    {CAT_Mc, 0x115BE, 0},
+    {CAT_Mn, 0x115BF, 1},
+    {CAT_Po, 0x115C1, 22},
+    {CAT_Lo, 0x115D8, 3},
+    {CAT_Mn, 0x115DC, 1},
+    {CAT_Lo, 0x11600, 47},
+    {CAT_Mc, 0x11630, 2},
+    {CAT_Mn, 0x11633, 7},
+    {CAT_Mc, 0x1163B, 1},
+    {CAT_Mn, 0x1163D, 0},
+    {CAT_Mc, 0x1163E, 0},
+    {CAT_Mn, 0x1163F, 1},
+    {CAT_Po, 0x11641, 2},
+    {CAT_Lo, 0x11644, 0},
+    {CAT_Nd, 0x11650, 9},
+    {CAT_Po, 0x11660, 12},
+    {CAT_Lo, 0x11680, 42},
+    {CAT_Mn, 0x116AB, 0},
+    {CAT_Mc, 0x116AC, 0},
+    {CAT_Mn, 0x116AD, 0},
+    {CAT_Mc, 0x116AE, 1},
+    {CAT_Mn, 0x116B0, 5},
+    {CAT_Mc, 0x116B6, 0},
+    {CAT_Mn, 0x116B7, 0},
+    {CAT_Nd, 0x116C0, 9},
+    {CAT_Lo, 0x11700, 25},
+    {CAT_Mn, 0x1171D, 2},
+    {CAT_Mc, 0x11720, 1},
+    {CAT_Mn, 0x11722, 3},
+    {CAT_Mc, 0x11726, 0},
+    {CAT_Mn, 0x11727, 4},
+    {CAT_Nd, 0x11730, 9},
+    {CAT_No, 0x1173A, 1},
+    {CAT_Po, 0x1173C, 2},
+    {CAT_So, 0x1173F, 0},
+    {CAT_LC, 0x118A0, 63},
+    {CAT_Nd, 0x118E0, 9},
+    {CAT_No, 0x118EA, 8},
+    {CAT_Lo, 0x118FF, 0},
+    {CAT_Lo, 0x11A00, 0},
+    {CAT_Mn, 0x11A01, 5},
+    {CAT_Mc, 0x11A07, 1},
+    {CAT_Mn, 0x11A09, 1},
+    {CAT_Lo, 0x11A0B, 39},
+    {CAT_Mn, 0x11A33, 5},
+    {CAT_Mc, 0x11A39, 0},
+    {CAT_Lo, 0x11A3A, 0},
+    {CAT_Mn, 0x11A3B, 3},
+    {CAT_Po, 0x11A3F, 7},
+    {CAT_Mn, 0x11A47, 0},
+    {CAT_Lo, 0x11A50, 0},
+    {CAT_Mn, 0x11A51, 5},
+    {CAT_Mc, 0x11A57, 1},
+    {CAT_Mn, 0x11A59, 2},
+    {CAT_Lo, 0x11A5C, 39},
+    {CAT_Lo, 0x11A86, 3},
+    {CAT_Mn, 0x11A8A, 12},
+    {CAT_Mc, 0x11A97, 0},
+    {CAT_Mn, 0x11A98, 1},
+    {CAT_Po, 0x11A9A, 2},
+    {CAT_Po, 0x11A9E, 4},
+    {CAT_Lo, 0x11AC0, 56},
+    {CAT_Lo, 0x11C00, 8},
+    {CAT_Lo, 0x11C0A, 36},
+    {CAT_Mc, 0x11C2F, 0},
+    {CAT_Mn, 0x11C30, 6},
+    {CAT_Mn, 0x11C38, 5},
+    {CAT_Mc, 0x11C3E, 0},
+    {CAT_Mn, 0x11C3F, 0},
+    {CAT_Lo, 0x11C40, 0},
+    {CAT_Po, 0x11C41, 4},
+    {CAT_Nd, 0x11C50, 9},
+    {CAT_No, 0x11C5A, 18},
+    {CAT_Po, 0x11C70, 1},
+    {CAT_Lo, 0x11C72, 29},
+    {CAT_Mn, 0x11C92, 21},
+    {CAT_Mc, 0x11CA9, 0},
+    {CAT_Mn, 0x11CAA, 6},
+    {CAT_Mc, 0x11CB1, 0},
+    {CAT_Mn, 0x11CB2, 1},
+    {CAT_Mc, 0x11CB4, 0},
+    {CAT_Mn, 0x11CB5, 1},
+    {CAT_Lo, 0x11D00, 6},
+    {CAT_Lo, 0x11D08, 1},
+    {CAT_Lo, 0x11D0B, 37},
+    {CAT_Mn, 0x11D31, 5},
+    {CAT_Mn, 0x11D3A, 0},
+    {CAT_Mn, 0x11D3C, 1},
+    {CAT_Mn, 0x11D3F, 6},
+    {CAT_Lo, 0x11D46, 0},
+    {CAT_Mn, 0x11D47, 0},
+    {CAT_Nd, 0x11D50, 9},
+    {CAT_Lo, 0x12000, 921},
+    {CAT_Nl, 0x12400, 110},
+    {CAT_Po, 0x12470, 4},
+    {CAT_Lo, 0x12480, 195},
+    {CAT_Lo, 0x13000, 1070},
+    {CAT_Lo, 0x14400, 582},
+    {CAT_Lo, 0x16800, 568},
+    {CAT_Lo, 0x16A40, 30},
+    {CAT_Nd, 0x16A60, 9},
+    {CAT_Po, 0x16A6E, 1},
+    {CAT_Lo, 0x16AD0, 29},
+    {CAT_Mn, 0x16AF0, 4},
+    {CAT_Po, 0x16AF5, 0},
+    {CAT_Lo, 0x16B00, 47},
+    {CAT_Mn, 0x16B30, 6},
+    {CAT_Po, 0x16B37, 4},
+    {CAT_So, 0x16B3C, 3},
+    {CAT_Lm, 0x16B40, 3},
+    {CAT_Po, 0x16B44, 0},
+    {CAT_So, 0x16B45, 0},
+    {CAT_Nd, 0x16B50, 9},
+    {CAT_No, 0x16B5B, 6},
+    {CAT_Lo, 0x16B63, 20},
+    {CAT_Lo, 0x16B7D, 18},
+    {CAT_Lo, 0x16F00, 68},
+    {CAT_Lo, 0x16F50, 0},
+    {CAT_Mc, 0x16F51, 45},
+    {CAT_Mn, 0x16F8F, 3},
+    {CAT_Lm, 0x16F93, 12},
+    {CAT_Lm, 0x16FE0, 1},
+    {CAT_Lo, 0x17000, 0},
+    {CAT_Lo, 0x187EC, 0},
+    {CAT_Lo, 0x18800, 754},
+    {CAT_Lo, 0x1B000, 286},
+    {CAT_Lo, 0x1B170, 395},
+    {CAT_Lo, 0x1BC00, 106},
+    {CAT_Lo, 0x1BC70, 12},
+    {CAT_Lo, 0x1BC80, 8},
+    {CAT_Lo, 0x1BC90, 9},
+    {CAT_So, 0x1BC9C, 0},
+    {CAT_Mn, 0x1BC9D, 1},
+    {CAT_Po, 0x1BC9F, 0},
+    {CAT_Cf, 0x1BCA0, 3},
+    {CAT_So, 0x1D000, 245},
+    {CAT_So, 0x1D100, 38},
+    {CAT_So, 0x1D129, 59},
+    {CAT_Mc, 0x1D165, 1},
+    {CAT_Mn, 0x1D167, 2},
+    {CAT_So, 0x1D16A, 2},
+    {CAT_Mc, 0x1D16D, 5},
+    {CAT_Cf, 0x1D173, 7},
+    {CAT_Mn, 0x1D17B, 7},
+    {CAT_So, 0x1D183, 1},
+    {CAT_Mn, 0x1D185, 6},
+    {CAT_So, 0x1D18C, 29},
+    {CAT_Mn, 0x1D1AA, 3},
+    {CAT_So, 0x1D1AE, 58},
+    {CAT_So, 0x1D200, 65},
+    {CAT_Mn, 0x1D242, 2},
+    {CAT_So, 0x1D245, 0},
+    {CAT_So, 0x1D300, 86},
+    {CAT_No, 0x1D360, 17},
+    {CAT_Lu, 0x1D400, 25},
+    {CAT_Ll, 0x1D41A, 25},
+    {CAT_Lu, 0x1D434, 25},
+    {CAT_Ll, 0x1D44E, 6},
+    {CAT_Ll, 0x1D456, 17},
+    {CAT_Lu, 0x1D468, 25},
+    {CAT_Ll, 0x1D482, 25},
+    {CAT_Lu, 0x1D49C, 0},
+    {CAT_Lu, 0x1D49E, 1},
+    {CAT_Lu, 0x1D4A2, 0},
+    {CAT_Lu, 0x1D4A5, 1},
+    {CAT_Lu, 0x1D4A9, 3},
+    {CAT_Lu, 0x1D4AE, 7},
+    {CAT_Ll, 0x1D4B6, 3},
+    {CAT_Ll, 0x1D4BB, 0},
+    {CAT_Ll, 0x1D4BD, 6},
+    {CAT_Ll, 0x1D4C5, 10},
+    {CAT_Lu, 0x1D4D0, 25},
+    {CAT_Ll, 0x1D4EA, 25},
+    {CAT_Lu, 0x1D504, 1},
+    {CAT_Lu, 0x1D507, 3},
+    {CAT_Lu, 0x1D50D, 7},
+    {CAT_Lu, 0x1D516, 6},
+    {CAT_Ll, 0x1D51E, 25},
+    {CAT_Lu, 0x1D538, 1},
+    {CAT_Lu, 0x1D53B, 3},
+    {CAT_Lu, 0x1D540, 4},
+    {CAT_Lu, 0x1D546, 0},
+    {CAT_Lu, 0x1D54A, 6},
+    {CAT_Ll, 0x1D552, 25},
+    {CAT_Lu, 0x1D56C, 25},
+    {CAT_Ll, 0x1D586, 25},
+    {CAT_Lu, 0x1D5A0, 25},
+    {CAT_Ll, 0x1D5BA, 25},
+    {CAT_Lu, 0x1D5D4, 25},
+    {CAT_Ll, 0x1D5EE, 25},
+    {CAT_Lu, 0x1D608, 25},
+    {CAT_Ll, 0x1D622, 25},
+    {CAT_Lu, 0x1D63C, 25},
+    {CAT_Ll, 0x1D656, 25},
+    {CAT_Lu, 0x1D670, 25},
+    {CAT_Ll, 0x1D68A, 27},
+    {CAT_Lu, 0x1D6A8, 24},
+    {CAT_Sm, 0x1D6C1, 0},
+    {CAT_Ll, 0x1D6C2, 24},
+    {CAT_Sm, 0x1D6DB, 0},
+    {CAT_Ll, 0x1D6DC, 5},
+    {CAT_Lu, 0x1D6E2, 24},
+    {CAT_Sm, 0x1D6FB, 0},
+    {CAT_Ll, 0x1D6FC, 24},
+    {CAT_Sm, 0x1D715, 0},
+    {CAT_Ll, 0x1D716, 5},
+    {CAT_Lu, 0x1D71C, 24},
+    {CAT_Sm, 0x1D735, 0},
+    {CAT_Ll, 0x1D736, 24},
+    {CAT_Sm, 0x1D74F, 0},
+    {CAT_Ll, 0x1D750, 5},
+    {CAT_Lu, 0x1D756, 24},
+    {CAT_Sm, 0x1D76F, 0},
+    {CAT_Ll, 0x1D770, 24},
+    {CAT_Sm, 0x1D789, 0},
+    {CAT_Ll, 0x1D78A, 5},
+    {CAT_Lu, 0x1D790, 24},
+    {CAT_Sm, 0x1D7A9, 0},
+    {CAT_Ll, 0x1D7AA, 24},
+    {CAT_Sm, 0x1D7C3, 0},
+    {CAT_Ll, 0x1D7C4, 5},
+    {CAT_Lu, 0x1D7CA, 0},
+    {CAT_Ll, 0x1D7CB, 0},
+    {CAT_Nd, 0x1D7CE, 49},
+    {CAT_So, 0x1D800, 511},
+    {CAT_Mn, 0x1DA00, 54},
+    {CAT_So, 0x1DA37, 3},
+    {CAT_Mn, 0x1DA3B, 49},
+    {CAT_So, 0x1DA6D, 7},
+    {CAT_Mn, 0x1DA75, 0},
+    {CAT_So, 0x1DA76, 13},
+    {CAT_Mn, 0x1DA84, 0},
+    {CAT_So, 0x1DA85, 1},
+    {CAT_Po, 0x1DA87, 4},
+    {CAT_Mn, 0x1DA9B, 4},
+    {CAT_Mn, 0x1DAA1, 14},
+    {CAT_Mn, 0x1E000, 6},
+    {CAT_Mn, 0x1E008, 16},
+    {CAT_Mn, 0x1E01B, 6},
+    {CAT_Mn, 0x1E023, 1},
+    {CAT_Mn, 0x1E026, 4},
+    {CAT_Lo, 0x1E800, 196},
+    {CAT_No, 0x1E8C7, 8},
+    {CAT_Mn, 0x1E8D0, 6},
+    {CAT_LC, 0x1E900, 67},
+    {CAT_Mn, 0x1E944, 6},
+    {CAT_Nd, 0x1E950, 9},
+    {CAT_Po, 0x1E95E, 1},
+    {CAT_Lo, 0x1EE00, 3},
+    {CAT_Lo, 0x1EE05, 26},
+    {CAT_Lo, 0x1EE21, 1},
+    {CAT_Lo, 0x1EE24, 0},
+    {CAT_Lo, 0x1EE27, 0},
+    {CAT_Lo, 0x1EE29, 9},
+    {CAT_Lo, 0x1EE34, 3},
+    {CAT_Lo, 0x1EE39, 0},
+    {CAT_Lo, 0x1EE3B, 0},
+    {CAT_Lo, 0x1EE42, 0},
+    {CAT_Lo, 0x1EE47, 0},
+    {CAT_Lo, 0x1EE49, 0},
+    {CAT_Lo, 0x1EE4B, 0},
+    {CAT_Lo, 0x1EE4D, 2},
+    {CAT_Lo, 0x1EE51, 1},
+    {CAT_Lo, 0x1EE54, 0},
+    {CAT_Lo, 0x1EE57, 0},
+    {CAT_Lo, 0x1EE59, 0},
+    {CAT_Lo, 0x1EE5B, 0},
+    {CAT_Lo, 0x1EE5D, 0},
+    {CAT_Lo, 0x1EE5F, 0},
+    {CAT_Lo, 0x1EE61, 1},
+    {CAT_Lo, 0x1EE64, 0},
+    {CAT_Lo, 0x1EE67, 3},
+    {CAT_Lo, 0x1EE6C, 6},
+    {CAT_Lo, 0x1EE74, 3},
+    {CAT_Lo, 0x1EE79, 3},
+    {CAT_Lo, 0x1EE7E, 0},
+    {CAT_Lo, 0x1EE80, 9},
+    {CAT_Lo, 0x1EE8B, 16},
+    {CAT_Lo, 0x1EEA1, 2},
+    {CAT_Lo, 0x1EEA5, 4},
+    {CAT_Lo, 0x1EEAB, 16},
+    {CAT_Sm, 0x1EEF0, 1},
+    {CAT_So, 0x1F000, 43},
+    {CAT_So, 0x1F030, 99},
+    {CAT_So, 0x1F0A0, 14},
+    {CAT_So, 0x1F0B1, 14},
+    {CAT_So, 0x1F0C1, 14},
+    {CAT_So, 0x1F0D1, 36},
+    {CAT_No, 0x1F100, 12},
+    {CAT_So, 0x1F110, 30},
+    {CAT_So, 0x1F130, 59},
+    {CAT_So, 0x1F170, 60},
+    {CAT_So, 0x1F1E6, 28},
+    {CAT_So, 0x1F210, 43},
+    {CAT_So, 0x1F240, 8},
+    {CAT_So, 0x1F250, 1},
+    {CAT_So, 0x1F260, 5},
+    {CAT_So, 0x1F300, 250},
+    {CAT_Sk, 0x1F3FB, 4},
+    {CAT_So, 0x1F400, 724},
+    {CAT_So, 0x1F6E0, 12},
+    {CAT_So, 0x1F6F0, 8},
+    {CAT_So, 0x1F700, 115},
+    {CAT_So, 0x1F780, 84},
+    {CAT_So, 0x1F800, 11},
+    {CAT_So, 0x1F810, 55},
+    {CAT_So, 0x1F850, 9},
+    {CAT_So, 0x1F860, 39},
+    {CAT_So, 0x1F890, 29},
+    {CAT_So, 0x1F900, 11},
+    {CAT_So, 0x1F910, 46},
+    {CAT_So, 0x1F940, 12},
+    {CAT_So, 0x1F950, 27},
+    {CAT_So, 0x1F980, 23},
+    {CAT_So, 0x1F9C0, 0},
+    {CAT_So, 0x1F9D0, 22},
+    {CAT_Lo, 0x20000, 0},
+    {CAT_Lo, 0x2A6D6, 0},
+    {CAT_Lo, 0x2A700, 0},
+    {CAT_Lo, 0x2B734, 0},
+    {CAT_Lo, 0x2B740, 0},
+    {CAT_Lo, 0x2B81D, 0},
+    {CAT_Lo, 0x2B820, 0},
+    {CAT_Lo, 0x2CEA1, 0},
+    {CAT_Lo, 0x2CEB0, 0},
+    {CAT_Lo, 0x2EBE0, 0},
+    {CAT_Lo, 0x2F800, 541},
+    {CAT_Cf, 0xE0001, 0},
+    {CAT_Cf, 0xE0020, 95},
+    {CAT_Mn, 0xE0100, 239},
Thomas Wolff March 7, 2018, 11:20 p.m. | #2
From 15999d30c011be9041821456d23807249981dd86 Mon Sep 17 00:00:00 2001
From: Thomas Wolff <towo@towo.net>
Date: Sun, 25 Feb 2018 17:11:44 +0100
Subject: [PATCH 3/6] remove hard-coded character data

---
 newlib/libc/ctype/iswalpha.c  | 439 --------------------------------
 newlib/libc/ctype/iswprint.c  | 501 ------------------------------------
 newlib/libc/ctype/towlower.c  | 575 ------------------------------------------
 newlib/libc/ctype/utf8alpha.h | 355 --------------------------
 newlib/libc/ctype/utf8print.h | 389 ----------------------------
 5 files changed, 2259 deletions(-)
 delete mode 100644 newlib/libc/ctype/iswalpha.c
 delete mode 100644 newlib/libc/ctype/iswprint.c
 delete mode 100644 newlib/libc/ctype/towlower.c
 delete mode 100644 newlib/libc/ctype/utf8alpha.h
 delete mode 100644 newlib/libc/ctype/utf8print.h

diff --git a/newlib/libc/ctype/iswalpha.c b/newlib/libc/ctype/iswalpha.c
deleted file mode 100644
index 2906cd1..0000000
--- a/newlib/libc/ctype/iswalpha.c
+++ /dev/null
@@ -1,439 +0,0 @@
-/* Copyright (c) 2002 Red Hat Incorporated.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are met:
-
-     Redistributions of source code must retain the above copyright
-     notice, this list of conditions and the following disclaimer.
-
-     Redistributions in binary form must reproduce the above copyright
-     notice, this list of conditions and the following disclaimer in the
-     documentation and/or other materials provided with the distribution.
-
-     The name of Red Hat Incorporated may not be used to endorse
-     or promote products derived from this software without specific
-     prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED.  IN NO EVENT SHALL RED HAT INCORPORATED BE LIABLE FOR ANY
-   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS   
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/*
-FUNCTION
-	<<iswalpha>>, <<iswalpha_l>>---alphabetic wide character test
-
-INDEX
-	iswalpha
-
-INDEX
-	iswalpha_l
-
-SYNOPSIS
-	#include <wctype.h>
-	int iswalpha(wint_t <[c]>);
-
-	#include <wctype.h>
-	int iswalpha_l(wint_t <[c]>, locale_t <[locale]>);
-
-DESCRIPTION
-<<iswalpha>> is a function which classifies wide-character values that
-are alphabetic.
-
-<<iswalpha_l>> is like <<iswalpha>> but performs the check based on the
-locale specified by the locale object locale.  If <[locale]> is
-LC_GLOBAL_LOCALE or not a valid locale object, the behaviour is undefined.
-
-RETURNS
-<<iswalpha>>, <<iswalpha_l>> return non-zero if <[c]> is an alphabetic
-wide character.
-
-PORTABILITY
-<<iswalpha>> is C99.
-<<iswalpha_l>> is POSIX-1.2008.
-
-No supporting OS subroutines are required.
-*/
-#include <_ansi.h>
-#include <newlib.h>
-#include <wctype.h>
-#include <string.h>
-#include <ctype.h>
-#include "local.h"
-
-#ifdef _MB_CAPABLE
-#include "utf8alpha.h"
-#endif /* _MB_CAPABLE */
-
-int
-iswalpha (wint_t c)
-{
-#ifdef _MB_CAPABLE
-  unsigned const char *table;
-  unsigned char *ptr;
-  unsigned char ctmp;
-  int size;
-  wint_t x;
-
-  c = _jp2uc (c);
-
-  /* Based on and tested against Unicode 5.2
-     See utf8alpha.h for a description how to fetch the data. */
-  x = (c >> 8);
-  /* for some large sections, all characters are alphabetic so handle them here */
-  if ((x >= 0x34 && x <= 0x4c) ||
-      (x >= 0x4e && x <= 0x9e) ||
-      (x >= 0xac && x <= 0xd6) ||
-      (x >= 0x120 && x <= 0x122) ||
-      (x >= 0x130 && x <= 0x133) ||
-      (x >= 0x200 && x <= 0x2a5) ||
-      (x >= 0x2a7 && x <= 0x2b6))
-    return 1;
-  
-  switch (x)
-    {
-    case 0x00:
-      table = u0;
-      size = sizeof(u0);
-      break;
-    case 0x01:
-    case 0x11:
-    case 0x15:
-    case 0x1e:
-    case 0xa0:
-    case 0xa1:
-    case 0xa2:
-    case 0xa3:
-    case 0xa5:
-    case 0xf9:
-    case 0xfc:
-    case 0x2f8:
-    case 0x2f9:
-      return 1;
-    case 0x02:
-      table = u2;
-      size = sizeof(u2);
-      break;
-    case 0x03:
-      table = u3;
-      size = sizeof(u3);
-      break;
-    case 0x04:
-      table = u4;
-      size = sizeof(u4);
-      break;
-    case 0x05:
-      table = u5;
-      size = sizeof(u5);
-      break;
-    case 0x06:
-      table = u6;
-      size = sizeof(u6);
-      break;
-    case 0x07:
-      table = u7;
-      size = sizeof(u7);
-      break;
-    case 0x08:
-      table = u8;
-      size = sizeof(u8);
-      break;
-    case 0x09:
-      table = u9;
-      size = sizeof(u9);
-      break;
-    case 0x0a:
-      table = ua;
-      size = sizeof(ua);
-      break;
-    case 0x0b:
-      table = ub;
-      size = sizeof(ub);
-      break;
-    case 0x0c:
-      table = uc;
-      size = sizeof(uc);
-      break;
-    case 0x0d:
-      table = ud;
-      size = sizeof(ud);
-      break;
-    case 0x0e:
-      table = ue;
-      size = sizeof(ue);
-      break;
-    case 0x0f:
-      table = uf;
-      size = sizeof(uf);
-      break;
-    case 0x10:
-      table = u10;
-      size = sizeof(u10);
-      break;
-    case 0x12:
-      table = u12;
-      size = sizeof(u12);
-      break;
-    case 0x13:
-      table = u13;
-      size = sizeof(u13);
-      break;
-    case 0x14:
-      table = u14;
-      size = sizeof(u14);
-      break;
-    case 0x16:
-      table = u16;
-      size = sizeof(u16);
-      break;
-    case 0x17:
-      table = u17;
-      size = sizeof(u17);
-      break;
-    case 0x18:
-      table = u18;
-      size = sizeof(u18);
-      break;
-    case 0x19:
-      table = u19;
-      size = sizeof(u19);
-      break;
-    case 0x1a:
-      table = u1a;
-      size = sizeof(u1a);
-      break;
-    case 0x1b:
-      table = u1b;
-      size = sizeof(u1b);
-      break;
-    case 0x1c:
-      table = u1c;
-      size = sizeof(u1c);
-      break;
-    case 0x1d:
-      table = u1d;
-      size = sizeof(u1d);
-      break;
-    case 0x1f:
-      table = u1f;
-      size = sizeof(u1f);
-      break;
-    case 0x20:
-      table = u20;
-      size = sizeof(u20);
-      break;
-    case 0x21:
-      table = u21;
-      size = sizeof(u21);
-      break;
-    case 0x24:
-      table = u24;
-      size = sizeof(u24);
-      break;
-    case 0x2c:
-      table = u2c;
-      size = sizeof(u2c);
-      break;
-    case 0x2d:
-      table = u2d;
-      size = sizeof(u2d);
-      break;
-    case 0x2e:
-      table = u2e;
-      size = sizeof(u2e);
-      break;
-    case 0x30:
-      table = u30;
-      size = sizeof(u30);
-      break;
-    case 0x31:
-      table = u31;
-      size = sizeof(u31);
-      break;
-    case 0x4d:
-      table = u4d;
-      size = sizeof(u4d);
-      break;
-    case 0x9f:
-      table = u9f;
-      size = sizeof(u9f);
-      break;
-    case 0xa4:
-      table = ua4;
-      size = sizeof(ua4);
-      break;
-    case 0xa6:
-      table = ua6;
-      size = sizeof(ua6);
-      break;
-    case 0xa7:
-      table = ua7;
-      size = sizeof(ua7);
-      break;
-    case 0xa8:
-      table = ua8;
-      size = sizeof(ua8);
-      break;
-    case 0xa9:
-      table = ua9;
-      size = sizeof(ua9);
-      break;
-    case 0xaa:
-      table = uaa;
-      size = sizeof(uaa);
-      break;
-    case 0xab:
-      table = uab;
-      size = sizeof(uab);
-      break;
-    case 0xd7:
-      table = ud7;
-      size = sizeof(ud7);
-      break;
-    case 0xfa:
-      table = ufa;
-      size = sizeof(ufa);
-      break;
-    case 0xfb:
-      table = ufb;
-      size = sizeof(ufb);
-      break;
-    case 0xfd:
-      table = ufd;
-      size = sizeof(ufd);
-      break;
-    case 0xfe:
-      table = ufe;
-      size = sizeof(ufe);
-      break;
-    case 0xff:
-      table = uff;
-      size = sizeof(uff);
-      break;
-    case 0x100:
-      table = u100;
-      size = sizeof(u100);
-      break;
-    case 0x101:
-      table = u101;
-      size = sizeof(u101);
-      break;
-    case 0x102:
-      table = u102;
-      size = sizeof(u102);
-      break;
-    case 0x103:
-      table = u103;
-      size = sizeof(u103);
-      break;
-    case 0x104:
-      table = u104;
-      size = sizeof(u104);
-      break;
-    case 0x108:
-      table = u108;
-      size = sizeof(u108);
-      break;
-    case 0x109:
-      table = u109;
-      size = sizeof(u109);
-      break;
-    case 0x10a:
-      table = u10a;
-      size = sizeof(u10a);
-      break;
-    case 0x10b:
-      table = u10b;
-      size = sizeof(u10b);
-      break;
-    case 0x10c:
-      table = u10c;
-      size = sizeof(u10c);
-      break;
-    case 0x110:
-      table = u110;
-      size = sizeof(u110);
-      break;
-    case 0x123:
-      table = u123;
-      size = sizeof(u123);
-      break;
-    case 0x124:
-      table = u124;
-      size = sizeof(u124);
-      break;
-    case 0x134:
-      table = u134;
-      size = sizeof(u134);
-      break;
-    case 0x1d4:
-      table = u1d4;
-      size = sizeof(u1d4);
-      break;
-    case 0x1d5:
-      table = u1d5;
-      size = sizeof(u1d5);
-      break;
-    case 0x1d6:
-      table = u1d6;
-      size = sizeof(u1d6);
-      break;
-    case 0x1d7:
-      table = u1d7;
-      size = sizeof(u1d7);
-      break;
-    case 0x1f1:
-      table = u1f1;
-      size = sizeof(u1f1);
-      break;
-    case 0x2a6:
-      table = u2a6;
-      size = sizeof(u2a6);
-      break;
-    case 0x2b7:
-      table = u2b7;
-      size = sizeof(u2b7);
-      break;
-    case 0x2fa:
-      table = u2fa;
-      size = sizeof(u2fa);
-      break;
-    default:
-      return 0;
-    }
-  /* we have narrowed down to a section of 256 characters to check */
-  /* now check if c matches the alphabetic wide-chars within that section */
-  ptr = (unsigned char *)table;
-  ctmp = (unsigned char)c;
-  while (ptr < table + size)
-    {
-      if (ctmp == *ptr)
-	return 1;
-      if (ctmp < *ptr)
-	return 0;
-      /* otherwise c > *ptr */
-      /* look for 0x0 as next element which indicates a range */
-      ++ptr;
-      if (ptr < table + size - 1 && *ptr == 0x0)
-	{
-	  /* we have a range..see if c falls within range */
-	  ++ptr;
-	  if (ctmp <= *ptr)
-	    return 1;
-	  ++ptr;
-	}
-    }
-  /* not in table */
-  return 0;
-#else
-  return (c < (wint_t)0x100 ? isalpha (c) : 0);
-#endif /* _MB_CAPABLE */
-}
diff --git a/newlib/libc/ctype/iswprint.c b/newlib/libc/ctype/iswprint.c
deleted file mode 100644
index c6050b5..0000000
--- a/newlib/libc/ctype/iswprint.c
+++ /dev/null
@@ -1,501 +0,0 @@
-/* Copyright (c) 2002 Red Hat Incorporated.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are met:
-
-     Redistributions of source code must retain the above copyright
-     notice, this list of conditions and the following disclaimer.
-
-     Redistributions in binary form must reproduce the above copyright
-     notice, this list of conditions and the following disclaimer in the
-     documentation and/or other materials provided with the distribution.
-
-     The name of Red Hat Incorporated may not be used to endorse
-     or promote products derived from this software without specific
-     prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED.  IN NO EVENT SHALL RED HAT INCORPORATED BE LIABLE FOR ANY
-   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS   
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/*
-FUNCTION
-	<<iswprint>>, <<iswprint_l>>---printable wide character test
-
-INDEX
-	iswprint
-
-INDEX
-	iswprint_l
-
-SYNOPSIS
-	#include <wctype.h>
-	int iswprint(wint_t <[c]>);
-
-	#include <wctype.h>
-	int iswprint_l(wint_t <[c]>, locale_t <[locale]>);
-
-DESCRIPTION
-<<iswprint>> is a function which classifies wide-character values that
-are printable.
-
-<<iswprint_l>> is like <<iswprint>> but performs the check based on the
-locale specified by the locale object locale.  If <[locale]> is
-LC_GLOBAL_LOCALE or not a valid locale object, the behaviour is undefined.
-
-RETURNS
-<<iswprint>>, <<iswprint_l>> return non-zero if <[c]> is a printable wide character.
-
-PORTABILITY
-<<iswprint>> is C99.
-<<iswprint_l>> is POSIX-1.2008.
-
-No supporting OS subroutines are required.
-*/
-#include <_ansi.h>
-#include <newlib.h>
-#include <wctype.h>
-#include <string.h>
-#include <ctype.h>
-#include "local.h"
-
-#ifdef _MB_CAPABLE
-#include "utf8print.h"
-#endif /* _MB_CAPABLE */
-
-int
-iswprint (wint_t c)
-{
-#ifdef _MB_CAPABLE
-  unsigned const char *table;
-  unsigned char *ptr;
-  unsigned char ctmp;
-  int size;
-  wint_t x;
-  
-  c = _jp2uc (c);
-
-  /* Based on and tested against Unicode 5.2
-     See utf8print.h for a description how to fetch the data. */
-  x = (c >> 8);
-  /* for some large sections, all characters are printuation so handle them here */
-  if ((x >= 0x33 && x <= 0x4c) ||
-      (x >= 0x4e && x <= 0x9e) ||
-      (x >= 0xa0 && x <= 0xa3) ||
-      (x >= 0xac && x <= 0xd6) ||
-      (x >= 0xe0 && x <= 0xf9) ||
-      (x >= 0x120 && x <= 0x122) ||
-      (x >= 0x130 && x <= 0x133) ||
-      (x >= 0x200 && x <= 0x2a5) ||
-      (x >= 0x2a7 && x <= 0x2b6) ||
-      (x >= 0xf00 && x <= 0xffe) ||
-      (x >= 0x1000 && x <= 0x10fe))
-    return 1;
-  
-  switch (x)
-    {
-    case 0x01:
-    case 0x02:
-    case 0x04:
-    case 0x11:
-    case 0x14:
-    case 0x15:
-    case 0x1e:
-    case 0x22:
-    case 0x25:
-    case 0x28:
-    case 0x29:
-    case 0x2a:
-    case 0xa5:
-    case 0xfc:
-    case 0x2f8:
-    case 0x2f9:
-      return 1;
-    case 0x00:
-      table = u0;
-      size = sizeof(u0);
-      break;
-    case 0x03:
-      table = u3;
-      size = sizeof(u3);
-      break;
-    case 0x05:
-      table = u5;
-      size = sizeof(u5);
-      break;
-    case 0x06:
-      table = u6;
-      size = sizeof(u6);
-      break;
-    case 0x07:
-      table = u7;
-      size = sizeof(u7);
-      break;
-    case 0x08:
-      table = u8;
-      size = sizeof(u8);
-      break;
-    case 0x09:
-      table = u9;
-      size = sizeof(u9);
-      break;
-    case 0x0a:
-      table = ua;
-      size = sizeof(ua);
-      break;
-    case 0x0b:
-      table = ub;
-      size = sizeof(ub);
-      break;
-    case 0x0c:
-      table = uc;
-      size = sizeof(uc);
-      break;
-    case 0x0d:
-      table = ud;
-      size = sizeof(ud);
-      break;
-    case 0x0e:
-      table = ue;
-      size = sizeof(ue);
-      break;
-    case 0x0f:
-      table = uf;
-      size = sizeof(uf);
-      break;
-    case 0x10:
-      table = u10;
-      size = sizeof(u10);
-      break;
-    case 0x12:
-      table = u12;
-      size = sizeof(u12);
-      break;
-    case 0x13:
-      table = u13;
-      size = sizeof(u13);
-      break;
-    case 0x16:
-      table = u16;
-      size = sizeof(u16);
-      break;
-    case 0x17:
-      table = u17;
-      size = sizeof(u17);
-      break;
-    case 0x18:
-      table = u18;
-      size = sizeof(u18);
-      break;
-    case 0x19:
-      table = u19;
-      size = sizeof(u19);
-      break;
-    case 0x1a:
-      table = u1a;
-      size = sizeof(u1a);
-      break;
-    case 0x1b:
-      table = u1b;
-      size = sizeof(u1b);
-      break;
-    case 0x1c:
-      table = u1c;
-      size = sizeof(u1c);
-      break;
-    case 0x1d:
-      table = u1d;
-      size = sizeof(u1d);
-      break;
-    case 0x1f:
-      table = u1f;
-      size = sizeof(u1f);
-      break;
-    case 0x20:
-      table = u20;
-      size = sizeof(u20);
-      break;
-    case 0x21:
-      table = u21;
-      size = sizeof(u21);
-      break;
-    case 0x23:
-      table = u23;
-      size = sizeof(u23);
-      break;
-    case 0x24:
-      table = u24;
-      size = sizeof(u24);
-      break;
-    case 0x26:
-      table = u26;
-      size = sizeof(u26);
-      break;
-    case 0x27:
-      table = u27;
-      size = sizeof(u27);
-      break;
-    case 0x2b:
-      table = u2b;
-      size = sizeof(u2b);
-      break;
-    case 0x2c:
-      table = u2c;
-      size = sizeof(u2c);
-      break;
-    case 0x2d:
-      table = u2d;
-      size = sizeof(u2d);
-      break;
-    case 0x2e:
-      table = u2e;
-      size = sizeof(u2e);
-      break;
-    case 0x2f:
-      table = u2f;
-      size = sizeof(u2f);
-      break;
-    case 0x30:
-      table = u30;
-      size = sizeof(u30);
-      break;
-    case 0x31:
-      table = u31;
-      size = sizeof(u31);
-      break;
-    case 0x32:
-      table = u32;
-      size = sizeof(u32);
-      break;
-    case 0x4d:
-      table = u4d;
-      size = sizeof(u4d);
-      break;
-    case 0x9f:
-      table = u9f;
-      size = sizeof(u9f);
-      break;
-    case 0xa4:
-      table = ua4;
-      size = sizeof(ua4);
-      break;
-    case 0xa6:
-      table = ua6;
-      size = sizeof(ua6);
-      break;
-    case 0xa7:
-      table = ua7;
-      size = sizeof(ua7);
-      break;
-    case 0xa8:
-      table = ua8;
-      size = sizeof(ua8);
-      break;
-    case 0xa9:
-      table = ua9;
-      size = sizeof(ua9);
-      break;
-    case 0xaa:
-      table = uaa;
-      size = sizeof(uaa);
-      break;
-    case 0xab:
-      table = uab;
-      size = sizeof(uab);
-      break;
-    case 0xd7:
-      table = ud7;
-      size = sizeof(ud7);
-      break;
-    case 0xfa:
-      table = ufa;
-      size = sizeof(ufa);
-      break;
-    case 0xfb:
-      table = ufb;
-      size = sizeof(ufb);
-      break;
-    case 0xfd:
-      table = ufd;
-      size = sizeof(ufd);
-      break;
-    case 0xfe:
-      table = ufe;
-      size = sizeof(ufe);
-      break;
-    case 0xff:
-      table = uff;
-      size = sizeof(uff);
-      break;
-    case 0x100:
-      table = u100;
-      size = sizeof(u100);
-      break;
-    case 0x101:
-      table = u101;
-      size = sizeof(u101);
-      break;
-    case 0x102:
-      table = u102;
-      size = sizeof(u102);
-      break;
-    case 0x103:
-      table = u103;
-      size = sizeof(u103);
-      break;
-    case 0x104:
-      table = u104;
-      size = sizeof(u104);
-      break;
-    case 0x108:
-      table = u108;
-      size = sizeof(u108);
-      break;
-    case 0x109:
-      table = u109;
-      size = sizeof(u109);
-      break;
-    case 0x10a:
-      table = u10a;
-      size = sizeof(u10a);
-      break;
-    case 0x10b:
-      table = u10b;
-      size = sizeof(u10b);
-      break;
-    case 0x10c:
-      table = u10c;
-      size = sizeof(u10c);
-      break;
-    case 0x10e:
-      table = u10e;
-      size = sizeof(u10e);
-      break;
-    case 0x110:
-      table = u110;
-      size = sizeof(u110);
-      break;
-    case 0x123:
-      table = u123;
-      size = sizeof(u123);
-      break;
-    case 0x124:
-      table = u124;
-      size = sizeof(u124);
-      break;
-    case 0x134:
-      table = u134;
-      size = sizeof(u134);
-      break;
-    case 0x1d0:
-      table = u1d0;
-      size = sizeof(u1d0);
-      break;
-    case 0x1d1:
-      table = u1d1;
-      size = sizeof(u1d1);
-      break;
-    case 0x1d2:
-      table = u1d2;
-      size = sizeof(u1d2);
-      break;
-    case 0x1d3:
-      table = u1d3;
-      size = sizeof(u1d3);
-      break;
-    case 0x1d4:
-      table = u1d4;
-      size = sizeof(u1d4);
-      break;
-    case 0x1d5:
-      table = u1d5;
-      size = sizeof(u1d5);
-      break;
-    case 0x1d6:
-      table = u1d6;
-      size = sizeof(u1d6);
-      break;
-    case 0x1d7:
-      table = u1d7;
-      size = sizeof(u1d7);
-      break;
-    case 0x1f0:
-      table = u1f0;
-      size = sizeof(u1f0);
-      break;
-    case 0x1f1:
-      table = u1f1;
-      size = sizeof(u1f1);
-      break;
-    case 0x1f2:
-      table = u1f2;
-      size = sizeof(u1f2);
-      break;
-    case 0x2a6:
-      table = u2a6;
-      size = sizeof(u2a6);
-      break;
-    case 0x2b7:
-      table = u2b7;
-      size = sizeof(u2b7);
-      break;
-    case 0x2fa:
-      table = u2fa;
-      size = sizeof(u2fa);
-      break;
-    case 0xe00:
-      table = ue00;
-      size = sizeof(ue00);
-      break;
-    case 0xe01:
-      table = ue01;
-      size = sizeof(ue01);
-      break;
-    case 0xfff:
-      table = ufff;
-      size = sizeof(ufff);
-      break;
-    case 0x10ff:
-      table = u10ff;
-      size = sizeof(u10ff);
-      break;
-    default:
-      return 0;
-    }
-  /* we have narrowed down to a section of 256 characters to check */
-  /* now check if c matches the printuation wide-chars within that section */
-  ptr = (unsigned char *)table;
-  ctmp = (unsigned char)c;
-  while (ptr < table + size)
-    {
-      if (ctmp == *ptr)
-	return 1;
-      if (ctmp < *ptr)
-	return 0;
-      /* otherwise c > *ptr */
-      /* look for 0x0 as next element which indicates a range */
-      ++ptr;
-      if (*ptr == 0x0)
-	{
-	  /* we have a range..see if c falls within range */
-	  ++ptr;
-	  if (ctmp <= *ptr)
-	    return 1;
-	  ++ptr;
-	}
-    }
-  /* not in table */
-  return 0;
-#else
-  return (c < (wint_t)0x100 ? isprint (c) : 0);
-#endif /* _MB_CAPABLE */
-}
diff --git a/newlib/libc/ctype/towlower.c b/newlib/libc/ctype/towlower.c
deleted file mode 100644
index db390db..0000000
--- a/newlib/libc/ctype/towlower.c
+++ /dev/null
@@ -1,575 +0,0 @@
-/* Copyright (c) 2002 Red Hat Incorporated.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are met:
-
-     Redistributions of source code must retain the above copyright
-     notice, this list of conditions and the following disclaimer.
-
-     Redistributions in binary form must reproduce the above copyright
-     notice, this list of conditions and the following disclaimer in the
-     documentation and/or other materials provided with the distribution.
-
-     The name of Red Hat Incorporated may not be used to endorse
-     or promote products derived from this software without specific
-     prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED.  IN NO EVENT SHALL RED HAT INCORPORATED BE LIABLE FOR ANY
-   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS   
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/*
-FUNCTION
-	<<towlower>>, <<towlower_l>>---translate wide characters to lowercase
-
-INDEX
-	towlower
-
-INDEX
-	towlower_l
-
-SYNOPSIS
-	#include <wctype.h>
-	wint_t towlower(wint_t <[c]>);
-
-	#include <wctype.h>
-	wint_t towlower_l(wint_t <[c]>, locale_t <[locale]>);
-
-
-DESCRIPTION
-<<towlower>> is a function which converts uppercase wide characters to
-lowercase, leaving all other characters unchanged.
-
-<<towlower_l>> is like <<towlower>> but performs the function based on the
-locale specified by the locale object locale.  If <[locale]> is
-LC_GLOBAL_LOCALE or not a valid locale object, the behaviour is undefined.
-
-RETURNS
-<<towlower>>, <<towlower_l>> return the lowercase equivalent of <[c]> when it is a
-uppercase wide character; otherwise, it returns the input character.
-
-PORTABILITY
-<<towlower>> is C99.
-<<towlower_l>> is POSIX-1.2008.
-
-No supporting OS subroutines are required.
-*/
-
-#include <_ansi.h>
-#include <newlib.h>
-#include <string.h>
-#include <reent.h>
-#include <ctype.h>
-#include <wctype.h>
-#include "local.h"
-
-wint_t
-towlower (wint_t c)
-{
-#ifdef _MB_CAPABLE
-  c = _jp2uc (c);
-  /* Based on and tested against Unicode 5.2 */
-
-  /* Expression used to filter out the characters for the below code:
-
-     awk -F\; '{ if ( $14 != "" ) print $1; }' UnicodeData.txt
-  */
-  if (c < 0x100)
-    {
-      if ((c >= 0x0041 && c <= 0x005a) ||
-	  (c >= 0x00c0 && c <= 0x00d6) ||
-	  (c >= 0x00d8 && c <= 0x00de))
-	return (c + 0x20);
-
-      return c;
-    }
-  else if (c < 0x300)
-    {
-      if ((c >= 0x0100 && c <= 0x012e) ||
-	  (c >= 0x0132 && c <= 0x0136) ||
-	  (c >= 0x014a && c <= 0x0176) ||
-	  (c >= 0x01de && c <= 0x01ee) ||
-	  (c >= 0x01f8 && c <= 0x021e) ||
-	  (c >= 0x0222 && c <= 0x0232))
-	{
-	  if (!(c & 0x01))
-	    return (c + 1);
-	  return c;
-	}
-
-      if (c == 0x0130)
-	return 0x0069;
-
-      if ((c >= 0x0139 && c <= 0x0147) ||
-	  (c >= 0x01cd && c <= 0x01db))
-	{
-	  if (c & 0x01)
-	    return (c + 1);
-	  return c;
-	}
-      
-      if (c >= 0x178 && c <= 0x01f7)
-	{
-	  wint_t k;
-	  switch (c)
-	    {
-	    case 0x0178:
-	      k = 0x00ff;
-	      break;
-	    case 0x0179:
-	    case 0x017b:
-	    case 0x017d:
-	    case 0x0182:
-	    case 0x0184:
-	    case 0x0187:
-	    case 0x018b:
-	    case 0x0191:
-	    case 0x0198:
-	    case 0x01a0:
-	    case 0x01a2:
-	    case 0x01a4:
-	    case 0x01a7:
-	    case 0x01ac:
-	    case 0x01af:
-	    case 0x01b3:
-	    case 0x01b5:
-	    case 0x01b8:
-	    case 0x01bc:
-	    case 0x01c5:
-	    case 0x01c8:
-	    case 0x01cb:
-	    case 0x01cd:
-	    case 0x01cf:
-	    case 0x01d1:
-	    case 0x01d3:
-	    case 0x01d5:
-	    case 0x01d7:
-	    case 0x01d9:
-	    case 0x01db:
-	    case 0x01f2:
-	    case 0x01f4:
-	      k = c + 1;
-	      break;
-	    case 0x0181:
-	      k = 0x0253;
-	      break;
-	    case 0x0186:
-	      k = 0x0254;
-	      break;
-	    case 0x0189:
-	      k = 0x0256;
-	      break;
-	    case 0x018a:
-	      k = 0x0257;
-	      break;
-	    case 0x018e:
-	      k = 0x01dd;
-	      break;
-	    case 0x018f:
-	      k = 0x0259;
-	      break;
-	    case 0x0190:
-	      k = 0x025b;
-	      break;
-	    case 0x0193:
-	      k = 0x0260;
-	      break;
-	    case 0x0194:
-	      k = 0x0263;
-	      break;
-	    case 0x0196:
-	      k = 0x0269;
-	      break;
-	    case 0x0197:
-	      k = 0x0268;
-	      break;
-	    case 0x019c:
-	      k = 0x026f;
-	      break;
-	    case 0x019d:
-	      k = 0x0272;
-	      break;
-	    case 0x019f:
-	      k = 0x0275;
-	      break;
-	    case 0x01a6:
-	      k = 0x0280;
-	      break;
-	    case 0x01a9:
-	      k = 0x0283;
-	      break;
-	    case 0x01ae:
-	      k = 0x0288;
-	      break;
-	    case 0x01b1:
-	      k = 0x028a;
-	      break;
-	    case 0x01b2:
-	      k = 0x028b;
-	      break;
-	    case 0x01b7:
-	      k = 0x0292;
-	      break;
-	    case 0x01c4:
-	    case 0x01c7:
-	    case 0x01ca:
-	    case 0x01f1:
-	      k = c + 2;
-	      break;
-	    case 0x01f6:
-	      k = 0x0195;
-	      break;
-	    case 0x01f7:
-	      k = 0x01bf;
-	      break;
-	    default:
-	      k = 0;
-	    }
-	  if (k != 0)
-	    return k;
-	}
-      else if (c == 0x0220)
-      	return 0x019e;
-      else if (c >= 0x023a && c <= 0x024e)
-      	{
-	  wint_t k;
-	  switch (c)
-	    {
-	    case 0x023a:
-	      k = 0x2c65;
-	      break;
-	    case 0x023b:
-	    case 0x0241:
-	    case 0x0246:
-	    case 0x0248:
-	    case 0x024a:
-	    case 0x024c:
-	    case 0x024e:
-	      k = c + 1;
-	      break;
-	    case 0x023d:
-	      k = 0x019a;
-	      break;
-	    case 0x023e:
-	      k = 0x2c66;
-	      break;
-	    case 0x0243:
-	      k = 0x0180;
-	      break;
-	    case 0x0244:
-	      k = 0x0289;
-	      break;
-	    case 0x0245:
-	      k = 0x028c;
-	      break;
-	    default:
-	      k = 0;
-	    }
-	  if (k != 0)
-	    return k;
-	}
-    }
-  else if (c < 0x0400)
-    {
-      if (c == 0x0370 || c == 0x0372 || c == 0x0376)
-      	return (c + 1);
-      if (c >= 0x0391 && c <= 0x03ab && c != 0x03a2)
-	return (c + 0x20);
-      if (c >= 0x03d8 && c <= 0x03ee && !(c & 0x01))
-	return (c + 1);
-      if (c >= 0x0386 && c <= 0x03ff)
-	{
-	  wint_t k;
-	  switch (c)
-	    {
-	    case 0x0386:
-	      k = 0x03ac;
-	      break;
-	    case 0x0388:
-	      k = 0x03ad;
-	      break;
-	    case 0x0389:
-	      k = 0x03ae;
-	      break;
-	    case 0x038a:
-	      k = 0x03af;
-	      break;
-	    case 0x038c:
-	      k = 0x03cc;
-	      break;
-	    case 0x038e:
-	      k = 0x03cd;
-	      break;
-	    case 0x038f:
-	      k = 0x03ce;
-	      break;
-	    case 0x03cf:
-	      k = 0x03d7;
-	      break;
-	    case 0x03f4:
-	      k = 0x03b8;
-	      break;
-	    case 0x03f7:
-	      k = 0x03f8;
-	      break;
-	    case 0x03f9:
-	      k = 0x03f2;
-	      break;
-	    case 0x03fa:
-	      k = 0x03fb;
-	      break;
-	    case 0x03fd:
-	      k = 0x037b;
-	      break;
-	    case 0x03fe:
-	      k = 0x037c;
-	      break;
-	    case 0x03ff:
-	      k = 0x037d;
-	      break;
-	    default:
-	      k = 0;
-	    }
-	  if (k != 0)
-	    return k;
-	}
-    }
-  else if (c < 0x500)
-    {
-      if (c >= 0x0400 && c <= 0x040f)
-	return (c + 0x50);
-      
-      if (c >= 0x0410 && c <= 0x042f)
-	return (c + 0x20);
-      
-      if ((c >= 0x0460 && c <= 0x0480) ||
-	  (c >= 0x048a && c <= 0x04be) ||
-	  (c >= 0x04d0 && c <= 0x04fe))
-	{
-	  if (!(c & 0x01))
-	    return (c + 1);
-	  return c;
-	}
-      
-      if (c == 0x04c0)
-	return 0x04cf;
-
-      if (c >= 0x04c1 && c <= 0x04cd)
-	{
-	  if (c & 0x01)
-	    return (c + 1);
-	  return c;
-	}
-    }
-  else if (c < 0x1f00)
-    {
-      if ((c >= 0x0500 && c <= 0x050e) ||
-	  (c >= 0x0510 && c <= 0x0524) ||
-	  (c >= 0x1e00 && c <= 0x1e94) ||
-	  (c >= 0x1ea0 && c <= 0x1ef8))
-	{
-	  if (!(c & 0x01))
-	    return (c + 1);
-	  return c;
-	}
-      
-      if (c >= 0x0531 && c <= 0x0556)
-	return (c + 0x30);
-
-      if (c >= 0x10a0 && c <= 0x10c5)
-	return (c + 0x1c60);
-
-      if (c == 0x1e9e)
-	return 0x00df;
-
-      if (c >= 0x1efa && c <= 0x1efe && !(c & 0x01))
-	return (c + 1);
-    }
-  else if (c < 0x2000)
-    {
-      if ((c >= 0x1f08 && c <= 0x1f0f) ||
-	  (c >= 0x1f18 && c <= 0x1f1d) ||
-	  (c >= 0x1f28 && c <= 0x1f2f) ||
-	  (c >= 0x1f38 && c <= 0x1f3f) ||
-	  (c >= 0x1f48 && c <= 0x1f4d) ||
-	  (c >= 0x1f68 && c <= 0x1f6f) ||
-	  (c >= 0x1f88 && c <= 0x1f8f) ||
-	  (c >= 0x1f98 && c <= 0x1f9f) ||
-	  (c >= 0x1fa8 && c <= 0x1faf))
-	return (c - 0x08);
-
-      if (c >= 0x1f59 && c <= 0x1f5f)
-	{
-	  if (c & 0x01)
-	    return (c - 0x08);
-	  return c;
-	}
-    
-      if (c >= 0x1fb8 && c <= 0x1ffc)
-	{
-	  wint_t k;
-	  switch (c)
-	    {
-	    case 0x1fb8:
-	    case 0x1fb9:
-	    case 0x1fd8:
-	    case 0x1fd9:
-	    case 0x1fe8:
-	    case 0x1fe9:
-	      k = c - 0x08;
-	      break;
-	    case 0x1fba:
-	    case 0x1fbb:
-	      k = c - 0x4a;
-	      break;
-	    case 0x1fbc:
-	      k = 0x1fb3;
-	      break;
-	    case 0x1fc8:
-	    case 0x1fc9:
-	    case 0x1fca:
-	    case 0x1fcb:
-	      k = c - 0x56;
-	      break;
-	    case 0x1fcc:
-	      k = 0x1fc3;
-	      break;
-	    case 0x1fda:
-	    case 0x1fdb:
-	      k = c - 0x64;
-	      break;
-	    case 0x1fea:
-	    case 0x1feb:
-	      k = c - 0x70;
-	      break;
-	    case 0x1fec:
-	      k = 0x1fe5;
-	      break;
-	    case 0x1ff8:
-	    case 0x1ff9:
-	      k = c - 0x80;
-	      break;
-	    case 0x1ffa:
-	    case 0x1ffb:
-	      k = c - 0x7e;
-	      break;
-	    case 0x1ffc:
-	      k = 0x1ff3;
-	      break;
-	    default:
-	      k = 0;
-	    }
-	  if (k != 0)
-	    return k;
-	}
-    }
-  else if (c < 0x2c00)
-    {
-      if (c >= 0x2160 && c <= 0x216f)
-	return (c + 0x10);
-
-      if (c >= 0x24b6 && c <= 0x24cf)
-	return (c + 0x1a);
-      
-      switch (c)
-      	{
-	case 0x2126:
-	  return 0x03c9;
-	case 0x212a:
-	  return 0x006b;
-	case 0x212b:
-	  return 0x00e5;
-	case 0x2132:
-	  return 0x214e;
-	case 0x2183:
-	  return 0x2184;
-	}
-    }
-  else if (c < 0x2d00)
-    {
-      if (c >= 0x2c00 && c <= 0x2c2e)
-	return (c + 0x30);
-
-      if (c >= 0x2c80 && c <= 0x2ce2 && !(c & 0x01))
-	return (c + 1);
-
-      switch (c)
-      	{
-	case 0x2c60:
-	  return 0x2c61;
-	case 0x2c62:
-	  return 0x026b;
-	case 0x2c63:
-	  return 0x1d7d;
-	case 0x2c64:
-	  return 0x027d;
-	case 0x2c67:
-	case 0x2c69:
-	case 0x2c6b:
-	case 0x2c72:
-	case 0x2c75:
-	case 0x2ceb:
-	case 0x2ced:
-	  return c + 1;
-	case 0x2c6d:
-	  return 0x0251;
-	case 0x2c6e:
-	  return 0x0271;
-	case 0x2c6f:
-	  return 0x0250;
-	case 0x2c70:
-	  return 0x0252;
-	case 0x2c7e:
-	  return 0x023f;
-	case 0x2c7f:
-	  return 0x0240;
-	}
-    }
-  else if (c >= 0xa600 && c < 0xa800)
-    {
-      if ((c >= 0xa640 && c <= 0xa65e) ||
-	  (c >= 0xa662 && c <= 0xa66c) ||
-	  (c >= 0xa680 && c <= 0xa696) ||
-	  (c >= 0xa722 && c <= 0xa72e) ||
-	  (c >= 0xa732 && c <= 0xa76e) ||
-	  (c >= 0xa77f && c <= 0xa786))
-	{
-	  if (!(c & 1))
-	    return (c + 1);
-	  return c;
-	}
-
-      switch (c)
-      	{
-	case 0xa779:
-	case 0xa77b:
-	case 0xa77e:
-	case 0xa78b:
-	  return (c + 1);
-	case 0xa77d:
-	  return 0x1d79;
-	}
-    }
-  else
-    {
-      if (c >= 0xff21 && c <= 0xff3a)
-	return (c + 0x20);
-      
-      if (c >= 0x10400 && c <= 0x10427)
-	return (c + 0x28);
-    }
-  return c;
-#else
-  return (c < 0x00ff ? (wint_t)(tolower ((int)c)) : c);
-#endif /* _MB_CAPABLE */
-}
-
diff --git a/newlib/libc/ctype/utf8alpha.h b/newlib/libc/ctype/utf8alpha.h
deleted file mode 100644
index d9306b7..0000000
--- a/newlib/libc/ctype/utf8alpha.h
+++ /dev/null
@@ -1,355 +0,0 @@
-/* Copyright (c) 2002 Red Hat Incorporated.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are met:
-
-     Redistributions of source code must retain the above copyright
-     notice, this list of conditions and the following disclaimer.
-
-     Redistributions in binary form must reproduce the above copyright
-     notice, this list of conditions and the following disclaimer in the
-     documentation and/or other materials provided with the distribution.
-
-     The name of Red Hat Incorporated may not be used to endorse
-     or promote products derived from this software without specific
-     prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED.  IN NO EVENT SHALL RED HAT INCORPORATED BE LIABLE FOR ANY
-   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS   
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/* Generated using UnicodeData.txt 5.2 */
-
-/* Expression used to filter out the characters for the below tables:
-
-  awk -F\; \
-  '{ \
-    VAL = strtonum (sprintf("0x%s", $1)); \
-    # All of general category "L", except for two Thai characters which \
-    # are actually punctuation characters.  Old Unicode weirdness. \
-    # The character "COMBINING GREEK YPOGEGRAMMENI", as well as all Thai \
-    # characters which are in "Mn" category.  Old Unicode weirdness. \
-    # All numerical digit or letter characters, except the ASCII variants. \
-    # This is necessary due to the unfortunate ISO C definition for the \
-    # iswdigit class, otherwise these characters are missing in iswalnum. \
-    # All "Other Symbols" which are named as "LETTER" characters. \
-    # \
-    # Before running this test, make sure to expand all Unicode blocks \
-    # which are just marked by their first and last character! \
-    # \
-    if (   (match($3, "^L") && VAL != 0x0e2f && VAL != 0x0e46) \
-	|| (match($3, "^Mn") && (VAL == 0x0345 || match($2, "\\<CHARACTER\\>"))) \
-	|| (match($3, "^N[dl]") && VAL >= 0x100) \
-	|| (match($3, "^So") && match($2, "\\<LETTER\\>"))) \
-      print $1; \
-  }' UnicodeData.txt
-*/
-
-static const unsigned char u0[] = {
-  0x41, 0x0, 0x5a, 0x61, 0x0, 0x7a, 0xaa, 0xb5, 
-  0xba, 0xc0, 0x0, 0xd6, 0xd8, 0x0, 0xf6, 0xf8, 
-  0x0, 0xff };
-/* u1 all alphabetic */
-static const unsigned char u2[] = {
-  0x00, 0x0, 0xc1, 0xc6, 0x0, 0xd1,
-  0xe0, 0x0, 0xe4, 0xec, 0xee };
-static const unsigned char u3[] = {
-  0x45, 0x70, 0x0, 0x74, 0x76, 0x77,
-  0x7a, 0x0, 0x7d, 0x86, 0x88, 0x0, 0x8a, 0x8c,
-  0x8e, 0x0, 0xa1, 0xa3, 0x0, 0xf5,
-  0xf7, 0x0, 0xff };
-static const unsigned char u4[] = { 
-  0x00, 0x0, 0x81, 0x8a, 0x0, 0xff };
-static const unsigned char u5[] = { 
-  0x00, 0x0, 0x25, 0x31, 0x0, 0x56, 0x59, 
-  0x61, 0x0, 0x87, 0xd0, 0x0, 0xea,
-  0xf0, 0x0, 0xf2 };
-static const unsigned char u6[] = { 
-  0x21, 0x0, 0x4a, 0x60, 0x0, 0x69,
-  0x6e, 0x0, 0x6f, 0x71, 0x0, 0xd3, 
-  0xd5, 0xe5, 0x0, 0xe6, 0xee, 0x0, 0xfc, 0xff };
-static const unsigned char u7[] = { 
-  0x10, 0x12, 0x0, 0x2f, 0x4d, 0x0, 0xa5, 0xb1,
-  0xc0, 0x0, 0xea, 0xf4, 0xf5, 0xfa };
-static const unsigned char u8[] = { 
-  0x00, 0x0, 0x15, 0x1a, 0x24, 0x28 };
-static const unsigned char u9[] = { 
-  0x04, 0x0, 0x39, 0x3d, 0x50, 0x58, 0x0, 0x61,
-  0x66, 0x0, 0x6f, 0x71, 0x72, 0x79, 0x0, 0x7f,
-  0x85, 0x0, 0x8c, 0x8f, 0x0, 0x90, 
-  0x93, 0x0, 0xa8, 0xaa, 0x0, 0xb0, 0xb2,
-  0xb6, 0x0, 0xb9, 0xbd, 0xce, 0xdc, 0x0, 0xdd,
-  0xdf, 0x0, 0xe1, 0xe6, 0x0, 0xf1 };
-static const unsigned char ua[] = { 
-  0x05, 0x0, 0x0a, 0x0f, 0x0, 0x10,
-  0x13, 0x0, 0x28, 0x2a, 0x0, 0x30,
-  0x32, 0x0, 0x33, 0x35, 0x0, 0x36,
-  0x38, 0x0, 0x39, 0x59, 0x0, 0x5c,
-  0x5e, 0x66, 0x0, 0x6f, 0x72, 0x0, 0x74,
-  0x85, 0x0, 0x8d, 0x8f, 0x0, 0x91,
-  0x93, 0x0, 0xa8, 0xaa, 0x0, 0xb0,
-  0xb2, 0x0, 0xb3, 0xb5, 0x0, 0xb9,
-  0xbd, 0xd0, 0xe0, 0xe1, 0xe6, 0x0, 0xef };
-static const unsigned char ub[] = { 
-  0x05, 0x0, 0x0c, 0x0f, 0x0, 0x10,
-  0x13, 0x0, 0x28, 0x2a, 0x0, 0x30,
-  0x32, 0x0, 0x33, 0x35, 0x0, 0x39, 0x3d,
-  0x5c, 0x0, 0x5d, 0x5f, 0x0, 0x61,
-  0x66, 0x0, 0x6f, 0x71, 0x83, 0x85, 0x0, 0x8a,
-  0x8e, 0x0, 0x90, 0x92, 0x0, 0x95,
-  0x99, 0x0, 0x9a, 0x9c, 0x9e, 0x0, 0x9f,
-  0xa3, 0x0, 0xa4, 0xa8, 0x0, 0xaa,
-  0xae, 0x0, 0xb9, 0xd0, 0xe6, 0x0, 0xef };
-static const unsigned char uc[] = { 
-  0x05, 0x0, 0x0c, 0x0e, 0x0, 0x10,
-  0x12, 0x0, 0x28, 0x2a, 0x0, 0x33,
-  0x35, 0x0, 0x39, 0x3d, 0x58, 0x59,
-  0x60, 0x0, 0x61, 0x66, 0x0, 0x6f,
-  0x85, 0x0, 0x8c, 0x8e, 0x0, 0x90,
-  0x92, 0x0, 0xa8, 0xaa, 0x0, 0xb3,
-  0xb5, 0x0, 0xb9, 0xbd, 0xde, 0xe0, 0x0, 0xe1,
-  0xe6, 0x0, 0xef };
-static const unsigned char ud[] = { 
-  0x05, 0x0, 0x0c, 0x0e, 0x0, 0x10, 
-  0x12, 0x0, 0x28, 0x2a, 0x0, 0x39, 0x3d,
-  0x60, 0x0, 0x61, 0x66, 0x0, 0x6f,
-  0x7a, 0x0, 0x7f, 0x85, 0x0, 0x96, 0x9a, 
-  0x0, 0xb1, 0xb3, 0x0, 0xbb, 0xbd,
-  0xc0, 0x0, 0xc6 };
-static const unsigned char ue[] = { 
-  0x01, 0x0, 0x2e, 0x30, 0x0, 0x3a, 0x40, 
-  0x0, 0x45, 0x47, 0x0, 0x4e, 0x50, 0x0, 0x59, 
-  0x81, 0x0, 0x82, 0x84, 0x87, 0x0, 0x88, 0x8a, 
-  0x8d, 0x94, 0x0, 0x97, 0x99, 0x0, 0x9f, 0xa1, 
-  0x0, 0xa3, 0xa5, 0xa7, 0xaa, 0x0, 0xab, 0xad, 
-  0x0, 0xb0, 0xb2, 0x0, 0xb3, 0xbd, 0xc0, 0x0, 
-  0xc4, 0xc6, 0xd0, 0x0, 0xd9, 0xdc, 0x0, 0xdd }; 
-static const unsigned char uf[] = {
-  0x00, 0x20, 0x0, 0x29, 0x40, 0x0, 0x47, 0x49, 
-  0x0, 0x6c, 0x88, 0x0, 0x8b };
-static const unsigned char u10[] = { 
-  0x00, 0x0, 0x2a, 0x3f, 0x0, 0x49,
-  0x50, 0x0, 0x55, 0x5a, 0x0, 0x5d,
-  0x61, 0x65, 0x66, 0x6e, 0x0, 0x70,
-  0x75, 0x0, 0x81, 0x8e, 0x90, 0x0, 0x99,
-  0xa0, 0x0, 0xc5, 0xd0, 0x0, 0xfa, 0xfc };
-/* u11 all alphabetic */
-static const unsigned char u12[] = { 
-  0x00, 0x0, 0x48, 0x4a, 0x0, 0x4d,
-  0x50, 0x0, 0x56, 0x58, 0x5a, 0x0, 0x5d,
-  0x60, 0x0, 0x88, 0x8a, 0x0, 0x8d,
-  0x90, 0x0, 0xb0, 0xb2, 0x0, 0xb5,
-  0xb8, 0x0, 0xbe, 0xc0, 0xc2, 0x0, 0xc5,
-  0xc8, 0x0, 0xd6, 0xd8, 0x0, 0xff };
-static const unsigned char u13[] = { 
-  0x00, 0x0, 0x10, 0x12, 0x0, 0x15,
-  0x18, 0x0, 0x5a, 0x80, 0x0, 0x8f,
-  0xa0, 0x0, 0xf4 };
-static const unsigned char u14[] = { 
-  0x01, 0x0, 0xff };
-/* u15 all alphabetic */
-static const unsigned char u16[] = { 
-  0x00, 0x0, 0x6c, 0x6f, 0x0, 0x7f, 
-  0x81, 0x0, 0x9a, 0xa0, 0x0, 0xea,
-  0xee, 0x0, 0xf0 };
-static const unsigned char u17[] = { 
-  0x00, 0x0, 0x0c, 0x0e, 0x0, 0x11,
-  0x20, 0x0, 0x31, 0x40, 0x0, 0x51,
-  0x60, 0x0, 0x6c, 0x6e, 0x0, 0x70,
-  0x80, 0x0, 0xb3, 0xd7, 0xdc, 0xe0, 0x0, 0xe9 };
-static const unsigned char u18[] = { 
-  0x10, 0x0, 0x19, 0x20, 0x0, 0x77,
-  0x80, 0x0, 0xa8, 0xaa, 0xb0, 0x0, 0xf5 };
-static const unsigned char u19[] = { 
-  0x00, 0x0, 0x1c, 0x46, 0x0, 0x6d,
-  0x70, 0x0, 0x74, 0x80, 0x0, 0xab,
-  0xc1, 0x0, 0xc7, 0xd0, 0x0, 0xda };
-static const unsigned char u1a[] = { 
-  0x00, 0x0, 0x16, 0x20, 0x0, 0x54,
-  0x80, 0x0, 0x89, 0x90, 0x0, 0x99, 0xa7 };
-static const unsigned char u1b[] = { 
-  0x05, 0x0, 0x33, 0x45, 0x0, 0x4b,
-  0x50, 0x0, 0x59, 0x83, 0x0, 0xa0,
-  0xae, 0x0, 0xb9 };
-static const unsigned char u1c[] = { 
-  0x00, 0x0, 0x23, 0x40, 0x0, 0x49,
-  0x4d, 0x0, 0x7d, 0xe9, 0x0, 0xec,
-  0xee, 0x0, 0xf1 };
-static const unsigned char u1d[] = { 
-  0x00, 0x0, 0xbf };
-/* u1e all alphabetic */
-static const unsigned char u1f[] = { 
-  0x00, 0x0, 0x15, 0x18, 0x0, 0x1d, 
-  0x20, 0x0, 0x45, 0x48, 0x0, 0x4d, 0x50, 0x0, 0x57, 0x59, 
-  0x5b, 0x5d, 0x5f, 0x0, 0x7d, 0x80, 0x0, 0xb4, 
-  0xb6, 0x0, 0xbc, 0xbe, 0xc2, 0x0, 0xc4, 0xc6, 
-  0x0, 0xcc, 0xd0, 0x0, 0xd3, 0xd6, 0x0, 0xdb, 
-  0xe0, 0x0, 0xec, 0xf2, 0x0, 0xf4, 0xf6, 0x0, 
-  0xfc };
-static const unsigned char u20[] = { 
-  0x71, 0x7f, 0x90, 0x0, 0x94 };
-static const unsigned char u21[] = { 
-  0x02, 0x07, 0x0a, 0x0, 0x13, 0x15,
-  0x19, 0x0, 0x1d, 0x24, 0x26, 0x28, 0x0, 0x2d,
-  0x2f, 0x0, 0x39, 0x3c, 0x0, 0x3f,
-  0x45, 0x0, 0x49, 0x4e, 0x60, 0x0, 0x88 }; 
-static const unsigned char u24[] = { 
-  0x9c, 0x0, 0xe9 };
-static const unsigned char u2c[] = { 
-  0x00, 0x0, 0x2e, 0x30, 0x0, 0x5e,
-  0x60, 0x0, 0xe4, 0xeb, 0x0, 0xee };
-static const unsigned char u2d[] = { 
-  0x00, 0x0, 0x25, 0x30, 0x0, 0x65, 0x6f,
-  0x80, 0x0, 0x96, 0xa0, 0x0, 0xa6,
-  0xa8, 0x0, 0xae, 0xb0, 0x0, 0xb6,
-  0xb8, 0x0, 0xbe, 0xc0, 0x0, 0xc6,
-  0xc8, 0x0, 0xce, 0xd0, 0x0, 0xd6,
-  0xd8, 0x0, 0xde };
-static const unsigned char u2e[] = {
-  0x2f };
-static const unsigned char u30[] = { 
-  0x05, 0x0, 0x07, 0x21, 0x0, 
-  0x29, 0x31, 0x0, 0x35, 0x38, 0x0, 0x3c, 0x41, 
-  0x0, 0x96, 0x9d, 0x0, 0x9f, 0xa1, 0x0, 0xfa, 
-  0xfc, 0x0, 0xff };
-static const unsigned char u31[] = { 
-  0x05, 0x0, 0x2d, 0x31, 0x0, 
-  0x8e, 0xa0, 0x0, 0xb7, 0xf0, 0x0, 0xff };
-/* u34 to u4c all alphabetic */
-static const unsigned char u4d[] = { 
-  0x00, 0x0, 0xb5 };
-/* u4e to u9e all alphabetic */
-static const unsigned char u9f[] = { 
-  0x00, 0x0, 0xcb };
-/* ua0 to ua3 all alphabetic */
-static const unsigned char ua4[] = { 
-  0x00, 0x0, 0x8c, 0xd0, 0x0, 0xfd }; 
-/* ua5 all alphabetic */
-static const unsigned char ua6[] = {
-  0x00, 0x0, 0x0c, 0x10, 0x0, 0x2b,
-  0x40, 0x0, 0x5f, 0x62, 0x0, 0x6e,
-  0x7f, 0x0, 0x97, 0xa0, 0x0, 0xef };
-static const unsigned char ua7[] = {
-  0x17, 0x0, 0x1f, 0x22, 0x0, 0x88,
-  0x8b, 0x8c,
-  0xfb, 0x0, 0xff };
-static const unsigned char ua8[] = {
-  0x00, 0x01, 0x03, 0x0, 0x05, 0x07, 0x0, 0x0a,
-  0x0c, 0x0, 0x22, 0x40, 0x0, 0x73,
-  0x82, 0x0, 0xb3, 0xd0, 0x0, 0xd9,
-  0xf2, 0x0, 0xf7, 0xfb };
-static const unsigned char ua9[] = {
-  0x00, 0x0, 0x25, 0x30, 0x0, 0x46,
-  0x60, 0x0, 0x7c, 0x84, 0x0, 0xb2,
-  0xcf, 0x0, 0xd9 };
-static const unsigned char uaa[] = {
-  0x00, 0x0, 0x28, 0x40, 0x0, 0x42,
-  0x44, 0x0, 0x4b, 0x50, 0x0, 0x59,
-  0x60, 0x0, 0x76, 0x7a, 0x80, 0x0, 0xaf,
-  0xb1, 0xb5, 0xb6, 0xb9, 0x0, 0xbd,
-  0xc0, 0xc2, 0xdb, 0x0, 0xdd };
-static const unsigned char uab[] = {
-  0xc0, 0x0, 0xe2, 0xf0, 0x0, 0xf9 };
-/* uac to ud6 all alphabetic */
-static const unsigned char ud7[] = { 
-  0x00, 0x0, 0xa3, 0xb0, 0x0, 0xc6,
-  0xcb, 0x0, 0xfb };
-/* uf9 all alphabetic */
-static const unsigned char ufa[] = { 
-  0x00, 0x0, 0x2d, 0x30, 0x0, 0x6d,
-  0x70, 0x0, 0xd9 };
-static const unsigned char ufb[] = { 
-  0x00, 0x0, 0x06, 0x13, 0x0, 0x17, 0x1d, 
-  0x1f, 0x0, 0x28, 0x2a, 0x0, 0x36, 0x38, 0x0, 
-  0x3c, 0x3e, 0x40, 0x0, 0x41, 0x43, 0x0, 0x44, 
-  0x46, 0x0, 0xb1, 0xd3, 0x0, 0xff };
-/* ufc all alphabetic */
-static const unsigned char ufd[] = { 
-  0x00, 0x0, 0x3d, 0x50, 0x0, 
-  0x8f, 0x92, 0x0, 0xc7, 0xf0, 0x0, 0xfb };
-static const unsigned char ufe[] = { 
-  0x70, 
-  0x0, 0x74, 0x76, 0x0, 0xfc };
-static const unsigned char uff[] = { 
-  0x10, 0x0, 0x19, 
-  0x21, 0x0, 0x3a, 0x41, 0x0, 0x5a, 0x66, 0x0, 
-  0xbe, 0xc2, 0x0, 0xc7, 0xca, 0x0, 0xcf, 0xd2, 
-  0x0, 0xd7, 0xda, 0x0, 0xdc };
-static const unsigned char u100[] = { 
-  0x00, 0x0, 0x0b, 0x0d, 0x0, 0x26,
-  0x28, 0x0, 0x3a, 0x3c, 0x3d, 0x3f, 0x0, 0x4d,
-  0x50, 0x0, 0x5d, 0x80, 0x0, 0xfa };
-static const unsigned char u101[] = { 
-  0x40, 0x0, 0x74 };
-static const unsigned char u102[] = { 
-  0x80, 0x0, 0x9c, 0xa0, 0x0, 0xd0 };
-static const unsigned char u103[] = { 
-  0x00, 0x0, 0x1e, 0x30, 0x0, 0x4a,
-  0x80, 0x0, 0x9d, 0xa0, 0x0, 0xc3,
-  0xc8, 0x0, 0xcf, 0xd1, 0x0, 0xd5 };
-static const unsigned char u104[] = { 
-  0x00, 0x0, 0x9d, 0xa0, 0x0, 0xa9 };
-static const unsigned char u108[] = { 
-  0x00, 0x0, 0x05, 0x08, 0x0a, 0x0, 0x35,
-  0x37, 0x38, 0x3c, 0x3f, 0x0, 0x55 };
-static const unsigned char u109[] = {
-  0x00, 0x0, 0x15, 0x20, 0x0, 0x39 };
-static const unsigned char u10a[] = {
-  0x00, 0x10, 0x0, 0x13, 0x15, 0x0, 0x17,
-  0x19, 0x0, 0x33, 0x60, 0x0, 0x7c };
-static const unsigned char u10b[] = {
-  0x00, 0x0, 0x35, 0x40, 0x0, 0x55,
-  0x60, 0x0, 0x72 };
-static const unsigned char u10c[] = {
-  0x00, 0x0, 0x48 };
-static const unsigned char u110[] = {
-  0x83, 0x0, 0xaf };
-/* u120 to u122 all alphabetic */
-static const unsigned char u123[] = { 
-  0x00, 0x0, 0x6e };
-static const unsigned char u124[] = { 
-  0x00, 0x0, 0x62 };
-/* u130 to u133 all alphabetic */
-static const unsigned char u134[] = {
-  0x00, 0x0, 0x2e };
-static const unsigned char u1d4[] = { 
-  0x00, 0x0, 0x54, 0x56, 0x0, 0x9c,
-  0x9e, 0x0, 0x9f, 0xa2, 0xa5, 0x0, 0xa6,
-  0xa9, 0x0, 0xac, 0xae, 0x0, 0xb9, 0xbb,
-  0xbd, 0x0, 0xc3, 0xc5, 0x0, 0xff };
-static const unsigned char u1d5[] = { 
-  0x00, 0x0, 0x05, 0x07, 0x0, 
-  0x0a, 0x0d, 0x0, 0x14, 0x16, 0x0, 0x1c, 0x1e, 
-  0x0, 0x39, 0x3b, 0x0, 0x3e, 0x40, 0x0, 0x44, 
-  0x46, 0x4a, 0x0, 0x50, 0x52, 0x0, 0xff }; 
-static const unsigned char u1d6[] = { 
-  0x00, 0x0, 0xa5, 0xa8, 0x0, 0xc0,
-  0xc2, 0x0, 0xda, 0xdc, 0x0, 0xfa, 
-  0xfc, 0x0, 0xff };
-static const unsigned char u1d7[] = { 
-  0x00, 0x0, 0x14, 0x16, 0x0, 0x34,
-  0x36, 0x0, 0x4e, 0x50, 0x0, 0x6e, 
-  0x70, 0x0, 0x88, 0x8a, 0x0, 0xa8,
-  0xaa, 0x0, 0xc2, 0xc4, 0x0, 0xcb,
-  0xce, 0x0, 0xff };
-static const unsigned char u1f1[] = {
-  0x10, 0x0, 0x2c, 0x31, 0x3d, 0x3f, 0x42, 0x46,
-  0x57, 0x5f, 0x79, 0x7b, 0x7c, 0x7f, 0x8a };
-/* u200 to u2a5 all alphabetic */
-static const unsigned char u2a6[] = { 
-  0x00, 0x0, 0xd6 };
-/* u2a7 to u2b6 all alphabetic */
-static const unsigned char u2b7[] = {
-  0x00, 0x0, 0x34 };
-/* u2f8 to u2f9 all alphabetic */
-static const unsigned char u2fa[] = { 
-  0x00, 0x0, 0x1d };
diff --git a/newlib/libc/ctype/utf8print.h b/newlib/libc/ctype/utf8print.h
deleted file mode 100644
index abeb81c..0000000
--- a/newlib/libc/ctype/utf8print.h
+++ /dev/null
@@ -1,389 +0,0 @@
-/* Copyright (c) 2002 Red Hat Incorporated.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are met:
-
-     Redistributions of source code must retain the above copyright
-     notice, this list of conditions and the following disclaimer.
-
-     Redistributions in binary form must reproduce the above copyright
-     notice, this list of conditions and the following disclaimer in the
-     documentation and/or other materials provided with the distribution.
-
-     The name of Red Hat Incorporated may not be used to endorse
-     or promote products derived from this software without specific
-     prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED.  IN NO EVENT SHALL RED HAT INCORPORATED BE LIABLE FOR ANY
-   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS   
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/* Generated using UnicodeData.txt 5.2 */
-
-/* Expression used to filter out the characters for the below tables:
-
-   awk -F\; \
-   '{ \
-     VAL = strtonum (sprintf("0x%s", $1)); \
-     # All valid characters except from categories Cc (C0 or C1 control code), \
-     # Cs (Surrogates), Zl (Line separator), and Zp (Paragraph separator).\
-     # \
-     # Before running this test, make sure to expand all Unicode blocks \
-     # which are just marked by their first and last character! \
-     # \
-     if (!match($3, "^C[cs]") && !match($3, "^Z[lp]")) \
-       print $1; \
-   }' UnicodeData.txt
-*/
-static const unsigned char u0[] = {
-  0x20, 0x0, 0x7e, 0xa0, 0x0, 0xff };
-/* u1 is all-print */
-/* u2 is all-print */
-static const unsigned char u3[] = { 
-  0x00, 0x0, 0x77, 0x7a, 0x0, 0x7e,
-  0x84, 0x0, 0x8a, 0x8c, 0x8e, 0x0, 
-  0xa1, 0xa3, 0x0, 0xff };
-/* u4 is all-print */
-static const unsigned char u5[] = {
-  0x00, 0x0, 0x25, 0x31, 0x0, 
-  0x56, 0x59, 0x0, 0x5f, 0x61, 0x0, 0x87, 0x89, 
-  0x0, 0x8a, 0x91, 0x0, 0xc7, 0xd0, 0x0, 0xea,
-  0xf0, 0x0, 0xf4 };
-static const unsigned char u6[] = {
-  0x00, 0x0, 0x03, 0x06, 0x0, 0x1b, 0x1e, 0x1f,
-  0x21, 0x0, 0x5e, 0x60, 0x0, 0xff };
-static const unsigned char u7[] = { 
-  0x00, 0x0, 0x0d, 0x0f, 0x0, 0x4a, 0x4d, 0x0, 0xb1,
-  0xc0, 0x0, 0xfa };
-static const unsigned char u8[] = { 
-  0x00, 0x0, 0x2d, 0x30, 0x0, 0x3e, };
-static const unsigned char u9[] = {
-  0x00, 0x0, 0x39, 0x3c, 0x0, 0x4e, 0x50, 0x0, 0x55, 
-  0x58, 0x0, 0x72, 0x79, 0x0, 0x7f, 0x81, 0x0, 0x83,
-  0x85, 0x0, 0x8c, 0x8f, 0x0, 0x90, 0x93, 0x0, 0xa8,
-  0xaa, 0x0, 0xb0, 0xb2, 0xb6, 0x0, 0xb9, 0xbc, 
-  0x0, 0xc4, 0xc7, 0xc8, 0xcb, 0x0, 0xce, 
-  0xd7, 0xdc, 0x0, 0xdd, 0xdf, 0x0, 0xe3, 0xe6, 
-  0x0, 0xfb };
-static const unsigned char ua[] = { 
-  0x01, 0x0, 0x03, 0x05, 0x0, 0x0a, 0x0f, 0x0, 
-  0x10, 0x13, 0x0, 0x28, 0x2a, 0x0, 0x30, 0x32, 
-  0x0, 0x33, 0x35, 0x0, 0x36, 0x38, 0x0, 0x39, 
-  0x3c, 0x3e, 0x0, 0x42, 0x47, 0x0, 0x48, 0x4b, 
-  0x0, 0x4d, 0x51, 0x59, 0x0, 0x5c, 0x5e, 0x66, 0x0, 
-  0x75, 0x81, 0x0, 0x83, 0x85, 0x0, 0x8d,
-  0x8f, 0x0, 0x91, 0x93, 0x0, 0xa8, 0xaa, 0x0, 
-  0xb0, 0xb2, 0x0, 0xb3, 0xb5, 0x0, 0xb9, 0xbc, 
-  0x0, 0xc5, 0xc7, 0x0, 0xc9, 0xcb, 0x0, 0xcd, 
-  0xd0, 0xe0, 0x0, 0xe3, 0xe6, 0x0, 0xef, 0xf1 };
-static const unsigned char ub[] = {
-  0x01, 0x0, 0x03, 
-  0x05, 0x0, 0x0c, 0x0f, 0x0, 0x10, 0x13, 0x0, 
-  0x28, 0x2a, 0x0, 0x30, 0x32, 0x0, 0x33, 0x35, 
-  0x0, 0x39, 0x3c, 0x0, 0x44, 0x47, 0x0, 0x48, 
-  0x4b, 0x0, 0x4d, 0x56, 0x0, 0x57, 0x5c, 0x0, 
-  0x5d, 0x5f, 0x0, 0x63, 0x66, 0x0, 0x71, 0x82, 
-  0x0, 0x83, 0x85, 0x0, 0x8a, 0x8e, 0x0, 0x90, 
-  0x92, 0x0, 0x95, 0x99, 0x0, 0x9a, 0x9c, 0x9e, 
-  0x0, 0x9f, 0xa3, 0x0, 0xa4, 0xa8, 0x0, 0xaa, 
-  0xae, 0x0, 0xb9, 0xbe, 0x0, 
-  0xc2, 0xc6, 0x0, 0xc8, 0xca, 0x0, 0xcd, 0xd0,
-  0xd7, 0xe6, 0xe7, 0x0, 0xfa };
-static const unsigned char uc[] = {
-  0x01, 0x0, 0x03, 0x05, 0x0, 
-  0x0c, 0x0e, 0x0, 0x10, 0x12, 0x0, 0x28, 0x2a, 
-  0x0, 0x33, 0x35, 0x0, 0x39, 0x3d, 0x0, 0x44, 
-  0x46, 0x0, 0x48, 0x4a, 0x0, 0x4d, 0x55, 0x0, 
-  0x56, 0x58, 0x59, 0x60, 0x0, 0x63, 0x66, 0x0, 0x6f,
-  0x78, 0x0, 0x7f, 0x82, 0x83, 0x85, 0x0, 0x8c,
-  0x8e, 0x0, 0x90, 0x92, 0x0, 0xa8, 0xaa, 0x0, 0xb3,
-  0xb5, 0x0, 0xb9, 0xbc, 0x0, 0xc4, 0xc6, 0x0, 0xc8,
-  0xca, 0x0, 0xcd, 0xd5, 0x0, 0xd6, 0xde, 0xe0, 0x0, 
-  0xe3, 0xe6, 0x0, 0xef, 0xf1, 0xf2 };
-static const unsigned char ud[] = {
-  0x02, 0x0, 0x03, 0x05, 
-  0x0, 0x0c, 0x0e, 0x0, 0x10, 0x12, 0x0, 0x28, 
-  0x2a, 0x0, 0x39, 0x3d, 0x0, 0x44, 0x46, 0x0, 
-  0x48, 0x4a, 0x0, 0x4d, 0x57, 0x60, 0x0, 0x63, 
-  0x66, 0x0, 0x75, 0x79, 0x0, 0x7f, 0x82, 0x0, 0x83,
-  0x85, 0x0, 0x96, 0x9a, 0x0, 0xb1, 0xb3, 0x0, 0xbb,
-  0xbd, 0xc0, 0x0, 0xc6, 0xca, 0xcf, 0x0, 0xd4, 0xd6, 
-  0xd8, 0x0, 0xdf, 0xf2, 0x0, 0xf4 };
-static const unsigned char ue[] = {
-  0x01, 0x0, 
-  0x3a, 0x3f, 0x0, 0x5b, 0x81, 0x0, 0x82, 0x84, 
-  0x87, 0x0, 0x88, 0x8a, 0x8d, 0x94, 0x0, 0x97, 
-  0x99, 0x0, 0x9f, 0xa1, 0x0, 0xa3, 0xa5, 0xa7, 
-  0xaa, 0x0, 0xab, 0xad, 0x0, 0xb9, 0xbb, 0x0, 
-  0xbd, 0xc0, 0x0, 0xc4, 0xc6, 0xc8, 0x0, 0xcd, 
-  0xd0, 0x0, 0xd9, 0xdc, 0x0, 0xdd };
-static const unsigned char uf[] = {
-  0x00, 0x0, 0x47, 0x49, 0x0, 0x6c,
-  0x71, 0x0, 0x8b, 0x90, 0x0, 0x97,
-  0x99, 0x0, 0xbc, 0xbe, 0x0, 0xcc, 
-  0xce, 0x0, 0xd8 };
-static const unsigned char u10[] = {
-  0x00, 0x0, 0xc5, 0xd0, 0x0, 0xfc };
-/* u11 is all-print */
-static const unsigned char u12[] = {
-  0x00, 0x0, 0x48, 0x4a, 0x0, 0x4d, 0x50, 0x0, 0x56, 
-  0x58, 0x5a, 0x0, 0x5d, 0x60, 0x0, 0x88, 
-  0x8a, 0x0, 0x8d, 0x90, 0x0, 0xb0, 0xb2, 
-  0x0, 0xb5, 0xb8, 0x0, 0xbe, 0xc0, 0xc2, 0x0, 
-  0xc5, 0xc8, 0x0, 0xd6, 0xd8, 0x0, 0xff };
-static const unsigned char u13[] = {
-  0x00, 0x0, 0x10, 0x12, 0x0, 0x15,
-  0x18, 0x0, 0x5a, 0x5f, 0x0, 0x7c,
-  0x80, 0x0, 0x99, 0xa0, 0x0, 0xf4 };
-/* u14 is all-print */
-/* u15 is all-print */
-static const unsigned char u16[] = {
-  0x00, 0x0, 0x9c, 0xa0, 0x0, 0xf0 };
-static const unsigned char u17[] = {
-  0x00, 0x0, 0x0c, 0x0e, 0x0, 0x14, 0x20, 
-  0x0, 0x36, 0x40, 0x0, 0x53, 0x60, 0x0, 0x6c, 
-  0x6e, 0x0, 0x70, 0x72, 0x0, 0x73, 0x80, 0x0, 
-  0xdd, 0xe0, 0x0, 0xe9, 0xf0, 0x0, 0xf9 };
-static const unsigned char u18[] = {
-  0x00, 0x0, 0x0e, 0x10, 
-  0x0, 0x19, 0x20, 0x0, 0x77, 0x80, 0x0, 0xaa,
-  0xb0, 0x0, 0xf5 };
-static const unsigned char u19[] = {
-  0x00, 0x0, 0x1c, 0x20, 0x0, 0x2b,
-  0x30, 0x0, 0x3b, 0x40, 0x44, 0x0, 0x6d,
-  0x70, 0x0, 0x74, 0x80, 0x0, 0xab,
-  0xb0, 0x0, 0xc9, 0xd0, 0x0, 0xda,
-  0xde, 0x0, 0xff };
-static const unsigned char u1a[] = {
-  0x00, 0x0, 0x1b, 0x1e, 0x0, 0x5e,
-  0x60, 0x0, 0x7c, 0x7f, 0x0, 0x89,
-  0x90, 0x0, 0x99, 0xa0, 0x0, 0xad };
-static const unsigned char u1b[] = {
-  0x00, 0x0, 0x4b, 0x50, 0x0, 0x7c,
-  0x80, 0x0, 0xaa, 0xae, 0x0, 0xb9 };
-static const unsigned char u1c[] = {
-  0x00, 0x0, 0x37, 0x3b, 0x0, 0x49,
-  0x4d, 0x0, 0x7f, 0xd0, 0x0, 0xf2 };
-static const unsigned char u1d[] = { 
-  0x00, 0x0, 0xe6, 0xfd, 0x0, 0xff };
-/* u1e is all-print */
-static const unsigned char u1f[] = {
-  0x00, 0x0, 
-  0x15, 0x18, 0x0, 0x1d, 0x20, 0x0, 0x45, 0x48, 
-  0x0, 0x4d, 0x50, 0x0, 0x57, 0x59, 0x5b, 0x5d, 
-  0x5f, 0x0, 0x7d, 0x80, 0x0, 0xb4, 0xb6, 0x0, 
-  0xc4, 0xc6, 0x0, 0xd3, 0xd6, 0x0, 0xdb, 0xdd, 
-  0x0, 0xef, 0xf2, 0x0, 0xf4, 0xf6, 0x0, 0xfe };
-static const unsigned char u20[] = { 
-  0x00, 0x0, 0x27, 0x2a, 0x0, 0x64,
-  0x6a, 0x0, 0x71, 0x74, 0x0, 0x8e, 
-  0x90, 0x0, 0x94, 0xa0, 0x0, 0xb8,
-  0xd0, 0x0, 0xf0 };
-static const unsigned char u21[] = {
-  0x00, 0x0, 0x89, 0x90, 0x0, 0xff };
-/* u22 is all-print */
-static const unsigned char u23[] = {
-  0x00, 0x0, 0xe8 };
-static const unsigned char u24[] = {
-  0x00, 0x0, 0x26, 0x40, 0x0, 0x4a, 
-  0x60, 0x0, 0xff };
-/* u25 is all-print */
-static const unsigned char u26[] = {
-  0x00, 0x0, 0xcd, 0xcf, 0x0, 0xe1,
-  0xe3, 0xe8, 0x0, 0xff };
-static const unsigned char u27[] = {
-  0x01, 0x0, 0x04, 0x06, 0x0, 0x09,
-  0x0c, 0x0, 0x27, 0x29, 0x0, 0x4b, 0x4d,
-  0x4f, 0x0, 0x52, 0x56, 0x0, 0x5e,
-  0x61, 0x0, 0x94, 0x98, 0x0, 0xaf,
-  0xb1, 0x0, 0xbe, 0xc0, 0x0, 0xca, 0xcc,
-  0xd0, 0x0, 0xff };
-/* u28 to u2a are all-print */
-static const unsigned char u2b[] = {
-  0x00, 0x0, 0x4c, 0x50, 0x0, 0x59 };
-static const unsigned char u2c[] = {
-  0x00, 0x0, 0x2e, 0x30, 0x0, 0x5e,
-  0x60, 0x0, 0xf1, 0xf9, 0x0, 0xff };
-static const unsigned char u2d[] = {
-  0x00, 0x0, 0x25, 0x30, 0x0, 0x65, 0x6f,
-  0x80, 0x0, 0x96, 0xa0, 0x0, 0xa6,
-  0xa8, 0x0, 0xae, 0xb0, 0x0, 0xb6,
-  0xb8, 0x0, 0xbe, 0xc0, 0x0, 0xc6,
-  0xc8, 0x0, 0xce, 0xd0, 0x0, 0xd6,
-  0xd8, 0x0, 0xde, 0xe0, 0x0, 0xff };
-static const unsigned char u2e[] = {
-  0x00, 0x0, 0x31, 0x80, 0x0, 0x99,
-  0x9b, 0x0, 0xf3 };
-static const unsigned char u2f[] = { 
-  0x00, 0x0, 0xd5, 0xf0, 0x0, 0xfb };
-static const unsigned char u30[] = {
-  0x00, 0x0, 
-  0x3f, 0x41, 0x0, 0x96, 0x99, 0x0, 0xff };
-static const unsigned char u31[] = {
-  0x05, 0x0, 0x2d, 0x31, 0x0, 0x8e,
-  0x90, 0x0, 0xb7, 0xc0, 0x0, 0xe3,
-  0xf0, 0x0, 0xff };
-static const unsigned char u32[] = {
-  0x00, 0x0, 0x1e, 0x20, 0x0, 0xfe };
-/* u33 to u4c is all-print */
-static const unsigned char u4d[] = { 
-  0x00, 0x0, 0xb5, 0xc0, 0x0, 0xff };
-/* u4e to u9e is all-print */
-static const unsigned char u9f[] = {
-  0x00, 0x0, 0xcb };
-/* ua0 to ua3 is all-print */
-static const unsigned char ua4[] = {
-  0x00, 0x0, 0x8c, 0x90, 0x0, 0xc6,
-  0xd0, 0x0, 0xff };
-/* ua5 is all-print */
-static const unsigned char ua6[] = {
-  0x00, 0x0, 0x2b, 0x40, 0x0, 0x5f,
-  0x62, 0x0, 0x73, 0x7c, 0x0, 0x97,
-  0xa0, 0x0, 0xf7 };
-static const unsigned char ua7[] = {
-  0x00, 0x0, 0x8c, 0xfb, 0x0, 0xff };
-static const unsigned char ua8[] = {
-  0x00, 0x0, 0x2b, 0x30, 0x0, 0x39,
-  0x40, 0x0, 0x77, 0x80, 0x0, 0xc4,
-  0xce, 0x0, 0xd9, 0xe0, 0x0, 0xfb };
-static const unsigned char ua9[] = {
-  0x00, 0x0, 0x53, 0x5f, 0x0, 0x7c,
-  0x80, 0x0, 0xcd, 0xcf, 0x0, 0xd9,
-  0xde, 0xdf };
-static const unsigned char uaa[] = {
-  0x00, 0x0, 0x36, 0x40, 0x0, 0x4d,
-  0x50, 0x0, 0x59, 0x5c, 0x0, 0x7b,
-  0x80, 0x0, 0xc2, 0xdb, 0x0, 0xdf };
-static const unsigned char uab[] = {
-  0xc0, 0x0, 0xed, 0xf0, 0x0, 0xf9 };
-/* uac to ud6 is all-print */
-static const unsigned char ud7[] = {
-  0x00, 0x0, 0xa3, 0xb0, 0x0, 0xc6,
-  0xcb, 0x0, 0xfb };
-/* ud8 to udf are UTF-16 surrogates, non-printable */
-/* ue0 to uf9 is all-print */
-static const unsigned char ufa[] = {
-  0x00, 0x0, 0x2d, 0x30, 0x0, 0x6d,
-  0x70, 0x0, 0xd9 };
-static const unsigned char ufb[] = {
-  0x00, 0x0, 0x06, 0x13, 0x0, 0x17,
-  0x1d, 0x0, 0x36, 0x38, 0x0, 0x3c,
-  0x3e, 0x40, 0x41, 0x43, 0x44, 
-  0x46, 0x0, 0xb1, 0xd3, 0x0, 0xff };
-/* ufc is all-print */
-static const unsigned char ufd[] = {
-  0x00, 0x0, 0x3f, 0x50, 0x0, 0x8f,
-  0x92, 0x0, 0xc7, 0xf0, 0x0, 0xfd };
-static const unsigned char ufe[] = {
-  0x00, 0x0, 0x19, 0x20, 0x0, 0x26,
-  0x30, 0x0, 0x52, 0x54, 0x0, 0x66,
-  0x68, 0x0, 0x6b, 0x70, 0x0, 0x74,
-  0x76, 0x0, 0xfc, 0xff };
-static const unsigned char uff[] = {
-  0x01, 0x0, 0xbe, 0xc2, 0x0, 0xc7, 0xca, 0x0, 
-  0xcf, 0xd2, 0x0, 0xd7, 0xda, 0x0, 0xdc, 0xe0, 
-  0x0, 0xe6, 0xe8, 0x0, 0xee, 0xf9, 0x0, 0xfd }; 
-static const unsigned char u100[] = {
-  0x00, 0x0, 0x0b, 0x0d, 0x0, 0x26,
-  0x28, 0x0, 0x3a, 0x3c, 0x3d, 0x3f, 0x0, 0x4d,
-  0x50, 0x0, 0x5d, 0x80, 0x0, 0xfa };
-static const unsigned char u101[] = {
-  0x00, 0x0, 0x02, 0x07, 0x0, 0x33,
-  0x37, 0x0, 0x8a, 0x90, 0x0, 0x9b,
-  0xd0, 0x0, 0xfd };
-static const unsigned char u102[] = {
-  0x80, 0x0, 0x9c, 0xa0, 0x0, 0xd0 };
-static const unsigned char u103[] = {
-  0x00, 0x0, 0x1e, 0x20, 0x0, 0x23,
-  0x30, 0x0, 0x4a, 0x80, 0x0, 0x9d,
-  0x9f, 0x0, 0xc3, 0xc8, 0x0, 0xd5 };
-static const unsigned char u104[] = {
-  0x00, 0x0, 0x9d, 0xa0, 0x0, 0xa9 };
-static const unsigned char u108[] = {
-  0x00, 0x0, 0x05, 0x08, 0x0a, 0x0, 0x35,
-  0x37, 0x38, 0x3c, 0x3f, 0x0, 0x55,
-  0x57, 0x0, 0x5f };
-static const unsigned char u109[] = {
-  0x00, 0x0, 0x1b, 0x1f, 0x0, 0x39, 0x3f };
-static const unsigned char u10a[] = {
-  0x00, 0x0, 0x03, 0x05, 0x06, 0x0c, 0x0, 0x13,
-  0x15, 0x0, 0x17, 0x19, 0x0, 0x33,
-  0x38, 0x0, 0x3a, 0x3f, 0x0, 0x47,
-  0x50, 0x0, 0x58, 0x60, 0x0, 0x7f };
-static const unsigned char u10b[] = {
-  0x00, 0x0, 0x35, 0x39, 0x0, 0x55,
-  0x58, 0x0, 0x72, 0x78, 0x0, 0x7f };
-static const unsigned char u10c[] = {
-  0x00, 0x0, 0x48 };
-static const unsigned char u10e[] = {
-  0x60, 0x0, 0x7e };
-static const unsigned char u110[] = {
-  0x80, 0x0, 0xc1 };
-/* u120 to u122 is all-print */
-static const unsigned char u123[] = {
-  0x00, 0x0, 0x6e };
-static const unsigned char u124[] = {
-  0x00, 0x0, 0x62, 0x70, 0x0, 0x73 };
-/* u130 to u133 is all-print */
-static const unsigned char u134[] = {
-  0x00, 0x0, 0x2e };
-static const unsigned char u1d0[] = {
-  0x00, 0x0, 0xf5 };
-static const unsigned char u1d1[] = {
-  0x00, 0x0, 0x26, 0x29, 0x0, 0xdd };
-static const unsigned char u1d2[] = {
-  0x00, 0x0, 0x45 };
-static const unsigned char u1d3[] = {
-  0x00, 0x0, 0x56, 0x60, 0x0, 0x71 };
-static const unsigned char u1d4[] = { 
-  0x00, 0x0, 0x54, 0x56, 0x0, 0x9c, 0x9e, 0x0, 
-  0x9f, 0xa2, 0xa5, 0x0, 0xa6, 0xa9, 0x0, 0xac, 
-  0xae, 0x0, 0xb9, 0xbb, 0xbd, 0x0, 0xc3,
-  0xc5, 0x0, 0xff };
-static const unsigned char u1d5[] = {
-  0x00, 0x0, 0x05, 0x07, 0x0, 0x0a, 
-  0x0d, 0x0, 0x14, 0x16, 0x0, 0x1c, 0x1e, 0x0, 
-  0x39, 0x3b, 0x0, 0x3e, 0x40, 0x0, 0x44, 0x46, 
-  0x4a, 0x0, 0x50, 0x52, 0x0, 0xff };
-static const unsigned char u1d6[] = {
-  0x00, 0x0, 0xa5, 0xa8, 0x0, 0xff };
-static const unsigned char u1d7[] = {
-  0x00, 0x0, 0xcb, 0xce, 0x0, 0xff };
-static const unsigned char u1f0[] = {
-  0x00, 0x0, 0x2b, 0x30, 0x0, 0x93 };
-static const unsigned char u1f1[] = {
-  0x00, 0x0, 0x0a, 0x10, 0x0, 0x2e,
-  0x31, 0x3d, 0x3f, 0x42, 0x46, 0x4a, 0x0, 0x4e,
-  0x57, 0x5f, 0x79, 0x7b, 0x7c, 0x7f, 0x8a, 0x0,
-  0x8c, 0x8d, 0x90 };
-static const unsigned char u1f2[] = {
-  0x00, 0x10, 0x0, 0x31, 0x40, 0x0, 0x48 };
-/* u200 to u2a5 is all-print */
-static const unsigned char u2a6[] = {
-  0x00, 0x0, 0xd6 };
-/* u2a7 to u2b6 is all-print */
-static const unsigned char u2b7[] = {
-  0x00, 0x0, 0x34 };
-/* u2f8 to u2f9 is all-print */
-static const unsigned char u2fa[] = {
-  0x00, 
-  0x0, 0x1d };
-static const unsigned char ue00[] = {
-  0x01, 0x20, 0x0, 0x7f };
-static const unsigned char ue01[] = {
-  0x00, 0x0, 0xef };
-/* uf00 to uffe is all-print */
-static const unsigned char ufff[] = {
-  0x00, 0x0, 0xfd };
-/* u1000 to u10fe is all-print */
-static const unsigned char u10ff[] = {
-  0x00, 0x0, 0xfd };
Thomas Wolff March 7, 2018, 11:21 p.m. | #3
From 58a9cfcb253165d7073a9ed25e143daa2e979c10 Mon Sep 17 00:00:00 2001
From: Thomas Wolff <towo@towo.net>
Date: Sun, 25 Feb 2018 17:22:34 +0100
Subject: [PATCH 4/6] use generated character data

---
 newlib/libc/ctype/categories.c  |  39 +++
 newlib/libc/ctype/categories.h  |   7 +
 newlib/libc/ctype/iswalnum.c    |   2 +-
 newlib/libc/ctype/iswalnum_l.c  |  19 +-
 newlib/libc/ctype/iswalpha.c    |  73 ++++++
 newlib/libc/ctype/iswalpha_l.c  |  17 +-
 newlib/libc/ctype/iswblank.c    |  19 +-
 newlib/libc/ctype/iswblank_l.c  |  16 +-
 newlib/libc/ctype/iswcntrl.c    |  17 +-
 newlib/libc/ctype/iswcntrl_l.c  |  16 +-
 newlib/libc/ctype/iswctype_l.c  |  37 ++-
 newlib/libc/ctype/iswdigit.c    |   3 +-
 newlib/libc/ctype/iswdigit_l.c  |   2 +-
 newlib/libc/ctype/iswgraph.c    |   3 +-
 newlib/libc/ctype/iswgraph_l.c  |  19 +-
 newlib/libc/ctype/iswlower.c    |   4 +-
 newlib/libc/ctype/iswlower_l.c  |  16 +-
 newlib/libc/ctype/iswprint.c    |  72 ++++++
 newlib/libc/ctype/iswprint_l.c  |  17 +-
 newlib/libc/ctype/iswpunct.c    |   7 +-
 newlib/libc/ctype/iswpunct_l.c  |  22 +-
 newlib/libc/ctype/iswspace.c    |  20 +-
 newlib/libc/ctype/iswspace_l.c  |  17 +-
 newlib/libc/ctype/iswupper.c    |   6 +-
 newlib/libc/ctype/iswupper_l.c  |  16 +-
 newlib/libc/ctype/iswxdigit.c   |   6 +-
 newlib/libc/ctype/jp2uc.c       |  51 +++-
 newlib/libc/ctype/local.h       |  19 +-
 newlib/libc/ctype/towctrans.c   |  16 +-
 newlib/libc/ctype/towctrans_l.c |  97 +++++++-
 newlib/libc/ctype/towlower.c    |  81 +++++++
 newlib/libc/ctype/towlower_l.c  |   7 +-
 newlib/libc/ctype/towupper.c    | 515 +---------------------------------------
 newlib/libc/ctype/towupper_l.c  |   8 +-
 34 files changed, 650 insertions(+), 639 deletions(-)
 create mode 100644 newlib/libc/ctype/categories.c
 create mode 100644 newlib/libc/ctype/categories.h
 create mode 100644 newlib/libc/ctype/iswalpha.c
 create mode 100644 newlib/libc/ctype/iswprint.c
 create mode 100644 newlib/libc/ctype/towlower.c

diff --git a/newlib/libc/ctype/categories.c b/newlib/libc/ctype/categories.c
new file mode 100644
index 0000000..db285d7
--- /dev/null
+++ b/newlib/libc/ctype/categories.c
@@ -0,0 +1,39 @@
+#include <wctype.h>
+#include "categories.h"
+
+struct _category {
+  enum category cat: 11;
+  unsigned int first: 21;
+  unsigned short delta;
+} __attribute__((packed));
+
+static const struct _category categories[] = {
+#include "categories.t"
+};
+
+static enum category
+bisearch_cat(wint_t ucs, const struct _category *table, int max)
+{
+  int min = 0;
+  int mid;
+
+  if (ucs < table[0].first || ucs > table[max].first + table[max].delta)
+    return 0;
+  while (max >= min)
+    {
+      mid = (min + max) / 2;
+      if (ucs > table[mid].first + table[mid].delta)
+	min = mid + 1;
+      else if (ucs < table[mid].first)
+	max = mid - 1;
+      else
+	return table[mid].cat;
+    }
+  return -1;
+}
+
+enum category category(wint_t ucs)
+{
+  return bisearch_cat(ucs, categories,
+		      sizeof(categories) / sizeof(*categories) - 1);
+}
diff --git a/newlib/libc/ctype/categories.h b/newlib/libc/ctype/categories.h
new file mode 100644
index 0000000..271038e
--- /dev/null
+++ b/newlib/libc/ctype/categories.h
@@ -0,0 +1,7 @@
+/* category data */
+
+enum category {
+#include "categories.cat"
+};
+
+extern enum category category(wint_t ucs);
diff --git a/newlib/libc/ctype/iswalnum.c b/newlib/libc/ctype/iswalnum.c
index 45273a8..7b2cac7 100644
--- a/newlib/libc/ctype/iswalnum.c
+++ b/newlib/libc/ctype/iswalnum.c
@@ -39,5 +39,5 @@ No supporting OS subroutines are required.
 int
 iswalnum (wint_t c)
 {
-  return (iswalpha (c) || iswdigit (c));
+  return iswalnum_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswalnum_l.c b/newlib/libc/ctype/iswalnum_l.c
index e4ab3dd..8802273 100644
--- a/newlib/libc/ctype/iswalnum_l.c
+++ b/newlib/libc/ctype/iswalnum_l.c
@@ -1,10 +1,23 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswalnum_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return iswalpha (c) || iswdigit (c);
+#ifdef _MB_CAPABLE
+  //return iswalpha (c) || iswdigit (c);
+  c = _jp2uc_l (c, locale);
+  enum category cat = category (c);
+  return cat == CAT_LC || cat == CAT_Lu || cat == CAT_Ll || cat == CAT_Lt
+      || cat == CAT_Lm || cat == CAT_Lo
+      || cat == CAT_Nl // Letter_Number
+      || cat == CAT_Nd // Decimal_Number
+      ;
+#else
+  return c < (wint_t)0x100 ? isalnum (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswalpha.c b/newlib/libc/ctype/iswalpha.c
new file mode 100644
index 0000000..3928772
--- /dev/null
+++ b/newlib/libc/ctype/iswalpha.c
@@ -0,0 +1,73 @@
+/* Copyright (c) 2002 Red Hat Incorporated.
+   All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+     Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+
+     Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+
+     The name of Red Hat Incorporated may not be used to endorse
+     or promote products derived from this software without specific
+     prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED.  IN NO EVENT SHALL RED HAT INCORPORATED BE LIABLE FOR ANY
+   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS   
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+FUNCTION
+	<<iswalpha>>, <<iswalpha_l>>---alphabetic wide character test
+
+INDEX
+	iswalpha
+
+INDEX
+	iswalpha_l
+
+SYNOPSIS
+	#include <wctype.h>
+	int iswalpha(wint_t <[c]>);
+
+	#include <wctype.h>
+	int iswalpha_l(wint_t <[c]>, locale_t <[locale]>);
+
+DESCRIPTION
+<<iswalpha>> is a function which classifies wide-character values that
+are alphabetic.
+
+<<iswalpha_l>> is like <<iswalpha>> but performs the check based on the
+locale specified by the locale object locale.  If <[locale]> is
+LC_GLOBAL_LOCALE or not a valid locale object, the behaviour is undefined.
+
+RETURNS
+<<iswalpha>>, <<iswalpha_l>> return non-zero if <[c]> is an alphabetic
+wide character.
+
+PORTABILITY
+<<iswalpha>> is C99.
+<<iswalpha_l>> is POSIX-1.2008.
+
+No supporting OS subroutines are required.
+*/
+#include <_ansi.h>
+#include <wctype.h>
+
+int
+iswalpha (wint_t c)
+{
+  return iswalpha_l (c, 0);
+}
diff --git a/newlib/libc/ctype/iswalpha_l.c b/newlib/libc/ctype/iswalpha_l.c
index efcb95a..922983e 100644
--- a/newlib/libc/ctype/iswalpha_l.c
+++ b/newlib/libc/ctype/iswalpha_l.c
@@ -1,10 +1,21 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswalpha_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return iswalpha (c);
+#ifdef _MB_CAPABLE
+  c = _jp2uc_l (c, locale);
+  enum category cat = category (c);
+  return cat == CAT_LC || cat == CAT_Lu || cat == CAT_Ll || cat == CAT_Lt
+      || cat == CAT_Lm || cat == CAT_Lo
+      || cat == CAT_Nl // Letter_Number
+      ;
+#else
+  return c < (wint_t)0x100 ? isalpha (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswblank.c b/newlib/libc/ctype/iswblank.c
index ef91572..31779d2 100644
--- a/newlib/libc/ctype/iswblank.c
+++ b/newlib/libc/ctype/iswblank.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -62,26 +63,10 @@ PORTABILITY
 No supporting OS subroutines are required.
 */
 #include <_ansi.h>
-#include <newlib.h>
 #include <wctype.h>
-#include <ctype.h>
-#include <string.h>
-#include "local.h"
 
 int
 iswblank (wint_t c)
 {
-#ifdef _MB_CAPABLE
-  c = _jp2uc (c);
-  /* Based on Unicode 5.2.  Control char 09, plus all characters
-     from general category "Zs", which are not marked as decomposition
-     type "noBreak". */
-  return (c == 0x0009 || c == 0x0020 ||
-	  c == 0x1680 || c == 0x180e ||
-	  (c >= 0x2000 && c <= 0x2006) ||
-	  (c >= 0x2008 && c <= 0x200a) ||
-	  c == 0x205f || c == 0x3000);
-#else
-  return (c < 0x100 ? isblank (c) : 0);
-#endif /* _MB_CAPABLE */
+  return iswblank_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswblank_l.c b/newlib/libc/ctype/iswblank_l.c
index 6960693..b27ed82 100644
--- a/newlib/libc/ctype/iswblank_l.c
+++ b/newlib/libc/ctype/iswblank_l.c
@@ -1,10 +1,20 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswblank_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return iswblank (c);
+#ifdef _MB_CAPABLE
+  c = _jp2uc_l (c, locale);
+  enum category cat = category (c);
+  // exclude "<noBreak>"?
+  return cat == CAT_Zs
+      || c == '\t';
+#else
+  return c < 0x100 ? isblank (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswcntrl.c b/newlib/libc/ctype/iswcntrl.c
index 249a0a8..d4b0147 100644
--- a/newlib/libc/ctype/iswcntrl.c
+++ b/newlib/libc/ctype/iswcntrl.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -62,24 +63,10 @@ PORTABILITY
 No supporting OS subroutines are required.
 */
 #include <_ansi.h>
-#include <newlib.h>
 #include <wctype.h>
-#include <ctype.h>
-#include <string.h>
-#include "local.h"
 
 int
 iswcntrl (wint_t c)
 {
-#ifdef _MB_CAPABLE
-  c = _jp2uc (c);
-
-  /* Based on Unicode 5.2.  All characters from general category "Cc", "Zl",
-     and "Zp".  */
-  return ((c >= 0x0000 && c <= 0x001f) || 
-	  (c >= 0x007f && c <= 0x009f) ||
-	  c == 0x2028 || c == 0x2029);
-#else
-  return (c < 0x100 ? iscntrl (c) : 0);
-#endif /* _MB_CAPABLE */
+  return iswcntrl_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswcntrl_l.c b/newlib/libc/ctype/iswcntrl_l.c
index 37caba8..6a900a7 100644
--- a/newlib/libc/ctype/iswcntrl_l.c
+++ b/newlib/libc/ctype/iswcntrl_l.c
@@ -1,10 +1,20 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswcntrl_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return iswcntrl (c);
+#ifdef _MB_CAPABLE
+  c = _jp2uc_l (c, locale);
+  enum category cat = category (c);
+  return cat == CAT_Cc
+      || cat == CAT_Zl || cat == CAT_Zp // Line/Paragraph Separator
+      ;
+#else
+  return c < 0x100 ? iscntrl (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswctype_l.c b/newlib/libc/ctype/iswctype_l.c
index d9e7b2e..506972d 100644
--- a/newlib/libc/ctype/iswctype_l.c
+++ b/newlib/libc/ctype/iswctype_l.c
@@ -1,10 +1,41 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
 #include <wctype.h>
+#include "local.h"
 
 int
 iswctype_l (wint_t c, wctype_t desc, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return iswctype (c, desc);
+  switch (desc)
+    {
+    case WC_ALNUM:
+      return iswalnum_l (c, locale);
+    case WC_ALPHA:
+      return iswalpha_l (c, locale);
+    case WC_BLANK:
+      return iswblank_l (c, locale);
+    case WC_CNTRL:
+      return iswcntrl_l (c, locale);
+    case WC_DIGIT:
+      return iswdigit_l (c, locale);
+    case WC_GRAPH:
+      return iswgraph_l (c, locale);
+    case WC_LOWER:
+      return iswlower_l (c, locale);
+    case WC_PRINT:
+      return iswprint_l (c, locale);
+    case WC_PUNCT:
+      return iswpunct_l (c, locale);
+    case WC_SPACE:
+      return iswspace_l (c, locale);
+    case WC_UPPER:
+      return iswupper_l (c, locale);
+    case WC_XDIGIT:
+      return iswxdigit_l (c, locale);
+    default:
+      return 0; /* eliminate warning */
+    }
+
+  /* otherwise unknown */
+  return 0;
 }
diff --git a/newlib/libc/ctype/iswdigit.c b/newlib/libc/ctype/iswdigit.c
index 2b26141..d3562f8 100644
--- a/newlib/libc/ctype/iswdigit.c
+++ b/newlib/libc/ctype/iswdigit.c
@@ -38,5 +38,6 @@ No supporting OS subroutines are required.
 int
 iswdigit (wint_t c)
 {
-  return (c >= (wint_t)'0' && c <= (wint_t)'9');
+  return c >= (wint_t)'0' && c <= (wint_t)'9';
+  // category (c) == CAT_Nd not to be included as of C-99
 }
diff --git a/newlib/libc/ctype/iswdigit_l.c b/newlib/libc/ctype/iswdigit_l.c
index 98dd94e..29de9d3 100644
--- a/newlib/libc/ctype/iswdigit_l.c
+++ b/newlib/libc/ctype/iswdigit_l.c
@@ -4,5 +4,5 @@
 int
 iswdigit_l (wint_t c, struct __locale_t *locale)
 {
-  return (c >= (wint_t)'0' && c <= (wint_t)'9');
+  return c >= (wint_t)'0' && c <= (wint_t)'9';
 }
diff --git a/newlib/libc/ctype/iswgraph.c b/newlib/libc/ctype/iswgraph.c
index e0df4aa..bb21c21 100644
--- a/newlib/libc/ctype/iswgraph.c
+++ b/newlib/libc/ctype/iswgraph.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -67,5 +68,5 @@ No supporting OS subroutines are required.
 int
 iswgraph (wint_t c)
 {
-  return (iswprint (c) && !iswspace (c));
+  return iswgraph_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswgraph_l.c b/newlib/libc/ctype/iswgraph_l.c
index 9803c18..b8a5866 100644
--- a/newlib/libc/ctype/iswgraph_l.c
+++ b/newlib/libc/ctype/iswgraph_l.c
@@ -1,10 +1,23 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswgraph_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return iswprint (c) && !iswspace (c);
+#ifdef _MB_CAPABLE
+  //return iswprint (c, locale) && !iswspace (c, locale);
+  c = _jp2uc_l (c, locale);
+  enum category cat = category (c);
+  return cat != -1
+      && cat != CAT_Cc && cat != CAT_Cf
+      && cat != CAT_Cs // Surrogate
+      && cat != CAT_Zs
+      && cat != CAT_Zl && cat != CAT_Zp // Line/Paragraph Separator
+      ;
+#else
+  return iswprint_l (c, locale) && !iswspace_l (c, locale);
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswlower.c b/newlib/libc/ctype/iswlower.c
index 8b38835..e1d926b 100644
--- a/newlib/libc/ctype/iswlower.c
+++ b/newlib/libc/ctype/iswlower.c
@@ -17,7 +17,7 @@ SYNOPSIS
 
 DESCRIPTION
 <<iswlower>> is a function which classifies wide-character values that
-have uppercase translations.
+are categorized as lowercase.
 
 <<iswlower_l>> is like <<iswlower>> but performs the check based on the
 locale specified by the locale object locale.  If <[locale]> is
@@ -38,5 +38,5 @@ No supporting OS subroutines are required.
 int
 iswlower (wint_t c)
 {
-	return (towupper (c) != c);
+  return iswlower_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswlower_l.c b/newlib/libc/ctype/iswlower_l.c
index d69615b..64f77a3 100644
--- a/newlib/libc/ctype/iswlower_l.c
+++ b/newlib/libc/ctype/iswlower_l.c
@@ -1,10 +1,20 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswlower_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return (towupper (c) != c);
+#ifdef _MB_CAPABLE
+  c = _jp2uc_l (c, locale);
+  // The wide-character class "lower" contains at least those characters wc 
+  // which are equal to towlower(wc) and different from towupper(wc).
+  enum category cat = category (c);
+  return cat == CAT_Ll || (cat == CAT_LC && towlower (c) == c);
+#else
+  return c < 0x100 ? islower (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswprint.c b/newlib/libc/ctype/iswprint.c
new file mode 100644
index 0000000..5e468fe
--- /dev/null
+++ b/newlib/libc/ctype/iswprint.c
@@ -0,0 +1,72 @@
+/* Copyright (c) 2002 Red Hat Incorporated.
+   All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+     Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+
+     Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+
+     The name of Red Hat Incorporated may not be used to endorse
+     or promote products derived from this software without specific
+     prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED.  IN NO EVENT SHALL RED HAT INCORPORATED BE LIABLE FOR ANY
+   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS   
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+FUNCTION
+	<<iswprint>>, <<iswprint_l>>---printable wide character test
+
+INDEX
+	iswprint
+
+INDEX
+	iswprint_l
+
+SYNOPSIS
+	#include <wctype.h>
+	int iswprint(wint_t <[c]>);
+
+	#include <wctype.h>
+	int iswprint_l(wint_t <[c]>, locale_t <[locale]>);
+
+DESCRIPTION
+<<iswprint>> is a function which classifies wide-character values that
+are printable.
+
+<<iswprint_l>> is like <<iswprint>> but performs the check based on the
+locale specified by the locale object locale.  If <[locale]> is
+LC_GLOBAL_LOCALE or not a valid locale object, the behaviour is undefined.
+
+RETURNS
+<<iswprint>>, <<iswprint_l>> return non-zero if <[c]> is a printable wide character.
+
+PORTABILITY
+<<iswprint>> is C99.
+<<iswprint_l>> is POSIX-1.2008.
+
+No supporting OS subroutines are required.
+*/
+#include <_ansi.h>
+#include <wctype.h>
+
+int
+iswprint (wint_t c)
+{
+  return iswprint_l (c, 0);
+}
diff --git a/newlib/libc/ctype/iswprint_l.c b/newlib/libc/ctype/iswprint_l.c
index a8d8686..cdf027b 100644
--- a/newlib/libc/ctype/iswprint_l.c
+++ b/newlib/libc/ctype/iswprint_l.c
@@ -1,10 +1,21 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswprint_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return iswprint (c);
+#ifdef _MB_CAPABLE
+  c = _jp2uc_l (c, locale);
+  enum category cat = category (c);
+  return cat != -1
+      && cat != CAT_Cc && cat != CAT_Cf
+      && cat != CAT_Cs // Surrogate
+      ;
+#else
+  return c < (wint_t)0x100 ? isprint (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswpunct.c b/newlib/libc/ctype/iswpunct.c
index 8ab7038..27a6d65 100644
--- a/newlib/libc/ctype/iswpunct.c
+++ b/newlib/libc/ctype/iswpunct.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -62,14 +63,10 @@ PORTABILITY
 No supporting OS subroutines are required.
 */
 #include <_ansi.h>
-#include <newlib.h>
 #include <wctype.h>
-#include <string.h>
-#include <ctype.h>
-#include "local.h"
 
 int
 iswpunct (wint_t c)
 {
-  return (!iswalnum (c) && iswgraph (c));
+  return iswpunct_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswpunct_l.c b/newlib/libc/ctype/iswpunct_l.c
index c7acc4e..4adc1ed 100644
--- a/newlib/libc/ctype/iswpunct_l.c
+++ b/newlib/libc/ctype/iswpunct_l.c
@@ -1,10 +1,26 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswpunct_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return !iswalnum (c) && iswgraph (c);
+#ifdef _MB_CAPABLE
+  //return !iswalnum (c) && iswgraph (c);
+  c = _jp2uc_l (c, locale);
+  enum category cat = category (c);
+  return cat == CAT_Pc || cat == CAT_Pd || cat == CAT_Pe || cat == CAT_Pf || cat == CAT_Pi || cat == CAT_Po || cat == CAT_Ps
+      || cat == CAT_Sm // Math Symbols
+      // the following are included for backwards consistency:
+      || cat == CAT_Sc // Currency Symbols
+      || cat == CAT_Sk // Modifier_Symbol
+      || cat == CAT_So // Other_Symbol
+      || cat == CAT_No // Other_Number
+      ;
+#else
+  return c < (wint_t)0x100 ? ispunct (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswspace.c b/newlib/libc/ctype/iswspace.c
index ae3841a..ca6a887 100644
--- a/newlib/libc/ctype/iswspace.c
+++ b/newlib/libc/ctype/iswspace.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -62,27 +63,10 @@ PORTABILITY
 No supporting OS subroutines are required.
 */
 #include <_ansi.h>
-#include <newlib.h>
 #include <wctype.h>
-#include <ctype.h>
-#include <string.h>
-#include "local.h"
 
 int
 iswspace (wint_t c)
 {
-#ifdef _MB_CAPABLE
-  c = _jp2uc (c);
-  /* Based on Unicode 5.2.  Control chars 09-0D, plus all characters
-     from general category "Zs", which are not marked as decomposition
-     type "noBreak". */
-  return ((c >= 0x0009 && c <= 0x000d) || c == 0x0020 ||
-	  c == 0x1680 || c == 0x180e ||
-	  (c >= 0x2000 && c <= 0x2006) ||
-	  (c >= 0x2008 && c <= 0x200a) ||
-	  c == 0x2028 || c == 0x2029 ||
-	  c == 0x205f || c == 0x3000);
-#else
-  return (c < 0x100 ? isspace (c) : 0);
-#endif /* _MB_CAPABLE */
+  return iswspace_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswspace_l.c b/newlib/libc/ctype/iswspace_l.c
index 0c30242..e7f47ee 100644
--- a/newlib/libc/ctype/iswspace_l.c
+++ b/newlib/libc/ctype/iswspace_l.c
@@ -1,10 +1,21 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswspace_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return iswspace (c);
+#ifdef _MB_CAPABLE
+  c = _jp2uc_l (c, locale);
+  enum category cat = category (c);
+  // exclude "<noBreak>"?
+  return cat == CAT_Zs
+      || cat == CAT_Zl || cat == CAT_Zp // Line/Paragraph Separator
+      || (c >= 0x9 && c <= 0xD);
+#else
+  return c < 0x100 ? isspace (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswupper.c b/newlib/libc/ctype/iswupper.c
index c4969a3..96b5a0c 100644
--- a/newlib/libc/ctype/iswupper.c
+++ b/newlib/libc/ctype/iswupper.c
@@ -17,14 +17,14 @@ SYNOPSIS
 
 DESCRIPTION
 <<iswupper>> is a function which classifies wide-character values that
-have uppercase translations.
+are categorized as uppercase.
 
 <<iswupper_l>> is like <<iswupper>> but performs the check based on the
 locale specified by the locale object locale.  If <[locale]> is
 LC_GLOBAL_LOCALE or not a valid locale object, the behaviour is undefined.
 
 RETURNS
-<<iswupper>>, <<iswupper_l>> return non-zero if <[c]> is a uppercase wide character.
+<<iswupper>>, <<iswupper_l>> return non-zero if <[c]> is an uppercase wide character.
 
 PORTABILITY
 <<iswupper>> is C99.
@@ -38,5 +38,5 @@ No supporting OS subroutines are required.
 int
 iswupper (wint_t c)
 {
-  return (towlower (c) != c);
+  return iswupper_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswupper_l.c b/newlib/libc/ctype/iswupper_l.c
index 2555cd0..7ce8b5e 100644
--- a/newlib/libc/ctype/iswupper_l.c
+++ b/newlib/libc/ctype/iswupper_l.c
@@ -1,10 +1,20 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswupper_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return towlower (c) != c;
+#ifdef _MB_CAPABLE
+  c = _jp2uc_l (c, locale);
+  // The wide-character class "upper" contains at least those characters wc 
+  // which are equal to towupper(wc) and different from towlower(wc).
+  enum category cat = category (c);
+  return cat == CAT_Lu || (cat == CAT_LC && towupper (c) == c);
+#else
+  return c < 0x100 ? islower (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswxdigit.c b/newlib/libc/ctype/iswxdigit.c
index 4367186..fce2a4d 100644
--- a/newlib/libc/ctype/iswxdigit.c
+++ b/newlib/libc/ctype/iswxdigit.c
@@ -38,7 +38,7 @@ No supporting OS subroutines are required.
 int
 iswxdigit (wint_t c)
 {
-  return ((c >= (wint_t)'0' && c <= (wint_t)'9') ||
-	  (c >= (wint_t)'a' && c <= (wint_t)'f') ||
-	  (c >= (wint_t)'A' && c <= (wint_t)'F'));
+  return (c >= (wint_t)'0' && c <= (wint_t)'9')
+      || (c >= (wint_t)'a' && c <= (wint_t)'f')
+      || (c >= (wint_t)'A' && c <= (wint_t)'F');
 }
diff --git a/newlib/libc/ctype/jp2uc.c b/newlib/libc/ctype/jp2uc.c
index 29eec0f..b89b5ea 100644
--- a/newlib/libc/ctype/jp2uc.c
+++ b/newlib/libc/ctype/jp2uc.c
@@ -1,7 +1,8 @@
-/* Routine to translate from Japanese characters to Unicode */
+/* Routine to translate between Japanese characters and Unicode */
 
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff: consider locale, add dummy uc2jp
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -25,7 +26,7 @@
    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
    ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS   
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
@@ -40,13 +41,15 @@
 #include <string.h>
 #include <wctype.h>
 #include "local.h"
-#include "jp2uc.h"
 
 /* Japanese encoding types supported */
 #define JP_JIS		1
 #define JP_SJIS		2
 #define JP_EUCJP	3
 
+/* Japanese to Unicode conversion routine */
+#include "jp2uc.h"
+
 static wint_t
 __jp2uc (wint_t c, int type)
 {
@@ -104,7 +107,7 @@ __jp2uc (wint_t c, int type)
       return d02f4[index];
     }
 
-  /* handle smaller ranges here */    
+  /* handle smaller ranges here */
   switch (byte1)
     {
     case 0xA1:
@@ -148,20 +151,50 @@ __jp2uc (wint_t c, int type)
       return WEOF;
     }
 
-  return WEOF; 
+  return WEOF;
+}
+
+/* Unicode to Japanese conversion routine */
+static wint_t
+__uc2jp (wint_t c, int type)
+{
+#warning back-conversion Unicode to Japanese not implemented; needed for towupper/towlower
+  return c;
 }
 
+/* Japanese to Unicode conversion interface */
 wint_t
-_jp2uc (wint_t c)
+_jp2uc_l (wint_t c, struct __locale_t * l)
 {
-  if (!strcmp (__current_locale_charset (), "JIS"))
+  char * cs = l ? __locale_charset(l) : __current_locale_charset();
+  if (0 == strcmp (cs, "JIS"))
     c = __jp2uc (c, JP_JIS);
-  else if (!strcmp (__current_locale_charset (), "SJIS"))
+  else if (0 == strcmp (cs, "SJIS"))
     c = __jp2uc (c, JP_SJIS);
-  else if (!strcmp (__current_locale_charset (), "EUCJP"))
+  else if (0 == strcmp (cs, "EUCJP"))
     c = __jp2uc (c, JP_EUCJP);
   return c;
 }
 
+wint_t
+_jp2uc (wint_t c)
+{
+  return _jp2uc_l (c, 0);
+}
+
+/* Unicode to Japanese conversion interface */
+wint_t
+_uc2jp_l (wint_t c, struct __locale_t * l)
+{
+  char * cs = l ? __locale_charset(l) : __current_locale_charset();
+  if (0 == strcmp (cs, "JIS"))
+    c = __uc2jp (c, JP_JIS);
+  else if (0 == strcmp (cs, "SJIS"))
+    c = __uc2jp (c, JP_SJIS);
+  else if (0 == strcmp (cs, "EUCJP"))
+    c = __uc2jp (c, JP_EUCJP);
+  return c;
+}
+
 #endif /* !__CYGWIN__ */
 #endif /* _MB_CAPABLE */
diff --git a/newlib/libc/ctype/local.h b/newlib/libc/ctype/local.h
index 62d2b15..aa8f533 100644
--- a/newlib/libc/ctype/local.h
+++ b/newlib/libc/ctype/local.h
@@ -1,3 +1,5 @@
+/* Modified (m) 2017 Thomas Wolff: fixed locale/wchar handling */
+
 /* wctrans constants */
 
 #include <_ansi.h>
@@ -21,11 +23,22 @@
 #define WC_UPPER	11
 #define WC_XDIGIT	12
 
-/* internal function to translate JP to Unicode */
+/* internal functions to translate between JP and Unicode */
+/* note this is not applicable to Cygwin, where wchar_t is always Unicode,
+   and should not be applicable to most other platforms either;
+   * platforms for which wchar_t is not Unicode should be explicitly listed
+   * the transformation should be applied to all non-Unicode locales 
+     (also Chinese, Korean, and even 8-bit locales such as *.CP1252)
+   * for towupper and towlower, the result must be back-transformed 
+     into the respective locale encoding; currently NOT IMPLEMENTED
+*/
 #ifdef __CYGWIN__
-/* Under Cygwin, the incoming wide character is already given in UTF due
-   to the requirements of the underlying OS. */
+/* Under Cygwin, wchar_t (or its extension wint_t) is Unicode */
 #define _jp2uc(c) (c)
+#define _jp2uc_l(c, l) (c)
+#define _uc2jp_l(c, l) (c)
 #else
 wint_t _jp2uc (wint_t);
+wint_t _jp2uc_l (wint_t, struct __locale_t *);
+wint_t _uc2jp_l (wint_t, struct __locale_t *);
 #endif
diff --git a/newlib/libc/ctype/towctrans.c b/newlib/libc/ctype/towctrans.c
index edbdfce..176aa3d 100644
--- a/newlib/libc/ctype/towctrans.c
+++ b/newlib/libc/ctype/towctrans.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -69,10 +70,9 @@ No supporting OS subroutines are required.
 */
 
 #include <_ansi.h>
-#include <string.h>
 #include <reent.h>
 #include <wctype.h>
-#include <errno.h>
+//#include <errno.h>
 #include "local.h"
 
 wint_t
@@ -80,13 +80,13 @@ _towctrans_r (struct _reent *r,
 	wint_t c,
 	wctrans_t w)
 {
-  if (w == WCT_TOLOWER)
-    return towlower (c);
-  else if (w == WCT_TOUPPER)
-    return towupper (c);
+  if (w == WCT_TOLOWER || w == WCT_TOUPPER)
+    return towctrans_l (c, w, 0);
   else
     {
-      r->_errno = EINVAL;
+      // skipping this because it was causing trouble (cygwin crash)
+      // and there is no errno specified for towctrans
+      //r->_errno = EINVAL;
       return c;
     }
 }
@@ -94,7 +94,7 @@ _towctrans_r (struct _reent *r,
 #ifndef _REENT_ONLY
 wint_t
 towctrans (wint_t c,
-        wctrans_t w)
+	wctrans_t w)
 {
   return _towctrans_r (_REENT, c, w);
 }
diff --git a/newlib/libc/ctype/towctrans_l.c b/newlib/libc/ctype/towctrans_l.c
index d7369e1..8da372f 100644
--- a/newlib/libc/ctype/towctrans_l.c
+++ b/newlib/libc/ctype/towctrans_l.c
@@ -1,10 +1,101 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
 #include <wctype.h>
+//#include <errno.h>
+#include "local.h"
+
+enum {EVENCAP, ODDCAP};
+enum {TO1, TOLO, TOUP, TOBOTH};
+static struct caseconv_entry {
+  unsigned int first: 21;
+  unsigned short diff: 8;
+  unsigned char mode: 2;
+  int delta: 17;
+} __attribute__ ((packed))
+caseconv_table [] = {
+#include "caseconv.t"
+};
+#define first(ce)	ce.first
+#define last(ce)	(ce.first + ce.diff)
+
+/* auxiliary function for binary search in interval properties table */
+static const struct caseconv_entry *
+bisearch(wint_t ucs, const struct caseconv_entry *table, int max)
+{
+  int min = 0;
+  int mid;
+
+  if (ucs < first(table[0]) || ucs > last(table[max]))
+    return 0;
+  while (max >= min)
+    {
+      mid = (min + max) / 2;
+      if (ucs > last(table[mid]))
+	min = mid + 1;
+      else if (ucs < first(table[mid]))
+	max = mid - 1;
+      else
+	return &table[mid];
+    }
+  return 0;
+}
+
+static wint_t
+toulower (wint_t c)
+{
+  const struct caseconv_entry * cce =
+    bisearch(c, caseconv_table,
+             sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
+  if (cce)
+    switch (cce->mode) {
+      case TOLO: return c + cce->delta;
+      case TOBOTH: return c + 1;
+      case TO1: switch (cce->delta) {
+        case EVENCAP: if (!(c & 1)) return c + 1; break;
+        case ODDCAP: if (c & 1) return c + 1; break;
+      }
+    }
+  else
+    return c;
+}
+
+static wint_t
+touupper (wint_t c)
+{
+  const struct caseconv_entry * cce =
+    bisearch(c, caseconv_table,
+             sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
+  if (cce)
+    switch (cce->mode) {
+      case TOUP: return c + cce->delta;
+      case TOBOTH: return c - 1;
+      case TO1: switch (cce->delta) {
+        case EVENCAP: if (c & 1) return c - 1; break;
+        case ODDCAP: if (!(c & 1)) return c - 1; break;
+      }
+    }
+  else
+    return c;
+}
 
 wint_t
 towctrans_l (wint_t c, wctrans_t w, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return towctrans (c, w);
+  wint_t u = _jp2uc_l (c, locale);
+  wint_t res;
+  if (w == WCT_TOLOWER)
+    res = toulower (u);
+  else if (w == WCT_TOUPPER)
+    res = touupper (u);
+  else
+    {
+      // skipping the errno setting that was previously involved
+      // by delegating to towctrans; it was causing trouble (cygwin crash)
+      // and there is no errno specified for towctrans
+      return c;
+    }
+  if (res != u)
+    return _uc2jp_l (res, locale);
+  else
+    return c;
 }
diff --git a/newlib/libc/ctype/towlower.c b/newlib/libc/ctype/towlower.c
new file mode 100644
index 0000000..01de1bd
--- /dev/null
+++ b/newlib/libc/ctype/towlower.c
@@ -0,0 +1,81 @@
+/* Copyright (c) 2002 Red Hat Incorporated.
+   All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+     Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+
+     Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+
+     The name of Red Hat Incorporated may not be used to endorse
+     or promote products derived from this software without specific
+     prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED.  IN NO EVENT SHALL RED HAT INCORPORATED BE LIABLE FOR ANY
+   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS   
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+FUNCTION
+	<<towlower>>, <<towlower_l>>---translate wide characters to lowercase
+
+INDEX
+	towlower
+
+INDEX
+	towlower_l
+
+SYNOPSIS
+	#include <wctype.h>
+	wint_t towlower(wint_t <[c]>);
+
+	#include <wctype.h>
+	wint_t towlower_l(wint_t <[c]>, locale_t <[locale]>);
+
+
+DESCRIPTION
+<<towlower>> is a function which converts uppercase wide characters to
+lowercase, leaving all other characters unchanged.
+
+<<towlower_l>> is like <<towlower>> but performs the function based on the
+locale specified by the locale object locale.  If <[locale]> is
+LC_GLOBAL_LOCALE or not a valid locale object, the behaviour is undefined.
+
+RETURNS
+<<towlower>>, <<towlower_l>> return the lowercase equivalent of <[c]> when it is a
+uppercase wide character; otherwise, it returns the input character.
+
+PORTABILITY
+<<towlower>> is C99.
+<<towlower_l>> is POSIX-1.2008.
+
+No supporting OS subroutines are required.
+*/
+
+#include <_ansi.h>
+#include <ctype.h>
+#include <wctype.h>
+#include "local.h"
+
+wint_t
+towlower (wint_t c)
+{
+#ifdef _MB_CAPABLE
+  return towctrans (c, WCT_TOLOWER);
+#else
+  return c < 0x00ff ? (wint_t)(tolower ((int)c)) : c;
+#endif /* _MB_CAPABLE */
+}
diff --git a/newlib/libc/ctype/towlower_l.c b/newlib/libc/ctype/towlower_l.c
index 2e89ec9..46e024d 100644
--- a/newlib/libc/ctype/towlower_l.c
+++ b/newlib/libc/ctype/towlower_l.c
@@ -1,3 +1,4 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
 #include <newlib.h>
 #include <wctype.h>
@@ -6,7 +7,9 @@
 wint_t
 towlower_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
+#ifdef _MB_CAPABLE
+  return towctrans_l (c, WCT_TOLOWER, locale);
+#else
   return towlower (c);
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/towupper.c b/newlib/libc/ctype/towupper.c
index 306f72b..a60e62b 100644
--- a/newlib/libc/ctype/towupper.c
+++ b/newlib/libc/ctype/towupper.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -76,518 +77,8 @@ wint_t
 towupper (wint_t c)
 {
 #ifdef _MB_CAPABLE
-  c = _jp2uc (c);
-  /* Based on and tested against Unicode 5.2 */
-
-  /* Expression used to filter out the characters for the below code:
-
-     awk -F\; '{ if ( $13 != "" ) print $1; }' UnicodeData.txt
-  */
-  if (c < 0x100)
-    {
-      if (c == 0x00b5)
-	return 0x039c;
-      
-      if ((c >= 0x00e0 && c <= 0x00fe && c != 0x00f7) ||
-	  (c >= 0x0061 && c <= 0x007a))
-	return (c - 0x20);
-      
-      if (c == 0xff)
-	return 0x0178;
-      
-      return c;
-    }
-  else if (c < 0x300)
-    {
-      if ((c >= 0x0101 && c <= 0x012f) ||
-	  (c >= 0x0133 && c <= 0x0137) ||
-	  (c >= 0x014b && c <= 0x0177) ||
-	  (c >= 0x01df && c <= 0x01ef) ||
-	  (c >= 0x01f9 && c <= 0x021f) ||
-	  (c >= 0x0223 && c <= 0x0233) ||
-	  (c >= 0x0247 && c <= 0x024f))
-	{
-	  if (c & 0x01)
-	    return (c - 1);
-	  return c;
-	}
-
-      if ((c >= 0x013a && c <= 0x0148) ||
-	  (c >= 0x01ce && c <= 0x01dc) ||
-	  c == 0x023c || c == 0x0242)
-	{
-	  if (!(c & 0x01))
-	    return (c - 1);
-	  return c;
-	}
-      
-      if (c == 0x0131)
-	return 0x0049;
-      
-      if (c == 0x017a || c == 0x017c || c == 0x017e)
-	return (c - 1);
-      
-      if (c >= 0x017f && c <= 0x0292)
-	{
-	  wint_t k;
-	  switch (c)
-	    {
-	    case 0x017f:
-	      k = 0x0053;
-	      break;
-	    case 0x0180:
-	      k = 0x0243;
-	      break;
-	    case 0x0183:
-	      k = 0x0182;
-	      break;
-	    case 0x0185:
-	      k = 0x0184;
-	      break;
-	    case 0x0188:
-	      k = 0x0187;
-	      break;
-	    case 0x018c:
-	      k = 0x018b;
-	      break;
-	    case 0x0192:
-	      k = 0x0191;
-	      break;
-	    case 0x0195:
-	      k = 0x01f6;
-	      break;
-	    case 0x0199:
-	      k = 0x0198;
-	      break;
-	    case 0x019a:
-	      k = 0x023d;
-	      break;
-	    case 0x019e:
-	      k = 0x0220;
-	      break;
-	    case 0x01a1:
-	    case 0x01a3:
-	    case 0x01a5:
-	    case 0x01a8:
-	    case 0x01ad:
-	    case 0x01b0:
-	    case 0x01b4:
-	    case 0x01b6:
-	    case 0x01b9:
-	    case 0x01bd:
-	    case 0x01c5:
-	    case 0x01c8:
-	    case 0x01cb:
-	    case 0x01f2:
-	    case 0x01f5:
-	      k = c - 1;
-	      break;
-	    case 0x01bf:
-	      k = 0x01f7;
-	      break;
-	    case 0x01c6:
-	    case 0x01c9:
-	    case 0x01cc:
-	      k = c - 2;
-	      break;
-	    case 0x01dd:
-	      k = 0x018e;
-	      break;
-	    case 0x01f3:
-	      k = 0x01f1;
-	      break;
-	    case 0x023f:
-	      k = 0x2c7e;
-	      break;
-	    case 0x0240:
-	      k = 0x2c7f;
-	      break;
-	    case 0x0250:
-	      k = 0x2c6f;
-	      break;
-	    case 0x0251:
-	      k = 0x2c6d;
-	      break;
-	    case 0x0252:
-	      k = 0x2c70;
-	      break;
-	    case 0x0253:
-	      k = 0x0181;
-	      break;
-	    case 0x0254:
-	      k = 0x0186;
-	      break;
-	    case 0x0256:
-	      k = 0x0189;
-	      break;
-	    case 0x0257:
-	      k = 0x018a;
-	      break;
-	    case 0x0259:
-	      k = 0x018f;
-	      break;
-	    case 0x025b:
-	      k = 0x0190;
-	      break;
-	    case 0x0260:
-	      k = 0x0193;
-	      break;
-	    case 0x0263:
-	      k = 0x0194;
-	      break;
-	    case 0x0268:
-	      k = 0x0197;
-	      break;
-	    case 0x0269:
-	      k = 0x0196;
-	      break;
-	    case 0x026b:
-	      k = 0x2c62;
-	      break;
-	    case 0x026f:
-	      k = 0x019c;
-	      break;
-	    case 0x0271:
-	      k = 0x2c6e;
-	      break;
-	    case 0x0272:
-	      k = 0x019d;
-	      break;
-	    case 0x0275:
-	      k = 0x019f;
-	      break;
-	    case 0x027d:
-	      k = 0x2c64;
-	      break;
-	    case 0x0280:
-	      k = 0x01a6;
-	      break;
-	    case 0x0283:
-	      k = 0x01a9;
-	      break;
-	    case 0x0288:
-	      k = 0x01ae;
-	      break;
-	    case 0x0289:
-	      k = 0x0244;
-	      break;
-	    case 0x028a:
-	      k = 0x01b1;
-	      break;
-	    case 0x028b:
-	      k = 0x01b2;
-	      break;
-	    case 0x028c:
-	      k = 0x0245;
-	      break;
-	    case 0x0292:
-	      k = 0x01b7;
-	      break;
-	    default:
-	      k = 0;
-	    }
-	  if (k != 0)
-	    return k;
-	}
-    }
-  else if (c < 0x0400)
-    {
-      wint_t k;
-
-      if (c >= 0x03ad && c <= 0x03af)
-      	return (c - 0x25);
-
-      if (c >= 0x03b1 && c <= 0x03cb && c != 0x03c2)
-	return (c - 0x20);
-      
-      if (c >= 0x03d9 && c <= 0x03ef && (c & 1))
-	return (c - 1);
-
-      switch (c)
-	{
-	case 0x0345:
-	  k = 0x0399;
-	  break;
-	case 0x0371:
-	case 0x0373:
-	case 0x0377:
-	case 0x03f8:
-	case 0x03fb:
-	  k = c - 1;
-	  break;
-	case 0x037b:
-	case 0x037c:
-	case 0x037d:
-	  k = c + 0x82;
-	  break;
-	case 0x03ac:
-	  k = 0x0386;
-	  break;
-	case 0x03c2:
-	  k = 0x03a3;
-	  break;
-	case 0x03cc:
-	  k = 0x038c;
-	  break;
-	case 0x03cd:
-	case 0x03ce:
-	  k = c - 0x3f;
-	  break;
-	case 0x03d0:
-	  k = 0x0392;
-	  break;
-	case 0x03d1:
-	  k = 0x0398;
-	  break;
-	case 0x03d5:
-	  k = 0x03a6;
-	  break;
-	case 0x03d6:
-	  k = 0x03a0;
-	  break;
-	case 0x03d7:
-	  k = 0x03cf;
-	  break;
-	case 0x03f0:
-	  k = 0x039a;
-	  break;
-	case 0x03f1:
-	  k = 0x03a1;
-	  break;
-	case 0x03f2:
-	  k = 0x03f9;
-	  break;
-	case 0x03f5:
-	  k = 0x0395;
-	  break;
-	default:
-	  k = 0;
-	}
-      if (k != 0)
-	return k;
-    }
-  else if (c < 0x500)
-    {
-      if (c >= 0x0430 && c <= 0x044f)
-	return (c - 0x20);
-      
-      if (c >= 0x0450 && c <= 0x045f)
-	return (c - 0x50);
-      
-      if ((c >= 0x0461 && c <= 0x0481) ||
-	  (c >= 0x048b && c <= 0x04bf) ||
-	  (c >= 0x04d1 && c <= 0x04ff))
-	{
-	  if (c & 0x01)
-	    return (c - 1);
-	  return c;
-	}
-      
-      if (c >= 0x04c2 && c <= 0x04ce)
-	{
-	  if (!(c & 0x01))
-	    return (c - 1);
-	  return c;
-	}
-      
-      if (c == 0x04cf)
-      	return 0x04c0;
-    }
-  else if (c < 0x0600)
-    {
-      if (c >= 0x0501 && c <= 0x0525 && (c & 1))
-      	return c - 1;
-
-      if (c >= 0x0561 && c <= 0x0586)
-	return (c - 0x30);
-    }
-  else if (c < 0x1f00)
-    {
-      if (c == 0x1d79)
-      	return 0xa77d;
-
-      if (c == 0x1d7d)
-      	return 0x2c63;
-
-      if ((c >= 0x1e01 && c <= 0x1e95) ||
-	  (c >= 0x1ea1 && c <= 0x1eff))
-	{
-	  if (c & 0x01)
-	    return (c - 1);
-	  return c;
-	}
-      
-      if (c == 0x1e9b)
-	return 0x1e60;
-    }
-  else if (c < 0x2000)
-    {
-      
-      if ((c >= 0x1f00 && c <= 0x1f07) ||
-	  (c >= 0x1f10 && c <= 0x1f15) ||
-	  (c >= 0x1f20 && c <= 0x1f27) ||
-	  (c >= 0x1f30 && c <= 0x1f37) ||
-	  (c >= 0x1f40 && c <= 0x1f45) ||
-	  (c >= 0x1f60 && c <= 0x1f67) ||
-	  (c >= 0x1f80 && c <= 0x1f87) ||
-	  (c >= 0x1f90 && c <= 0x1f97) ||
-	  (c >= 0x1fa0 && c <= 0x1fa7))
-	return (c + 0x08);
-
-      if (c >= 0x1f51 && c <= 0x1f57 && (c & 0x01))
-	return (c + 0x08);
-      
-      if (c >= 0x1f70 && c <= 0x1ff3)
-	{
-	  wint_t k;
-	  switch (c)
-	    {
-	    case 0x1fb0:
-	      k = 0x1fb8;
-	      break;
-	    case 0x1fb1:
-	      k = 0x1fb9;
-	      break;
-	    case 0x1f70:
-	      k = 0x1fba;
-	      break;
-	    case 0x1f71:
-	      k = 0x1fbb;
-	      break;
-	    case 0x1fb3:
-	      k = 0x1fbc;
-	      break;
-	    case 0x1fbe:
-	      k = 0x0399;
-	      break;
-	    case 0x1f72:
-	      k = 0x1fc8;
-	      break;
-	    case 0x1f73:
-	      k = 0x1fc9;
-	      break;
-	    case 0x1f74:
-	      k = 0x1fca;
-	      break;
-	    case 0x1f75:
-	      k = 0x1fcb;
-	      break;
-	    case 0x1fc3:
-	      k = 0x1fcc;
-	      break;
-	    case 0x1fd0:
-	      k = 0x1fd8;
-	      break;
-	    case 0x1fd1:
-	      k = 0x1fd9;
-	      break;
-	    case 0x1f76:
-	      k = 0x1fda;
-	      break;
-	    case 0x1f77:
-	      k = 0x1fdb;
-	      break;
-	    case 0x1fe0:
-	      k = 0x1fe8;
-	      break;
-	    case 0x1fe1:
-	      k = 0x1fe9;
-	      break;
-	    case 0x1f7a:
-	      k = 0x1fea;
-	      break;
-	    case 0x1f7b:
-	      k = 0x1feb;
-	      break;
-	    case 0x1fe5:
-	      k = 0x1fec;
-	      break;
-	    case 0x1f78:
-	      k = 0x1ff8;
-	      break;
-	    case 0x1f79:
-	      k = 0x1ff9;
-	      break;
-	    case 0x1f7c:
-	      k = 0x1ffa;
-	      break;
-	    case 0x1f7d:
-	      k = 0x1ffb;
-	      break;
-	    case 0x1ff3:
-	      k = 0x1ffc;
-	      break;
-	    default:
-	      k = 0;
-	    }
-	  if (k != 0)
-	    return k;
-	}
-    }
-  else if (c < 0x3000)
-    {
-      if (c == 0x214e)
-      	return 0x2132;
-
-      if (c == 0x2184)
-      	return 0x2183;
-
-      if (c >= 0x2170 && c <= 0x217f)
-	return (c - 0x10);
-      
-      if (c >= 0x24d0 && c <= 0x24e9)
-	return (c - 0x1a);
-      
-      if (c >= 0x2c30 && c <= 0x2c5e)
-	return (c - 0x30);
-
-      if ((c >= 0x2c68 && c <= 0x2c6c && !(c & 1)) ||
-	  (c >= 0x2c81 && c <= 0x2ce3 &&  (c & 1)) ||
-	  c == 0x2c73 || c == 0x2c76 ||
-	  c == 0x2cec || c == 0x2cee)
-      	return (c - 1);
-
-      if (c >= 0x2c81 && c <= 0x2ce3 && (c & 1))
-	return (c - 1);
-
-      if (c >= 0x2d00 && c <= 0x2d25)
-      	return (c - 0x1c60);
-
-      switch (c)
-      	{
-	case 0x2c61:
-	  return 0x2c60;
-	case 0x2c65:
-	  return 0x023a;
-	case 0x2c66:
-	  return 0x023e;
-	}
-    }
-  else if (c >= 0xa000 && c < 0xb000)
-    {
-      if (((c >= 0xa641 && c <= 0xa65f) ||
-           (c >= 0xa663 && c <= 0xa66d) ||
-           (c >= 0xa681 && c <= 0xa697) ||
-           (c >= 0xa723 && c <= 0xa72f) ||
-           (c >= 0xa733 && c <= 0xa76f) ||
-           (c >= 0xa77f && c <= 0xa787)) &&
-	  (c & 1))
-	return (c - 1);
-      	
-      if (c == 0xa77a || c == 0xa77c || c == 0xa78c)
-	return (c - 1);
-    }
-  else
-    {
-      if (c >= 0xff41 && c <= 0xff5a)
-	return (c - 0x20);
-      
-      if (c >= 0x10428 && c <= 0x1044f)
-	return (c - 0x28);
-    }
-  return c;
+  return towctrans (c, WCT_TOUPPER);
 #else
-  return (c < 0x00ff ? (wint_t)(toupper ((int)c)) : c);
+  return c < 0x00ff ? (wint_t)(toupper ((int)c)) : c;
 #endif /* _MB_CAPABLE */
 }
-
diff --git a/newlib/libc/ctype/towupper_l.c b/newlib/libc/ctype/towupper_l.c
index 5a8384c..d7c1adb 100644
--- a/newlib/libc/ctype/towupper_l.c
+++ b/newlib/libc/ctype/towupper_l.c
@@ -1,10 +1,14 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
 #include <wctype.h>
+#include "local.h"
 
 wint_t
 towupper_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
+#ifdef _MB_CAPABLE
+  return towctrans_l (c, WCT_TOUPPER, locale);
+#else
   return towupper (c);
+#endif /* _MB_CAPABLE */
 }
Thomas Wolff March 7, 2018, 11:21 p.m. | #4
From 00403bc7a6c5d32026dd1e118bbb4f0d0d998c67 Mon Sep 17 00:00:00 2001
From: Thomas Wolff <towo@towo.net>
Date: Mon, 26 Feb 2018 22:08:44 +0100
Subject: [PATCH 5/6] use generated character data, Makefiles

---
 newlib/libc/ctype/Makefile.am |  1 +
 newlib/libc/ctype/Makefile.in | 12 ++++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/newlib/libc/ctype/Makefile.am b/newlib/libc/ctype/Makefile.am
index 8986935..fa6a70d 100644
--- a/newlib/libc/ctype/Makefile.am
+++ b/newlib/libc/ctype/Makefile.am
@@ -24,6 +24,7 @@ if ELIX_LEVEL_1
 ELIX_SOURCES =
 else
 ELIX_SOURCES = \
+	categories.c	\
 	isalnum_l.c	\
 	isalpha_l.c	\
 	isascii.c 	\
diff --git a/newlib/libc/ctype/Makefile.in b/newlib/libc/ctype/Makefile.in
index 2b23317..9932a94 100644
--- a/newlib/libc/ctype/Makefile.in
+++ b/newlib/libc/ctype/Makefile.in
@@ -79,7 +79,8 @@ am__objects_1 = lib_a-ctype_.$(OBJEXT) lib_a-isalnum.$(OBJEXT) \
 	lib_a-ispunct.$(OBJEXT) lib_a-isspace.$(OBJEXT) \
 	lib_a-isxdigit.$(OBJEXT) lib_a-tolower.$(OBJEXT) \
 	lib_a-toupper.$(OBJEXT)
-@ELIX_LEVEL_1_FALSE@am__objects_2 = lib_a-isalnum_l.$(OBJEXT) \
+@ELIX_LEVEL_1_FALSE@am__objects_2 = lib_a-categories.$(OBJEXT) \
+@ELIX_LEVEL_1_FALSE@	lib_a-isalnum_l.$(OBJEXT) \
 @ELIX_LEVEL_1_FALSE@	lib_a-isalpha_l.$(OBJEXT) \
 @ELIX_LEVEL_1_FALSE@	lib_a-isascii.$(OBJEXT) \
 @ELIX_LEVEL_1_FALSE@	lib_a-isascii_l.$(OBJEXT) \
@@ -142,7 +143,7 @@ libctype_la_LIBADD =
 am__objects_3 = ctype_.lo isalnum.lo isalpha.lo iscntrl.lo isdigit.lo \
 	islower.lo isupper.lo isprint.lo ispunct.lo isspace.lo \
 	isxdigit.lo tolower.lo toupper.lo
-@ELIX_LEVEL_1_FALSE@am__objects_4 = isalnum_l.lo isalpha_l.lo \
+@ELIX_LEVEL_1_FALSE@am__objects_4 = categories.lo isalnum_l.lo isalpha_l.lo \
 @ELIX_LEVEL_1_FALSE@	isascii.lo isascii_l.lo isblank.lo \
 @ELIX_LEVEL_1_FALSE@	isblank_l.lo iscntrl_l.lo isdigit_l.lo \
 @ELIX_LEVEL_1_FALSE@	islower_l.lo isupper_l.lo isprint_l.lo \
@@ -351,6 +352,7 @@ GENERAL_SOURCES = \
 	toupper.c
 
 @ELIX_LEVEL_1_FALSE@ELIX_SOURCES = \
+@ELIX_LEVEL_1_FALSE@	categories.c	\
 @ELIX_LEVEL_1_FALSE@	isalnum_l.c	\
 @ELIX_LEVEL_1_FALSE@	isalpha_l.c	\
 @ELIX_LEVEL_1_FALSE@	isascii.c 	\
@@ -609,6 +611,12 @@ lib_a-toupper.o: toupper.c
 lib_a-toupper.obj: toupper.c
 	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-toupper.obj `if test -f 'toupper.c'; then $(CYGPATH_W) 'toupper.c'; else $(CYGPATH_W) '$(srcdir)/toupper.c'; fi`
 
+lib_a-categories.o: categories.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-categories.o `test -f 'categories.c' || echo '$(srcdir)/'`categories.c
+
+lib_a-categories.obj: categories.c
+	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-categories.obj `if test -f 'categories.c'; then $(CYGPATH_W) 'categories.c'; else $(CYGPATH_W) '$(srcdir)/categories.c'; fi`
+
 lib_a-isalnum_l.o: isalnum_l.c
 	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-isalnum_l.o `test -f 'isalnum_l.c' || echo '$(srcdir)/'`isalnum_l.c
Thomas Wolff March 7, 2018, 11:21 p.m. | #5
From 46e8063732f0589d2470c02d5dfab92d75b8e2ca Mon Sep 17 00:00:00 2001
From: Thomas Wolff <towo@towo.net>
Date: Thu, 8 Mar 2018 00:02:35 +0100
Subject: [PATCH 6/6] character data generation

---
 newlib/libc/ctype/mkcaseconv   | 128 +++++++++++++++++++++++++++++++++++++++++
 newlib/libc/ctype/mkcategories |  69 ++++++++++++++++++++++
 newlib/libc/ctype/mkunidata    |  40 +++++++++++++
 3 files changed, 237 insertions(+)
 create mode 100755 newlib/libc/ctype/mkcaseconv
 create mode 100755 newlib/libc/ctype/mkcategories
 create mode 100755 newlib/libc/ctype/mkunidata

diff --git a/newlib/libc/ctype/mkcaseconv b/newlib/libc/ctype/mkcaseconv
new file mode 100755
index 0000000..a5a23a9
--- /dev/null
+++ b/newlib/libc/ctype/mkcaseconv
@@ -0,0 +1,128 @@
+#! /bin/sh -f
+
+# generate a table for Unicode case conversion; entries:
+# struct caseconv_entry defined in towctrans_l.c
+
+if [ -r UnicodeData.txt ]
+then	UnicodeData=UnicodeData.txt
+elif [ -r /usr/share/unicode/ucd/UnicodeData.txt ]
+then	UnicodeData=/usr/share/unicode/ucd/UnicodeData.txt
+else	echo UnicodeData.txt not found >&2
+	exit 1
+fi
+
+LC_ALL=C
+export LC_ALL
+
+compact=true
+
+#0041;LATIN CAPITAL LETTER A;Lu;0;L;;;;;N;;;;0061;
+#0061;LATIN SMALL LETTER A;Ll;0;L;;;;;N;;;0041;;0041
+#0130;LATIN CAPITAL LETTER I WITH DOT ABOVE;Lu;0;L;0049 0307;;;;N;LATIN CAPITAL LETTER I DOT;;;0069;
+#01C4;LATIN CAPITAL LETTER DZ WITH CARON;Lu;0;L;<compat> 0044 017D;;;;N;LATIN CAPITAL LETTER D Z HACEK;;;01C6;01C5
+#01C5;LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON;Lt;0;L;<compat> 0044 017E;;;;N;LATIN LETTER CAPITAL D SMALL Z HACEK;;01C4;01C6;01C5
+#01C6;LATIN SMALL LETTER DZ WITH CARON;Ll;0;L;<compat> 0064 017E;;;;N;LATIN SMALL LETTER D Z HACEK;;01C4;;01C5
+
+tr -d '\015' < $UnicodeData |
+sed \
+-e 's,^\([^;]*\);[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;\([^;][^;]*\);\([^;]*\);\([^;]*\)$,src \1 upper "\2" lower "\3" title "\4",' \
+-e t \
+-e 's,^\([^;]*\);[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;\([^;]*\);\([^;][^;]*\);\([^;]*\)$,src \1 upper "\2" lower "\3" title "\4",' \
+-e t \
+-e 's,^\([^;]*\);[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;\([^;]*\);\([^;]*\);\([^;][^;]*\)$,src \1 upper "\2" lower "\3" title "\4",' \
+-e t \
+-e d |
+(#src 01C5 upper "01C4" lower "01C6" title "01C5"
+if $compact
+then
+  (
+  cat <<\/EOS
+  src () {
+    if [ -n "$3" ]
+    then	tohi=$(( 0x0$3 - 0x0$1 ))
+    else	tohi=0
+    fi
+    if [ -n "$5" ]
+    then	tolo=$(( 0x0$5 - 0x0$1 ))
+    else	tolo=0
+    fi
+    case "$tolo.$tohi" in
+    0.0)	true;;
+    0.*)
+    	case "$1.$tohi" in
+    	*[02468ACE].1)	echo "'#error' U+$1 ODDSML";;
+    	*[02468ACE].-1)	echo "  0x$1 TO1 ODDCAP";;
+    	*[13579BDF].1)	echo "'#error' U+$1 EVENSML";;
+    	*[13579BDF].-1)	echo "  0x$1 TO1 EVENCAP";;
+    	*)		echo "  0x$1 TOUP $tohi";;
+    	esac;;
+    *.0)
+    	case "$1.$tolo" in
+    	*[02468ACE].1)	echo "  0x$1 TO1 EVENCAP";;
+    	*[02468ACE].-1)	echo "'#error' U+$1 EVENSML";;
+    	*[13579BDF].1)	echo "  0x$1 TO1 ODDCAP";;
+    	*[13579BDF].-1)	echo "'#error' U+$1 ODDSML";;
+    	*)		echo "  0x$1 TOLO $tolo";;
+    	esac;;
+    *)	case "$tolo.$tohi" in
+    	1.-1)		echo "  0x$1 TOBOTH 0";;
+    	*)		echo "'#error' U+$1";;
+    	esac;;
+    esac
+  }
+/EOS
+  cat
+  ) | sh |
+  uniq -f1 --group=append | sed -e "s,^$,range," -e t -e "s,^,item ," |
+  (
+  cat <<\/EOS
+  first=
+  diff=-1
+  max=255
+  range () {
+	# $diff == $(($last - $first))
+	if [ "$diff" -ge 0 ]
+	then	# we have items at all
+		echo "  {$first, $diff, $v2, $v3},"
+	fi
+	first=
+	diff=-1
+  }
+  item () {
+	if [ "$1" == "#error" ]
+	then	echo "$*"
+		return
+	fi
+
+	if [ $diff -eq $max ]
+	then	range
+	elif [ -n "$first" ]
+	then	if [ $(( $1 )) -ne $(( ${last-0} + 1 )) ]
+		then	range
+		fi
+	fi
+
+	if [ -z "$first" ]
+	then	first=$1
+		v2=$2
+		v3=$3
+	fi
+
+	last=$1
+	diff=$(( $diff + 1 ))
+  }
+/EOS
+  cat
+  ) | sh
+elif false
+then
+  sed -e 's/src \([^ ]*\) upper "\([^ ]*\)" lower "\([^ ]*\)" title "\([^ ]*\)"/  {0x\1, 0x\2 - 0x\1, 0x\3 - 0x\1},/' \
+      -e 's/0x - 0x[^ ,}]*/0/g' -e 's/0x}/0}/' \
+      -e 's/\(0x[0-9A-F][0-9A-F]*\) - \(0x[0-9A-F][0-9A-F]*\)/$((`printf %d \1` - `printf %d \2`))/g' \
+      -e 's/^/echo "/' -e 's/$/"/' |
+  sh
+else
+  sed -e 's/src \([^ ]*\) upper "\([^ ]*\)" lower "\([^ ]*\)" title "\([^ ]*\)"/  {0x\1, 0x\2 - 0x\1, 0x\3 - 0x\1},/' \
+      -e 's/0x - 0x[^ ,}]*/0/g' -e 's/0x}/0}/'
+fi
+) > caseconv.t
diff --git a/newlib/libc/ctype/mkcategories b/newlib/libc/ctype/mkcategories
new file mode 100755
index 0000000..9aceeeb
--- /dev/null
+++ b/newlib/libc/ctype/mkcategories
@@ -0,0 +1,69 @@
+#! /bin/sh
+
+# generate table of Unicode character category ranges;
+# note: undefined characters between two characters of the same category 
+# are associated to the same category, e.g.
+#0A0A;GURMUKHI LETTER UU;Lo
+#0A0B..0A0E           -> Lo
+#0A0F;GURMUKHI LETTER EE;Lo
+
+if [ -r UnicodeData.txt ]
+then	UnicodeData=UnicodeData.txt
+elif [ -r /usr/share/unicode/ucd/UnicodeData.txt ]
+then	UnicodeData=/usr/share/unicode/ucd/UnicodeData.txt
+else	echo UnicodeData.txt not found >&2
+	exit 1
+fi
+
+# the code assumes foldall=false, foldcase=true
+foldall=false
+foldcase=true
+
+(
+cat <<\/EOS
+first=
+item () {
+	if [ -n "$first" ]
+	then	if [ $(( 0x$1 )) -ne $(( 0x${last-0} + 1 )) ]
+		then	range
+		fi
+	fi
+
+	if [ -z "$first" ]
+	then	first=$1
+		val=$2
+	fi
+
+	last=$1
+}
+range () {
+#	echo "    {0x$first, 0x$last, CAT_$val},"
+#	echo "    {0x$first, $((0x$last - 0x$first)), CAT_$val},"
+#	echo "    {0x$first | (CAT_$val << 24), $((0x$last - 0x$first))},"
+	echo "    {CAT_$val, 0x$first, $((0x$last - 0x$first))},"
+	first=
+}
+/EOS
+
+cat "$UnicodeData" |
+if $foldall
+then sed -e "s,;L[lu];,;LC;," -e "s,;C[fs];,;Cfs;," \
+	 -e "s,;L[mo];,;Lmo;," -e "s,;Nl;,;Lmo;," \
+	 -e "s,;P.;,;P;,"  -e "s,;No;,;P;," \
+	 -e "s,;S.;,;S;," -e "s,;Z[lp];,;Zlp;," \
+	 -e "s,;C[no];,;X;," -e "s,;M[cen];,;M;,"
+elif $foldcase
+then
+# fold Lu/Ll to LC only if lower/upper conversion is available
+ sed -e '/^\([^;]*\);[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;\([^;]*\);\([^;][^;]*\);.*/ s/;Lu;/;LC;/' \
+     -e '/^\([^;]*\);[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;[^;]*;\([^;][^;]*\);\([^;]*\);.*/ s/;Ll;/;LC;/' \
+     -e '/;Co;/ d'
+else cat
+fi |
+sed -e "s,^\([^;]*\);[^;]*;\([^;]*\);.*,\1	\2," |
+uniq -f1 --group=append | sed -e "s,^$,range," -e t -e "s,^,item ,"
+) | sh > categories.t
+
+sed -e "s/.*\(CAT_[A-Za-z]*\).*/  \1,/" categories.t |
+sort | uniq > categories.cat
+
diff --git a/newlib/libc/ctype/mkunidata b/newlib/libc/ctype/mkunidata
new file mode 100755
index 0000000..ea18e67
--- /dev/null
+++ b/newlib/libc/ctype/mkunidata
@@ -0,0 +1,40 @@
+#! /bin/sh
+
+echo generating Unicode character properties data for newlib/libc/ctype
+
+cd `dirname $0`
+
+#############################################################################
+# checks and (with option -u) download
+
+case "$1" in
+-u)
+	#WGET=wget -N -t 1 --timeout=55
+	WGET=curl -R -O --connect-timeout 55
+	WGET+=-z $@
+
+	echo downloading data from unicode.org
+	for data in UnicodeData.txt
+	do	$WGET http://unicode.org/Public/UNIDATA/$data
+	done
+	;;
+*)	echo checking package unicode-ucd
+	grep unicode-ucd /etc/setup/installed.db || exit 9
+	;;
+esac
+
+for data in UnicodeData.txt
+do	test -r $data || ln -s /usr/share/unicode/ucd/$data . || exit 9
+done
+
+#############################################################################
+# table generation
+
+echo generating character category table for "isw*.c"
+	sh ./mkcategories
+
+echo generating case conversion table for "tow*.c"
+	sh ./mkcaseconv
+
+#############################################################################
+# end
Corinna Vinschen March 8, 2018, 8:04 a.m. | #6
On Mar  8 00:20, Thomas Wolff wrote:
> 


> From 15999d30c011be9041821456d23807249981dd86 Mon Sep 17 00:00:00 2001

> From: Thomas Wolff <towo@towo.net>

> Date: Sun, 25 Feb 2018 17:11:44 +0100

> Subject: [PATCH 3/6] remove hard-coded character data


Sorry, but no.  I like the idea of this and the followup patch in that
you call, e.g., iswalpha_l from iswalpha, rather than vice versa as
today.  However, don't delete and re-introduce files, just overwrite
them with the new code, for sake of a sane history.

Also, these two patches(*) should get a descriptive text in the git log.
Especially the fact that it turns around the calling order is worth a
couple of words.


Thanks,
Corinna

(*) ...which should actually be only one as outlined above.  The
    easiest way to accomplish that is probably an interactive rebase
    with squashing the second patch into the first.

-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat
Thomas Wolff March 8, 2018, 10:05 p.m. | #7
Am 08.03.2018 um 09:04 schrieb Corinna Vinschen:
> On Mar  8 00:20, Thomas Wolff wrote:

>>  From 15999d30c011be9041821456d23807249981dd86 Mon Sep 17 00:00:00 2001

>> From: Thomas Wolff <towo@towo.net>

>> Date: Sun, 25 Feb 2018 17:11:44 +0100

>> Subject: [PATCH 3/6] remove hard-coded character data

> Sorry, but no.  ... don't delete and re-introduce files, just overwrite

> them with the new code, for sake of a sane history.

The only reason for the split was that I recall a 64KB mail size
limitation on this mailing list;
given that git format-patch quotes all deleted lines, this would have
been exceeded,
so how should we handle this? Would format-patch option -D be
acceptable? Can you drop that limitation?

> Also, these two patches(*) should get a descriptive text in the git log.

> Especially the fact that it turns around the calling order is worth a

> couple of words.

OK.

> Thanks,

> Corinna

>

> (*) ...which should actually be only one as outlined above.  The

>      easiest way to accomplish that is probably an interactive rebase

>      with squashing the second patch into the first.

As I'm not a master at git fiddling, for me the easiest way is rather
poking the files into a fresh git clone...
To be honest, I think this git format-patch is quite bothersome when
things need to be iterated...
Maybe you'd like to merge these two patches? I'll contribute an
additional description of course.

Thomas


---
Diese E-Mail wurde von Avast Antivirus-Software auf Viren geprüft.
https://www.avast.com/antivirus
Thomas Wolff March 9, 2018, 7:11 a.m. | #8
Am 08.03.2018 um 23:05 schrieb Thomas Wolff:
> Am 08.03.2018 um 09:04 schrieb Corinna Vinschen:

>> On Mar  8 00:20, Thomas Wolff wrote:

>>>  From 15999d30c011be9041821456d23807249981dd86 Mon Sep 17 00:00:00 2001

>>> From: Thomas Wolff <towo@towo.net>

>>> Date: Sun, 25 Feb 2018 17:11:44 +0100

>>> Subject: [PATCH 3/6] remove hard-coded character data

>> Sorry, but no.  ... don't delete and re-introduce files, just overwrite

>> them with the new code, for sake of a sane history.

> The only reason for the split was that I recall a 64KB mail size

> limitation on this mailing list;

> given that git format-patch quotes all deleted lines, this would have

> been exceeded,

> so how should we handle this? Would format-patch option -D be

> acceptable? Can you drop that limitation?

>

>> Also, these two patches(*) should get a descriptive text in the git log.

>> Especially the fact that it turns around the calling order is worth a

>> couple of words.

> OK.

Extended commit description:

The tow* functions use an included case conversion table which can be
generated from Unicode data.
The isw* functions use a character categories table (provided by 
categories.c)
which can be generated from Unicode data.
Delegation between current-locale and locale-dependent functions was
reverted towards the generic locale-dependent functions; this is however
only relevant on systems with non-Unicode wide character locales,
thus not on Cygwin.

>

>> Thanks,

>> Corinna

>>

>> (*) ...which should actually be only one as outlined above.  The

>>      easiest way to accomplish that is probably an interactive rebase

>>      with squashing the second patch into the first.

> As I'm not a master at git fiddling, for me the easiest way is rather

> poking the files into a fresh git clone...

> To be honest, I think this git format-patch is quite bothersome when

> things need to be iterated...

> Maybe you'd like to merge these two patches? I'll contribute an

> additional description of course.

>

> Thomas

>

>

> ---

> Diese E-Mail wurde von Avast Antivirus-Software auf Viren geprüft.

> https://www.avast.com/antivirus
Corinna Vinschen March 9, 2018, 8:48 a.m. | #9
On Mar  8 23:05, Thomas Wolff wrote:
> Am 08.03.2018 um 09:04 schrieb Corinna Vinschen:

> > On Mar  8 00:20, Thomas Wolff wrote:

> > >  From 15999d30c011be9041821456d23807249981dd86 Mon Sep 17 00:00:00 2001

> > > From: Thomas Wolff <towo@towo.net>

> > > Date: Sun, 25 Feb 2018 17:11:44 +0100

> > > Subject: [PATCH 3/6] remove hard-coded character data

> > Sorry, but no.  ... don't delete and re-introduce files, just overwrite

> > them with the new code, for sake of a sane history.

> The only reason for the split was that I recall a 64KB mail size

> limitation on this mailing list;


There is no 64K limit.  The limit is 1 Meg.


Corinna

-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat
Corinna Vinschen March 9, 2018, 11:06 a.m. | #10
On Mar  8 23:05, Thomas Wolff wrote:
> Am 08.03.2018 um 09:04 schrieb Corinna Vinschen:

> > On Mar  8 00:20, Thomas Wolff wrote:

> > >  From 15999d30c011be9041821456d23807249981dd86 Mon Sep 17 00:00:00 2001

> > > From: Thomas Wolff <towo@towo.net>

> > > Date: Sun, 25 Feb 2018 17:11:44 +0100

> > > Subject: [PATCH 3/6] remove hard-coded character data

> > Sorry, but no.  ... don't delete and re-introduce files, just overwrite

> > them with the new code, for sake of a sane history.

> The only reason for the split was that I recall a 64KB mail size

> limitation on this mailing list;

> given that git format-patch quotes all deleted lines, this would have

> been exceeded,

> so how should we handle this? Would format-patch option -D be

> acceptable? Can you drop that limitation?

> 

> > Also, these two patches(*) should get a descriptive text in the git log.

> > Especially the fact that it turns around the calling order is worth a

> > couple of words.

> OK.

> 

> > Thanks,

> > Corinna

> > 

> > (*) ...which should actually be only one as outlined above.  The

> >      easiest way to accomplish that is probably an interactive rebase

> >      with squashing the second patch into the first.

> As I'm not a master at git fiddling, for me the easiest way is rather

> poking the files into a fresh git clone...


Btw., why don't you just try my suggestion?  The beauty of git is that
you can test and try in a separate branch locally without breaking
anything important in a jiffy.  `git rebase --interactive is a wonderful
instrument, well worth learning it.

Here's an excellent online book for learning git:

  https://git-scm.com/book/en/v2

also available in german:

  https://git-scm.com/book/de/v2

> To be honest, I think this git format-patch is quite bothersome when

> things need to be iterated...


Not in conjunction with git send-email...


Corinna

-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat
Thomas Wolff March 9, 2018, 10:54 p.m. | #11
Am 09.03.2018 um 08:11 schrieb Thomas Wolff:
> Am 08.03.2018 um 23:05 schrieb Thomas Wolff:

>> Am 08.03.2018 um 09:04 schrieb Corinna Vinschen:

>>> On Mar  8 00:20, Thomas Wolff wrote:

>>>>  From 15999d30c011be9041821456d23807249981dd86 Mon Sep 17 00:00:00 

>>>> 2001

>>>> From: Thomas Wolff <towo@towo.net>

>>>> Date: Sun, 25 Feb 2018 17:11:44 +0100

>>>> Subject: [PATCH 3/6] remove hard-coded character data

>>> Sorry, but no.  ... don't delete and re-introduce files, just overwrite

>>> them with the new code, for sake of a sane history.

>> The only reason for the split was that I recall a 64KB mail size

>> limitation on this mailing list;

>> given that git format-patch quotes all deleted lines, this would have

>> been exceeded,

>> so how should we handle this? Would format-patch option -D be

>> acceptable? Can you drop that limitation?

>>

>>> Also, these two patches(*) should get a descriptive text in the git 

>>> log.

>>> Especially the fact that it turns around the calling order is worth a

>>> couple of words.

>> OK.

> Extended commit description:

>

> The tow* functions use an included case conversion table which can be

> generated from Unicode data.

> The isw* functions use a character categories table (provided by 

> categories.c)

> which can be generated from Unicode data.

> Delegation between current-locale and locale-dependent functions was

> reverted towards the generic locale-dependent functions; this is however

> only relevant on systems with non-Unicode wide character locales,

> thus not on Cygwin.

>

>>

>>> Thanks,

>>> Corinna

>>>

>>> (*) ...which should actually be only one as outlined above. The

>>>      easiest way to accomplish that is probably an interactive rebase

>>>      with squashing the second patch into the first.

>> As I'm not a master at git fiddling, for me the easiest way is rather 

>> poking the files into a fresh git clone...

Here is the updated common ctype patch, replacing previous patches 3/6 
and 4/6.
Thomas


---
Diese E-Mail wurde von Avast Antivirus-Software auf Viren geprüft.
https://www.avast.com/antivirus
From 0ba4bd6ba6dcd0e0eceb12928e54a0e186e3f27b Mon Sep 17 00:00:00 2001
From: Thomas Wolff <mintty@users.noreply.github.com>
Date: Fri, 9 Mar 2018 13:30:33 +0100
Subject: [PATCH 3+4/6] use generated character data

The tow* functions use an included case conversion table which can be
generated from Unicode data.
The isw* functions use a character categories table (provided by
categories.c) which can be generated from Unicode data.
Delegation between current-locale and specific-locale-dependent functions
was reverted towards the generic locale-dependent functions (*_l.c);
this is however only relevant on systems with non-Unicode wide character
locales, thus not on Cygwin.
---
 newlib/libc/ctype/categories.c  |  39 +++
 newlib/libc/ctype/categories.h  |   7 +
 newlib/libc/ctype/iswalnum.c    |   2 +-
 newlib/libc/ctype/iswalnum_l.c  |  19 +-
 newlib/libc/ctype/iswalpha.c    | 370 +----------------------------
 newlib/libc/ctype/iswalpha_l.c  |  17 +-
 newlib/libc/ctype/iswblank.c    |  19 +-
 newlib/libc/ctype/iswblank_l.c  |  16 +-
 newlib/libc/ctype/iswcntrl.c    |  17 +-
 newlib/libc/ctype/iswcntrl_l.c  |  16 +-
 newlib/libc/ctype/iswctype_l.c  |  37 ++-
 newlib/libc/ctype/iswdigit.c    |   3 +-
 newlib/libc/ctype/iswdigit_l.c  |   2 +-
 newlib/libc/ctype/iswgraph.c    |   3 +-
 newlib/libc/ctype/iswgraph_l.c  |  19 +-
 newlib/libc/ctype/iswlower.c    |   4 +-
 newlib/libc/ctype/iswlower_l.c  |  16 +-
 newlib/libc/ctype/iswprint.c    | 433 +--------------------------------
 newlib/libc/ctype/iswprint_l.c  |  17 +-
 newlib/libc/ctype/iswpunct.c    |   7 +-
 newlib/libc/ctype/iswpunct_l.c  |  22 +-
 newlib/libc/ctype/iswspace.c    |  20 +-
 newlib/libc/ctype/iswspace_l.c  |  17 +-
 newlib/libc/ctype/iswupper.c    |   6 +-
 newlib/libc/ctype/iswupper_l.c  |  16 +-
 newlib/libc/ctype/iswxdigit.c   |   6 +-
 newlib/libc/ctype/jp2uc.c       |  51 +++-
 newlib/libc/ctype/local.h       |  19 +-
 newlib/libc/ctype/towctrans.c   |  16 +-
 newlib/libc/ctype/towctrans_l.c |  97 +++++++-
 newlib/libc/ctype/towlower.c    | 500 +-------------------------------------
 newlib/libc/ctype/towlower_l.c  |   7 +-
 newlib/libc/ctype/towupper.c    | 515 +---------------------------------------
 newlib/libc/ctype/towupper_l.c  |   8 +-
 newlib/libc/ctype/utf8alpha.h   | 355 ---------------------------
 newlib/libc/ctype/utf8print.h   | 389 ------------------------------
 36 files changed, 428 insertions(+), 2679 deletions(-)
 create mode 100644 newlib/libc/ctype/categories.c
 create mode 100644 newlib/libc/ctype/categories.h
 delete mode 100644 newlib/libc/ctype/utf8alpha.h
 delete mode 100644 newlib/libc/ctype/utf8print.h

diff --git a/newlib/libc/ctype/categories.c b/newlib/libc/ctype/categories.c
new file mode 100644
index 0000000..db285d7
--- /dev/null
+++ b/newlib/libc/ctype/categories.c
@@ -0,0 +1,39 @@
+#include <wctype.h>
+#include "categories.h"
+
+struct _category {
+  enum category cat: 11;
+  unsigned int first: 21;
+  unsigned short delta;
+} __attribute__((packed));
+
+static const struct _category categories[] = {
+#include "categories.t"
+};
+
+static enum category
+bisearch_cat(wint_t ucs, const struct _category *table, int max)
+{
+  int min = 0;
+  int mid;
+
+  if (ucs < table[0].first || ucs > table[max].first + table[max].delta)
+    return 0;
+  while (max >= min)
+    {
+      mid = (min + max) / 2;
+      if (ucs > table[mid].first + table[mid].delta)
+	min = mid + 1;
+      else if (ucs < table[mid].first)
+	max = mid - 1;
+      else
+	return table[mid].cat;
+    }
+  return -1;
+}
+
+enum category category(wint_t ucs)
+{
+  return bisearch_cat(ucs, categories,
+		      sizeof(categories) / sizeof(*categories) - 1);
+}
diff --git a/newlib/libc/ctype/categories.h b/newlib/libc/ctype/categories.h
new file mode 100644
index 0000000..271038e
--- /dev/null
+++ b/newlib/libc/ctype/categories.h
@@ -0,0 +1,7 @@
+/* category data */
+
+enum category {
+#include "categories.cat"
+};
+
+extern enum category category(wint_t ucs);
diff --git a/newlib/libc/ctype/iswalnum.c b/newlib/libc/ctype/iswalnum.c
index 45273a8..7b2cac7
--- a/newlib/libc/ctype/iswalnum.c
+++ b/newlib/libc/ctype/iswalnum.c
@@ -39,5 +39,5 @@ No supporting OS subroutines are required.
 int
 iswalnum (wint_t c)
 {
-  return (iswalpha (c) || iswdigit (c));
+  return iswalnum_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswalnum_l.c b/newlib/libc/ctype/iswalnum_l.c
index e4ab3dd..8802273
--- a/newlib/libc/ctype/iswalnum_l.c
+++ b/newlib/libc/ctype/iswalnum_l.c
@@ -1,10 +1,23 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswalnum_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return iswalpha (c) || iswdigit (c);
+#ifdef _MB_CAPABLE
+  //return iswalpha (c) || iswdigit (c);
+  c = _jp2uc_l (c, locale);
+  enum category cat = category (c);
+  return cat == CAT_LC || cat == CAT_Lu || cat == CAT_Ll || cat == CAT_Lt
+      || cat == CAT_Lm || cat == CAT_Lo
+      || cat == CAT_Nl // Letter_Number
+      || cat == CAT_Nd // Decimal_Number
+      ;
+#else
+  return c < (wint_t)0x100 ? isalnum (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswalpha.c b/newlib/libc/ctype/iswalpha.c
index 2906cd1..3928772
--- a/newlib/libc/ctype/iswalpha.c
+++ b/newlib/libc/ctype/iswalpha.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -63,377 +64,10 @@ PORTABILITY
 No supporting OS subroutines are required.
 */
 #include <_ansi.h>
-#include <newlib.h>
 #include <wctype.h>
-#include <string.h>
-#include <ctype.h>
-#include "local.h"
-
-#ifdef _MB_CAPABLE
-#include "utf8alpha.h"
-#endif /* _MB_CAPABLE */
 
 int
 iswalpha (wint_t c)
 {
-#ifdef _MB_CAPABLE
-  unsigned const char *table;
-  unsigned char *ptr;
-  unsigned char ctmp;
-  int size;
-  wint_t x;
-
-  c = _jp2uc (c);
-
-  /* Based on and tested against Unicode 5.2
-     See utf8alpha.h for a description how to fetch the data. */
-  x = (c >> 8);
-  /* for some large sections, all characters are alphabetic so handle them here */
-  if ((x >= 0x34 && x <= 0x4c) ||
-      (x >= 0x4e && x <= 0x9e) ||
-      (x >= 0xac && x <= 0xd6) ||
-      (x >= 0x120 && x <= 0x122) ||
-      (x >= 0x130 && x <= 0x133) ||
-      (x >= 0x200 && x <= 0x2a5) ||
-      (x >= 0x2a7 && x <= 0x2b6))
-    return 1;
-  
-  switch (x)
-    {
-    case 0x00:
-      table = u0;
-      size = sizeof(u0);
-      break;
-    case 0x01:
-    case 0x11:
-    case 0x15:
-    case 0x1e:
-    case 0xa0:
-    case 0xa1:
-    case 0xa2:
-    case 0xa3:
-    case 0xa5:
-    case 0xf9:
-    case 0xfc:
-    case 0x2f8:
-    case 0x2f9:
-      return 1;
-    case 0x02:
-      table = u2;
-      size = sizeof(u2);
-      break;
-    case 0x03:
-      table = u3;
-      size = sizeof(u3);
-      break;
-    case 0x04:
-      table = u4;
-      size = sizeof(u4);
-      break;
-    case 0x05:
-      table = u5;
-      size = sizeof(u5);
-      break;
-    case 0x06:
-      table = u6;
-      size = sizeof(u6);
-      break;
-    case 0x07:
-      table = u7;
-      size = sizeof(u7);
-      break;
-    case 0x08:
-      table = u8;
-      size = sizeof(u8);
-      break;
-    case 0x09:
-      table = u9;
-      size = sizeof(u9);
-      break;
-    case 0x0a:
-      table = ua;
-      size = sizeof(ua);
-      break;
-    case 0x0b:
-      table = ub;
-      size = sizeof(ub);
-      break;
-    case 0x0c:
-      table = uc;
-      size = sizeof(uc);
-      break;
-    case 0x0d:
-      table = ud;
-      size = sizeof(ud);
-      break;
-    case 0x0e:
-      table = ue;
-      size = sizeof(ue);
-      break;
-    case 0x0f:
-      table = uf;
-      size = sizeof(uf);
-      break;
-    case 0x10:
-      table = u10;
-      size = sizeof(u10);
-      break;
-    case 0x12:
-      table = u12;
-      size = sizeof(u12);
-      break;
-    case 0x13:
-      table = u13;
-      size = sizeof(u13);
-      break;
-    case 0x14:
-      table = u14;
-      size = sizeof(u14);
-      break;
-    case 0x16:
-      table = u16;
-      size = sizeof(u16);
-      break;
-    case 0x17:
-      table = u17;
-      size = sizeof(u17);
-      break;
-    case 0x18:
-      table = u18;
-      size = sizeof(u18);
-      break;
-    case 0x19:
-      table = u19;
-      size = sizeof(u19);
-      break;
-    case 0x1a:
-      table = u1a;
-      size = sizeof(u1a);
-      break;
-    case 0x1b:
-      table = u1b;
-      size = sizeof(u1b);
-      break;
-    case 0x1c:
-      table = u1c;
-      size = sizeof(u1c);
-      break;
-    case 0x1d:
-      table = u1d;
-      size = sizeof(u1d);
-      break;
-    case 0x1f:
-      table = u1f;
-      size = sizeof(u1f);
-      break;
-    case 0x20:
-      table = u20;
-      size = sizeof(u20);
-      break;
-    case 0x21:
-      table = u21;
-      size = sizeof(u21);
-      break;
-    case 0x24:
-      table = u24;
-      size = sizeof(u24);
-      break;
-    case 0x2c:
-      table = u2c;
-      size = sizeof(u2c);
-      break;
-    case 0x2d:
-      table = u2d;
-      size = sizeof(u2d);
-      break;
-    case 0x2e:
-      table = u2e;
-      size = sizeof(u2e);
-      break;
-    case 0x30:
-      table = u30;
-      size = sizeof(u30);
-      break;
-    case 0x31:
-      table = u31;
-      size = sizeof(u31);
-      break;
-    case 0x4d:
-      table = u4d;
-      size = sizeof(u4d);
-      break;
-    case 0x9f:
-      table = u9f;
-      size = sizeof(u9f);
-      break;
-    case 0xa4:
-      table = ua4;
-      size = sizeof(ua4);
-      break;
-    case 0xa6:
-      table = ua6;
-      size = sizeof(ua6);
-      break;
-    case 0xa7:
-      table = ua7;
-      size = sizeof(ua7);
-      break;
-    case 0xa8:
-      table = ua8;
-      size = sizeof(ua8);
-      break;
-    case 0xa9:
-      table = ua9;
-      size = sizeof(ua9);
-      break;
-    case 0xaa:
-      table = uaa;
-      size = sizeof(uaa);
-      break;
-    case 0xab:
-      table = uab;
-      size = sizeof(uab);
-      break;
-    case 0xd7:
-      table = ud7;
-      size = sizeof(ud7);
-      break;
-    case 0xfa:
-      table = ufa;
-      size = sizeof(ufa);
-      break;
-    case 0xfb:
-      table = ufb;
-      size = sizeof(ufb);
-      break;
-    case 0xfd:
-      table = ufd;
-      size = sizeof(ufd);
-      break;
-    case 0xfe:
-      table = ufe;
-      size = sizeof(ufe);
-      break;
-    case 0xff:
-      table = uff;
-      size = sizeof(uff);
-      break;
-    case 0x100:
-      table = u100;
-      size = sizeof(u100);
-      break;
-    case 0x101:
-      table = u101;
-      size = sizeof(u101);
-      break;
-    case 0x102:
-      table = u102;
-      size = sizeof(u102);
-      break;
-    case 0x103:
-      table = u103;
-      size = sizeof(u103);
-      break;
-    case 0x104:
-      table = u104;
-      size = sizeof(u104);
-      break;
-    case 0x108:
-      table = u108;
-      size = sizeof(u108);
-      break;
-    case 0x109:
-      table = u109;
-      size = sizeof(u109);
-      break;
-    case 0x10a:
-      table = u10a;
-      size = sizeof(u10a);
-      break;
-    case 0x10b:
-      table = u10b;
-      size = sizeof(u10b);
-      break;
-    case 0x10c:
-      table = u10c;
-      size = sizeof(u10c);
-      break;
-    case 0x110:
-      table = u110;
-      size = sizeof(u110);
-      break;
-    case 0x123:
-      table = u123;
-      size = sizeof(u123);
-      break;
-    case 0x124:
-      table = u124;
-      size = sizeof(u124);
-      break;
-    case 0x134:
-      table = u134;
-      size = sizeof(u134);
-      break;
-    case 0x1d4:
-      table = u1d4;
-      size = sizeof(u1d4);
-      break;
-    case 0x1d5:
-      table = u1d5;
-      size = sizeof(u1d5);
-      break;
-    case 0x1d6:
-      table = u1d6;
-      size = sizeof(u1d6);
-      break;
-    case 0x1d7:
-      table = u1d7;
-      size = sizeof(u1d7);
-      break;
-    case 0x1f1:
-      table = u1f1;
-      size = sizeof(u1f1);
-      break;
-    case 0x2a6:
-      table = u2a6;
-      size = sizeof(u2a6);
-      break;
-    case 0x2b7:
-      table = u2b7;
-      size = sizeof(u2b7);
-      break;
-    case 0x2fa:
-      table = u2fa;
-      size = sizeof(u2fa);
-      break;
-    default:
-      return 0;
-    }
-  /* we have narrowed down to a section of 256 characters to check */
-  /* now check if c matches the alphabetic wide-chars within that section */
-  ptr = (unsigned char *)table;
-  ctmp = (unsigned char)c;
-  while (ptr < table + size)
-    {
-      if (ctmp == *ptr)
-	return 1;
-      if (ctmp < *ptr)
-	return 0;
-      /* otherwise c > *ptr */
-      /* look for 0x0 as next element which indicates a range */
-      ++ptr;
-      if (ptr < table + size - 1 && *ptr == 0x0)
-	{
-	  /* we have a range..see if c falls within range */
-	  ++ptr;
-	  if (ctmp <= *ptr)
-	    return 1;
-	  ++ptr;
-	}
-    }
-  /* not in table */
-  return 0;
-#else
-  return (c < (wint_t)0x100 ? isalpha (c) : 0);
-#endif /* _MB_CAPABLE */
+  return iswalpha_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswalpha_l.c b/newlib/libc/ctype/iswalpha_l.c
index efcb95a..922983e
--- a/newlib/libc/ctype/iswalpha_l.c
+++ b/newlib/libc/ctype/iswalpha_l.c
@@ -1,10 +1,21 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswalpha_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return iswalpha (c);
+#ifdef _MB_CAPABLE
+  c = _jp2uc_l (c, locale);
+  enum category cat = category (c);
+  return cat == CAT_LC || cat == CAT_Lu || cat == CAT_Ll || cat == CAT_Lt
+      || cat == CAT_Lm || cat == CAT_Lo
+      || cat == CAT_Nl // Letter_Number
+      ;
+#else
+  return c < (wint_t)0x100 ? isalpha (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswblank.c b/newlib/libc/ctype/iswblank.c
index ef91572..31779d2
--- a/newlib/libc/ctype/iswblank.c
+++ b/newlib/libc/ctype/iswblank.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -62,26 +63,10 @@ PORTABILITY
 No supporting OS subroutines are required.
 */
 #include <_ansi.h>
-#include <newlib.h>
 #include <wctype.h>
-#include <ctype.h>
-#include <string.h>
-#include "local.h"
 
 int
 iswblank (wint_t c)
 {
-#ifdef _MB_CAPABLE
-  c = _jp2uc (c);
-  /* Based on Unicode 5.2.  Control char 09, plus all characters
-     from general category "Zs", which are not marked as decomposition
-     type "noBreak". */
-  return (c == 0x0009 || c == 0x0020 ||
-	  c == 0x1680 || c == 0x180e ||
-	  (c >= 0x2000 && c <= 0x2006) ||
-	  (c >= 0x2008 && c <= 0x200a) ||
-	  c == 0x205f || c == 0x3000);
-#else
-  return (c < 0x100 ? isblank (c) : 0);
-#endif /* _MB_CAPABLE */
+  return iswblank_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswblank_l.c b/newlib/libc/ctype/iswblank_l.c
index 6960693..b27ed82
--- a/newlib/libc/ctype/iswblank_l.c
+++ b/newlib/libc/ctype/iswblank_l.c
@@ -1,10 +1,20 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswblank_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return iswblank (c);
+#ifdef _MB_CAPABLE
+  c = _jp2uc_l (c, locale);
+  enum category cat = category (c);
+  // exclude "<noBreak>"?
+  return cat == CAT_Zs
+      || c == '\t';
+#else
+  return c < 0x100 ? isblank (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswcntrl.c b/newlib/libc/ctype/iswcntrl.c
index 249a0a8..d4b0147
--- a/newlib/libc/ctype/iswcntrl.c
+++ b/newlib/libc/ctype/iswcntrl.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -62,24 +63,10 @@ PORTABILITY
 No supporting OS subroutines are required.
 */
 #include <_ansi.h>
-#include <newlib.h>
 #include <wctype.h>
-#include <ctype.h>
-#include <string.h>
-#include "local.h"
 
 int
 iswcntrl (wint_t c)
 {
-#ifdef _MB_CAPABLE
-  c = _jp2uc (c);
-
-  /* Based on Unicode 5.2.  All characters from general category "Cc", "Zl",
-     and "Zp".  */
-  return ((c >= 0x0000 && c <= 0x001f) || 
-	  (c >= 0x007f && c <= 0x009f) ||
-	  c == 0x2028 || c == 0x2029);
-#else
-  return (c < 0x100 ? iscntrl (c) : 0);
-#endif /* _MB_CAPABLE */
+  return iswcntrl_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswcntrl_l.c b/newlib/libc/ctype/iswcntrl_l.c
index 37caba8..6a900a7
--- a/newlib/libc/ctype/iswcntrl_l.c
+++ b/newlib/libc/ctype/iswcntrl_l.c
@@ -1,10 +1,20 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswcntrl_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return iswcntrl (c);
+#ifdef _MB_CAPABLE
+  c = _jp2uc_l (c, locale);
+  enum category cat = category (c);
+  return cat == CAT_Cc
+      || cat == CAT_Zl || cat == CAT_Zp // Line/Paragraph Separator
+      ;
+#else
+  return c < 0x100 ? iscntrl (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswctype_l.c b/newlib/libc/ctype/iswctype_l.c
index d9e7b2e..506972d
--- a/newlib/libc/ctype/iswctype_l.c
+++ b/newlib/libc/ctype/iswctype_l.c
@@ -1,10 +1,41 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
 #include <wctype.h>
+#include "local.h"
 
 int
 iswctype_l (wint_t c, wctype_t desc, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return iswctype (c, desc);
+  switch (desc)
+    {
+    case WC_ALNUM:
+      return iswalnum_l (c, locale);
+    case WC_ALPHA:
+      return iswalpha_l (c, locale);
+    case WC_BLANK:
+      return iswblank_l (c, locale);
+    case WC_CNTRL:
+      return iswcntrl_l (c, locale);
+    case WC_DIGIT:
+      return iswdigit_l (c, locale);
+    case WC_GRAPH:
+      return iswgraph_l (c, locale);
+    case WC_LOWER:
+      return iswlower_l (c, locale);
+    case WC_PRINT:
+      return iswprint_l (c, locale);
+    case WC_PUNCT:
+      return iswpunct_l (c, locale);
+    case WC_SPACE:
+      return iswspace_l (c, locale);
+    case WC_UPPER:
+      return iswupper_l (c, locale);
+    case WC_XDIGIT:
+      return iswxdigit_l (c, locale);
+    default:
+      return 0; /* eliminate warning */
+    }
+
+  /* otherwise unknown */
+  return 0;
 }
diff --git a/newlib/libc/ctype/iswdigit.c b/newlib/libc/ctype/iswdigit.c
index 2b26141..d3562f8
--- a/newlib/libc/ctype/iswdigit.c
+++ b/newlib/libc/ctype/iswdigit.c
@@ -38,5 +38,6 @@ No supporting OS subroutines are required.
 int
 iswdigit (wint_t c)
 {
-  return (c >= (wint_t)'0' && c <= (wint_t)'9');
+  return c >= (wint_t)'0' && c <= (wint_t)'9';
+  // category (c) == CAT_Nd not to be included as of C-99
 }
diff --git a/newlib/libc/ctype/iswdigit_l.c b/newlib/libc/ctype/iswdigit_l.c
index 98dd94e..29de9d3
--- a/newlib/libc/ctype/iswdigit_l.c
+++ b/newlib/libc/ctype/iswdigit_l.c
@@ -4,5 +4,5 @@
 int
 iswdigit_l (wint_t c, struct __locale_t *locale)
 {
-  return (c >= (wint_t)'0' && c <= (wint_t)'9');
+  return c >= (wint_t)'0' && c <= (wint_t)'9';
 }
diff --git a/newlib/libc/ctype/iswgraph.c b/newlib/libc/ctype/iswgraph.c
index e0df4aa..bb21c21
--- a/newlib/libc/ctype/iswgraph.c
+++ b/newlib/libc/ctype/iswgraph.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -67,5 +68,5 @@ No supporting OS subroutines are required.
 int
 iswgraph (wint_t c)
 {
-  return (iswprint (c) && !iswspace (c));
+  return iswgraph_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswgraph_l.c b/newlib/libc/ctype/iswgraph_l.c
index 9803c18..b8a5866
--- a/newlib/libc/ctype/iswgraph_l.c
+++ b/newlib/libc/ctype/iswgraph_l.c
@@ -1,10 +1,23 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswgraph_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return iswprint (c) && !iswspace (c);
+#ifdef _MB_CAPABLE
+  //return iswprint (c, locale) && !iswspace (c, locale);
+  c = _jp2uc_l (c, locale);
+  enum category cat = category (c);
+  return cat != -1
+      && cat != CAT_Cc && cat != CAT_Cf
+      && cat != CAT_Cs // Surrogate
+      && cat != CAT_Zs
+      && cat != CAT_Zl && cat != CAT_Zp // Line/Paragraph Separator
+      ;
+#else
+  return iswprint_l (c, locale) && !iswspace_l (c, locale);
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswlower.c b/newlib/libc/ctype/iswlower.c
index 8b38835..e1d926b
--- a/newlib/libc/ctype/iswlower.c
+++ b/newlib/libc/ctype/iswlower.c
@@ -17,7 +17,7 @@ SYNOPSIS
 
 DESCRIPTION
 <<iswlower>> is a function which classifies wide-character values that
-have uppercase translations.
+are categorized as lowercase.
 
 <<iswlower_l>> is like <<iswlower>> but performs the check based on the
 locale specified by the locale object locale.  If <[locale]> is
@@ -38,5 +38,5 @@ No supporting OS subroutines are required.
 int
 iswlower (wint_t c)
 {
-	return (towupper (c) != c);
+  return iswlower_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswlower_l.c b/newlib/libc/ctype/iswlower_l.c
index d69615b..64f77a3
--- a/newlib/libc/ctype/iswlower_l.c
+++ b/newlib/libc/ctype/iswlower_l.c
@@ -1,10 +1,20 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswlower_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return (towupper (c) != c);
+#ifdef _MB_CAPABLE
+  c = _jp2uc_l (c, locale);
+  // The wide-character class "lower" contains at least those characters wc 
+  // which are equal to towlower(wc) and different from towupper(wc).
+  enum category cat = category (c);
+  return cat == CAT_Ll || (cat == CAT_LC && towlower (c) == c);
+#else
+  return c < 0x100 ? islower (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswprint.c b/newlib/libc/ctype/iswprint.c
index c6050b5..5e468fe
--- a/newlib/libc/ctype/iswprint.c
+++ b/newlib/libc/ctype/iswprint.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -62,440 +63,10 @@ PORTABILITY
 No supporting OS subroutines are required.
 */
 #include <_ansi.h>
-#include <newlib.h>
 #include <wctype.h>
-#include <string.h>
-#include <ctype.h>
-#include "local.h"
-
-#ifdef _MB_CAPABLE
-#include "utf8print.h"
-#endif /* _MB_CAPABLE */
 
 int
 iswprint (wint_t c)
 {
-#ifdef _MB_CAPABLE
-  unsigned const char *table;
-  unsigned char *ptr;
-  unsigned char ctmp;
-  int size;
-  wint_t x;
-  
-  c = _jp2uc (c);
-
-  /* Based on and tested against Unicode 5.2
-     See utf8print.h for a description how to fetch the data. */
-  x = (c >> 8);
-  /* for some large sections, all characters are printuation so handle them here */
-  if ((x >= 0x33 && x <= 0x4c) ||
-      (x >= 0x4e && x <= 0x9e) ||
-      (x >= 0xa0 && x <= 0xa3) ||
-      (x >= 0xac && x <= 0xd6) ||
-      (x >= 0xe0 && x <= 0xf9) ||
-      (x >= 0x120 && x <= 0x122) ||
-      (x >= 0x130 && x <= 0x133) ||
-      (x >= 0x200 && x <= 0x2a5) ||
-      (x >= 0x2a7 && x <= 0x2b6) ||
-      (x >= 0xf00 && x <= 0xffe) ||
-      (x >= 0x1000 && x <= 0x10fe))
-    return 1;
-  
-  switch (x)
-    {
-    case 0x01:
-    case 0x02:
-    case 0x04:
-    case 0x11:
-    case 0x14:
-    case 0x15:
-    case 0x1e:
-    case 0x22:
-    case 0x25:
-    case 0x28:
-    case 0x29:
-    case 0x2a:
-    case 0xa5:
-    case 0xfc:
-    case 0x2f8:
-    case 0x2f9:
-      return 1;
-    case 0x00:
-      table = u0;
-      size = sizeof(u0);
-      break;
-    case 0x03:
-      table = u3;
-      size = sizeof(u3);
-      break;
-    case 0x05:
-      table = u5;
-      size = sizeof(u5);
-      break;
-    case 0x06:
-      table = u6;
-      size = sizeof(u6);
-      break;
-    case 0x07:
-      table = u7;
-      size = sizeof(u7);
-      break;
-    case 0x08:
-      table = u8;
-      size = sizeof(u8);
-      break;
-    case 0x09:
-      table = u9;
-      size = sizeof(u9);
-      break;
-    case 0x0a:
-      table = ua;
-      size = sizeof(ua);
-      break;
-    case 0x0b:
-      table = ub;
-      size = sizeof(ub);
-      break;
-    case 0x0c:
-      table = uc;
-      size = sizeof(uc);
-      break;
-    case 0x0d:
-      table = ud;
-      size = sizeof(ud);
-      break;
-    case 0x0e:
-      table = ue;
-      size = sizeof(ue);
-      break;
-    case 0x0f:
-      table = uf;
-      size = sizeof(uf);
-      break;
-    case 0x10:
-      table = u10;
-      size = sizeof(u10);
-      break;
-    case 0x12:
-      table = u12;
-      size = sizeof(u12);
-      break;
-    case 0x13:
-      table = u13;
-      size = sizeof(u13);
-      break;
-    case 0x16:
-      table = u16;
-      size = sizeof(u16);
-      break;
-    case 0x17:
-      table = u17;
-      size = sizeof(u17);
-      break;
-    case 0x18:
-      table = u18;
-      size = sizeof(u18);
-      break;
-    case 0x19:
-      table = u19;
-      size = sizeof(u19);
-      break;
-    case 0x1a:
-      table = u1a;
-      size = sizeof(u1a);
-      break;
-    case 0x1b:
-      table = u1b;
-      size = sizeof(u1b);
-      break;
-    case 0x1c:
-      table = u1c;
-      size = sizeof(u1c);
-      break;
-    case 0x1d:
-      table = u1d;
-      size = sizeof(u1d);
-      break;
-    case 0x1f:
-      table = u1f;
-      size = sizeof(u1f);
-      break;
-    case 0x20:
-      table = u20;
-      size = sizeof(u20);
-      break;
-    case 0x21:
-      table = u21;
-      size = sizeof(u21);
-      break;
-    case 0x23:
-      table = u23;
-      size = sizeof(u23);
-      break;
-    case 0x24:
-      table = u24;
-      size = sizeof(u24);
-      break;
-    case 0x26:
-      table = u26;
-      size = sizeof(u26);
-      break;
-    case 0x27:
-      table = u27;
-      size = sizeof(u27);
-      break;
-    case 0x2b:
-      table = u2b;
-      size = sizeof(u2b);
-      break;
-    case 0x2c:
-      table = u2c;
-      size = sizeof(u2c);
-      break;
-    case 0x2d:
-      table = u2d;
-      size = sizeof(u2d);
-      break;
-    case 0x2e:
-      table = u2e;
-      size = sizeof(u2e);
-      break;
-    case 0x2f:
-      table = u2f;
-      size = sizeof(u2f);
-      break;
-    case 0x30:
-      table = u30;
-      size = sizeof(u30);
-      break;
-    case 0x31:
-      table = u31;
-      size = sizeof(u31);
-      break;
-    case 0x32:
-      table = u32;
-      size = sizeof(u32);
-      break;
-    case 0x4d:
-      table = u4d;
-      size = sizeof(u4d);
-      break;
-    case 0x9f:
-      table = u9f;
-      size = sizeof(u9f);
-      break;
-    case 0xa4:
-      table = ua4;
-      size = sizeof(ua4);
-      break;
-    case 0xa6:
-      table = ua6;
-      size = sizeof(ua6);
-      break;
-    case 0xa7:
-      table = ua7;
-      size = sizeof(ua7);
-      break;
-    case 0xa8:
-      table = ua8;
-      size = sizeof(ua8);
-      break;
-    case 0xa9:
-      table = ua9;
-      size = sizeof(ua9);
-      break;
-    case 0xaa:
-      table = uaa;
-      size = sizeof(uaa);
-      break;
-    case 0xab:
-      table = uab;
-      size = sizeof(uab);
-      break;
-    case 0xd7:
-      table = ud7;
-      size = sizeof(ud7);
-      break;
-    case 0xfa:
-      table = ufa;
-      size = sizeof(ufa);
-      break;
-    case 0xfb:
-      table = ufb;
-      size = sizeof(ufb);
-      break;
-    case 0xfd:
-      table = ufd;
-      size = sizeof(ufd);
-      break;
-    case 0xfe:
-      table = ufe;
-      size = sizeof(ufe);
-      break;
-    case 0xff:
-      table = uff;
-      size = sizeof(uff);
-      break;
-    case 0x100:
-      table = u100;
-      size = sizeof(u100);
-      break;
-    case 0x101:
-      table = u101;
-      size = sizeof(u101);
-      break;
-    case 0x102:
-      table = u102;
-      size = sizeof(u102);
-      break;
-    case 0x103:
-      table = u103;
-      size = sizeof(u103);
-      break;
-    case 0x104:
-      table = u104;
-      size = sizeof(u104);
-      break;
-    case 0x108:
-      table = u108;
-      size = sizeof(u108);
-      break;
-    case 0x109:
-      table = u109;
-      size = sizeof(u109);
-      break;
-    case 0x10a:
-      table = u10a;
-      size = sizeof(u10a);
-      break;
-    case 0x10b:
-      table = u10b;
-      size = sizeof(u10b);
-      break;
-    case 0x10c:
-      table = u10c;
-      size = sizeof(u10c);
-      break;
-    case 0x10e:
-      table = u10e;
-      size = sizeof(u10e);
-      break;
-    case 0x110:
-      table = u110;
-      size = sizeof(u110);
-      break;
-    case 0x123:
-      table = u123;
-      size = sizeof(u123);
-      break;
-    case 0x124:
-      table = u124;
-      size = sizeof(u124);
-      break;
-    case 0x134:
-      table = u134;
-      size = sizeof(u134);
-      break;
-    case 0x1d0:
-      table = u1d0;
-      size = sizeof(u1d0);
-      break;
-    case 0x1d1:
-      table = u1d1;
-      size = sizeof(u1d1);
-      break;
-    case 0x1d2:
-      table = u1d2;
-      size = sizeof(u1d2);
-      break;
-    case 0x1d3:
-      table = u1d3;
-      size = sizeof(u1d3);
-      break;
-    case 0x1d4:
-      table = u1d4;
-      size = sizeof(u1d4);
-      break;
-    case 0x1d5:
-      table = u1d5;
-      size = sizeof(u1d5);
-      break;
-    case 0x1d6:
-      table = u1d6;
-      size = sizeof(u1d6);
-      break;
-    case 0x1d7:
-      table = u1d7;
-      size = sizeof(u1d7);
-      break;
-    case 0x1f0:
-      table = u1f0;
-      size = sizeof(u1f0);
-      break;
-    case 0x1f1:
-      table = u1f1;
-      size = sizeof(u1f1);
-      break;
-    case 0x1f2:
-      table = u1f2;
-      size = sizeof(u1f2);
-      break;
-    case 0x2a6:
-      table = u2a6;
-      size = sizeof(u2a6);
-      break;
-    case 0x2b7:
-      table = u2b7;
-      size = sizeof(u2b7);
-      break;
-    case 0x2fa:
-      table = u2fa;
-      size = sizeof(u2fa);
-      break;
-    case 0xe00:
-      table = ue00;
-      size = sizeof(ue00);
-      break;
-    case 0xe01:
-      table = ue01;
-      size = sizeof(ue01);
-      break;
-    case 0xfff:
-      table = ufff;
-      size = sizeof(ufff);
-      break;
-    case 0x10ff:
-      table = u10ff;
-      size = sizeof(u10ff);
-      break;
-    default:
-      return 0;
-    }
-  /* we have narrowed down to a section of 256 characters to check */
-  /* now check if c matches the printuation wide-chars within that section */
-  ptr = (unsigned char *)table;
-  ctmp = (unsigned char)c;
-  while (ptr < table + size)
-    {
-      if (ctmp == *ptr)
-	return 1;
-      if (ctmp < *ptr)
-	return 0;
-      /* otherwise c > *ptr */
-      /* look for 0x0 as next element which indicates a range */
-      ++ptr;
-      if (*ptr == 0x0)
-	{
-	  /* we have a range..see if c falls within range */
-	  ++ptr;
-	  if (ctmp <= *ptr)
-	    return 1;
-	  ++ptr;
-	}
-    }
-  /* not in table */
-  return 0;
-#else
-  return (c < (wint_t)0x100 ? isprint (c) : 0);
-#endif /* _MB_CAPABLE */
+  return iswprint_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswprint_l.c b/newlib/libc/ctype/iswprint_l.c
index a8d8686..cdf027b
--- a/newlib/libc/ctype/iswprint_l.c
+++ b/newlib/libc/ctype/iswprint_l.c
@@ -1,10 +1,21 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswprint_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return iswprint (c);
+#ifdef _MB_CAPABLE
+  c = _jp2uc_l (c, locale);
+  enum category cat = category (c);
+  return cat != -1
+      && cat != CAT_Cc && cat != CAT_Cf
+      && cat != CAT_Cs // Surrogate
+      ;
+#else
+  return c < (wint_t)0x100 ? isprint (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswpunct.c b/newlib/libc/ctype/iswpunct.c
index 8ab7038..27a6d65
--- a/newlib/libc/ctype/iswpunct.c
+++ b/newlib/libc/ctype/iswpunct.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -62,14 +63,10 @@ PORTABILITY
 No supporting OS subroutines are required.
 */
 #include <_ansi.h>
-#include <newlib.h>
 #include <wctype.h>
-#include <string.h>
-#include <ctype.h>
-#include "local.h"
 
 int
 iswpunct (wint_t c)
 {
-  return (!iswalnum (c) && iswgraph (c));
+  return iswpunct_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswpunct_l.c b/newlib/libc/ctype/iswpunct_l.c
index c7acc4e..4adc1ed
--- a/newlib/libc/ctype/iswpunct_l.c
+++ b/newlib/libc/ctype/iswpunct_l.c
@@ -1,10 +1,26 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswpunct_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return !iswalnum (c) && iswgraph (c);
+#ifdef _MB_CAPABLE
+  //return !iswalnum (c) && iswgraph (c);
+  c = _jp2uc_l (c, locale);
+  enum category cat = category (c);
+  return cat == CAT_Pc || cat == CAT_Pd || cat == CAT_Pe || cat == CAT_Pf || cat == CAT_Pi || cat == CAT_Po || cat == CAT_Ps
+      || cat == CAT_Sm // Math Symbols
+      // the following are included for backwards consistency:
+      || cat == CAT_Sc // Currency Symbols
+      || cat == CAT_Sk // Modifier_Symbol
+      || cat == CAT_So // Other_Symbol
+      || cat == CAT_No // Other_Number
+      ;
+#else
+  return c < (wint_t)0x100 ? ispunct (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswspace.c b/newlib/libc/ctype/iswspace.c
index ae3841a..ca6a887
--- a/newlib/libc/ctype/iswspace.c
+++ b/newlib/libc/ctype/iswspace.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -62,27 +63,10 @@ PORTABILITY
 No supporting OS subroutines are required.
 */
 #include <_ansi.h>
-#include <newlib.h>
 #include <wctype.h>
-#include <ctype.h>
-#include <string.h>
-#include "local.h"
 
 int
 iswspace (wint_t c)
 {
-#ifdef _MB_CAPABLE
-  c = _jp2uc (c);
-  /* Based on Unicode 5.2.  Control chars 09-0D, plus all characters
-     from general category "Zs", which are not marked as decomposition
-     type "noBreak". */
-  return ((c >= 0x0009 && c <= 0x000d) || c == 0x0020 ||
-	  c == 0x1680 || c == 0x180e ||
-	  (c >= 0x2000 && c <= 0x2006) ||
-	  (c >= 0x2008 && c <= 0x200a) ||
-	  c == 0x2028 || c == 0x2029 ||
-	  c == 0x205f || c == 0x3000);
-#else
-  return (c < 0x100 ? isspace (c) : 0);
-#endif /* _MB_CAPABLE */
+  return iswspace_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswspace_l.c b/newlib/libc/ctype/iswspace_l.c
index 0c30242..e7f47ee
--- a/newlib/libc/ctype/iswspace_l.c
+++ b/newlib/libc/ctype/iswspace_l.c
@@ -1,10 +1,21 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswspace_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return iswspace (c);
+#ifdef _MB_CAPABLE
+  c = _jp2uc_l (c, locale);
+  enum category cat = category (c);
+  // exclude "<noBreak>"?
+  return cat == CAT_Zs
+      || cat == CAT_Zl || cat == CAT_Zp // Line/Paragraph Separator
+      || (c >= 0x9 && c <= 0xD);
+#else
+  return c < 0x100 ? isspace (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswupper.c b/newlib/libc/ctype/iswupper.c
index c4969a3..96b5a0c
--- a/newlib/libc/ctype/iswupper.c
+++ b/newlib/libc/ctype/iswupper.c
@@ -17,14 +17,14 @@ SYNOPSIS
 
 DESCRIPTION
 <<iswupper>> is a function which classifies wide-character values that
-have uppercase translations.
+are categorized as uppercase.
 
 <<iswupper_l>> is like <<iswupper>> but performs the check based on the
 locale specified by the locale object locale.  If <[locale]> is
 LC_GLOBAL_LOCALE or not a valid locale object, the behaviour is undefined.
 
 RETURNS
-<<iswupper>>, <<iswupper_l>> return non-zero if <[c]> is a uppercase wide character.
+<<iswupper>>, <<iswupper_l>> return non-zero if <[c]> is an uppercase wide character.
 
 PORTABILITY
 <<iswupper>> is C99.
@@ -38,5 +38,5 @@ No supporting OS subroutines are required.
 int
 iswupper (wint_t c)
 {
-  return (towlower (c) != c);
+  return iswupper_l (c, 0);
 }
diff --git a/newlib/libc/ctype/iswupper_l.c b/newlib/libc/ctype/iswupper_l.c
index 2555cd0..7ce8b5e
--- a/newlib/libc/ctype/iswupper_l.c
+++ b/newlib/libc/ctype/iswupper_l.c
@@ -1,10 +1,20 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
+#include <ctype.h>
 #include <wctype.h>
+#include "local.h"
+#include "categories.h"
 
 int
 iswupper_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return towlower (c) != c;
+#ifdef _MB_CAPABLE
+  c = _jp2uc_l (c, locale);
+  // The wide-character class "upper" contains at least those characters wc 
+  // which are equal to towupper(wc) and different from towlower(wc).
+  enum category cat = category (c);
+  return cat == CAT_Lu || (cat == CAT_LC && towupper (c) == c);
+#else
+  return c < 0x100 ? islower (c) : 0;
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/iswxdigit.c b/newlib/libc/ctype/iswxdigit.c
index 4367186..fce2a4d
--- a/newlib/libc/ctype/iswxdigit.c
+++ b/newlib/libc/ctype/iswxdigit.c
@@ -38,7 +38,7 @@ No supporting OS subroutines are required.
 int
 iswxdigit (wint_t c)
 {
-  return ((c >= (wint_t)'0' && c <= (wint_t)'9') ||
-	  (c >= (wint_t)'a' && c <= (wint_t)'f') ||
-	  (c >= (wint_t)'A' && c <= (wint_t)'F'));
+  return (c >= (wint_t)'0' && c <= (wint_t)'9')
+      || (c >= (wint_t)'a' && c <= (wint_t)'f')
+      || (c >= (wint_t)'A' && c <= (wint_t)'F');
 }
diff --git a/newlib/libc/ctype/jp2uc.c b/newlib/libc/ctype/jp2uc.c
index 29eec0f..b89b5ea
--- a/newlib/libc/ctype/jp2uc.c
+++ b/newlib/libc/ctype/jp2uc.c
@@ -1,7 +1,8 @@
-/* Routine to translate from Japanese characters to Unicode */
+/* Routine to translate between Japanese characters and Unicode */
 
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff: consider locale, add dummy uc2jp
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -25,7 +26,7 @@
    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
    ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS   
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
@@ -40,13 +41,15 @@
 #include <string.h>
 #include <wctype.h>
 #include "local.h"
-#include "jp2uc.h"
 
 /* Japanese encoding types supported */
 #define JP_JIS		1
 #define JP_SJIS		2
 #define JP_EUCJP	3
 
+/* Japanese to Unicode conversion routine */
+#include "jp2uc.h"
+
 static wint_t
 __jp2uc (wint_t c, int type)
 {
@@ -104,7 +107,7 @@ __jp2uc (wint_t c, int type)
       return d02f4[index];
     }
 
-  /* handle smaller ranges here */    
+  /* handle smaller ranges here */
   switch (byte1)
     {
     case 0xA1:
@@ -148,20 +151,50 @@ __jp2uc (wint_t c, int type)
       return WEOF;
     }
 
-  return WEOF; 
+  return WEOF;
+}
+
+/* Unicode to Japanese conversion routine */
+static wint_t
+__uc2jp (wint_t c, int type)
+{
+#warning back-conversion Unicode to Japanese not implemented; needed for towupper/towlower
+  return c;
 }
 
+/* Japanese to Unicode conversion interface */
 wint_t
-_jp2uc (wint_t c)
+_jp2uc_l (wint_t c, struct __locale_t * l)
 {
-  if (!strcmp (__current_locale_charset (), "JIS"))
+  char * cs = l ? __locale_charset(l) : __current_locale_charset();
+  if (0 == strcmp (cs, "JIS"))
     c = __jp2uc (c, JP_JIS);
-  else if (!strcmp (__current_locale_charset (), "SJIS"))
+  else if (0 == strcmp (cs, "SJIS"))
     c = __jp2uc (c, JP_SJIS);
-  else if (!strcmp (__current_locale_charset (), "EUCJP"))
+  else if (0 == strcmp (cs, "EUCJP"))
     c = __jp2uc (c, JP_EUCJP);
   return c;
 }
 
+wint_t
+_jp2uc (wint_t c)
+{
+  return _jp2uc_l (c, 0);
+}
+
+/* Unicode to Japanese conversion interface */
+wint_t
+_uc2jp_l (wint_t c, struct __locale_t * l)
+{
+  char * cs = l ? __locale_charset(l) : __current_locale_charset();
+  if (0 == strcmp (cs, "JIS"))
+    c = __uc2jp (c, JP_JIS);
+  else if (0 == strcmp (cs, "SJIS"))
+    c = __uc2jp (c, JP_SJIS);
+  else if (0 == strcmp (cs, "EUCJP"))
+    c = __uc2jp (c, JP_EUCJP);
+  return c;
+}
+
 #endif /* !__CYGWIN__ */
 #endif /* _MB_CAPABLE */
diff --git a/newlib/libc/ctype/local.h b/newlib/libc/ctype/local.h
index 62d2b15..aa8f533
--- a/newlib/libc/ctype/local.h
+++ b/newlib/libc/ctype/local.h
@@ -1,3 +1,5 @@
+/* Modified (m) 2017 Thomas Wolff: fixed locale/wchar handling */
+
 /* wctrans constants */
 
 #include <_ansi.h>
@@ -21,11 +23,22 @@
 #define WC_UPPER	11
 #define WC_XDIGIT	12
 
-/* internal function to translate JP to Unicode */
+/* internal functions to translate between JP and Unicode */
+/* note this is not applicable to Cygwin, where wchar_t is always Unicode,
+   and should not be applicable to most other platforms either;
+   * platforms for which wchar_t is not Unicode should be explicitly listed
+   * the transformation should be applied to all non-Unicode locales 
+     (also Chinese, Korean, and even 8-bit locales such as *.CP1252)
+   * for towupper and towlower, the result must be back-transformed 
+     into the respective locale encoding; currently NOT IMPLEMENTED
+*/
 #ifdef __CYGWIN__
-/* Under Cygwin, the incoming wide character is already given in UTF due
-   to the requirements of the underlying OS. */
+/* Under Cygwin, wchar_t (or its extension wint_t) is Unicode */
 #define _jp2uc(c) (c)
+#define _jp2uc_l(c, l) (c)
+#define _uc2jp_l(c, l) (c)
 #else
 wint_t _jp2uc (wint_t);
+wint_t _jp2uc_l (wint_t, struct __locale_t *);
+wint_t _uc2jp_l (wint_t, struct __locale_t *);
 #endif
diff --git a/newlib/libc/ctype/towctrans.c b/newlib/libc/ctype/towctrans.c
index edbdfce..176aa3d
--- a/newlib/libc/ctype/towctrans.c
+++ b/newlib/libc/ctype/towctrans.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -69,10 +70,9 @@ No supporting OS subroutines are required.
 */
 
 #include <_ansi.h>
-#include <string.h>
 #include <reent.h>
 #include <wctype.h>
-#include <errno.h>
+//#include <errno.h>
 #include "local.h"
 
 wint_t
@@ -80,13 +80,13 @@ _towctrans_r (struct _reent *r,
 	wint_t c,
 	wctrans_t w)
 {
-  if (w == WCT_TOLOWER)
-    return towlower (c);
-  else if (w == WCT_TOUPPER)
-    return towupper (c);
+  if (w == WCT_TOLOWER || w == WCT_TOUPPER)
+    return towctrans_l (c, w, 0);
   else
     {
-      r->_errno = EINVAL;
+      // skipping this because it was causing trouble (cygwin crash)
+      // and there is no errno specified for towctrans
+      //r->_errno = EINVAL;
       return c;
     }
 }
@@ -94,7 +94,7 @@ _towctrans_r (struct _reent *r,
 #ifndef _REENT_ONLY
 wint_t
 towctrans (wint_t c,
-        wctrans_t w)
+	wctrans_t w)
 {
   return _towctrans_r (_REENT, c, w);
 }
diff --git a/newlib/libc/ctype/towctrans_l.c b/newlib/libc/ctype/towctrans_l.c
index d7369e1..8da372f 100644
--- a/newlib/libc/ctype/towctrans_l.c
+++ b/newlib/libc/ctype/towctrans_l.c
@@ -1,10 +1,101 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
 #include <wctype.h>
+//#include <errno.h>
+#include "local.h"
+
+enum {EVENCAP, ODDCAP};
+enum {TO1, TOLO, TOUP, TOBOTH};
+static struct caseconv_entry {
+  unsigned int first: 21;
+  unsigned short diff: 8;
+  unsigned char mode: 2;
+  int delta: 17;
+} __attribute__ ((packed))
+caseconv_table [] = {
+#include "caseconv.t"
+};
+#define first(ce)	ce.first
+#define last(ce)	(ce.first + ce.diff)
+
+/* auxiliary function for binary search in interval properties table */
+static const struct caseconv_entry *
+bisearch(wint_t ucs, const struct caseconv_entry *table, int max)
+{
+  int min = 0;
+  int mid;
+
+  if (ucs < first(table[0]) || ucs > last(table[max]))
+    return 0;
+  while (max >= min)
+    {
+      mid = (min + max) / 2;
+      if (ucs > last(table[mid]))
+	min = mid + 1;
+      else if (ucs < first(table[mid]))
+	max = mid - 1;
+      else
+	return &table[mid];
+    }
+  return 0;
+}
+
+static wint_t
+toulower (wint_t c)
+{
+  const struct caseconv_entry * cce =
+    bisearch(c, caseconv_table,
+             sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
+  if (cce)
+    switch (cce->mode) {
+      case TOLO: return c + cce->delta;
+      case TOBOTH: return c + 1;
+      case TO1: switch (cce->delta) {
+        case EVENCAP: if (!(c & 1)) return c + 1; break;
+        case ODDCAP: if (c & 1) return c + 1; break;
+      }
+    }
+  else
+    return c;
+}
+
+static wint_t
+touupper (wint_t c)
+{
+  const struct caseconv_entry * cce =
+    bisearch(c, caseconv_table,
+             sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
+  if (cce)
+    switch (cce->mode) {
+      case TOUP: return c + cce->delta;
+      case TOBOTH: return c - 1;
+      case TO1: switch (cce->delta) {
+        case EVENCAP: if (c & 1) return c - 1; break;
+        case ODDCAP: if (!(c & 1)) return c - 1; break;
+      }
+    }
+  else
+    return c;
+}
 
 wint_t
 towctrans_l (wint_t c, wctrans_t w, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
-  return towctrans (c, w);
+  wint_t u = _jp2uc_l (c, locale);
+  wint_t res;
+  if (w == WCT_TOLOWER)
+    res = toulower (u);
+  else if (w == WCT_TOUPPER)
+    res = touupper (u);
+  else
+    {
+      // skipping the errno setting that was previously involved
+      // by delegating to towctrans; it was causing trouble (cygwin crash)
+      // and there is no errno specified for towctrans
+      return c;
+    }
+  if (res != u)
+    return _uc2jp_l (res, locale);
+  else
+    return c;
 }
diff --git a/newlib/libc/ctype/towlower.c b/newlib/libc/ctype/towlower.c
index db390db..01de1bd
--- a/newlib/libc/ctype/towlower.c
+++ b/newlib/libc/ctype/towlower.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -65,9 +66,6 @@ No supporting OS subroutines are required.
 */
 
 #include <_ansi.h>
-#include <newlib.h>
-#include <string.h>
-#include <reent.h>
 #include <ctype.h>
 #include <wctype.h>
 #include "local.h"
@@ -76,500 +74,8 @@ wint_t
 towlower (wint_t c)
 {
 #ifdef _MB_CAPABLE
-  c = _jp2uc (c);
-  /* Based on and tested against Unicode 5.2 */
-
-  /* Expression used to filter out the characters for the below code:
-
-     awk -F\; '{ if ( $14 != "" ) print $1; }' UnicodeData.txt
-  */
-  if (c < 0x100)
-    {
-      if ((c >= 0x0041 && c <= 0x005a) ||
-	  (c >= 0x00c0 && c <= 0x00d6) ||
-	  (c >= 0x00d8 && c <= 0x00de))
-	return (c + 0x20);
-
-      return c;
-    }
-  else if (c < 0x300)
-    {
-      if ((c >= 0x0100 && c <= 0x012e) ||
-	  (c >= 0x0132 && c <= 0x0136) ||
-	  (c >= 0x014a && c <= 0x0176) ||
-	  (c >= 0x01de && c <= 0x01ee) ||
-	  (c >= 0x01f8 && c <= 0x021e) ||
-	  (c >= 0x0222 && c <= 0x0232))
-	{
-	  if (!(c & 0x01))
-	    return (c + 1);
-	  return c;
-	}
-
-      if (c == 0x0130)
-	return 0x0069;
-
-      if ((c >= 0x0139 && c <= 0x0147) ||
-	  (c >= 0x01cd && c <= 0x01db))
-	{
-	  if (c & 0x01)
-	    return (c + 1);
-	  return c;
-	}
-      
-      if (c >= 0x178 && c <= 0x01f7)
-	{
-	  wint_t k;
-	  switch (c)
-	    {
-	    case 0x0178:
-	      k = 0x00ff;
-	      break;
-	    case 0x0179:
-	    case 0x017b:
-	    case 0x017d:
-	    case 0x0182:
-	    case 0x0184:
-	    case 0x0187:
-	    case 0x018b:
-	    case 0x0191:
-	    case 0x0198:
-	    case 0x01a0:
-	    case 0x01a2:
-	    case 0x01a4:
-	    case 0x01a7:
-	    case 0x01ac:
-	    case 0x01af:
-	    case 0x01b3:
-	    case 0x01b5:
-	    case 0x01b8:
-	    case 0x01bc:
-	    case 0x01c5:
-	    case 0x01c8:
-	    case 0x01cb:
-	    case 0x01cd:
-	    case 0x01cf:
-	    case 0x01d1:
-	    case 0x01d3:
-	    case 0x01d5:
-	    case 0x01d7:
-	    case 0x01d9:
-	    case 0x01db:
-	    case 0x01f2:
-	    case 0x01f4:
-	      k = c + 1;
-	      break;
-	    case 0x0181:
-	      k = 0x0253;
-	      break;
-	    case 0x0186:
-	      k = 0x0254;
-	      break;
-	    case 0x0189:
-	      k = 0x0256;
-	      break;
-	    case 0x018a:
-	      k = 0x0257;
-	      break;
-	    case 0x018e:
-	      k = 0x01dd;
-	      break;
-	    case 0x018f:
-	      k = 0x0259;
-	      break;
-	    case 0x0190:
-	      k = 0x025b;
-	      break;
-	    case 0x0193:
-	      k = 0x0260;
-	      break;
-	    case 0x0194:
-	      k = 0x0263;
-	      break;
-	    case 0x0196:
-	      k = 0x0269;
-	      break;
-	    case 0x0197:
-	      k = 0x0268;
-	      break;
-	    case 0x019c:
-	      k = 0x026f;
-	      break;
-	    case 0x019d:
-	      k = 0x0272;
-	      break;
-	    case 0x019f:
-	      k = 0x0275;
-	      break;
-	    case 0x01a6:
-	      k = 0x0280;
-	      break;
-	    case 0x01a9:
-	      k = 0x0283;
-	      break;
-	    case 0x01ae:
-	      k = 0x0288;
-	      break;
-	    case 0x01b1:
-	      k = 0x028a;
-	      break;
-	    case 0x01b2:
-	      k = 0x028b;
-	      break;
-	    case 0x01b7:
-	      k = 0x0292;
-	      break;
-	    case 0x01c4:
-	    case 0x01c7:
-	    case 0x01ca:
-	    case 0x01f1:
-	      k = c + 2;
-	      break;
-	    case 0x01f6:
-	      k = 0x0195;
-	      break;
-	    case 0x01f7:
-	      k = 0x01bf;
-	      break;
-	    default:
-	      k = 0;
-	    }
-	  if (k != 0)
-	    return k;
-	}
-      else if (c == 0x0220)
-      	return 0x019e;
-      else if (c >= 0x023a && c <= 0x024e)
-      	{
-	  wint_t k;
-	  switch (c)
-	    {
-	    case 0x023a:
-	      k = 0x2c65;
-	      break;
-	    case 0x023b:
-	    case 0x0241:
-	    case 0x0246:
-	    case 0x0248:
-	    case 0x024a:
-	    case 0x024c:
-	    case 0x024e:
-	      k = c + 1;
-	      break;
-	    case 0x023d:
-	      k = 0x019a;
-	      break;
-	    case 0x023e:
-	      k = 0x2c66;
-	      break;
-	    case 0x0243:
-	      k = 0x0180;
-	      break;
-	    case 0x0244:
-	      k = 0x0289;
-	      break;
-	    case 0x0245:
-	      k = 0x028c;
-	      break;
-	    default:
-	      k = 0;
-	    }
-	  if (k != 0)
-	    return k;
-	}
-    }
-  else if (c < 0x0400)
-    {
-      if (c == 0x0370 || c == 0x0372 || c == 0x0376)
-      	return (c + 1);
-      if (c >= 0x0391 && c <= 0x03ab && c != 0x03a2)
-	return (c + 0x20);
-      if (c >= 0x03d8 && c <= 0x03ee && !(c & 0x01))
-	return (c + 1);
-      if (c >= 0x0386 && c <= 0x03ff)
-	{
-	  wint_t k;
-	  switch (c)
-	    {
-	    case 0x0386:
-	      k = 0x03ac;
-	      break;
-	    case 0x0388:
-	      k = 0x03ad;
-	      break;
-	    case 0x0389:
-	      k = 0x03ae;
-	      break;
-	    case 0x038a:
-	      k = 0x03af;
-	      break;
-	    case 0x038c:
-	      k = 0x03cc;
-	      break;
-	    case 0x038e:
-	      k = 0x03cd;
-	      break;
-	    case 0x038f:
-	      k = 0x03ce;
-	      break;
-	    case 0x03cf:
-	      k = 0x03d7;
-	      break;
-	    case 0x03f4:
-	      k = 0x03b8;
-	      break;
-	    case 0x03f7:
-	      k = 0x03f8;
-	      break;
-	    case 0x03f9:
-	      k = 0x03f2;
-	      break;
-	    case 0x03fa:
-	      k = 0x03fb;
-	      break;
-	    case 0x03fd:
-	      k = 0x037b;
-	      break;
-	    case 0x03fe:
-	      k = 0x037c;
-	      break;
-	    case 0x03ff:
-	      k = 0x037d;
-	      break;
-	    default:
-	      k = 0;
-	    }
-	  if (k != 0)
-	    return k;
-	}
-    }
-  else if (c < 0x500)
-    {
-      if (c >= 0x0400 && c <= 0x040f)
-	return (c + 0x50);
-      
-      if (c >= 0x0410 && c <= 0x042f)
-	return (c + 0x20);
-      
-      if ((c >= 0x0460 && c <= 0x0480) ||
-	  (c >= 0x048a && c <= 0x04be) ||
-	  (c >= 0x04d0 && c <= 0x04fe))
-	{
-	  if (!(c & 0x01))
-	    return (c + 1);
-	  return c;
-	}
-      
-      if (c == 0x04c0)
-	return 0x04cf;
-
-      if (c >= 0x04c1 && c <= 0x04cd)
-	{
-	  if (c & 0x01)
-	    return (c + 1);
-	  return c;
-	}
-    }
-  else if (c < 0x1f00)
-    {
-      if ((c >= 0x0500 && c <= 0x050e) ||
-	  (c >= 0x0510 && c <= 0x0524) ||
-	  (c >= 0x1e00 && c <= 0x1e94) ||
-	  (c >= 0x1ea0 && c <= 0x1ef8))
-	{
-	  if (!(c & 0x01))
-	    return (c + 1);
-	  return c;
-	}
-      
-      if (c >= 0x0531 && c <= 0x0556)
-	return (c + 0x30);
-
-      if (c >= 0x10a0 && c <= 0x10c5)
-	return (c + 0x1c60);
-
-      if (c == 0x1e9e)
-	return 0x00df;
-
-      if (c >= 0x1efa && c <= 0x1efe && !(c & 0x01))
-	return (c + 1);
-    }
-  else if (c < 0x2000)
-    {
-      if ((c >= 0x1f08 && c <= 0x1f0f) ||
-	  (c >= 0x1f18 && c <= 0x1f1d) ||
-	  (c >= 0x1f28 && c <= 0x1f2f) ||
-	  (c >= 0x1f38 && c <= 0x1f3f) ||
-	  (c >= 0x1f48 && c <= 0x1f4d) ||
-	  (c >= 0x1f68 && c <= 0x1f6f) ||
-	  (c >= 0x1f88 && c <= 0x1f8f) ||
-	  (c >= 0x1f98 && c <= 0x1f9f) ||
-	  (c >= 0x1fa8 && c <= 0x1faf))
-	return (c - 0x08);
-
-      if (c >= 0x1f59 && c <= 0x1f5f)
-	{
-	  if (c & 0x01)
-	    return (c - 0x08);
-	  return c;
-	}
-    
-      if (c >= 0x1fb8 && c <= 0x1ffc)
-	{
-	  wint_t k;
-	  switch (c)
-	    {
-	    case 0x1fb8:
-	    case 0x1fb9:
-	    case 0x1fd8:
-	    case 0x1fd9:
-	    case 0x1fe8:
-	    case 0x1fe9:
-	      k = c - 0x08;
-	      break;
-	    case 0x1fba:
-	    case 0x1fbb:
-	      k = c - 0x4a;
-	      break;
-	    case 0x1fbc:
-	      k = 0x1fb3;
-	      break;
-	    case 0x1fc8:
-	    case 0x1fc9:
-	    case 0x1fca:
-	    case 0x1fcb:
-	      k = c - 0x56;
-	      break;
-	    case 0x1fcc:
-	      k = 0x1fc3;
-	      break;
-	    case 0x1fda:
-	    case 0x1fdb:
-	      k = c - 0x64;
-	      break;
-	    case 0x1fea:
-	    case 0x1feb:
-	      k = c - 0x70;
-	      break;
-	    case 0x1fec:
-	      k = 0x1fe5;
-	      break;
-	    case 0x1ff8:
-	    case 0x1ff9:
-	      k = c - 0x80;
-	      break;
-	    case 0x1ffa:
-	    case 0x1ffb:
-	      k = c - 0x7e;
-	      break;
-	    case 0x1ffc:
-	      k = 0x1ff3;
-	      break;
-	    default:
-	      k = 0;
-	    }
-	  if (k != 0)
-	    return k;
-	}
-    }
-  else if (c < 0x2c00)
-    {
-      if (c >= 0x2160 && c <= 0x216f)
-	return (c + 0x10);
-
-      if (c >= 0x24b6 && c <= 0x24cf)
-	return (c + 0x1a);
-      
-      switch (c)
-      	{
-	case 0x2126:
-	  return 0x03c9;
-	case 0x212a:
-	  return 0x006b;
-	case 0x212b:
-	  return 0x00e5;
-	case 0x2132:
-	  return 0x214e;
-	case 0x2183:
-	  return 0x2184;
-	}
-    }
-  else if (c < 0x2d00)
-    {
-      if (c >= 0x2c00 && c <= 0x2c2e)
-	return (c + 0x30);
-
-      if (c >= 0x2c80 && c <= 0x2ce2 && !(c & 0x01))
-	return (c + 1);
-
-      switch (c)
-      	{
-	case 0x2c60:
-	  return 0x2c61;
-	case 0x2c62:
-	  return 0x026b;
-	case 0x2c63:
-	  return 0x1d7d;
-	case 0x2c64:
-	  return 0x027d;
-	case 0x2c67:
-	case 0x2c69:
-	case 0x2c6b:
-	case 0x2c72:
-	case 0x2c75:
-	case 0x2ceb:
-	case 0x2ced:
-	  return c + 1;
-	case 0x2c6d:
-	  return 0x0251;
-	case 0x2c6e:
-	  return 0x0271;
-	case 0x2c6f:
-	  return 0x0250;
-	case 0x2c70:
-	  return 0x0252;
-	case 0x2c7e:
-	  return 0x023f;
-	case 0x2c7f:
-	  return 0x0240;
-	}
-    }
-  else if (c >= 0xa600 && c < 0xa800)
-    {
-      if ((c >= 0xa640 && c <= 0xa65e) ||
-	  (c >= 0xa662 && c <= 0xa66c) ||
-	  (c >= 0xa680 && c <= 0xa696) ||
-	  (c >= 0xa722 && c <= 0xa72e) ||
-	  (c >= 0xa732 && c <= 0xa76e) ||
-	  (c >= 0xa77f && c <= 0xa786))
-	{
-	  if (!(c & 1))
-	    return (c + 1);
-	  return c;
-	}
-
-      switch (c)
-      	{
-	case 0xa779:
-	case 0xa77b:
-	case 0xa77e:
-	case 0xa78b:
-	  return (c + 1);
-	case 0xa77d:
-	  return 0x1d79;
-	}
-    }
-  else
-    {
-      if (c >= 0xff21 && c <= 0xff3a)
-	return (c + 0x20);
-      
-      if (c >= 0x10400 && c <= 0x10427)
-	return (c + 0x28);
-    }
-  return c;
+  return towctrans (c, WCT_TOLOWER);
 #else
-  return (c < 0x00ff ? (wint_t)(tolower ((int)c)) : c);
+  return c < 0x00ff ? (wint_t)(tolower ((int)c)) : c;
 #endif /* _MB_CAPABLE */
 }
-
diff --git a/newlib/libc/ctype/towlower_l.c b/newlib/libc/ctype/towlower_l.c
index 2e89ec9..46e024d
--- a/newlib/libc/ctype/towlower_l.c
+++ b/newlib/libc/ctype/towlower_l.c
@@ -1,3 +1,4 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
 #include <newlib.h>
 #include <wctype.h>
@@ -6,7 +7,9 @@
 wint_t
 towlower_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
+#ifdef _MB_CAPABLE
+  return towctrans_l (c, WCT_TOLOWER, locale);
+#else
   return towlower (c);
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/towupper.c b/newlib/libc/ctype/towupper.c
index 306f72b..a60e62b
--- a/newlib/libc/ctype/towupper.c
+++ b/newlib/libc/ctype/towupper.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2002 Red Hat Incorporated.
    All rights reserved.
+   Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
@@ -76,518 +77,8 @@ wint_t
 towupper (wint_t c)
 {
 #ifdef _MB_CAPABLE
-  c = _jp2uc (c);
-  /* Based on and tested against Unicode 5.2 */
-
-  /* Expression used to filter out the characters for the below code:
-
-     awk -F\; '{ if ( $13 != "" ) print $1; }' UnicodeData.txt
-  */
-  if (c < 0x100)
-    {
-      if (c == 0x00b5)
-	return 0x039c;
-      
-      if ((c >= 0x00e0 && c <= 0x00fe && c != 0x00f7) ||
-	  (c >= 0x0061 && c <= 0x007a))
-	return (c - 0x20);
-      
-      if (c == 0xff)
-	return 0x0178;
-      
-      return c;
-    }
-  else if (c < 0x300)
-    {
-      if ((c >= 0x0101 && c <= 0x012f) ||
-	  (c >= 0x0133 && c <= 0x0137) ||
-	  (c >= 0x014b && c <= 0x0177) ||
-	  (c >= 0x01df && c <= 0x01ef) ||
-	  (c >= 0x01f9 && c <= 0x021f) ||
-	  (c >= 0x0223 && c <= 0x0233) ||
-	  (c >= 0x0247 && c <= 0x024f))
-	{
-	  if (c & 0x01)
-	    return (c - 1);
-	  return c;
-	}
-
-      if ((c >= 0x013a && c <= 0x0148) ||
-	  (c >= 0x01ce && c <= 0x01dc) ||
-	  c == 0x023c || c == 0x0242)
-	{
-	  if (!(c & 0x01))
-	    return (c - 1);
-	  return c;
-	}
-      
-      if (c == 0x0131)
-	return 0x0049;
-      
-      if (c == 0x017a || c == 0x017c || c == 0x017e)
-	return (c - 1);
-      
-      if (c >= 0x017f && c <= 0x0292)
-	{
-	  wint_t k;
-	  switch (c)
-	    {
-	    case 0x017f:
-	      k = 0x0053;
-	      break;
-	    case 0x0180:
-	      k = 0x0243;
-	      break;
-	    case 0x0183:
-	      k = 0x0182;
-	      break;
-	    case 0x0185:
-	      k = 0x0184;
-	      break;
-	    case 0x0188:
-	      k = 0x0187;
-	      break;
-	    case 0x018c:
-	      k = 0x018b;
-	      break;
-	    case 0x0192:
-	      k = 0x0191;
-	      break;
-	    case 0x0195:
-	      k = 0x01f6;
-	      break;
-	    case 0x0199:
-	      k = 0x0198;
-	      break;
-	    case 0x019a:
-	      k = 0x023d;
-	      break;
-	    case 0x019e:
-	      k = 0x0220;
-	      break;
-	    case 0x01a1:
-	    case 0x01a3:
-	    case 0x01a5:
-	    case 0x01a8:
-	    case 0x01ad:
-	    case 0x01b0:
-	    case 0x01b4:
-	    case 0x01b6:
-	    case 0x01b9:
-	    case 0x01bd:
-	    case 0x01c5:
-	    case 0x01c8:
-	    case 0x01cb:
-	    case 0x01f2:
-	    case 0x01f5:
-	      k = c - 1;
-	      break;
-	    case 0x01bf:
-	      k = 0x01f7;
-	      break;
-	    case 0x01c6:
-	    case 0x01c9:
-	    case 0x01cc:
-	      k = c - 2;
-	      break;
-	    case 0x01dd:
-	      k = 0x018e;
-	      break;
-	    case 0x01f3:
-	      k = 0x01f1;
-	      break;
-	    case 0x023f:
-	      k = 0x2c7e;
-	      break;
-	    case 0x0240:
-	      k = 0x2c7f;
-	      break;
-	    case 0x0250:
-	      k = 0x2c6f;
-	      break;
-	    case 0x0251:
-	      k = 0x2c6d;
-	      break;
-	    case 0x0252:
-	      k = 0x2c70;
-	      break;
-	    case 0x0253:
-	      k = 0x0181;
-	      break;
-	    case 0x0254:
-	      k = 0x0186;
-	      break;
-	    case 0x0256:
-	      k = 0x0189;
-	      break;
-	    case 0x0257:
-	      k = 0x018a;
-	      break;
-	    case 0x0259:
-	      k = 0x018f;
-	      break;
-	    case 0x025b:
-	      k = 0x0190;
-	      break;
-	    case 0x0260:
-	      k = 0x0193;
-	      break;
-	    case 0x0263:
-	      k = 0x0194;
-	      break;
-	    case 0x0268:
-	      k = 0x0197;
-	      break;
-	    case 0x0269:
-	      k = 0x0196;
-	      break;
-	    case 0x026b:
-	      k = 0x2c62;
-	      break;
-	    case 0x026f:
-	      k = 0x019c;
-	      break;
-	    case 0x0271:
-	      k = 0x2c6e;
-	      break;
-	    case 0x0272:
-	      k = 0x019d;
-	      break;
-	    case 0x0275:
-	      k = 0x019f;
-	      break;
-	    case 0x027d:
-	      k = 0x2c64;
-	      break;
-	    case 0x0280:
-	      k = 0x01a6;
-	      break;
-	    case 0x0283:
-	      k = 0x01a9;
-	      break;
-	    case 0x0288:
-	      k = 0x01ae;
-	      break;
-	    case 0x0289:
-	      k = 0x0244;
-	      break;
-	    case 0x028a:
-	      k = 0x01b1;
-	      break;
-	    case 0x028b:
-	      k = 0x01b2;
-	      break;
-	    case 0x028c:
-	      k = 0x0245;
-	      break;
-	    case 0x0292:
-	      k = 0x01b7;
-	      break;
-	    default:
-	      k = 0;
-	    }
-	  if (k != 0)
-	    return k;
-	}
-    }
-  else if (c < 0x0400)
-    {
-      wint_t k;
-
-      if (c >= 0x03ad && c <= 0x03af)
-      	return (c - 0x25);
-
-      if (c >= 0x03b1 && c <= 0x03cb && c != 0x03c2)
-	return (c - 0x20);
-      
-      if (c >= 0x03d9 && c <= 0x03ef && (c & 1))
-	return (c - 1);
-
-      switch (c)
-	{
-	case 0x0345:
-	  k = 0x0399;
-	  break;
-	case 0x0371:
-	case 0x0373:
-	case 0x0377:
-	case 0x03f8:
-	case 0x03fb:
-	  k = c - 1;
-	  break;
-	case 0x037b:
-	case 0x037c:
-	case 0x037d:
-	  k = c + 0x82;
-	  break;
-	case 0x03ac:
-	  k = 0x0386;
-	  break;
-	case 0x03c2:
-	  k = 0x03a3;
-	  break;
-	case 0x03cc:
-	  k = 0x038c;
-	  break;
-	case 0x03cd:
-	case 0x03ce:
-	  k = c - 0x3f;
-	  break;
-	case 0x03d0:
-	  k = 0x0392;
-	  break;
-	case 0x03d1:
-	  k = 0x0398;
-	  break;
-	case 0x03d5:
-	  k = 0x03a6;
-	  break;
-	case 0x03d6:
-	  k = 0x03a0;
-	  break;
-	case 0x03d7:
-	  k = 0x03cf;
-	  break;
-	case 0x03f0:
-	  k = 0x039a;
-	  break;
-	case 0x03f1:
-	  k = 0x03a1;
-	  break;
-	case 0x03f2:
-	  k = 0x03f9;
-	  break;
-	case 0x03f5:
-	  k = 0x0395;
-	  break;
-	default:
-	  k = 0;
-	}
-      if (k != 0)
-	return k;
-    }
-  else if (c < 0x500)
-    {
-      if (c >= 0x0430 && c <= 0x044f)
-	return (c - 0x20);
-      
-      if (c >= 0x0450 && c <= 0x045f)
-	return (c - 0x50);
-      
-      if ((c >= 0x0461 && c <= 0x0481) ||
-	  (c >= 0x048b && c <= 0x04bf) ||
-	  (c >= 0x04d1 && c <= 0x04ff))
-	{
-	  if (c & 0x01)
-	    return (c - 1);
-	  return c;
-	}
-      
-      if (c >= 0x04c2 && c <= 0x04ce)
-	{
-	  if (!(c & 0x01))
-	    return (c - 1);
-	  return c;
-	}
-      
-      if (c == 0x04cf)
-      	return 0x04c0;
-    }
-  else if (c < 0x0600)
-    {
-      if (c >= 0x0501 && c <= 0x0525 && (c & 1))
-      	return c - 1;
-
-      if (c >= 0x0561 && c <= 0x0586)
-	return (c - 0x30);
-    }
-  else if (c < 0x1f00)
-    {
-      if (c == 0x1d79)
-      	return 0xa77d;
-
-      if (c == 0x1d7d)
-      	return 0x2c63;
-
-      if ((c >= 0x1e01 && c <= 0x1e95) ||
-	  (c >= 0x1ea1 && c <= 0x1eff))
-	{
-	  if (c & 0x01)
-	    return (c - 1);
-	  return c;
-	}
-      
-      if (c == 0x1e9b)
-	return 0x1e60;
-    }
-  else if (c < 0x2000)
-    {
-      
-      if ((c >= 0x1f00 && c <= 0x1f07) ||
-	  (c >= 0x1f10 && c <= 0x1f15) ||
-	  (c >= 0x1f20 && c <= 0x1f27) ||
-	  (c >= 0x1f30 && c <= 0x1f37) ||
-	  (c >= 0x1f40 && c <= 0x1f45) ||
-	  (c >= 0x1f60 && c <= 0x1f67) ||
-	  (c >= 0x1f80 && c <= 0x1f87) ||
-	  (c >= 0x1f90 && c <= 0x1f97) ||
-	  (c >= 0x1fa0 && c <= 0x1fa7))
-	return (c + 0x08);
-
-      if (c >= 0x1f51 && c <= 0x1f57 && (c & 0x01))
-	return (c + 0x08);
-      
-      if (c >= 0x1f70 && c <= 0x1ff3)
-	{
-	  wint_t k;
-	  switch (c)
-	    {
-	    case 0x1fb0:
-	      k = 0x1fb8;
-	      break;
-	    case 0x1fb1:
-	      k = 0x1fb9;
-	      break;
-	    case 0x1f70:
-	      k = 0x1fba;
-	      break;
-	    case 0x1f71:
-	      k = 0x1fbb;
-	      break;
-	    case 0x1fb3:
-	      k = 0x1fbc;
-	      break;
-	    case 0x1fbe:
-	      k = 0x0399;
-	      break;
-	    case 0x1f72:
-	      k = 0x1fc8;
-	      break;
-	    case 0x1f73:
-	      k = 0x1fc9;
-	      break;
-	    case 0x1f74:
-	      k = 0x1fca;
-	      break;
-	    case 0x1f75:
-	      k = 0x1fcb;
-	      break;
-	    case 0x1fc3:
-	      k = 0x1fcc;
-	      break;
-	    case 0x1fd0:
-	      k = 0x1fd8;
-	      break;
-	    case 0x1fd1:
-	      k = 0x1fd9;
-	      break;
-	    case 0x1f76:
-	      k = 0x1fda;
-	      break;
-	    case 0x1f77:
-	      k = 0x1fdb;
-	      break;
-	    case 0x1fe0:
-	      k = 0x1fe8;
-	      break;
-	    case 0x1fe1:
-	      k = 0x1fe9;
-	      break;
-	    case 0x1f7a:
-	      k = 0x1fea;
-	      break;
-	    case 0x1f7b:
-	      k = 0x1feb;
-	      break;
-	    case 0x1fe5:
-	      k = 0x1fec;
-	      break;
-	    case 0x1f78:
-	      k = 0x1ff8;
-	      break;
-	    case 0x1f79:
-	      k = 0x1ff9;
-	      break;
-	    case 0x1f7c:
-	      k = 0x1ffa;
-	      break;
-	    case 0x1f7d:
-	      k = 0x1ffb;
-	      break;
-	    case 0x1ff3:
-	      k = 0x1ffc;
-	      break;
-	    default:
-	      k = 0;
-	    }
-	  if (k != 0)
-	    return k;
-	}
-    }
-  else if (c < 0x3000)
-    {
-      if (c == 0x214e)
-      	return 0x2132;
-
-      if (c == 0x2184)
-      	return 0x2183;
-
-      if (c >= 0x2170 && c <= 0x217f)
-	return (c - 0x10);
-      
-      if (c >= 0x24d0 && c <= 0x24e9)
-	return (c - 0x1a);
-      
-      if (c >= 0x2c30 && c <= 0x2c5e)
-	return (c - 0x30);
-
-      if ((c >= 0x2c68 && c <= 0x2c6c && !(c & 1)) ||
-	  (c >= 0x2c81 && c <= 0x2ce3 &&  (c & 1)) ||
-	  c == 0x2c73 || c == 0x2c76 ||
-	  c == 0x2cec || c == 0x2cee)
-      	return (c - 1);
-
-      if (c >= 0x2c81 && c <= 0x2ce3 && (c & 1))
-	return (c - 1);
-
-      if (c >= 0x2d00 && c <= 0x2d25)
-      	return (c - 0x1c60);
-
-      switch (c)
-      	{
-	case 0x2c61:
-	  return 0x2c60;
-	case 0x2c65:
-	  return 0x023a;
-	case 0x2c66:
-	  return 0x023e;
-	}
-    }
-  else if (c >= 0xa000 && c < 0xb000)
-    {
-      if (((c >= 0xa641 && c <= 0xa65f) ||
-           (c >= 0xa663 && c <= 0xa66d) ||
-           (c >= 0xa681 && c <= 0xa697) ||
-           (c >= 0xa723 && c <= 0xa72f) ||
-           (c >= 0xa733 && c <= 0xa76f) ||
-           (c >= 0xa77f && c <= 0xa787)) &&
-	  (c & 1))
-	return (c - 1);
-      	
-      if (c == 0xa77a || c == 0xa77c || c == 0xa78c)
-	return (c - 1);
-    }
-  else
-    {
-      if (c >= 0xff41 && c <= 0xff5a)
-	return (c - 0x20);
-      
-      if (c >= 0x10428 && c <= 0x1044f)
-	return (c - 0x28);
-    }
-  return c;
+  return towctrans (c, WCT_TOUPPER);
 #else
-  return (c < 0x00ff ? (wint_t)(toupper ((int)c)) : c);
+  return c < 0x00ff ? (wint_t)(toupper ((int)c)) : c;
 #endif /* _MB_CAPABLE */
 }
-
diff --git a/newlib/libc/ctype/towupper_l.c b/newlib/libc/ctype/towupper_l.c
index 5a8384c..d7c1adb
--- a/newlib/libc/ctype/towupper_l.c
+++ b/newlib/libc/ctype/towupper_l.c
@@ -1,10 +1,14 @@
+/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
 #include <_ansi.h>
 #include <wctype.h>
+#include "local.h"
 
 wint_t
 towupper_l (wint_t c, struct __locale_t *locale)
 {
-  /* We're using a locale-independent representation of upper/lower case
-     based on Unicode data.  Thus, the locale doesn't matter. */
+#ifdef _MB_CAPABLE
+  return towctrans_l (c, WCT_TOUPPER, locale);
+#else
   return towupper (c);
+#endif /* _MB_CAPABLE */
 }
diff --git a/newlib/libc/ctype/utf8alpha.h b/newlib/libc/ctype/utf8alpha.h
deleted file mode 100644
index d9306b7..0000000
--- a/newlib/libc/ctype/utf8alpha.h
+++ /dev/null
@@ -1,355 +0,0 @@
-/* Copyright (c) 2002 Red Hat Incorporated.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are met:
-
-     Redistributions of source code must retain the above copyright
-     notice, this list of conditions and the following disclaimer.
-
-     Redistributions in binary form must reproduce the above copyright
-     notice, this list of conditions and the following disclaimer in the
-     documentation and/or other materials provided with the distribution.
-
-     The name of Red Hat Incorporated may not be used to endorse
-     or promote products derived from this software without specific
-     prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED.  IN NO EVENT SHALL RED HAT INCORPORATED BE LIABLE FOR ANY
-   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS   
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/* Generated using UnicodeData.txt 5.2 */
-
-/* Expression used to filter out the characters for the below tables:
-
-  awk -F\; \
-  '{ \
-    VAL = strtonum (sprintf("0x%s", $1)); \
-    # All of general category "L", except for two Thai characters which \
-    # are actually punctuation characters.  Old Unicode weirdness. \
-    # The character "COMBINING GREEK YPOGEGRAMMENI", as well as all Thai \
-    # characters which are in "Mn" category.  Old Unicode weirdness. \
-    # All numerical digit or letter characters, except the ASCII variants. \
-    # This is necessary due to the unfortunate ISO C definition for the \
-    # iswdigit class, otherwise these characters are missing in iswalnum. \
-    # All "Other Symbols" which are named as "LETTER" characters. \
-    # \
-    # Before running this test, make sure to expand all Unicode blocks \
-    # which are just marked by their first and last character! \
-    # \
-    if (   (match($3, "^L") && VAL != 0x0e2f && VAL != 0x0e46) \
-	|| (match($3, "^Mn") && (VAL == 0x0345 || match($2, "\\<CHARACTER\\>"))) \
-	|| (match($3, "^N[dl]") && VAL >= 0x100) \
-	|| (match($3, "^So") && match($2, "\\<LETTER\\>"))) \
-      print $1; \
-  }' UnicodeData.txt
-*/
-
-static const unsigned char u0[] = {
-  0x41, 0x0, 0x5a, 0x61, 0x0, 0x7a, 0xaa, 0xb5, 
-  0xba, 0xc0, 0x0, 0xd6, 0xd8, 0x0, 0xf6, 0xf8, 
-  0x0, 0xff };
-/* u1 all alphabetic */
-static const unsigned char u2[] = {
-  0x00, 0x0, 0xc1, 0xc6, 0x0, 0xd1,
-  0xe0, 0x0, 0xe4, 0xec, 0xee };
-static const unsigned char u3[] = {
-  0x45, 0x70, 0x0, 0x74, 0x76, 0x77,
-  0x7a, 0x0, 0x7d, 0x86, 0x88, 0x0, 0x8a, 0x8c,
-  0x8e, 0x0, 0xa1, 0xa3, 0x0, 0xf5,
-  0xf7, 0x0, 0xff };
-static const unsigned char u4[] = { 
-  0x00, 0x0, 0x81, 0x8a, 0x0, 0xff };
-static const unsigned char u5[] = { 
-  0x00, 0x0, 0x25, 0x31, 0x0, 0x56, 0x59, 
-  0x61, 0x0, 0x87, 0xd0, 0x0, 0xea,
-  0xf0, 0x0, 0xf2 };
-static const unsigned char u6[] = { 
-  0x21, 0x0, 0x4a, 0x60, 0x0, 0x69,
-  0x6e, 0x0, 0x6f, 0x71, 0x0, 0xd3, 
-  0xd5, 0xe5, 0x0, 0xe6, 0xee, 0x0, 0xfc, 0xff };
-static const unsigned char u7[] = { 
-  0x10, 0x12, 0x0, 0x2f, 0x4d, 0x0, 0xa5, 0xb1,
-  0xc0, 0x0, 0xea, 0xf4, 0xf5, 0xfa };
-static const unsigned char u8[] = { 
-  0x00, 0x0, 0x15, 0x1a, 0x24, 0x28 };
-static const unsigned char u9[] = { 
-  0x04, 0x0, 0x39, 0x3d, 0x50, 0x58, 0x0, 0x61,
-  0x66, 0x0, 0x6f, 0x71, 0x72, 0x79, 0x0, 0x7f,
-  0x85, 0x0, 0x8c, 0x8f, 0x0, 0x90, 
-  0x93, 0x0, 0xa8, 0xaa, 0x0, 0xb0, 0xb2,
-  0xb6, 0x0, 0xb9, 0xbd, 0xce, 0xdc, 0x0, 0xdd,
-  0xdf, 0x0, 0xe1, 0xe6, 0x0, 0xf1 };
-static const unsigned char ua[] = { 
-  0x05, 0x0, 0x0a, 0x0f, 0x0, 0x10,
-  0x13, 0x0, 0x28, 0x2a, 0x0, 0x30,
-  0x32, 0x0, 0x33, 0x35, 0x0, 0x36,
-  0x38, 0x0, 0x39, 0x59, 0x0, 0x5c,
-  0x5e, 0x66, 0x0, 0x6f, 0x72, 0x0, 0x74,
-  0x85, 0x0, 0x8d, 0x8f, 0x0, 0x91,
-  0x93, 0x0, 0xa8, 0xaa, 0x0, 0xb0,
-  0xb2, 0x0, 0xb3, 0xb5, 0x0, 0xb9,
-  0xbd, 0xd0, 0xe0, 0xe1, 0xe6, 0x0, 0xef };
-static const unsigned char ub[] = { 
-  0x05, 0x0, 0x0c, 0x0f, 0x0, 0x10,
-  0x13, 0x0, 0x28, 0x2a, 0x0, 0x30,
-  0x32, 0x0, 0x33, 0x35, 0x0, 0x39, 0x3d,
-  0x5c, 0x0, 0x5d, 0x5f, 0x0, 0x61,
-  0x66, 0x0, 0x6f, 0x71, 0x83, 0x85, 0x0, 0x8a,
-  0x8e, 0x0, 0x90, 0x92, 0x0, 0x95,
-  0x99, 0x0, 0x9a, 0x9c, 0x9e, 0x0, 0x9f,
-  0xa3, 0x0, 0xa4, 0xa8, 0x0, 0xaa,
-  0xae, 0x0, 0xb9, 0xd0, 0xe6, 0x0, 0xef };
-static const unsigned char uc[] = { 
-  0x05, 0x0, 0x0c, 0x0e, 0x0, 0x10,
-  0x12, 0x0, 0x28, 0x2a, 0x0, 0x33,
-  0x35, 0x0, 0x39, 0x3d, 0x58, 0x59,
-  0x60, 0x0, 0x61, 0x66, 0x0, 0x6f,
-  0x85, 0x0, 0x8c, 0x8e, 0x0, 0x90,
-  0x92, 0x0, 0xa8, 0xaa, 0x0, 0xb3,
-  0xb5, 0x0, 0xb9, 0xbd, 0xde, 0xe0, 0x0, 0xe1,
-  0xe6, 0x0, 0xef };
-static const unsigned char ud[] = { 
-  0x05, 0x0, 0x0c, 0x0e, 0x0, 0x10, 
-  0x12, 0x0, 0x28, 0x2a, 0x0, 0x39, 0x3d,
-  0x60, 0x0, 0x61, 0x66, 0x0, 0x6f,
-  0x7a, 0x0, 0x7f, 0x85, 0x0, 0x96, 0x9a, 
-  0x0, 0xb1, 0xb3, 0x0, 0xbb, 0xbd,
-  0xc0, 0x0, 0xc6 };
-static const unsigned char ue[] = { 
-  0x01, 0x0, 0x2e, 0x30, 0x0, 0x3a, 0x40, 
-  0x0, 0x45, 0x47, 0x0, 0x4e, 0x50, 0x0, 0x59, 
-  0x81, 0x0, 0x82, 0x84, 0x87, 0x0, 0x88, 0x8a, 
-  0x8d, 0x94, 0x0, 0x97, 0x99, 0x0, 0x9f, 0xa1, 
-  0x0, 0xa3, 0xa5, 0xa7, 0xaa, 0x0, 0xab, 0xad, 
-  0x0, 0xb0, 0xb2, 0x0, 0xb3, 0xbd, 0xc0, 0x0, 
-  0xc4, 0xc6, 0xd0, 0x0, 0xd9, 0xdc, 0x0, 0xdd }; 
-static const unsigned char uf[] = {
-  0x00, 0x20, 0x0, 0x29, 0x40, 0x0, 0x47, 0x49, 
-  0x0, 0x6c, 0x88, 0x0, 0x8b };
-static const unsigned char u10[] = { 
-  0x00, 0x0, 0x2a, 0x3f, 0x0, 0x49,
-  0x50, 0x0, 0x55, 0x5a, 0x0, 0x5d,
-  0x61, 0x65, 0x66, 0x6e, 0x0, 0x70,
-  0x75, 0x0, 0x81, 0x8e, 0x90, 0x0, 0x99,
-  0xa0, 0x0, 0xc5, 0xd0, 0x0, 0xfa, 0xfc };
-/* u11 all alphabetic */
-static const unsigned char u12[] = { 
-  0x00, 0x0, 0x48, 0x4a, 0x0, 0x4d,
-  0x50, 0x0, 0x56, 0x58, 0x5a, 0x0, 0x5d,
-  0x60, 0x0, 0x88, 0x8a, 0x0, 0x8d,
-  0x90, 0x0, 0xb0, 0xb2, 0x0, 0xb5,
-  0xb8, 0x0, 0xbe, 0xc0, 0xc2, 0x0, 0xc5,
-  0xc8, 0x0, 0xd6, 0xd8, 0x0, 0xff };
-static const unsigned char u13[] = { 
-  0x00, 0x0, 0x10, 0x12, 0x0, 0x15,
-  0x18, 0x0, 0x5a, 0x80, 0x0, 0x8f,
-  0xa0, 0x0, 0xf4 };
-static const unsigned char u14[] = { 
-  0x01, 0x0, 0xff };
-/* u15 all alphabetic */
-static const unsigned char u16[] = { 
-  0x00, 0x0, 0x6c, 0x6f, 0x0, 0x7f, 
-  0x81, 0x0, 0x9a, 0xa0, 0x0, 0xea,
-  0xee, 0x0, 0xf0 };
-static const unsigned char u17[] = { 
-  0x00, 0x0, 0x0c, 0x0e, 0x0, 0x11,
-  0x20, 0x0, 0x31, 0x40, 0x0, 0x51,
-  0x60, 0x0, 0x6c, 0x6e, 0x0, 0x70,
-  0x80, 0x0, 0xb3, 0xd7, 0xdc, 0xe0, 0x0, 0xe9 };
-static const unsigned char u18[] = { 
-  0x10, 0x0, 0x19, 0x20, 0x0, 0x77,
-  0x80, 0x0, 0xa8, 0xaa, 0xb0, 0x0, 0xf5 };
-static const unsigned char u19[] = { 
-  0x00, 0x0, 0x1c, 0x46, 0x0, 0x6d,
-  0x70, 0x0, 0x74, 0x80, 0x0, 0xab,
-  0xc1, 0x0, 0xc7, 0xd0, 0x0, 0xda };
-static const unsigned char u1a[] = { 
-  0x00, 0x0, 0x16, 0x20, 0x0, 0x54,
-  0x80, 0x0, 0x89, 0x90, 0x0, 0x99, 0xa7 };
-static const unsigned char u1b[] = { 
-  0x05, 0x0, 0x33, 0x45, 0x0, 0x4b,
-  0x50, 0x0, 0x59, 0x83, 0x0, 0xa0,
-  0xae, 0x0, 0xb9 };
-static const unsigned char u1c[] = { 
-  0x00, 0x0, 0x23, 0x40, 0x0, 0x49,
-  0x4d, 0x0, 0x7d, 0xe9, 0x0, 0xec,
-  0xee, 0x0, 0xf1 };
-static const unsigned char u1d[] = { 
-  0x00, 0x0, 0xbf };
-/* u1e all alphabetic */
-static const unsigned char u1f[] = { 
-  0x00, 0x0, 0x15, 0x18, 0x0, 0x1d, 
-  0x20, 0x0, 0x45, 0x48, 0x0, 0x4d, 0x50, 0x0, 0x57, 0x59, 
-  0x5b, 0x5d, 0x5f, 0x0, 0x7d, 0x80, 0x0, 0xb4, 
-  0xb6, 0x0, 0xbc, 0xbe, 0xc2, 0x0, 0xc4, 0xc6, 
-  0x0, 0xcc, 0xd0, 0x0, 0xd3, 0xd6, 0x0, 0xdb, 
-  0xe0, 0x0, 0xec, 0xf2, 0x0, 0xf4, 0xf6, 0x0, 
-  0xfc };
-static const unsigned char u20[] = { 
-  0x71, 0x7f, 0x90, 0x0, 0x94 };
-static const unsigned char u21[] = { 
-  0x02, 0x07, 0x0a, 0x0, 0x13, 0x15,
-  0x19, 0x0, 0x1d, 0x24, 0x26, 0x28, 0x0, 0x2d,
-  0x2f, 0x0, 0x39, 0x3c, 0x0, 0x3f,
-  0x45, 0x0, 0x49, 0x4e, 0x60, 0x0, 0x88 }; 
-static const unsigned char u24[] = { 
-  0x9c, 0x0, 0xe9 };
-static const unsigned char u2c[] = { 
-  0x00, 0x0, 0x2e, 0x30, 0x0, 0x5e,
-  0x60, 0x0, 0xe4, 0xeb, 0x0, 0xee };
-static const unsigned char u2d[] = { 
-  0x00, 0x0, 0x25, 0x30, 0x0, 0x65, 0x6f,
-  0x80, 0x0, 0x96, 0xa0, 0x0, 0xa6,
-  0xa8, 0x0, 0xae, 0xb0, 0x0, 0xb6,
-  0xb8, 0x0, 0xbe, 0xc0, 0x0, 0xc6,
-  0xc8, 0x0, 0xce, 0xd0, 0x0, 0xd6,
-  0xd8, 0x0, 0xde };
-static const unsigned char u2e[] = {
-  0x2f };
-static const unsigned char u30[] = { 
-  0x05, 0x0, 0x07, 0x21, 0x0, 
-  0x29, 0x31, 0x0, 0x35, 0x38, 0x0, 0x3c, 0x41, 
-  0x0, 0x96, 0x9d, 0x0, 0x9f, 0xa1, 0x0, 0xfa, 
-  0xfc, 0x0, 0xff };
-static const unsigned char u31[] = { 
-  0x05, 0x0, 0x2d, 0x31, 0x0, 
-  0x8e, 0xa0, 0x0, 0xb7, 0xf0, 0x0, 0xff };
-/* u34 to u4c all alphabetic */
-static const unsigned char u4d[] = { 
-  0x00, 0x0, 0xb5 };
-/* u4e to u9e all alphabetic */
-static const unsigned char u9f[] = { 
-  0x00, 0x0, 0xcb };
-/* ua0 to ua3 all alphabetic */
-static const unsigned char ua4[] = { 
-  0x00, 0x0, 0x8c, 0xd0, 0x0, 0xfd }; 
-/* ua5 all alphabetic */
-static const unsigned char ua6[] = {
-  0x00, 0x0, 0x0c, 0x10, 0x0, 0x2b,
-  0x40, 0x0, 0x5f, 0x62, 0x0, 0x6e,
-  0x7f, 0x0, 0x97, 0xa0, 0x0, 0xef };
-static const unsigned char ua7[] = {
-  0x17, 0x0, 0x1f, 0x22, 0x0, 0x88,
-  0x8b, 0x8c,
-  0xfb, 0x0, 0xff };
-static const unsigned char ua8[] = {
-  0x00, 0x01, 0x03, 0x0, 0x05, 0x07, 0x0, 0x0a,
-  0x0c, 0x0, 0x22, 0x40, 0x0, 0x73,
-  0x82, 0x0, 0xb3, 0xd0, 0x0, 0xd9,
-  0xf2, 0x0, 0xf7, 0xfb };
-static const unsigned char ua9[] = {
-  0x00, 0x0, 0x25, 0x30, 0x0, 0x46,
-  0x60, 0x0, 0x7c, 0x84, 0x0, 0xb2,
-  0xcf, 0x0, 0xd9 };
-static const unsigned char uaa[] = {
-  0x00, 0x0, 0x28, 0x40, 0x0, 0x42,
-  0x44, 0x0, 0x4b, 0x50, 0x0, 0x59,
-  0x60, 0x0, 0x76, 0x7a, 0x80, 0x0, 0xaf,
-  0xb1, 0xb5, 0xb6, 0xb9, 0x0, 0xbd,
-  0xc0, 0xc2, 0xdb, 0x0, 0xdd };
-static const unsigned char uab[] = {
-  0xc0, 0x0, 0xe2, 0xf0, 0x0, 0xf9 };
-/* uac to ud6 all alphabetic */
-static const unsigned char ud7[] = { 
-  0x00, 0x0, 0xa3, 0xb0, 0x0, 0xc6,
-  0xcb, 0x0, 0xfb };
-/* uf9 all alphabetic */
-static const unsigned char ufa[] = { 
-  0x00, 0x0, 0x2d, 0x30, 0x0, 0x6d,
-  0x70, 0x0, 0xd9 };
-static const unsigned char ufb[] = { 
-  0x00, 0x0, 0x06, 0x13, 0x0, 0x17, 0x1d, 
-  0x1f, 0x0, 0x28, 0x2a, 0x0, 0x36, 0x38, 0x0, 
-  0x3c, 0x3e, 0x40, 0x0, 0x41, 0x43, 0x0, 0x44, 
-  0x46, 0x0, 0xb1, 0xd3, 0x0, 0xff };
-/* ufc all alphabetic */
-static const unsigned char ufd[] = { 
-  0x00, 0x0, 0x3d, 0x50, 0x0, 
-  0x8f, 0x92, 0x0, 0xc7, 0xf0, 0x0, 0xfb };
-static const unsigned char ufe[] = { 
-  0x70, 
-  0x0, 0x74, 0x76, 0x0, 0xfc };
-static const unsigned char uff[] = { 
-  0x10, 0x0, 0x19, 
-  0x21, 0x0, 0x3a, 0x41, 0x0, 0x5a, 0x66, 0x0, 
-  0xbe, 0xc2, 0x0, 0xc7, 0xca, 0x0, 0xcf, 0xd2, 
-  0x0, 0xd7, 0xda, 0x0, 0xdc };
-static const unsigned char u100[] = { 
-  0x00, 0x0, 0x0b, 0x0d, 0x0, 0x26,
-  0x28, 0x0, 0x3a, 0x3c, 0x3d, 0x3f, 0x0, 0x4d,
-  0x50, 0x0, 0x5d, 0x80, 0x0, 0xfa };
-static const unsigned char u101[] = { 
-  0x40, 0x0, 0x74 };
-static const unsigned char u102[] = { 
-  0x80, 0x0, 0x9c, 0xa0, 0x0, 0xd0 };
-static const unsigned char u103[] = { 
-  0x00, 0x0, 0x1e, 0x30, 0x0, 0x4a,
-  0x80, 0x0, 0x9d, 0xa0, 0x0, 0xc3,
-  0xc8, 0x0, 0xcf, 0xd1, 0x0, 0xd5 };
-static const unsigned char u104[] = { 
-  0x00, 0x0, 0x9d, 0xa0, 0x0, 0xa9 };
-static const unsigned char u108[] = { 
-  0x00, 0x0, 0x05, 0x08, 0x0a, 0x0, 0x35,
-  0x37, 0x38, 0x3c, 0x3f, 0x0, 0x55 };
-static const unsigned char u109[] = {
-  0x00, 0x0, 0x15, 0x20, 0x0, 0x39 };
-static const unsigned char u10a[] = {
-  0x00, 0x10, 0x0, 0x13, 0x15, 0x0, 0x17,
-  0x19, 0x0, 0x33, 0x60, 0x0, 0x7c };
-static const unsigned char u10b[] = {
-  0x00, 0x0, 0x35, 0x40, 0x0, 0x55,
-  0x60, 0x0, 0x72 };
-static const unsigned char u10c[] = {
-  0x00, 0x0, 0x48 };
-static const unsigned char u110[] = {
-  0x83, 0x0, 0xaf };
-/* u120 to u122 all alphabetic */
-static const unsigned char u123[] = { 
-  0x00, 0x0, 0x6e };
-static const unsigned char u124[] = { 
-  0x00, 0x0, 0x62 };
-/* u130 to u133 all alphabetic */
-static const unsigned char u134[] = {
-  0x00, 0x0, 0x2e };
-static const unsigned char u1d4[] = { 
-  0x00, 0x0, 0x54, 0x56, 0x0, 0x9c,
-  0x9e, 0x0, 0x9f, 0xa2, 0xa5, 0x0, 0xa6,
-  0xa9, 0x0, 0xac, 0xae, 0x0, 0xb9, 0xbb,
-  0xbd, 0x0, 0xc3, 0xc5, 0x0, 0xff };
-static const unsigned char u1d5[] = { 
-  0x00, 0x0, 0x05, 0x07, 0x0, 
-  0x0a, 0x0d, 0x0, 0x14, 0x16, 0x0, 0x1c, 0x1e, 
-  0x0, 0x39, 0x3b, 0x0, 0x3e, 0x40, 0x0, 0x44, 
-  0x46, 0x4a, 0x0, 0x50, 0x52, 0x0, 0xff }; 
-static const unsigned char u1d6[] = { 
-  0x00, 0x0, 0xa5, 0xa8, 0x0, 0xc0,
-  0xc2, 0x0, 0xda, 0xdc, 0x0, 0xfa, 
-  0xfc, 0x0, 0xff };
-static const unsigned char u1d7[] = { 
-  0x00, 0x0, 0x14, 0x16, 0x0, 0x34,
-  0x36, 0x0, 0x4e, 0x50, 0x0, 0x6e, 
-  0x70, 0x0, 0x88, 0x8a, 0x0, 0xa8,
-  0xaa, 0x0, 0xc2, 0xc4, 0x0, 0xcb,
-  0xce, 0x0, 0xff };
-static const unsigned char u1f1[] = {
-  0x10, 0x0, 0x2c, 0x31, 0x3d, 0x3f, 0x42, 0x46,
-  0x57, 0x5f, 0x79, 0x7b, 0x7c, 0x7f, 0x8a };
-/* u200 to u2a5 all alphabetic */
-static const unsigned char u2a6[] = { 
-  0x00, 0x0, 0xd6 };
-/* u2a7 to u2b6 all alphabetic */
-static const unsigned char u2b7[] = {
-  0x00, 0x0, 0x34 };
-/* u2f8 to u2f9 all alphabetic */
-static const unsigned char u2fa[] = { 
-  0x00, 0x0, 0x1d };
diff --git a/newlib/libc/ctype/utf8print.h b/newlib/libc/ctype/utf8print.h
deleted file mode 100644
index abeb81c..0000000
--- a/newlib/libc/ctype/utf8print.h
+++ /dev/null
@@ -1,389 +0,0 @@
-/* Copyright (c) 2002 Red Hat Incorporated.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are met:
-
-     Redistributions of source code must retain the above copyright
-     notice, this list of conditions and the following disclaimer.
-
-     Redistributions in binary form must reproduce the above copyright
-     notice, this list of conditions and the following disclaimer in the
-     documentation and/or other materials provided with the distribution.
-
-     The name of Red Hat Incorporated may not be used to endorse
-     or promote products derived from this software without specific
-     prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED.  IN NO EVENT SHALL RED HAT INCORPORATED BE LIABLE FOR ANY
-   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS   
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/* Generated using UnicodeData.txt 5.2 */
-
-/* Expression used to filter out the characters for the below tables:
-
-   awk -F\; \
-   '{ \
-     VAL = strtonum (sprintf("0x%s", $1)); \
-     # All valid characters except from categories Cc (C0 or C1 control code), \
-     # Cs (Surrogates), Zl (Line separator), and Zp (Paragraph separator).\
-     # \
-     # Before running this test, make sure to expand all Unicode blocks \
-     # which are just marked by their first and last character! \
-     # \
-     if (!match($3, "^C[cs]") && !match($3, "^Z[lp]")) \
-       print $1; \
-   }' UnicodeData.txt
-*/
-static const unsigned char u0[] = {
-  0x20, 0x0, 0x7e, 0xa0, 0x0, 0xff };
-/* u1 is all-print */
-/* u2 is all-print */
-static const unsigned char u3[] = { 
-  0x00, 0x0, 0x77, 0x7a, 0x0, 0x7e,
-  0x84, 0x0, 0x8a, 0x8c, 0x8e, 0x0, 
-  0xa1, 0xa3, 0x0, 0xff };
-/* u4 is all-print */
-static const unsigned char u5[] = {
-  0x00, 0x0, 0x25, 0x31, 0x0, 
-  0x56, 0x59, 0x0, 0x5f, 0x61, 0x0, 0x87, 0x89, 
-  0x0, 0x8a, 0x91, 0x0, 0xc7, 0xd0, 0x0, 0xea,
-  0xf0, 0x0, 0xf4 };
-static const unsigned char u6[] = {
-  0x00, 0x0, 0x03, 0x06, 0x0, 0x1b, 0x1e, 0x1f,
-  0x21, 0x0, 0x5e, 0x60, 0x0, 0xff };
-static const unsigned char u7[] = { 
-  0x00, 0x0, 0x0d, 0x0f, 0x0, 0x4a, 0x4d, 0x0, 0xb1,
-  0xc0, 0x0, 0xfa };
-static const unsigned char u8[] = { 
-  0x00, 0x0, 0x2d, 0x30, 0x0, 0x3e, };
-static const unsigned char u9[] = {
-  0x00, 0x0, 0x39, 0x3c, 0x0, 0x4e, 0x50, 0x0, 0x55, 
-  0x58, 0x0, 0x72, 0x79, 0x0, 0x7f, 0x81, 0x0, 0x83,
-  0x85, 0x0, 0x8c, 0x8f, 0x0, 0x90, 0x93, 0x0, 0xa8,
-  0xaa, 0x0, 0xb0, 0xb2, 0xb6, 0x0, 0xb9, 0xbc, 
-  0x0, 0xc4, 0xc7, 0xc8, 0xcb, 0x0, 0xce, 
-  0xd7, 0xdc, 0x0, 0xdd, 0xdf, 0x0, 0xe3, 0xe6, 
-  0x0, 0xfb };
-static const unsigned char ua[] = { 
-  0x01, 0x0, 0x03, 0x05, 0x0, 0x0a, 0x0f, 0x0, 
-  0x10, 0x13, 0x0, 0x28, 0x2a, 0x0, 0x30, 0x32, 
-  0x0, 0x33, 0x35, 0x0, 0x36, 0x38, 0x0, 0x39, 
-  0x3c, 0x3e, 0x0, 0x42, 0x47, 0x0, 0x48, 0x4b, 
-  0x0, 0x4d, 0x51, 0x59, 0x0, 0x5c, 0x5e, 0x66, 0x0, 
-  0x75, 0x81, 0x0, 0x83, 0x85, 0x0, 0x8d,
-  0x8f, 0x0, 0x91, 0x93, 0x0, 0xa8, 0xaa, 0x0, 
-  0xb0, 0xb2, 0x0, 0xb3, 0xb5, 0x0, 0xb9, 0xbc, 
-  0x0, 0xc5, 0xc7, 0x0, 0xc9, 0xcb, 0x0, 0xcd, 
-  0xd0, 0xe0, 0x0, 0xe3, 0xe6, 0x0, 0xef, 0xf1 };
-static const unsigned char ub[] = {
-  0x01, 0x0, 0x03, 
-  0x05, 0x0, 0x0c, 0x0f, 0x0, 0x10, 0x13, 0x0, 
-  0x28, 0x2a, 0x0, 0x30, 0x32, 0x0, 0x33, 0x35, 
-  0x0, 0x39, 0x3c, 0x0, 0x44, 0x47, 0x0, 0x48, 
-  0x4b, 0x0, 0x4d, 0x56, 0x0, 0x57, 0x5c, 0x0, 
-  0x5d, 0x5f, 0x0, 0x63, 0x66, 0x0, 0x71, 0x82, 
-  0x0, 0x83, 0x85, 0x0, 0x8a, 0x8e, 0x0, 0x90, 
-  0x92, 0x0, 0x95, 0x99, 0x0, 0x9a, 0x9c, 0x9e, 
-  0x0, 0x9f, 0xa3, 0x0, 0xa4, 0xa8, 0x0, 0xaa, 
-  0xae, 0x0, 0xb9, 0xbe, 0x0, 
-  0xc2, 0xc6, 0x0, 0xc8, 0xca, 0x0, 0xcd, 0xd0,
-  0xd7, 0xe6, 0xe7, 0x0, 0xfa };
-static const unsigned char uc[] = {
-  0x01, 0x0, 0x03, 0x05, 0x0, 
-  0x0c, 0x0e, 0x0, 0x10, 0x12, 0x0, 0x28, 0x2a, 
-  0x0, 0x33, 0x35, 0x0, 0x39, 0x3d, 0x0, 0x44, 
-  0x46, 0x0, 0x48, 0x4a, 0x0, 0x4d, 0x55, 0x0, 
-  0x56, 0x58, 0x59, 0x60, 0x0, 0x63, 0x66, 0x0, 0x6f,
-  0x78, 0x0, 0x7f, 0x82, 0x83, 0x85, 0x0, 0x8c,
-  0x8e, 0x0, 0x90, 0x92, 0x0, 0xa8, 0xaa, 0x0, 0xb3,
-  0xb5, 0x0, 0xb9, 0xbc, 0x0, 0xc4, 0xc6, 0x0, 0xc8,
-  0xca, 0x0, 0xcd, 0xd5, 0x0, 0xd6, 0xde, 0xe0, 0x0, 
-  0xe3, 0xe6, 0x0, 0xef, 0xf1, 0xf2 };
-static const unsigned char ud[] = {
-  0x02, 0x0, 0x03, 0x05, 
-  0x0, 0x0c, 0x0e, 0x0, 0x10, 0x12, 0x0, 0x28, 
-  0x2a, 0x0, 0x39, 0x3d, 0x0, 0x44, 0x46, 0x0, 
-  0x48, 0x4a, 0x0, 0x4d, 0x57, 0x60, 0x0, 0x63, 
-  0x66, 0x0, 0x75, 0x79, 0x0, 0x7f, 0x82, 0x0, 0x83,
-  0x85, 0x0, 0x96, 0x9a, 0x0, 0xb1, 0xb3, 0x0, 0xbb,
-  0xbd, 0xc0, 0x0, 0xc6, 0xca, 0xcf, 0x0, 0xd4, 0xd6, 
-  0xd8, 0x0, 0xdf, 0xf2, 0x0, 0xf4 };
-static const unsigned char ue[] = {
-  0x01, 0x0, 
-  0x3a, 0x3f, 0x0, 0x5b, 0x81, 0x0, 0x82, 0x84, 
-  0x87, 0x0, 0x88, 0x8a, 0x8d, 0x94, 0x0, 0x97, 
-  0x99, 0x0, 0x9f, 0xa1, 0x0, 0xa3, 0xa5, 0xa7, 
-  0xaa, 0x0, 0xab, 0xad, 0x0, 0xb9, 0xbb, 0x0, 
-  0xbd, 0xc0, 0x0, 0xc4, 0xc6, 0xc8, 0x0, 0xcd, 
-  0xd0, 0x0, 0xd9, 0xdc, 0x0, 0xdd };
-static const unsigned char uf[] = {
-  0x00, 0x0, 0x47, 0x49, 0x0, 0x6c,
-  0x71, 0x0, 0x8b, 0x90, 0x0, 0x97,
-  0x99, 0x0, 0xbc, 0xbe, 0x0, 0xcc, 
-  0xce, 0x0, 0xd8 };
-static const unsigned char u10[] = {
-  0x00, 0x0, 0xc5, 0xd0, 0x0, 0xfc };
-/* u11 is all-print */
-static const unsigned char u12[] = {
-  0x00, 0x0, 0x48, 0x4a, 0x0, 0x4d, 0x50, 0x0, 0x56, 
-  0x58, 0x5a, 0x0, 0x5d, 0x60, 0x0, 0x88, 
-  0x8a, 0x0, 0x8d, 0x90, 0x0, 0xb0, 0xb2, 
-  0x0, 0xb5, 0xb8, 0x0, 0xbe, 0xc0, 0xc2, 0x0, 
-  0xc5, 0xc8, 0x0, 0xd6, 0xd8, 0x0, 0xff };
-static const unsigned char u13[] = {
-  0x00, 0x0, 0x10, 0x12, 0x0, 0x15,
-  0x18, 0x0, 0x5a, 0x5f, 0x0, 0x7c,
-  0x80, 0x0, 0x99, 0xa0, 0x0, 0xf4 };
-/* u14 is all-print */
-/* u15 is all-print */
-static const unsigned char u16[] = {
-  0x00, 0x0, 0x9c, 0xa0, 0x0, 0xf0 };
-static const unsigned char u17[] = {
-  0x00, 0x0, 0x0c, 0x0e, 0x0, 0x14, 0x20, 
-  0x0, 0x36, 0x40, 0x0, 0x53, 0x60, 0x0, 0x6c, 
-  0x6e, 0x0, 0x70, 0x72, 0x0, 0x73, 0x80, 0x0, 
-  0xdd, 0xe0, 0x0, 0xe9, 0xf0, 0x0, 0xf9 };
-static const unsigned char u18[] = {
-  0x00, 0x0, 0x0e, 0x10, 
-  0x0, 0x19, 0x20, 0x0, 0x77, 0x80, 0x0, 0xaa,
-  0xb0, 0x0, 0xf5 };
-static const unsigned char u19[] = {
-  0x00, 0x0, 0x1c, 0x20, 0x0, 0x2b,
-  0x30, 0x0, 0x3b, 0x40, 0x44, 0x0, 0x6d,
-  0x70, 0x0, 0x74, 0x80, 0x0, 0xab,
-  0xb0, 0x0, 0xc9, 0xd0, 0x0, 0xda,
-  0xde, 0x0, 0xff };
-static const unsigned char u1a[] = {
-  0x00, 0x0, 0x1b, 0x1e, 0x0, 0x5e,
-  0x60, 0x0, 0x7c, 0x7f, 0x0, 0x89,
-  0x90, 0x0, 0x99, 0xa0, 0x0, 0xad };
-static const unsigned char u1b[] = {
-  0x00, 0x0, 0x4b, 0x50, 0x0, 0x7c,
-  0x80, 0x0, 0xaa, 0xae, 0x0, 0xb9 };
-static const unsigned char u1c[] = {
-  0x00, 0x0, 0x37, 0x3b, 0x0, 0x49,
-  0x4d, 0x0, 0x7f, 0xd0, 0x0, 0xf2 };
-static const unsigned char u1d[] = { 
-  0x00, 0x0, 0xe6, 0xfd, 0x0, 0xff };
-/* u1e is all-print */
-static const unsigned char u1f[] = {
-  0x00, 0x0, 
-  0x15, 0x18, 0x0, 0x1d, 0x20, 0x0, 0x45, 0x48, 
-  0x0, 0x4d, 0x50, 0x0, 0x57, 0x59, 0x5b, 0x5d, 
-  0x5f, 0x0, 0x7d, 0x80, 0x0, 0xb4, 0xb6, 0x0, 
-  0xc4, 0xc6, 0x0, 0xd3, 0xd6, 0x0, 0xdb, 0xdd, 
-  0x0, 0xef, 0xf2, 0x0, 0xf4, 0xf6, 0x0, 0xfe };
-static const unsigned char u20[] = { 
-  0x00, 0x0, 0x27, 0x2a, 0x0, 0x64,
-  0x6a, 0x0, 0x71, 0x74, 0x0, 0x8e, 
-  0x90, 0x0, 0x94, 0xa0, 0x0, 0xb8,
-  0xd0, 0x0, 0xf0 };
-static const unsigned char u21[] = {
-  0x00, 0x0, 0x89, 0x90, 0x0, 0xff };
-/* u22 is all-print */
-static const unsigned char u23[] = {
-  0x00, 0x0, 0xe8 };
-static const unsigned char u24[] = {
-  0x00, 0x0, 0x26, 0x40, 0x0, 0x4a, 
-  0x60, 0x0, 0xff };
-/* u25 is all-print */
-static const unsigned char u26[] = {
-  0x00, 0x0, 0xcd, 0xcf, 0x0, 0xe1,
-  0xe3, 0xe8, 0x0, 0xff };
-static const unsigned char u27[] = {
-  0x01, 0x0, 0x04, 0x06, 0x0, 0x09,
-  0x0c, 0x0, 0x27, 0x29, 0x0, 0x4b, 0x4d,
-  0x4f, 0x0, 0x52, 0x56, 0x0, 0x5e,
-  0x61, 0x0, 0x94, 0x98, 0x0, 0xaf,
-  0xb1, 0x0, 0xbe, 0xc0, 0x0, 0xca, 0xcc,
-  0xd0, 0x0, 0xff };
-/* u28 to u2a are all-print */
-static const unsigned char u2b[] = {
-  0x00, 0x0, 0x4c, 0x50, 0x0, 0x59 };
-static const unsigned char u2c[] = {
-  0x00, 0x0, 0x2e, 0x30, 0x0, 0x5e,
-  0x60, 0x0, 0xf1, 0xf9, 0x0, 0xff };
-static const unsigned char u2d[] = {
-  0x00, 0x0, 0x25, 0x30, 0x0, 0x65, 0x6f,
-  0x80, 0x0, 0x96, 0xa0, 0x0, 0xa6,
-  0xa8, 0x0, 0xae, 0xb0, 0x0, 0xb6,
-  0xb8, 0x0, 0xbe, 0xc0, 0x0, 0xc6,
-  0xc8, 0x0, 0xce, 0xd0, 0x0, 0xd6,
-  0xd8, 0x0, 0xde, 0xe0, 0x0, 0xff };
-static const unsigned char u2e[] = {
-  0x00, 0x0, 0x31, 0x80, 0x0, 0x99,
-  0x9b, 0x0, 0xf3 };
-static const unsigned char u2f[] = { 
-  0x00, 0x0, 0xd5, 0xf0, 0x0, 0xfb };
-static const unsigned char u30[] = {
-  0x00, 0x0, 
-  0x3f, 0x41, 0x0, 0x96, 0x99, 0x0, 0xff };
-static const unsigned char u31[] = {
-  0x05, 0x0, 0x2d, 0x31, 0x0, 0x8e,
-  0x90, 0x0, 0xb7, 0xc0, 0x0, 0xe3,
-  0xf0, 0x0, 0xff };
-static const unsigned char u32[] = {
-  0x00, 0x0, 0x1e, 0x20, 0x0, 0xfe };
-/* u33 to u4c is all-print */
-static const unsigned char u4d[] = { 
-  0x00, 0x0, 0xb5, 0xc0, 0x0, 0xff };
-/* u4e to u9e is all-print */
-static const unsigned char u9f[] = {
-  0x00, 0x0, 0xcb };
-/* ua0 to ua3 is all-print */
-static const unsigned char ua4[] = {
-  0x00, 0x0, 0x8c, 0x90, 0x0, 0xc6,
-  0xd0, 0x0, 0xff };
-/* ua5 is all-print */
-static const unsigned char ua6[] = {
-  0x00, 0x0, 0x2b, 0x40, 0x0, 0x5f,
-  0x62, 0x0, 0x73, 0x7c, 0x0, 0x97,
-  0xa0, 0x0, 0xf7 };
-static const unsigned char ua7[] = {
-  0x00, 0x0, 0x8c, 0xfb, 0x0, 0xff };
-static const unsigned char ua8[] = {
-  0x00, 0x0, 0x2b, 0x30, 0x0, 0x39,
-  0x40, 0x0, 0x77, 0x80, 0x0, 0xc4,
-  0xce, 0x0, 0xd9, 0xe0, 0x0, 0xfb };
-static const unsigned char ua9[] = {
-  0x00, 0x0, 0x53, 0x5f, 0x0, 0x7c,
-  0x80, 0x0, 0xcd, 0xcf, 0x0, 0xd9,
-  0xde, 0xdf };
-static const unsigned char uaa[] = {
-  0x00, 0x0, 0x36, 0x40, 0x0, 0x4d,
-  0x50, 0x0, 0x59, 0x5c, 0x0, 0x7b,
-  0x80, 0x0, 0xc2, 0xdb, 0x0, 0xdf };
-static const unsigned char uab[] = {
-  0xc0, 0x0, 0xed, 0xf0, 0x0, 0xf9 };
-/* uac to ud6 is all-print */
-static const unsigned char ud7[] = {
-  0x00, 0x0, 0xa3, 0xb0, 0x0, 0xc6,
-  0xcb, 0x0, 0xfb };
-/* ud8 to udf are UTF-16 surrogates, non-printable */
-/* ue0 to uf9 is all-print */
-static const unsigned char ufa[] = {
-  0x00, 0x0, 0x2d, 0x30, 0x0, 0x6d,
-  0x70, 0x0, 0xd9 };
-static const unsigned char ufb[] = {
-  0x00, 0x0, 0x06, 0x13, 0x0, 0x17,
-  0x1d, 0x0, 0x36, 0x38, 0x0, 0x3c,
-  0x3e, 0x40, 0x41, 0x43, 0x44, 
-  0x46, 0x0, 0xb1, 0xd3, 0x0, 0xff };
-/* ufc is all-print */
-static const unsigned char ufd[] = {
-  0x00, 0x0, 0x3f, 0x50, 0x0, 0x8f,
-  0x92, 0x0, 0xc7, 0xf0, 0x0, 0xfd };
-static const unsigned char ufe[] = {
-  0x00, 0x0, 0x19, 0x20, 0x0, 0x26,
-  0x30, 0x0, 0x52, 0x54, 0x0, 0x66,
-  0x68, 0x0, 0x6b, 0x70, 0x0, 0x74,
-  0x76, 0x0, 0xfc, 0xff };
-static const unsigned char uff[] = {
-  0x01, 0x0, 0xbe, 0xc2, 0x0, 0xc7, 0xca, 0x0, 
-  0xcf, 0xd2, 0x0, 0xd7, 0xda, 0x0, 0xdc, 0xe0, 
-  0x0, 0xe6, 0xe8, 0x0, 0xee, 0xf9, 0x0, 0xfd }; 
-static const unsigned char u100[] = {
-  0x00, 0x0, 0x0b, 0x0d, 0x0, 0x26,
-  0x28, 0x0, 0x3a, 0x3c, 0x3d, 0x3f, 0x0, 0x4d,
-  0x50, 0x0, 0x5d, 0x80, 0x0, 0xfa };
-static const unsigned char u101[] = {
-  0x00, 0x0, 0x02, 0x07, 0x0, 0x33,
-  0x37, 0x0, 0x8a, 0x90, 0x0, 0x9b,
-  0xd0, 0x0, 0xfd };
-static const unsigned char u102[] = {
-  0x80, 0x0, 0x9c, 0xa0, 0x0, 0xd0 };
-static const unsigned char u103[] = {
-  0x00, 0x0, 0x1e, 0x20, 0x0, 0x23,
-  0x30, 0x0, 0x4a, 0x80, 0x0, 0x9d,
-  0x9f, 0x0, 0xc3, 0xc8, 0x0, 0xd5 };
-static const unsigned char u104[] = {
-  0x00, 0x0, 0x9d, 0xa0, 0x0, 0xa9 };
-static const unsigned char u108[] = {
-  0x00, 0x0, 0x05, 0x08, 0x0a, 0x0, 0x35,
-  0x37, 0x38, 0x3c, 0x3f, 0x0, 0x55,
-  0x57, 0x0, 0x5f };
-static const unsigned char u109[] = {
-  0x00, 0x0, 0x1b, 0x1f, 0x0, 0x39, 0x3f };
-static const unsigned char u10a[] = {
-  0x00, 0x0, 0x03, 0x05, 0x06, 0x0c, 0x0, 0x13,
-  0x15, 0x0, 0x17, 0x19, 0x0, 0x33,
-  0x38, 0x0, 0x3a, 0x3f, 0x0, 0x47,
-  0x50, 0x0, 0x58, 0x60, 0x0, 0x7f };
-static const unsigned char u10b[] = {
-  0x00, 0x0, 0x35, 0x39, 0x0, 0x55,
-  0x58, 0x0, 0x72, 0x78, 0x0, 0x7f };
-static const unsigned char u10c[] = {
-  0x00, 0x0, 0x48 };
-static const unsigned char u10e[] = {
-  0x60, 0x0, 0x7e };
-static const unsigned char u110[] = {
-  0x80, 0x0, 0xc1 };
-/* u120 to u122 is all-print */
-static const unsigned char u123[] = {
-  0x00, 0x0, 0x6e };
-static const unsigned char u124[] = {
-  0x00, 0x0, 0x62, 0x70, 0x0, 0x73 };
-/* u130 to u133 is all-print */
-static const unsigned char u134[] = {
-  0x00, 0x0, 0x2e };
-static const unsigned char u1d0[] = {
-  0x00, 0x0, 0xf5 };
-static const unsigned char u1d1[] = {
-  0x00, 0x0, 0x26, 0x29, 0x0, 0xdd };
-static const unsigned char u1d2[] = {
-  0x00, 0x0, 0x45 };
-static const unsigned char u1d3[] = {
-  0x00, 0x0, 0x56, 0x60, 0x0, 0x71 };
-static const unsigned char u1d4[] = { 
-  0x00, 0x0, 0x54, 0x56, 0x0, 0x9c, 0x9e, 0x0, 
-  0x9f, 0xa2, 0xa5, 0x0, 0xa6, 0xa9, 0x0, 0xac, 
-  0xae, 0x0, 0xb9, 0xbb, 0xbd, 0x0, 0xc3,
-  0xc5, 0x0, 0xff };
-static const unsigned char u1d5[] = {
-  0x00, 0x0, 0x05, 0x07, 0x0, 0x0a, 
-  0x0d, 0x0, 0x14, 0x16, 0x0, 0x1c, 0x1e, 0x0, 
-  0x39, 0x3b, 0x0, 0x3e, 0x40, 0x0, 0x44, 0x46, 
-  0x4a, 0x0, 0x50, 0x52, 0x0, 0xff };
-static const unsigned char u1d6[] = {
-  0x00, 0x0, 0xa5, 0xa8, 0x0, 0xff };
-static const unsigned char u1d7[] = {
-  0x00, 0x0, 0xcb, 0xce, 0x0, 0xff };
-static const unsigned char u1f0[] = {
-  0x00, 0x0, 0x2b, 0x30, 0x0, 0x93 };
-static const unsigned char u1f1[] = {
-  0x00, 0x0, 0x0a, 0x10, 0x0, 0x2e,
-  0x31, 0x3d, 0x3f, 0x42, 0x46, 0x4a, 0x0, 0x4e,
-  0x57, 0x5f, 0x79, 0x7b, 0x7c, 0x7f, 0x8a, 0x0,
-  0x8c, 0x8d, 0x90 };
-static const unsigned char u1f2[] = {
-  0x00, 0x10, 0x0, 0x31, 0x40, 0x0, 0x48 };
-/* u200 to u2a5 is all-print */
-static const unsigned char u2a6[] = {
-  0x00, 0x0, 0xd6 };
-/* u2a7 to u2b6 is all-print */
-static const unsigned char u2b7[] = {
-  0x00, 0x0, 0x34 };
-/* u2f8 to u2f9 is all-print */
-static const unsigned char u2fa[] = {
-  0x00, 
-  0x0, 0x1d };
-static const unsigned char ue00[] = {
-  0x01, 0x20, 0x0, 0x7f };
-static const unsigned char ue01[] = {
-  0x00, 0x0, 0xef };
-/* uf00 to uffe is all-print */
-static const unsigned char ufff[] = {
-  0x00, 0x0, 0xfd };
-/* u1000 to u10fe is all-print */
-static const unsigned char u10ff[] = {
-  0x00, 0x0, 0xfd };
Corinna Vinschen March 12, 2018, 10:42 a.m. | #12
On Mar  8 00:18, Thomas Wolff wrote:
> Makefile add-ons for both patch series (libc/string and libc/ctype) will be

> sent separately.


> From 4cd871bea1c6cf677d57587a7e844bb9cb3b19be Mon Sep 17 00:00:00 2001

> From: Thomas Wolff <towo@towo.net>

> Date: Sun, 25 Feb 2018 16:29:33 +0100

> Subject: [PATCH 1/6] generated case conversion data, Unicode 10.0

> [...]


Patchset pushed.  I just squashed the makefile patch into the previous
patch to avoid a repository state not building.


Thanks,
Corinna

-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat
Can Finner March 13, 2018, 10:33 a.m. | #13
On Mon, Mar 12, 2018 at 10:42 AM, Corinna Vinschen <vinschen@redhat.com> wrote:
> On Mar  8 00:18, Thomas Wolff wrote:

>> Makefile add-ons for both patch series (libc/string and libc/ctype) will be

>> sent separately.

>

>> From 4cd871bea1c6cf677d57587a7e844bb9cb3b19be Mon Sep 17 00:00:00 2001

>> From: Thomas Wolff <towo@towo.net>

>> Date: Sun, 25 Feb 2018 16:29:33 +0100

>> Subject: [PATCH 1/6] generated case conversion data, Unicode 10.0

>> [...]

>

> Patchset pushed.  I just squashed the makefile patch into the previous

> patch to avoid a repository state not building.

Hi,
This patch breaks arm-none-eabi cross toolchain build with below error message:

/data/.../obj/gcc1/gcc/xgcc -B/data/.../obj/gcc1/gcc/
-B/data/.../obj/newlib/arm-none-eabi/thumb/newlib/ -isystem
/data/.../obj/newlib/arm-none-eabi/thumb/newlib/targ-include -isystem
/data/.../newlib-cygwin/newlib/libc/include
-B/data/.../obj/newlib/arm-none-eabi/thumb/libgloss/arm
-L/data/.../obj/newlib/arm-none-eabi/thumb/libgloss/libnosys
-L/data/.../newlib-cygwin/libgloss/arm  -mthumb
-DPACKAGE_NAME=\"newlib\" -DPACKAGE_TARNAME=\"newlib\"
-DPACKAGE_VERSION=\"3.0.0\" -DPACKAGE_STRING=\"newlib\ 3.0.0\"
-DPACKAGE_BUGREPORT=\"\" -DPACKAGE_URL=\"\" -I.
-I/data/.../newlib-cygwin/newlib/libc/ctype -D__NO_SYSCALLS__
-D_COMPILING_NEWLIB -fno-builtin      -g -ffunction-sections
-fdata-sections -O2  -mthumb -c -o lib_a-categories.o `test -f
'categories.c' || echo
'/data/.../newlib-cygwin/newlib/libc/ctype/'`categories.c
/data/.../newlib-cygwin/newlib/libc/ctype/categories.c:5:17: error:
width of 'cat' exceeds its type
   enum category cat: 11;
                 ^~~
make[8]: *** [lib_a-categories.o] Error 1

Newlib is configured as:

$ grep TOPLEVEL config.log
TOPLEVEL_CONFIGURE_ARGUMENTS='/data/.../newlib-cygwin/configure
--disable-newlib-supplied-syscalls --enable-newlib-io-long-long
--enable-newlib-io-c99-formats --enable-newlib-mb
--target=arm-none-eabi --prefix=/ --with-pkgversion=unknown'

Also, it might be better to send patch series in the form of:
[PATCH 0/n] Subject_0
[PATCH 1/n] Subject_1
...
[PATCH m/n] Subject_m

So each patch is described, sent and reviewed as an independent
thread.  It's really hard for people not understand the code to find
which patch breaks which code, if patches are sent as reply messages
to previous one.

Thanks,
bin
>

>

> Thanks,

> Corinna

>

> --

> Corinna Vinschen

> Cygwin Maintainer

> Red Hat




-- 
Regards.
Corinna Vinschen March 13, 2018, 10:40 a.m. | #14
On Mar 13 10:33, Can Finner wrote:
> On Mon, Mar 12, 2018 at 10:42 AM, Corinna Vinschen <vinschen@redhat.com> wrote:

> > On Mar  8 00:18, Thomas Wolff wrote:

> >> Makefile add-ons for both patch series (libc/string and libc/ctype) will be

> >> sent separately.

> >

> >> From 4cd871bea1c6cf677d57587a7e844bb9cb3b19be Mon Sep 17 00:00:00 2001

> >> From: Thomas Wolff <towo@towo.net>

> >> Date: Sun, 25 Feb 2018 16:29:33 +0100

> >> Subject: [PATCH 1/6] generated case conversion data, Unicode 10.0

> >> [...]

> >

> > Patchset pushed.  I just squashed the makefile patch into the previous

> > patch to avoid a repository state not building.

> Hi,

> This patch breaks arm-none-eabi cross toolchain build with below error message:

> 

> /data/.../obj/gcc1/gcc/xgcc -B/data/.../obj/gcc1/gcc/

> -B/data/.../obj/newlib/arm-none-eabi/thumb/newlib/ -isystem

> /data/.../obj/newlib/arm-none-eabi/thumb/newlib/targ-include -isystem

> /data/.../newlib-cygwin/newlib/libc/include

> -B/data/.../obj/newlib/arm-none-eabi/thumb/libgloss/arm

> -L/data/.../obj/newlib/arm-none-eabi/thumb/libgloss/libnosys

> -L/data/.../newlib-cygwin/libgloss/arm  -mthumb

> -DPACKAGE_NAME=\"newlib\" -DPACKAGE_TARNAME=\"newlib\"

> -DPACKAGE_VERSION=\"3.0.0\" -DPACKAGE_STRING=\"newlib\ 3.0.0\"

> -DPACKAGE_BUGREPORT=\"\" -DPACKAGE_URL=\"\" -I.

> -I/data/.../newlib-cygwin/newlib/libc/ctype -D__NO_SYSCALLS__

> -D_COMPILING_NEWLIB -fno-builtin      -g -ffunction-sections

> -fdata-sections -O2  -mthumb -c -o lib_a-categories.o `test -f

> 'categories.c' || echo

> '/data/.../newlib-cygwin/newlib/libc/ctype/'`categories.c

> /data/.../newlib-cygwin/newlib/libc/ctype/categories.c:5:17: error:

> width of 'cat' exceeds its type

>    enum category cat: 11;

>                  ^~~


I don't understand this error.  Why is an enum < 11 bits?!?


Corinna

-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat
Corinna Vinschen March 13, 2018, 10:48 a.m. | #15
On Mar 13 11:40, Corinna Vinschen wrote:
> On Mar 13 10:33, Can Finner wrote:

> > On Mon, Mar 12, 2018 at 10:42 AM, Corinna Vinschen <vinschen@redhat.com> wrote:

> > > On Mar  8 00:18, Thomas Wolff wrote:

> > >> Makefile add-ons for both patch series (libc/string and libc/ctype) will be

> > >> sent separately.

> > >

> > >> From 4cd871bea1c6cf677d57587a7e844bb9cb3b19be Mon Sep 17 00:00:00 2001

> > >> From: Thomas Wolff <towo@towo.net>

> > >> Date: Sun, 25 Feb 2018 16:29:33 +0100

> > >> Subject: [PATCH 1/6] generated case conversion data, Unicode 10.0

> > >> [...]

> > >

> > > Patchset pushed.  I just squashed the makefile patch into the previous

> > > patch to avoid a repository state not building.

> > Hi,

> > This patch breaks arm-none-eabi cross toolchain build with below error message:

> > 

> > /data/.../obj/gcc1/gcc/xgcc -B/data/.../obj/gcc1/gcc/

> > -B/data/.../obj/newlib/arm-none-eabi/thumb/newlib/ -isystem

> > /data/.../obj/newlib/arm-none-eabi/thumb/newlib/targ-include -isystem

> > /data/.../newlib-cygwin/newlib/libc/include

> > -B/data/.../obj/newlib/arm-none-eabi/thumb/libgloss/arm

> > -L/data/.../obj/newlib/arm-none-eabi/thumb/libgloss/libnosys

> > -L/data/.../newlib-cygwin/libgloss/arm  -mthumb

> > -DPACKAGE_NAME=\"newlib\" -DPACKAGE_TARNAME=\"newlib\"

> > -DPACKAGE_VERSION=\"3.0.0\" -DPACKAGE_STRING=\"newlib\ 3.0.0\"

> > -DPACKAGE_BUGREPORT=\"\" -DPACKAGE_URL=\"\" -I.

> > -I/data/.../newlib-cygwin/newlib/libc/ctype -D__NO_SYSCALLS__

> > -D_COMPILING_NEWLIB -fno-builtin      -g -ffunction-sections

> > -fdata-sections -O2  -mthumb -c -o lib_a-categories.o `test -f

> > 'categories.c' || echo

> > '/data/.../newlib-cygwin/newlib/libc/ctype/'`categories.c

> > /data/.../newlib-cygwin/newlib/libc/ctype/categories.c:5:17: error:

> > width of 'cat' exceeds its type

> >    enum category cat: 11;

> >                  ^~~

> 

> I don't understand this error.  Why is an enum < 11 bits?!?


As a followup, I have no idea how to fix this, building only for targets
with enums 32 bits wide.  So a patch to make sure this enum is at least
16 bits on your targets as well would be welcome.


Thanks,
Corinna

-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat
Can Finner March 13, 2018, 11:21 a.m. | #16
On Tue, Mar 13, 2018 at 10:40 AM, Corinna Vinschen <vinschen@redhat.com> wrote:
> On Mar 13 10:33, Can Finner wrote:

>> On Mon, Mar 12, 2018 at 10:42 AM, Corinna Vinschen <vinschen@redhat.com> wrote:

>> > On Mar  8 00:18, Thomas Wolff wrote:

>> >> Makefile add-ons for both patch series (libc/string and libc/ctype) will be

>> >> sent separately.

>> >

>> >> From 4cd871bea1c6cf677d57587a7e844bb9cb3b19be Mon Sep 17 00:00:00 2001

>> >> From: Thomas Wolff <towo@towo.net>

>> >> Date: Sun, 25 Feb 2018 16:29:33 +0100

>> >> Subject: [PATCH 1/6] generated case conversion data, Unicode 10.0

>> >> [...]

>> >

>> > Patchset pushed.  I just squashed the makefile patch into the previous

>> > patch to avoid a repository state not building.

>> Hi,

>> This patch breaks arm-none-eabi cross toolchain build with below error message:

>>

>> /data/.../obj/gcc1/gcc/xgcc -B/data/.../obj/gcc1/gcc/

>> -B/data/.../obj/newlib/arm-none-eabi/thumb/newlib/ -isystem

>> /data/.../obj/newlib/arm-none-eabi/thumb/newlib/targ-include -isystem

>> /data/.../newlib-cygwin/newlib/libc/include

>> -B/data/.../obj/newlib/arm-none-eabi/thumb/libgloss/arm

>> -L/data/.../obj/newlib/arm-none-eabi/thumb/libgloss/libnosys

>> -L/data/.../newlib-cygwin/libgloss/arm  -mthumb

>> -DPACKAGE_NAME=\"newlib\" -DPACKAGE_TARNAME=\"newlib\"

>> -DPACKAGE_VERSION=\"3.0.0\" -DPACKAGE_STRING=\"newlib\ 3.0.0\"

>> -DPACKAGE_BUGREPORT=\"\" -DPACKAGE_URL=\"\" -I.

>> -I/data/.../newlib-cygwin/newlib/libc/ctype -D__NO_SYSCALLS__

>> -D_COMPILING_NEWLIB -fno-builtin      -g -ffunction-sections

>> -fdata-sections -O2  -mthumb -c -o lib_a-categories.o `test -f

>> 'categories.c' || echo

>> '/data/.../newlib-cygwin/newlib/libc/ctype/'`categories.c

>> /data/.../newlib-cygwin/newlib/libc/ctype/categories.c:5:17: error:

>> width of 'cat' exceeds its type

>>    enum category cat: 11;

>>                  ^~~

>

> I don't understand this error.  Why is an enum < 11 bits?!?

To be honest, I don't either.  Will collect more information about this.

Thanks,
bin
>

>

> Corinna

>

> --

> Corinna Vinschen

> Cygwin Maintainer

> Red Hat




-- 
Regards.
Can Finner March 13, 2018, 12:02 p.m. | #17
On Tue, Mar 13, 2018 at 11:21 AM, Can Finner <can.finner@gmail.com> wrote:
> On Tue, Mar 13, 2018 at 10:40 AM, Corinna Vinschen <vinschen@redhat.com> wrote:

>> On Mar 13 10:33, Can Finner wrote:

>>> On Mon, Mar 12, 2018 at 10:42 AM, Corinna Vinschen <vinschen@redhat.com> wrote:

>>> > On Mar  8 00:18, Thomas Wolff wrote:

>>> >> Makefile add-ons for both patch series (libc/string and libc/ctype) will be

>>> >> sent separately.

>>> >

>>> >> From 4cd871bea1c6cf677d57587a7e844bb9cb3b19be Mon Sep 17 00:00:00 2001

>>> >> From: Thomas Wolff <towo@towo.net>

>>> >> Date: Sun, 25 Feb 2018 16:29:33 +0100

>>> >> Subject: [PATCH 1/6] generated case conversion data, Unicode 10.0

>>> >> [...]

>>> >

>>> > Patchset pushed.  I just squashed the makefile patch into the previous

>>> > patch to avoid a repository state not building.

>>> Hi,

>>> This patch breaks arm-none-eabi cross toolchain build with below error message:

>>>

>>> /data/.../obj/gcc1/gcc/xgcc -B/data/.../obj/gcc1/gcc/

>>> -B/data/.../obj/newlib/arm-none-eabi/thumb/newlib/ -isystem

>>> /data/.../obj/newlib/arm-none-eabi/thumb/newlib/targ-include -isystem

>>> /data/.../newlib-cygwin/newlib/libc/include

>>> -B/data/.../obj/newlib/arm-none-eabi/thumb/libgloss/arm

>>> -L/data/.../obj/newlib/arm-none-eabi/thumb/libgloss/libnosys

>>> -L/data/.../newlib-cygwin/libgloss/arm  -mthumb

>>> -DPACKAGE_NAME=\"newlib\" -DPACKAGE_TARNAME=\"newlib\"

>>> -DPACKAGE_VERSION=\"3.0.0\" -DPACKAGE_STRING=\"newlib\ 3.0.0\"

>>> -DPACKAGE_BUGREPORT=\"\" -DPACKAGE_URL=\"\" -I.

>>> -I/data/.../newlib-cygwin/newlib/libc/ctype -D__NO_SYSCALLS__

>>> -D_COMPILING_NEWLIB -fno-builtin      -g -ffunction-sections

>>> -fdata-sections -O2  -mthumb -c -o lib_a-categories.o `test -f

>>> 'categories.c' || echo

>>> '/data/.../newlib-cygwin/newlib/libc/ctype/'`categories.c

>>> /data/.../newlib-cygwin/newlib/libc/ctype/categories.c:5:17: error:

>>> width of 'cat' exceeds its type

>>>    enum category cat: 11;

>>>                  ^~~

>>

>> I don't understand this error.  Why is an enum < 11 bits?!?

> To be honest, I don't either.  Will collect more information about this.

So it looks like arm ABI requires -fshort-enums for bare-metal
toolchain, which will pack enum type into small int-type.
In this case, the enum category as below has fewer than 256 entries in
call cases?
enum category {
#include "categories.cat"
};
If so, can we change it into below?  If not, is there any macro can be
used to differentiate situations?
struct _category {
  enum category cat: 8;
  unsigned int first: 24;
  unsigned short delta;
} __attribute__((packed));

Another choice would be adding a last entry with value larger than 255
in the enum.

Any suggestions?

Thanks,
bin
>

> Thanks,

> bin

>>

>>

>> Corinna

>>

>> --

>> Corinna Vinschen

>> Cygwin Maintainer

>> Red Hat

>

>

>

> --

> Regards.




-- 
Regards.
Corinna Vinschen March 13, 2018, 12:49 p.m. | #18
On Mar 13 12:02, Can Finner wrote:
> On Tue, Mar 13, 2018 at 11:21 AM, Can Finner <can.finner@gmail.com> wrote:

> > On Tue, Mar 13, 2018 at 10:40 AM, Corinna Vinschen <vinschen@redhat.com> wrote:

> >> On Mar 13 10:33, Can Finner wrote:

> >>> On Mon, Mar 12, 2018 at 10:42 AM, Corinna Vinschen <vinschen@redhat.com> wrote:

> >>> > On Mar  8 00:18, Thomas Wolff wrote:

> >>> >> Makefile add-ons for both patch series (libc/string and libc/ctype) will be

> >>> >> sent separately.

> >>> >

> >>> >> From 4cd871bea1c6cf677d57587a7e844bb9cb3b19be Mon Sep 17 00:00:00 2001

> >>> >> From: Thomas Wolff <towo@towo.net>

> >>> >> Date: Sun, 25 Feb 2018 16:29:33 +0100

> >>> >> Subject: [PATCH 1/6] generated case conversion data, Unicode 10.0

> >>> >> [...]

> >>> >

> >>> > Patchset pushed.  I just squashed the makefile patch into the previous

> >>> > patch to avoid a repository state not building.

> >>> Hi,

> >>> This patch breaks arm-none-eabi cross toolchain build with below error message:

> >>>

> >>> /data/.../obj/gcc1/gcc/xgcc -B/data/.../obj/gcc1/gcc/

> >>> -B/data/.../obj/newlib/arm-none-eabi/thumb/newlib/ -isystem

> >>> /data/.../obj/newlib/arm-none-eabi/thumb/newlib/targ-include -isystem

> >>> /data/.../newlib-cygwin/newlib/libc/include

> >>> -B/data/.../obj/newlib/arm-none-eabi/thumb/libgloss/arm

> >>> -L/data/.../obj/newlib/arm-none-eabi/thumb/libgloss/libnosys

> >>> -L/data/.../newlib-cygwin/libgloss/arm  -mthumb

> >>> -DPACKAGE_NAME=\"newlib\" -DPACKAGE_TARNAME=\"newlib\"

> >>> -DPACKAGE_VERSION=\"3.0.0\" -DPACKAGE_STRING=\"newlib\ 3.0.0\"

> >>> -DPACKAGE_BUGREPORT=\"\" -DPACKAGE_URL=\"\" -I.

> >>> -I/data/.../newlib-cygwin/newlib/libc/ctype -D__NO_SYSCALLS__

> >>> -D_COMPILING_NEWLIB -fno-builtin      -g -ffunction-sections

> >>> -fdata-sections -O2  -mthumb -c -o lib_a-categories.o `test -f

> >>> 'categories.c' || echo

> >>> '/data/.../newlib-cygwin/newlib/libc/ctype/'`categories.c

> >>> /data/.../newlib-cygwin/newlib/libc/ctype/categories.c:5:17: error:

> >>> width of 'cat' exceeds its type

> >>>    enum category cat: 11;

> >>>                  ^~~

> >>

> >> I don't understand this error.  Why is an enum < 11 bits?!?

> > To be honest, I don't either.  Will collect more information about this.

> So it looks like arm ABI requires -fshort-enums for bare-metal

> toolchain, which will pack enum type into small int-type.

> In this case, the enum category as below has fewer than 256 entries in

> call cases?

> enum category {

> #include "categories.cat"

> };


Have a look at the file, it has barely 32 categories, so even a :5
would suffice.

Thomas, what was the idea here?  11 + 21 = 32, so was it just to
fill the struct?  If so, we may want to redefine the struct, as
suggested by Can:

> If so, can we change it into below?  If not, is there any macro can be

> used to differentiate situations?

> struct _category {

>   enum category cat: 8;

>   unsigned int first: 24;

>   unsigned short delta;

> } __attribute__((packed));


Thanks,
Corinna

-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat
Thomas Wolff March 13, 2018, 1:49 p.m. | #19
On 13.03.2018 13:49, Corinna Vinschen wrote:
> On Mar 13 12:02, Can Finner wrote:

>> On Tue, Mar 13, 2018 at 11:21 AM, Can Finner <can.finner@gmail.com> wrote:

>>> On Tue, Mar 13, 2018 at 10:40 AM, Corinna Vinschen <vinschen@redhat.com> wrote:

>>>> On Mar 13 10:33, Can Finner wrote:

>>>>> On Mon, Mar 12, 2018 at 10:42 AM, Corinna Vinschen <vinschen@redhat.com> wrote:

>>>>>> On Mar  8 00:18, Thomas Wolff wrote:

>>>>>>> Makefile add-ons for both patch series (libc/string and libc/ctype) will be

>>>>>>> sent separately.

>>>>>>>  From 4cd871bea1c6cf677d57587a7e844bb9cb3b19be Mon Sep 17 00:00:00 2001

>>>>>>> From: Thomas Wolff <towo@towo.net>

>>>>>>> Date: Sun, 25 Feb 2018 16:29:33 +0100

>>>>>>> Subject: [PATCH 1/6] generated case conversion data, Unicode 10.0

>>>>>>> [...]

>>>>>> Patchset pushed.  I just squashed the makefile patch into the previous

>>>>>> patch to avoid a repository state not building.

>>>>> Hi,

>>>>> This patch breaks arm-none-eabi cross toolchain build with below error message:

>>>>>

>>>>> /data/.../obj/gcc1/gcc/xgcc -B/data/.../obj/gcc1/gcc/

>>>>> -B/data/.../obj/newlib/arm-none-eabi/thumb/newlib/ -isystem

>>>>> /data/.../obj/newlib/arm-none-eabi/thumb/newlib/targ-include -isystem

>>>>> /data/.../newlib-cygwin/newlib/libc/include

>>>>> -B/data/.../obj/newlib/arm-none-eabi/thumb/libgloss/arm

>>>>> -L/data/.../obj/newlib/arm-none-eabi/thumb/libgloss/libnosys

>>>>> -L/data/.../newlib-cygwin/libgloss/arm  -mthumb

>>>>> -DPACKAGE_NAME=\"newlib\" -DPACKAGE_TARNAME=\"newlib\"

>>>>> -DPACKAGE_VERSION=\"3.0.0\" -DPACKAGE_STRING=\"newlib\ 3.0.0\"

>>>>> -DPACKAGE_BUGREPORT=\"\" -DPACKAGE_URL=\"\" -I.

>>>>> -I/data/.../newlib-cygwin/newlib/libc/ctype -D__NO_SYSCALLS__

>>>>> -D_COMPILING_NEWLIB -fno-builtin      -g -ffunction-sections

>>>>> -fdata-sections -O2  -mthumb -c -o lib_a-categories.o `test -f

>>>>> 'categories.c' || echo

>>>>> '/data/.../newlib-cygwin/newlib/libc/ctype/'`categories.c

>>>>> /data/.../newlib-cygwin/newlib/libc/ctype/categories.c:5:17: error:

>>>>> width of 'cat' exceeds its type

>>>>>     enum category cat: 11;

>>>>>                   ^~~

>>>> I don't understand this error.  Why is an enum < 11 bits?!?

>>> To be honest, I don't either.  Will collect more information about this.

>> So it looks like arm ABI requires -fshort-enums for bare-metal

>> toolchain, which will pack enum type into small int-type.

>> In this case, the enum category as below has fewer than 256 entries in

>> call cases?

>> enum category {

>> #include "categories.cat"

>> };

> Have a look at the file, it has barely 32 categories, so even a :5

> would suffice.

>

> Thomas, what was the idea here?  11 + 21 = 32, so was it just to

> fill the struct?

Yes, and to keep the Unicode value right-aligned. But 8/24 will do 
alike, please change it so.
Thomas

>    If so, we may want to redefine the struct, as

> suggested by Can:

>

>> If so, can we change it into below?  If not, is there any macro can be

>> used to differentiate situations?

>> struct _category {

>>    enum category cat: 8;

>>    unsigned int first: 24;

>>    unsigned short delta;

>> } __attribute__((packed));

> Thanks,

> Corinna

>
Can Finner March 13, 2018, 2:05 p.m. | #20
On Tue, Mar 13, 2018 at 1:49 PM, Thomas Wolff <towo@towo.net> wrote:
> On 13.03.2018 13:49, Corinna Vinschen wrote:

>>

>> On Mar 13 12:02, Can Finner wrote:

>>>

>>> On Tue, Mar 13, 2018 at 11:21 AM, Can Finner <can.finner@gmail.com>

>>> wrote:

>>>>

>>>> On Tue, Mar 13, 2018 at 10:40 AM, Corinna Vinschen <vinschen@redhat.com>

>>>> wrote:

>>>>>

>>>>> On Mar 13 10:33, Can Finner wrote:

>>>>>>

>>>>>> On Mon, Mar 12, 2018 at 10:42 AM, Corinna Vinschen

>>>>>> <vinschen@redhat.com> wrote:

>>>>>>>

>>>>>>> On Mar  8 00:18, Thomas Wolff wrote:

>>>>>>>>

>>>>>>>> Makefile add-ons for both patch series (libc/string and libc/ctype)

>>>>>>>> will be

>>>>>>>> sent separately.

>>>>>>>>  From 4cd871bea1c6cf677d57587a7e844bb9cb3b19be Mon Sep 17 00:00:00

>>>>>>>> 2001

>>>>>>>> From: Thomas Wolff <towo@towo.net>

>>>>>>>> Date: Sun, 25 Feb 2018 16:29:33 +0100

>>>>>>>> Subject: [PATCH 1/6] generated case conversion data, Unicode 10.0

>>>>>>>> [...]

>>>>>>>

>>>>>>> Patchset pushed.  I just squashed the makefile patch into the

>>>>>>> previous

>>>>>>> patch to avoid a repository state not building.

>>>>>>

>>>>>> Hi,

>>>>>> This patch breaks arm-none-eabi cross toolchain build with below error

>>>>>> message:

>>>>>>

>>>>>> /data/.../obj/gcc1/gcc/xgcc -B/data/.../obj/gcc1/gcc/

>>>>>> -B/data/.../obj/newlib/arm-none-eabi/thumb/newlib/ -isystem

>>>>>> /data/.../obj/newlib/arm-none-eabi/thumb/newlib/targ-include -isystem

>>>>>> /data/.../newlib-cygwin/newlib/libc/include

>>>>>> -B/data/.../obj/newlib/arm-none-eabi/thumb/libgloss/arm

>>>>>> -L/data/.../obj/newlib/arm-none-eabi/thumb/libgloss/libnosys

>>>>>> -L/data/.../newlib-cygwin/libgloss/arm  -mthumb

>>>>>> -DPACKAGE_NAME=\"newlib\" -DPACKAGE_TARNAME=\"newlib\"

>>>>>> -DPACKAGE_VERSION=\"3.0.0\" -DPACKAGE_STRING=\"newlib\ 3.0.0\"

>>>>>> -DPACKAGE_BUGREPORT=\"\" -DPACKAGE_URL=\"\" -I.

>>>>>> -I/data/.../newlib-cygwin/newlib/libc/ctype -D__NO_SYSCALLS__

>>>>>> -D_COMPILING_NEWLIB -fno-builtin      -g -ffunction-sections

>>>>>> -fdata-sections -O2  -mthumb -c -o lib_a-categories.o `test -f

>>>>>> 'categories.c' || echo

>>>>>> '/data/.../newlib-cygwin/newlib/libc/ctype/'`categories.c

>>>>>> /data/.../newlib-cygwin/newlib/libc/ctype/categories.c:5:17: error:

>>>>>> width of 'cat' exceeds its type

>>>>>>     enum category cat: 11;

>>>>>>                   ^~~

>>>>>

>>>>> I don't understand this error.  Why is an enum < 11 bits?!?

>>>>

>>>> To be honest, I don't either.  Will collect more information about this.

>>>

>>> So it looks like arm ABI requires -fshort-enums for bare-metal

>>> toolchain, which will pack enum type into small int-type.

>>> In this case, the enum category as below has fewer than 256 entries in

>>> call cases?

>>> enum category {

>>> #include "categories.cat"

>>> };

>>

>> Have a look at the file, it has barely 32 categories, so even a :5

>> would suffice.

>>

>> Thomas, what was the idea here?  11 + 21 = 32, so was it just to

>> fill the struct?

>

> Yes, and to keep the Unicode value right-aligned. But 8/24 will do alike,

> please change it so.

Thanks for clarification, I will prepare a change.

Thanks,
bin
> Thomas

>

>

>>    If so, we may want to redefine the struct, as

>> suggested by Can:

>>

>>> If so, can we change it into below?  If not, is there any macro can be

>>> used to differentiate situations?

>>> struct _category {

>>>    enum category cat: 8;

>>>    unsigned int first: 24;

>>>    unsigned short delta;

>>> } __attribute__((packed));

>>

>> Thanks,

>> Corinna

>>

>




-- 
Regards.
Corinna Vinschen March 13, 2018, 9:10 p.m. | #21
On Mar  8 00:21, Thomas Wolff wrote:
> From 58a9cfcb253165d7073a9ed25e143daa2e979c10 Mon Sep 17 00:00:00 2001

> From: Thomas Wolff <towo@towo.net>

> Date: Sun, 25 Feb 2018 17:22:34 +0100

> Subject: [PATCH 4/6] use generated character data

> 

> ---

>  newlib/libc/ctype/categories.c  |  39 +++

>  newlib/libc/ctype/categories.h  |   7 +

>  newlib/libc/ctype/iswalnum.c    |   2 +-

>  newlib/libc/ctype/iswalnum_l.c  |  19 +-

>  newlib/libc/ctype/iswalpha.c    |  73 ++++++

>  newlib/libc/ctype/iswalpha_l.c  |  17 +-

>  newlib/libc/ctype/iswblank.c    |  19 +-

>  newlib/libc/ctype/iswblank_l.c  |  16 +-

>  newlib/libc/ctype/iswcntrl.c    |  17 +-

>  newlib/libc/ctype/iswcntrl_l.c  |  16 +-

>  newlib/libc/ctype/iswctype_l.c  |  37 ++-

>  newlib/libc/ctype/iswdigit.c    |   3 +-

>  newlib/libc/ctype/iswdigit_l.c  |   2 +-

>  newlib/libc/ctype/iswgraph.c    |   3 +-

>  newlib/libc/ctype/iswgraph_l.c  |  19 +-

>  newlib/libc/ctype/iswlower.c    |   4 +-

>  newlib/libc/ctype/iswlower_l.c  |  16 +-

>  newlib/libc/ctype/iswprint.c    |  72 ++++++

>  newlib/libc/ctype/iswprint_l.c  |  17 +-

>  newlib/libc/ctype/iswpunct.c    |   7 +-

>  newlib/libc/ctype/iswpunct_l.c  |  22 +-

>  newlib/libc/ctype/iswspace.c    |  20 +-

>  newlib/libc/ctype/iswspace_l.c  |  17 +-

>  newlib/libc/ctype/iswupper.c    |   6 +-

>  newlib/libc/ctype/iswupper_l.c  |  16 +-

>  newlib/libc/ctype/iswxdigit.c   |   6 +-

>  newlib/libc/ctype/jp2uc.c       |  51 +++-

>  newlib/libc/ctype/local.h       |  19 +-

>  newlib/libc/ctype/towctrans.c   |  16 +-

>  newlib/libc/ctype/towctrans_l.c |  97 +++++++-

>  newlib/libc/ctype/towlower.c    |  81 +++++++

>  newlib/libc/ctype/towlower_l.c  |   7 +-

>  newlib/libc/ctype/towupper.c    | 515 +---------------------------------------

>  newlib/libc/ctype/towupper_l.c  |   8 +-

>  34 files changed, 650 insertions(+), 639 deletions(-)

>  create mode 100644 newlib/libc/ctype/categories.c

>  create mode 100644 newlib/libc/ctype/categories.h

>  create mode 100644 newlib/libc/ctype/iswalpha.c

>  create mode 100644 newlib/libc/ctype/iswprint.c

>  create mode 100644 newlib/libc/ctype/towlower.c


Looks like I pushed too soon.  After a full rebuild Cygwin didn't work
at all anymore.  After some experimenting it turned out that it depends
on the optimization settings.  If I build with -O2, all is well.  If I
build with just -g and no optimzation, Cygwin doesn't run anymore.

Fortunately strace is a native tool, so I could fetch an strace.

What catched my eye was that *all* paths converted to native NT
paths had a Ctrl-A in place of the drive letter 'C', like this:

  \??\^A:\WINDOWS

The culprit was apparently a call to towupper() on the drive letter,
required for case sensitivity.  This in turn led to the towctrans_l
function.

After some head scratching (without functioning debugger...) I realized
that there are cases which neglect to return a value due to `return c'.

Why gcc let this slip through beats me thoroughly.

I pushed a patch.


Corinna

-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat
Thomas Wolff March 13, 2018, 9:41 p.m. | #22
Am 13.03.2018 um 22:10 schrieb Corinna Vinschen:
> On Mar  8 00:21, Thomas Wolff wrote:

>>  From 58a9cfcb253165d7073a9ed25e143daa2e979c10 Mon Sep 17 00:00:00 2001

>> From: Thomas Wolff <towo@towo.net>

>> Date: Sun, 25 Feb 2018 17:22:34 +0100

>> Subject: [PATCH 4/6] use generated character data

>>

>> ---

>>   newlib/libc/ctype/categories.c  |  39 +++

>>   newlib/libc/ctype/categories.h  |   7 +

>>   newlib/libc/ctype/iswalnum.c    |   2 +-

>>   newlib/libc/ctype/iswalnum_l.c  |  19 +-

>>   newlib/libc/ctype/iswalpha.c    |  73 ++++++

>>   newlib/libc/ctype/iswalpha_l.c  |  17 +-

>>   newlib/libc/ctype/iswblank.c    |  19 +-

>>   newlib/libc/ctype/iswblank_l.c  |  16 +-

>>   newlib/libc/ctype/iswcntrl.c    |  17 +-

>>   newlib/libc/ctype/iswcntrl_l.c  |  16 +-

>>   newlib/libc/ctype/iswctype_l.c  |  37 ++-

>>   newlib/libc/ctype/iswdigit.c    |   3 +-

>>   newlib/libc/ctype/iswdigit_l.c  |   2 +-

>>   newlib/libc/ctype/iswgraph.c    |   3 +-

>>   newlib/libc/ctype/iswgraph_l.c  |  19 +-

>>   newlib/libc/ctype/iswlower.c    |   4 +-

>>   newlib/libc/ctype/iswlower_l.c  |  16 +-

>>   newlib/libc/ctype/iswprint.c    |  72 ++++++

>>   newlib/libc/ctype/iswprint_l.c  |  17 +-

>>   newlib/libc/ctype/iswpunct.c    |   7 +-

>>   newlib/libc/ctype/iswpunct_l.c  |  22 +-

>>   newlib/libc/ctype/iswspace.c    |  20 +-

>>   newlib/libc/ctype/iswspace_l.c  |  17 +-

>>   newlib/libc/ctype/iswupper.c    |   6 +-

>>   newlib/libc/ctype/iswupper_l.c  |  16 +-

>>   newlib/libc/ctype/iswxdigit.c   |   6 +-

>>   newlib/libc/ctype/jp2uc.c       |  51 +++-

>>   newlib/libc/ctype/local.h       |  19 +-

>>   newlib/libc/ctype/towctrans.c   |  16 +-

>>   newlib/libc/ctype/towctrans_l.c |  97 +++++++-

>>   newlib/libc/ctype/towlower.c    |  81 +++++++

>>   newlib/libc/ctype/towlower_l.c  |   7 +-

>>   newlib/libc/ctype/towupper.c    | 515 +---------------------------------------

>>   newlib/libc/ctype/towupper_l.c  |   8 +-

>>   34 files changed, 650 insertions(+), 639 deletions(-)

>>   create mode 100644 newlib/libc/ctype/categories.c

>>   create mode 100644 newlib/libc/ctype/categories.h

>>   create mode 100644 newlib/libc/ctype/iswalpha.c

>>   create mode 100644 newlib/libc/ctype/iswprint.c

>>   create mode 100644 newlib/libc/ctype/towlower.c

> Looks like I pushed too soon.  After a full rebuild Cygwin didn't work

> at all anymore.  After some experimenting it turned out that it depends

> on the optimization settings.  If I build with -O2, all is well.  If I

> build with just -g and no optimzation, Cygwin doesn't run anymore.

>

> Fortunately strace is a native tool, so I could fetch an strace.

>

> What catched my eye was that *all* paths converted to native NT

> paths had a Ctrl-A in place of the drive letter 'C', like this:

>

>    \??\^A:\WINDOWS

>

> The culprit was apparently a call to towupper() on the drive letter,

> required for case sensitivity.  This in turn led to the towctrans_l

> function.

>

> After some head scratching (without functioning debugger...) I realized

> that there are cases which neglect to return a value due to `return c'.

>

> Why gcc let this slip through beats me thoroughly.

>

> I pushed a patch.

>

>

> Corinna

>

Thanks a lot for hot-fixing this. I'll meditate how this could slip
through...
And I'll also check why this wasn't discovered during my extensive testing.
Thomas


---
Diese E-Mail wurde von Avast Antivirus-Software auf Viren geprüft.
https://www.avast.com/antivirus
Corinna Vinschen March 14, 2018, 9:43 a.m. | #23
On Mar 13 14:05, Can Finner wrote:
> On Tue, Mar 13, 2018 at 1:49 PM, Thomas Wolff <towo@towo.net> wrote:

> > On 13.03.2018 13:49, Corinna Vinschen wrote:

> >> On Mar 13 12:02, Can Finner wrote:

> >>> On Tue, Mar 13, 2018 at 11:21 AM, Can Finner <can.finner@gmail.com>

> >>> wrote:

> >>>> On Tue, Mar 13, 2018 at 10:40 AM, Corinna Vinschen <vinschen@redhat.com>

> >>>> wrote:

> >>>>> On Mar 13 10:33, Can Finner wrote:

> >>>>>> This patch breaks arm-none-eabi cross toolchain build with below error

> >>>>>> message:

> >>>>>>

> >>>>>> [...]

> >>>>>> /data/.../newlib-cygwin/newlib/libc/ctype/categories.c:5:17: error:

> >>>>>> width of 'cat' exceeds its type

> >>>>>>     enum category cat: 11;

> >>>>>>                   ^~~

> >>>>>

> >>>>> I don't understand this error.  Why is an enum < 11 bits?!?

> >>>>

> >>>> To be honest, I don't either.  Will collect more information about this.

> >>>

> >>> So it looks like arm ABI requires -fshort-enums for bare-metal

> >>> toolchain, which will pack enum type into small int-type.

> >>> In this case, the enum category as below has fewer than 256 entries in

> >>> call cases?

> >>> enum category {

> >>> #include "categories.cat"

> >>> };

> >>

> >> Have a look at the file, it has barely 32 categories, so even a :5

> >> would suffice.

> >>

> >> Thomas, what was the idea here?  11 + 21 = 32, so was it just to

> >> fill the struct?

> >

> > Yes, and to keep the Unicode value right-aligned. But 8/24 will do alike,

> > please change it so.

> Thanks for clarification, I will prepare a change.


Never mind, I pushed a patch.  Please give it a try.


Thanks,
Corinna

-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat
Thomas Wolff March 23, 2018, 7:28 p.m. | #24
Am 13.03.2018 um 22:41 schrieb Thomas Wolff:
> Am 13.03.2018 um 22:10 schrieb Corinna Vinschen:

>> On Mar  8 00:21, Thomas Wolff wrote:

>>> From 58a9cfcb253165d7073a9ed25e143daa2e979c10 Mon Sep 17 00:00:00 2001

>>> From: Thomas Wolff <towo@towo.net>

>>> Date: Sun, 25 Feb 2018 17:22:34 +0100

>>> Subject: [PATCH 4/6] use generated character data

>>>

>>> ---

>>>   newlib/libc/ctype/towctrans_l.c |  97 +++++++-

>>> ...

>> ...

>>

>> I pushed a patch.

>>

>>

>> Corinna

>>

> Thanks a lot for hot-fixing this. I'll meditate how this could slip 

> through...

> And I'll also check why this wasn't discovered during my extensive 

> testing.

Thanks again for helping to get this accomplished.
While meditating, I noticed that the bit packing of the case conversion 
entries could use some documentation.
The attached patch adds that (and some tweaking for consistent 
indentation); no code changes.
Thomas
From f8f4784437d319ad3ac2e3c629335fd0f50bee69 Mon Sep 17 00:00:00 2001
From: Thomas Wolff <towo@towo.net>
Date: Fri, 23 Mar 2018 20:07:22 +0100
Subject: [PATCH] comments to document struct caseconv_entry

explain design of compact (packed) struct caseconv_entry, 
in case it needs to be modified for future Unicode versions;
indentation tweaks

---
 newlib/libc/ctype/towctrans_l.c | 108 +++++++++++++++++++++++++---------------
 1 file changed, 69 insertions(+), 39 deletions(-)

diff --git a/newlib/libc/ctype/towctrans_l.c b/newlib/libc/ctype/towctrans_l.c
index eaabd8c..0f210e0 100644
--- a/newlib/libc/ctype/towctrans_l.c
+++ b/newlib/libc/ctype/towctrans_l.c
@@ -4,8 +4,36 @@
 //#include <errno.h>
 #include "local.h"
 
-enum {EVENCAP, ODDCAP};
+/*
+   struct caseconv_entry describes the case conversion behaviour
+   of a range of Unicode characters.
+   It was designed to be compact for a minimal table size.
+   The range is first...first + diff.
+   Conversion behaviour for a character c in the respective range:
+     mode == TOLO	towlower (c) = c + delta
+     mode == TOUP	towupper (c) = c + delta
+     mode == TOBOTH	(titling case characters)
+			towlower (c) = c + 1
+			towupper (c) = c - 1
+     mode == TO1	capital/small letters are alternating
+	delta == EVENCAP	even codes are capital
+	delta == ODDCAP		odd codes are capital
+			(this correlates with an even/odd first range value
+			as of Unicode 10.0 but we do not rely on this)
+   As of Unicode 10.0, the following field lengths are sufficient
+	first: 17 bits
+	diff: 8 bits
+	delta: 17 bits
+	mode: 2 bits
+   The reserve of 4 bits (to limit the struct to 6 bytes)
+   is currently added to the 'first' field;
+   should a future Unicode version make it necessary to expand the others,
+   the 'first' field could be reduced as needed, or larger ranges could
+   be split up (reduce limit max=255 e.g. to max=127 or max=63 in 
+   script mkcaseconv, check increasing table size).
+ */
 enum {TO1, TOLO, TOUP, TOBOTH};
+enum {EVENCAP, ODDCAP};
 static struct caseconv_entry {
   unsigned int first: 21;
   unsigned short diff: 8;
@@ -45,32 +73,33 @@ toulower (wint_t c)
 {
   const struct caseconv_entry * cce =
     bisearch(c, caseconv_table,
-             sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
+	     sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
 
   if (cce)
     switch (cce->mode)
       {
-      case TOLO:
-	return c + cce->delta;
-      case TOBOTH:
-	return c + 1;
-      case TO1:
-	switch (cce->delta)
-	  {
-	    case EVENCAP:
-	      if (!(c & 1))
-		return c + 1;
-	      break;
-	    case ODDCAP:
-	      if (c & 1)
-		return c + 1;
-	      break;
-	    default:
-	      break;
-	  }
+	case TOLO:
+	  return c + cce->delta;
+	case TOBOTH:
+	  return c + 1;
+	case TO1:
+	  switch (cce->delta)
+	    {
+	      case EVENCAP:
+		if (!(c & 1))
+		  return c + 1;
+		break;
+	      case ODDCAP:
+		if (c & 1)
+		  return c + 1;
+		break;
+	      default:
+		break;
+	    }
 	default:
 	  break;
       }
+
   return c;
 }
 
@@ -79,32 +108,33 @@ touupper (wint_t c)
 {
   const struct caseconv_entry * cce =
     bisearch(c, caseconv_table,
-             sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
+	     sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
 
   if (cce)
     switch (cce->mode)
       {
-      case TOUP:
-	return c + cce->delta;
-      case TOBOTH:
-	return c - 1;
-      case TO1:
-	switch (cce->delta)
-	  {
-	  case EVENCAP:
-	    if (c & 1)
-	      return c - 1;
-	    break;
-	  case ODDCAP:
-	    if (!(c & 1))
-	      return c - 1;
-	    break;
-	  default:
-	    break;
-	  }
+	case TOUP:
+	  return c + cce->delta;
+	case TOBOTH:
+	  return c - 1;
+	case TO1:
+	  switch (cce->delta)
+	    {
+	      case EVENCAP:
+		if (c & 1)
+		  return c - 1;
+		break;
+	      case ODDCAP:
+		if (!(c & 1))
+		  return c - 1;
+		break;
+	      default:
+		break;
+	    }
 	default:
 	  break;
       }
+
   return c;
 }
Corinna Vinschen March 23, 2018, 7:43 p.m. | #25
On Mar 23 20:28, Thomas Wolff wrote:
> While meditating, I noticed that the bit packing of the case conversion

> entries could use some documentation.

> The attached patch adds that (and some tweaking for consistent indentation);

> no code changes.

> Thomas


> From f8f4784437d319ad3ac2e3c629335fd0f50bee69 Mon Sep 17 00:00:00 2001

> From: Thomas Wolff <towo@towo.net>

> Date: Fri, 23 Mar 2018 20:07:22 +0100

> Subject: [PATCH] comments to document struct caseconv_entry

> 

> explain design of compact (packed) struct caseconv_entry, 

> in case it needs to be modified for future Unicode versions;

> indentation tweaks

> [...]

>    if (cce)

>      switch (cce->mode)

>        {

> -      case TOLO:

> -	return c + cce->delta;

> -      case TOBOTH:

> -	return c + 1;

> -      case TO1:

> -	switch (cce->delta)

> -	  {

> -	    case EVENCAP:

> -	      if (!(c & 1))

> -		return c + 1;

> -	      break;

> -	    case ODDCAP:

> -	      if (c & 1)

> -		return c + 1;

> -	      break;

> -	    default:

> -	      break;

> -	  }

> +	case TOLO:

> +	  return c + cce->delta;

> +	case TOBOTH:

> +	  return c + 1;

> +	case TO1:

> +	  switch (cce->delta)

> +	    {

> +	      case EVENCAP:

> +		if (!(c & 1))

> +		  return c + 1;

> +		break;

> +	      case ODDCAP:

> +		if (c & 1)

> +		  return c + 1;

> +		break;

> +	      default:

> +		break;

> +	    }


Thanks but the indentation for switch statements is correct as is.
Check and compare with other GNU sources like Cygwin or GDB.  The other
style is with sources taken from BSD as in vfprintf.c, but that's not
the case here.


Corinna

-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat
Thomas Wolff March 23, 2018, 9:02 p.m. | #26
Am 23.03.2018 um 20:43 schrieb Corinna Vinschen:
> On Mar 23 20:28, Thomas Wolff wrote:

>> While meditating, I noticed that the bit packing of the case conversion

>> entries could use some documentation.

>> The attached patch adds that (and some tweaking for consistent indentation);

>> no code changes.

>> Thomas

>>  From f8f4784437d319ad3ac2e3c629335fd0f50bee69 Mon Sep 17 00:00:00 2001

>> From: Thomas Wolff <towo@towo.net>

>> Date: Fri, 23 Mar 2018 20:07:22 +0100

>> Subject: [PATCH] comments to document struct caseconv_entry

>>

>> explain design of compact (packed) struct caseconv_entry,

>> in case it needs to be modified for future Unicode versions;

>> indentation tweaks

>> [...]

>>     if (cce)

>>       switch (cce->mode)

>>         {

>> -      case TOLO:

>> -	return c + cce->delta;

>> -      case TOBOTH:

>> -	return c + 1;

>> -      case TO1:

>> -	switch (cce->delta)

>> -	  {

>> -	    case EVENCAP:

>> -	      if (!(c & 1))

>> -		return c + 1;

>> -	      break;

>> -	    case ODDCAP:

>> -	      if (c & 1)

>> -		return c + 1;

>> -	      break;

>> -	    default:

>> -	      break;

>> -	  }

>> +	case TOLO:

>> +	  return c + cce->delta;

>> +	case TOBOTH:

>> +	  return c + 1;

>> +	case TO1:

>> +	  switch (cce->delta)

>> +	    {

>> +	      case EVENCAP:

>> +		if (!(c & 1))

>> +		  return c + 1;

>> +		break;

>> +	      case ODDCAP:

>> +		if (c & 1)

>> +		  return c + 1;

>> +		break;

>> +	      default:

>> +		break;

>> +	    }

> Thanks but the indentation for switch statements is correct as is.

> Check and compare with other GNU sources like Cygwin or GDB.  The other

> style is with sources taken from BSD as in vfprintf.c, but that's not

> the case here.

Whichever the style is, the previous version was inconsistent in itself, 
with "case" sometimes indented from "{" and sometimes not:
     switch (cce->mode)
       {
       case TO1:
         switch (cce->delta)
           {
             case EVENCAP:

But I don't care so much, we can reduce the patch to the documentation, 
of couse.
Thomas
Thomas Wolff March 25, 2018, 9:02 a.m. | #27
Am 23.03.2018 um 22:02 schrieb Thomas Wolff:
> Am 23.03.2018 um 20:43 schrieb Corinna Vinschen:

>> On Mar 23 20:28, Thomas Wolff wrote:

>>> While meditating, I noticed that the bit packing of the case conversion

>>> entries could use some documentation.

>>> The attached patch adds that (and some tweaking for consistent 

>>> indentation);

>>> no code changes.

>>> Thomas

>>>  From f8f4784437d319ad3ac2e3c629335fd0f50bee69 Mon Sep 17 00:00:00 2001

>>> From: Thomas Wolff <towo@towo.net>

>>> Date: Fri, 23 Mar 2018 20:07:22 +0100

>>> Subject: [PATCH] comments to document struct caseconv_entry

>>>

>>> explain design of compact (packed) struct caseconv_entry,

>>> in case it needs to be modified for future Unicode versions

>> ...

> ... we can reduce the patch to the documentation, of course.

as attached
Thomas
From f8f4784437d319ad3ac2e3c629335fd0f50bee69 Mon Sep 17 00:00:00 2001
From: Thomas Wolff <towo@towo.net>
Date: Fri, 23 Mar 2018 20:07:22 +0100
Subject: [PATCH] comments to document struct caseconv_entry

explain design of compact (packed) struct caseconv_entry, 
in case it needs to be modified for future Unicode versions

---
 newlib/libc/ctype/towctrans_l.c | 108 +++++++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 1 deletions(-)

diff --git a/newlib/libc/ctype/towctrans_l.c b/newlib/libc/ctype/towctrans_l.c
index eaabd8c..0f210e0 100644
--- a/newlib/libc/ctype/towctrans_l.c
+++ b/newlib/libc/ctype/towctrans_l.c
@@ -4,8 +4,36 @@
 //#include <errno.h>
 #include "local.h"
 
-enum {EVENCAP, ODDCAP};
+/*
+   struct caseconv_entry describes the case conversion behaviour
+   of a range of Unicode characters.
+   It was designed to be compact for a minimal table size.
+   The range is first...first + diff.
+   Conversion behaviour for a character c in the respective range:
+     mode == TOLO	towlower (c) = c + delta
+     mode == TOUP	towupper (c) = c + delta
+     mode == TOBOTH	(titling case characters)
+			towlower (c) = c + 1
+			towupper (c) = c - 1
+     mode == TO1	capital/small letters are alternating
+	delta == EVENCAP	even codes are capital
+	delta == ODDCAP		odd codes are capital
+			(this correlates with an even/odd first range value
+			as of Unicode 10.0 but we do not rely on this)
+   As of Unicode 10.0, the following field lengths are sufficient
+	first: 17 bits
+	diff: 8 bits
+	delta: 17 bits
+	mode: 2 bits
+   The reserve of 4 bits (to limit the struct to 6 bytes)
+   is currently added to the 'first' field;
+   should a future Unicode version make it necessary to expand the others,
+   the 'first' field could be reduced as needed, or larger ranges could
+   be split up (reduce limit max=255 e.g. to max=127 or max=63 in 
+   script mkcaseconv, check increasing table size).
+ */
 enum {TO1, TOLO, TOUP, TOBOTH};
+enum {EVENCAP, ODDCAP};
 static struct caseconv_entry {
   unsigned int first: 21;
   unsigned short diff: 8;
@@ -45,32 +73,33 @@ toulower (wint_t c)
Corinna Vinschen March 26, 2018, 8 a.m. | #28
On Mar 23 22:02, Thomas Wolff wrote:
> Am 23.03.2018 um 20:43 schrieb Corinna Vinschen:

> > On Mar 23 20:28, Thomas Wolff wrote:

> > > While meditating, I noticed that the bit packing of the case conversion

> > > entries could use some documentation.

> > > The attached patch adds that (and some tweaking for consistent indentation);

> > > no code changes.

> > > Thomas

> > >  From f8f4784437d319ad3ac2e3c629335fd0f50bee69 Mon Sep 17 00:00:00 2001

> > > From: Thomas Wolff <towo@towo.net>

> > > Date: Fri, 23 Mar 2018 20:07:22 +0100

> > > Subject: [PATCH] comments to document struct caseconv_entry

> > > 

> > > explain design of compact (packed) struct caseconv_entry,

> > > in case it needs to be modified for future Unicode versions;

> > > indentation tweaks

> > > [...]

> > Thanks but the indentation for switch statements is correct as is.

> > Check and compare with other GNU sources like Cygwin or GDB.  The other

> > style is with sources taken from BSD as in vfprintf.c, but that's not

> > the case here.

> Whichever the style is, the previous version was inconsistent in itself,

> with "case" sometimes indented from "{" and sometimes not:

>     switch (cce->mode)

>       {

>       case TO1:

>         switch (cce->delta)

>           {

>             case EVENCAP:


Oh, you're right.  I missed that.  I fixed that under your authorship.


Thanks,
Corinna

-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat
Corinna Vinschen March 26, 2018, 8:01 a.m. | #29
On Mar 25 11:02, Thomas Wolff wrote:
> Am 23.03.2018 um 22:02 schrieb Thomas Wolff:

> > Am 23.03.2018 um 20:43 schrieb Corinna Vinschen:

> > > On Mar 23 20:28, Thomas Wolff wrote:

> > > > While meditating, I noticed that the bit packing of the case conversion

> > > > entries could use some documentation.

> > > > The attached patch adds that (and some tweaking for consistent

> > > > indentation);

> > > > no code changes.

> > > > Thomas

> > > >  From f8f4784437d319ad3ac2e3c629335fd0f50bee69 Mon Sep 17 00:00:00 2001

> > > > From: Thomas Wolff <towo@towo.net>

> > > > Date: Fri, 23 Mar 2018 20:07:22 +0100

> > > > Subject: [PATCH] comments to document struct caseconv_entry

> > > > 

> > > > explain design of compact (packed) struct caseconv_entry,

> > > > in case it needs to be modified for future Unicode versions

> > > ...

> > ... we can reduce the patch to the documentation, of course.

> as attached

> Thomas


Thanks, but the patch is broken.  The last line in the patch is the
start of another patch hunk, which then is missing.  Can you fix that,
please?


Thanks,
Corinna

-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat
Thomas Wolff March 26, 2018, 9:53 a.m. | #30
Am 26.03.2018 um 10:01 schrieb Corinna Vinschen:
> On Mar 25 11:02, Thomas Wolff wrote:

>> Am 23.03.2018 um 22:02 schrieb Thomas Wolff:

>>> Am 23.03.2018 um 20:43 schrieb Corinna Vinschen:

>>>> On Mar 23 20:28, Thomas Wolff wrote:

>>>>> While meditating, I noticed that the bit packing of the case conversion

>>>>> entries could use some documentation.

>>>>> The attached patch adds that (and some tweaking for consistent

>>>>> indentation);

>>>>> no code changes.

>>>>> Thomas

>>>>>   From f8f4784437d319ad3ac2e3c629335fd0f50bee69 Mon Sep 17 00:00:00 2001

>>>>> From: Thomas Wolff <towo@towo.net>

>>>>> Date: Fri, 23 Mar 2018 20:07:22 +0100

>>>>> Subject: [PATCH] comments to document struct caseconv_entry

>>>>>

>>>>> explain design of compact (packed) struct caseconv_entry,

>>>>> in case it needs to be modified for future Unicode versions

>>>> ...

>>> ... we can reduce the patch to the documentation, of course.

>> as attached

>> Thomas

> Thanks, but the patch is broken.  The last line in the patch is the

> start of another patch hunk, which then is missing.  Can you fix that, please?

Yeah, I tried to limit git fiddling effort by manually manipulating the 
patch, which failed.
(After I tried to re-sync with the current repository, it would insist 
on some merging, and I do not know how to rectify that;
manual fixing of the file, git pull -f... nothing helped (error: Pulling 
is not possible because you have unmerged files); I know I should 
eventually consult the howto you kindly pointed me to...)
So, based on a fresh git clone, here's an updated patch, also fixing one 
remaining minor layout glitch.
Thomas
From 07fa10556a8fa0ecaf402268244abfdd25f6325c Mon Sep 17 00:00:00 2001
From: Thomas Wolff <towo@towo.net>
Date: Mon, 26 Mar 2018 11:46:40 +0200
Subject: [PATCH] comments to document struct caseconv_entry

explain design of compact (packed) struct caseconv_entry,
in case it needs to be modified for future Unicode versions
---
 newlib/libc/ctype/towctrans_l.c | 36 +++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/newlib/libc/ctype/towctrans_l.c b/newlib/libc/ctype/towctrans_l.c
index ca7e89f..9759cf7 100644
--- a/newlib/libc/ctype/towctrans_l.c
+++ b/newlib/libc/ctype/towctrans_l.c
@@ -4,8 +4,36 @@
 //#include <errno.h>
 #include "local.h"
 
-enum {EVENCAP, ODDCAP};
+/*
+   struct caseconv_entry describes the case conversion behaviour
+   of a range of Unicode characters.
+   It was designed to be compact for a minimal table size.
+   The range is first...first + diff.
+   Conversion behaviour for a character c in the respective range:
+     mode == TOLO	towlower (c) = c + delta
+     mode == TOUP	towupper (c) = c + delta
+     mode == TOBOTH	(titling case characters)
+			towlower (c) = c + 1
+			towupper (c) = c - 1
+     mode == TO1	capital/small letters are alternating
+	delta == EVENCAP	even codes are capital
+	delta == ODDCAP		odd codes are capital
+			(this correlates with an even/odd first range value
+			as of Unicode 10.0 but we do not rely on this)
+   As of Unicode 10.0, the following field lengths are sufficient
+	first: 17 bits
+	diff: 8 bits
+	delta: 17 bits
+	mode: 2 bits
+   The reserve of 4 bits (to limit the struct to 6 bytes)
+   is currently added to the 'first' field;
+   should a future Unicode version make it necessary to expand the others,
+   the 'first' field could be reduced as needed, or larger ranges could
+   be split up (reduce limit max=255 e.g. to max=127 or max=63 in 
+   script mkcaseconv, check increasing table size).
+ */
 enum {TO1, TOLO, TOUP, TOBOTH};
+enum {EVENCAP, ODDCAP};
 static struct caseconv_entry {
   unsigned int first: 21;
   unsigned short diff: 8;
@@ -71,6 +99,7 @@ toulower (wint_t c)
 	default:
 	  break;
       }
+
   return c;
 }
 
@@ -102,9 +131,10 @@ touupper (wint_t c)
 	  default:
 	    break;
 	  }
-	default:
-	  break;
+      default:
+	break;
       }
+
   return c;
 }
Corinna Vinschen March 26, 2018, 10:30 a.m. | #31
On Mar 26 11:53, Thomas Wolff wrote:
> Am 26.03.2018 um 10:01 schrieb Corinna Vinschen:

> > On Mar 25 11:02, Thomas Wolff wrote:

> > > as attached

> > > Thomas

> > Thanks, but the patch is broken.  The last line in the patch is the

> > start of another patch hunk, which then is missing.  Can you fix that, please?

> Yeah, I tried to limit git fiddling effort by manually manipulating the

> patch, which failed.


Never a good idea.

> (After I tried to re-sync with the current repository, it would insist on

> some merging, and I do not know how to rectify that;


Never do your patches on the master branch.  Create a new branch from
current master and work there:

  git checkout -b fix-towctrans-doc
  [hack, hack, hack]
  git commit
  git format-patch

After the changes have been commited, just remove your hack branch, e.g.:

  git checkout master
  git fetch && git merge (or `git pull)
  git branch -D fix-towctrans-doc

Since branches are local, they are really cheap, very different from
CVS, for instance.

> manual fixing of the file, git pull -f... nothing helped (error: Pulling is

> not possible because you have unmerged files);


Before you try to merge:

If you accidentally hacked on master, *now* create your hack branch

  git branch fix-towctrans-doc master

then reset master to upstream master (in the state known to your local
git repo) and merge the latest from upstream master:

  git reset --hard origin/master
  git fetch && git merge (or `git pull')

Note that you can do the above even after you already accidentally
merged too early.  Your patch will still be available on the
fix-towctrans-doc branch.  Just have a look into the branch and find
your patch:

  git checkout fix-towctrans-doc
  git log --oneline
  [prints all patches including yours with a shortened SHA-1 ID, e.g.]
  [...]
  123abc456def comments to document struct caseconv_entry
  [...]

then cherry-pick your patch on top of master and, if required, fix
conflicts:

  git co master
  git cherry-pick 123abc456def

> I know I should eventually

> consult the howto you kindly pointed me to...)


Definitely.  Because...

> So, based on a fresh git clone, ...


... you're making your life much harder than necessary.  git has a steep
learning curve, I bet everyone on this list will agree on that, but it's
worth the effort (and you will learn something new even after you think
you got a grip on git).

> ... here's an updated patch, also fixing one

> remaining minor layout glitch.

> Thomas


Pushed.


Thanks,
Corinna

-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat
Corinna Vinschen March 26, 2018, 10:38 a.m. | #32
On Mar 26 12:30, Corinna Vinschen wrote:
> On Mar 26 11:53, Thomas Wolff wrote:

> > Am 26.03.2018 um 10:01 schrieb Corinna Vinschen:

> > > On Mar 25 11:02, Thomas Wolff wrote:

> > > > as attached

> > > > Thomas

> > > Thanks, but the patch is broken.  The last line in the patch is the

> > > start of another patch hunk, which then is missing.  Can you fix that, please?

> > Yeah, I tried to limit git fiddling effort by manually manipulating the

> > patch, which failed.

> 

> Never a good idea.

> 

> > (After I tried to re-sync with the current repository, it would insist on

> > some merging, and I do not know how to rectify that;

> 

> Never do your patches on the master branch.  Create a new branch from

> current master and work there:

> 

>   git checkout -b fix-towctrans-doc

>   [hack, hack, hack]

>   git commit

>   git format-patch

> 

> After the changes have been commited, just remove your hack branch, e.g.:

> 

>   git checkout master

>   git fetch && git merge (or `git pull)

>   git branch -D fix-towctrans-doc

> 

> Since branches are local, they are really cheap, very different from

> CVS, for instance.


Oh, and you can keep your hack branch in sync with master, like this:

  git checkout master
  git fetch && git merge
  git co fix-towctrans-doc
  git rebase master

> [...]

> then cherry-pick your patch on top of master and, if required, fix

> conflicts:

> 

>   git co master


Yikes, local alias here.  That should have been:

    git checkout master

>   git cherry-pick 123abc456def



Corinna

-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat
Thomas Wolff March 26, 2018, 11:31 a.m. | #33
Am 26.03.2018 um 12:38 schrieb Corinna Vinschen:
> On Mar 26 12:30, Corinna Vinschen wrote:

>> On Mar 26 11:53, Thomas Wolff wrote:

>>> Am 26.03.2018 um 10:01 schrieb Corinna Vinschen:

>>>> On Mar 25 11:02, Thomas Wolff wrote:

>>>>> as attached

>>>>> Thomas

>>>> Thanks, but the patch is broken.  The last line in the patch is the

>>>> start of another patch hunk, which then is missing.  Can you fix that, please?

>>> Yeah, I tried to limit git fiddling effort by manually manipulating the

>>> patch, which failed.

>> Never a good idea.

>>

>>> (After I tried to re-sync with the current repository, it would insist on

>>> some merging, and I do not know how to rectify that;

>> Never do your patches on the master branch.  Create a new branch from

>> current master and work there:

>>

>>    git checkout -b fix-towctrans-doc

>>    [hack, hack, hack]

>>    git commit

>>    git format-patch

>>

>> After the changes have been commited, just remove your hack branch, e.g.:

>>

>>    git checkout master

>>    git fetch && git merge (or `git pull)

>>    git branch -D fix-towctrans-doc

>>

>> Since branches are local, they are really cheap, very different from

>> CVS, for instance.

> Oh, and you can keep your hack branch in sync with master, like this:

>

>    git checkout master

>    git fetch && git merge

>    git co fix-towctrans-doc

>    git rebase master

>

>> [...]

>> then cherry-pick your patch on top of master and, if required, fix

>> conflicts:

>>

>>    git co master

> Yikes, local alias here.  That should have been:

>

>      git checkout master

>

>>    git cherry-pick 123abc456def

>

> Corinna

Thanks a lot for these use-case-specific howto sniplets. That's very 
useful for my local notes. I wonder how a system that makes simple use 
cases need series of cryptic commands could get so popular...
Thomas
Corinna Vinschen March 26, 2018, 12:45 p.m. | #34
On Mar 26 13:31, Thomas Wolff wrote:
> Am 26.03.2018 um 12:38 schrieb Corinna Vinschen:

> > On Mar 26 12:30, Corinna Vinschen wrote:

> > > On Mar 26 11:53, Thomas Wolff wrote:

> > > > Am 26.03.2018 um 10:01 schrieb Corinna Vinschen:

> > > > > On Mar 25 11:02, Thomas Wolff wrote:

> > > > > > as attached

> > > > > > Thomas

> > > > > Thanks, but the patch is broken.  The last line in the patch is the

> > > > > start of another patch hunk, which then is missing.  Can you fix that, please?

> > > > Yeah, I tried to limit git fiddling effort by manually manipulating the

> > > > patch, which failed.

> > > Never a good idea.

> > > 

> > > > (After I tried to re-sync with the current repository, it would insist on

> > > > some merging, and I do not know how to rectify that;

> > > Never do your patches on the master branch.  Create a new branch from

> > > current master and work there:

> > > 

> > >    git checkout -b fix-towctrans-doc

> > >    [hack, hack, hack]

> > >    git commit

> > >    git format-patch

> > > 

> > > After the changes have been commited, just remove your hack branch, e.g.:

> > > 

> > >    git checkout master

> > >    git fetch && git merge (or `git pull)

> > >    git branch -D fix-towctrans-doc

> > > 

> > > Since branches are local, they are really cheap, very different from

> > > CVS, for instance.

> > Oh, and you can keep your hack branch in sync with master, like this:

> > 

> >    git checkout master

> >    git fetch && git merge

> >    git co fix-towctrans-doc

> >    git rebase master

> > 

> > > [...]

> > > then cherry-pick your patch on top of master and, if required, fix

> > > conflicts:

> > > 

> > >    git co master

> > Yikes, local alias here.  That should have been:

> > 

> >      git checkout master

> > 

> > >    git cherry-pick 123abc456def

> > 

> > Corinna

> Thanks a lot for these use-case-specific howto sniplets. That's very useful

> for my local notes. I wonder how a system that makes simple use cases need

> series of cryptic commands could get so popular...


Not cryptic at all as soon as you get a grip on the methodology.  It's
just different from CVS and svn due to the local copies of the entire
repository.  And the way how you can handle multiple remote repositories
within a single local repository.  It's just immensely more powerful
than other sccs.  There are a few instances where git saved my work,
where I had lost it with CVS.


Corinna,
once a fearful git critic, now a convinced believer

-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat
Thomas Preudhomme March 27, 2018, 10:09 a.m. | #35
Hi Thomas,

This patch appears to regress 3 tests in libstdc++:

22_locale/ctype/is/wchar_t/1.cc execution test
22_locale/ctype/scan/wchar_t/1.cc execution test
28_regex/traits/wchar_t/isctype.cc execution test

I believe the issue is due to ... (scroll down)

On 09/03/18 22:54, Thomas Wolff wrote:

[SNIP]

> diff --git a/newlib/libc/ctype/iswupper_l.c b/newlib/libc/ctype/iswupper_l.c

> index 2555cd0..7ce8b5e

> --- a/newlib/libc/ctype/iswupper_l.c

> +++ b/newlib/libc/ctype/iswupper_l.c

> @@ -1,10 +1,20 @@

> +/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */

>   #include <_ansi.h>

> +#include <ctype.h>

>   #include <wctype.h>

> +#include "local.h"

> +#include "categories.h"

>   

>   int

>   iswupper_l (wint_t c, struct __locale_t *locale)

>   {

> -  /* We're using a locale-independent representation of upper/lower case

> -     based on Unicode data.  Thus, the locale doesn't matter. */

> -  return towlower (c) != c;

> +#ifdef _MB_CAPABLE

> +  c = _jp2uc_l (c, locale);

> +  // The wide-character class "upper" contains at least those characters wc

> +  // which are equal to towupper(wc) and different from towlower(wc).

> +  enum category cat = category (c);

> +  return cat == CAT_Lu || (cat == CAT_LC && towupper (c) == c);

> +#else

> +  return c < 0x100 ? islower (c) : 0;

> +#endif /* _MB_CAPABLE */

>   }


This change. Shouldn't is call isupper instead of islower? Or perhaps !islower? 
I've tried with isupper and it makes the tests mentionned above pass.

Best regards,

Thomas
Corinna Vinschen March 27, 2018, 10:37 a.m. | #36
On Mar 27 11:09, Thomas Preudhomme wrote:
> Hi Thomas,

> 

> This patch appears to regress 3 tests in libstdc++:

> 

> 22_locale/ctype/is/wchar_t/1.cc execution test

> 22_locale/ctype/scan/wchar_t/1.cc execution test

> 28_regex/traits/wchar_t/isctype.cc execution test

> 

> I believe the issue is due to ... (scroll down)

> 

> On 09/03/18 22:54, Thomas Wolff wrote:

> 

> [SNIP]

> 

> > diff --git a/newlib/libc/ctype/iswupper_l.c b/newlib/libc/ctype/iswupper_l.c

> > index 2555cd0..7ce8b5e

> > --- a/newlib/libc/ctype/iswupper_l.c

> > +++ b/newlib/libc/ctype/iswupper_l.c

> > @@ -1,10 +1,20 @@

> > +/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */

> >   #include <_ansi.h>

> > +#include <ctype.h>

> >   #include <wctype.h>

> > +#include "local.h"

> > +#include "categories.h"

> >   int

> >   iswupper_l (wint_t c, struct __locale_t *locale)

> >   {

> > -  /* We're using a locale-independent representation of upper/lower case

> > -     based on Unicode data.  Thus, the locale doesn't matter. */

> > -  return towlower (c) != c;

> > +#ifdef _MB_CAPABLE

> > +  c = _jp2uc_l (c, locale);

> > +  // The wide-character class "upper" contains at least those characters wc

> > +  // which are equal to towupper(wc) and different from towlower(wc).

> > +  enum category cat = category (c);

> > +  return cat == CAT_Lu || (cat == CAT_LC && towupper (c) == c);

> > +#else

> > +  return c < 0x100 ? islower (c) : 0;

> > +#endif /* _MB_CAPABLE */

> >   }

> 

> This change. Shouldn't is call isupper instead of islower? Or perhaps

> !islower?  I've tried with isupper and it makes the tests mentionned above

> pass.


Yeah, calling isupper is correct, just as calling towupper in the
_MB_CAPABLE case.  I pushed a patch.


Thanks,
Corinna

-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat
Thomas Wolff March 27, 2018, 6:16 p.m. | #37
Am 27.03.2018 um 12:37 schrieb Corinna Vinschen:
> On Mar 27 11:09, Thomas Preudhomme wrote:

>> Hi Thomas,

>>

>> This patch appears to regress 3 tests in libstdc++:

>>

>> 22_locale/ctype/is/wchar_t/1.cc execution test

>> 22_locale/ctype/scan/wchar_t/1.cc execution test

>> 28_regex/traits/wchar_t/isctype.cc execution test

>>

>> I believe the issue is due to ... (scroll down)

>>

>> On 09/03/18 22:54, Thomas Wolff wrote:

>>

>> [SNIP]

>>

>>> diff --git a/newlib/libc/ctype/iswupper_l.c b/newlib/libc/ctype/iswupper_l.c

>>> index 2555cd0..7ce8b5e

>>> --- a/newlib/libc/ctype/iswupper_l.c

>>> +++ b/newlib/libc/ctype/iswupper_l.c

>>> @@ -1,10 +1,20 @@

>>> +/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */

>>>    #include <_ansi.h>

>>> +#include <ctype.h>

>>>    #include <wctype.h>

>>> +#include "local.h"

>>> +#include "categories.h"

>>>    int

>>>    iswupper_l (wint_t c, struct __locale_t *locale)

>>>    {

>>> -  /* We're using a locale-independent representation of upper/lower case

>>> -     based on Unicode data.  Thus, the locale doesn't matter. */

>>> -  return towlower (c) != c;

>>> +#ifdef _MB_CAPABLE

>>> +  c = _jp2uc_l (c, locale);

>>> +  // The wide-character class "upper" contains at least those characters wc

>>> +  // which are equal to towupper(wc) and different from towlower(wc).

>>> +  enum category cat = category (c);

>>> +  return cat == CAT_Lu || (cat == CAT_LC && towupper (c) == c);

>>> +#else

>>> +  return c < 0x100 ? islower (c) : 0;

>>> +#endif /* _MB_CAPABLE */

>>>    }

>> This change. Shouldn't is call isupper instead of islower? Or perhaps !islower?

>> I've tried with isupper and it makes the tests mentionned above pass.

Sure. I didn't mean to change the #ifdef _MB_CAPABLE #else case at all, 
but due to some refactoring and reversed dependencies, it needed to be 
tweaked. This looks like a copy-paste accident, which makes me really 
blush. I've meanwhile compared the sources to look out for further flaws 
but didn't find any. Sorry and thanks for testing.
> Yeah, calling isupper is correct, just as calling towupper in the _MB_CAPABLE case.  I pushed a patch.

Thanks.
Thomas

Patch

diff --git a/newlib/libc/ctype/caseconv.t b/newlib/libc/ctype/caseconv.t
new file mode 100644
index 0000000..5e132d3
--- /dev/null
+++ b/newlib/libc/ctype/caseconv.t
@@ -0,0 +1,304 @@ 
+  {0x0041, 25, TOLO, 32},
+  {0x0061, 25, TOUP, -32},
+  {0x00B5, 0, TOUP, 743},
+  {0x00C0, 22, TOLO, 32},
+  {0x00D8, 6, TOLO, 32},
+  {0x00E0, 22, TOUP, -32},
+  {0x00F8, 6, TOUP, -32},
+  {0x00FF, 0, TOUP, 121},
+  {0x0100, 47, TO1, EVENCAP},
+  {0x0130, 0, TOLO, -199},
+  {0x0131, 0, TOUP, -232},
+  {0x0132, 5, TO1, EVENCAP},
+  {0x0139, 15, TO1, ODDCAP},
+  {0x014A, 45, TO1, EVENCAP},
+  {0x0178, 0, TOLO, -121},
+  {0x0179, 5, TO1, ODDCAP},
+  {0x017F, 0, TOUP, -300},
+  {0x0180, 0, TOUP, 195},
+  {0x0181, 0, TOLO, 210},
+  {0x0182, 3, TO1, EVENCAP},
+  {0x0186, 0, TOLO, 206},
+  {0x0187, 1, TO1, ODDCAP},
+  {0x0189, 1, TOLO, 205},
+  {0x018B, 1, TO1, ODDCAP},
+  {0x018E, 0, TOLO, 79},
+  {0x018F, 0, TOLO, 202},
+  {0x0190, 0, TOLO, 203},
+  {0x0191, 1, TO1, ODDCAP},
+  {0x0193, 0, TOLO, 205},
+  {0x0194, 0, TOLO, 207},
+  {0x0195, 0, TOUP, 97},
+  {0x0196, 0, TOLO, 211},
+  {0x0197, 0, TOLO, 209},
+  {0x0198, 1, TO1, EVENCAP},
+  {0x019A, 0, TOUP, 163},
+  {0x019C, 0, TOLO, 211},
+  {0x019D, 0, TOLO, 213},
+  {0x019E, 0, TOUP, 130},
+  {0x019F, 0, TOLO, 214},
+  {0x01A0, 5, TO1, EVENCAP},
+  {0x01A6, 0, TOLO, 218},
+  {0x01A7, 1, TO1, ODDCAP},
+  {0x01A9, 0, TOLO, 218},
+  {0x01AC, 1, TO1, EVENCAP},
+  {0x01AE, 0, TOLO, 218},
+  {0x01AF, 1, TO1, ODDCAP},
+  {0x01B1, 1, TOLO, 217},
+  {0x01B3, 3, TO1, ODDCAP},
+  {0x01B7, 0, TOLO, 219},
+  {0x01B8, 1, TO1, EVENCAP},
+  {0x01BC, 1, TO1, EVENCAP},
+  {0x01BF, 0, TOUP, 56},
+  {0x01C4, 0, TOLO, 2},
+  {0x01C5, 0, TOBOTH, 0},
+  {0x01C6, 0, TOUP, -2},
+  {0x01C7, 0, TOLO, 2},
+  {0x01C8, 0, TOBOTH, 0},
+  {0x01C9, 0, TOUP, -2},
+  {0x01CA, 0, TOLO, 2},
+  {0x01CB, 0, TOBOTH, 0},
+  {0x01CC, 0, TOUP, -2},
+  {0x01CD, 15, TO1, ODDCAP},
+  {0x01DD, 0, TOUP, -79},
+  {0x01DE, 17, TO1, EVENCAP},
+  {0x01F1, 0, TOLO, 2},
+  {0x01F2, 0, TOBOTH, 0},
+  {0x01F3, 0, TOUP, -2},
+  {0x01F4, 1, TO1, EVENCAP},
+  {0x01F6, 0, TOLO, -97},
+  {0x01F7, 0, TOLO, -56},
+  {0x01F8, 39, TO1, EVENCAP},
+  {0x0220, 0, TOLO, -130},
+  {0x0222, 17, TO1, EVENCAP},
+  {0x023A, 0, TOLO, 10795},
+  {0x023B, 1, TO1, ODDCAP},
+  {0x023D, 0, TOLO, -163},
+  {0x023E, 0, TOLO, 10792},
+  {0x023F, 1, TOUP, 10815},
+  {0x0241, 1, TO1, ODDCAP},
+  {0x0243, 0, TOLO, -195},
+  {0x0244, 0, TOLO, 69},
+  {0x0245, 0, TOLO, 71},
+  {0x0246, 9, TO1, EVENCAP},
+  {0x0250, 0, TOUP, 10783},
+  {0x0251, 0, TOUP, 10780},
+  {0x0252, 0, TOUP, 10782},
+  {0x0253, 0, TOUP, -210},
+  {0x0254, 0, TOUP, -206},
+  {0x0256, 1, TOUP, -205},
+  {0x0259, 0, TOUP, -202},
+  {0x025B, 0, TOUP, -203},
+  {0x025C, 0, TOUP, 42319},
+  {0x0260, 0, TOUP, -205},
+  {0x0261, 0, TOUP, 42315},
+  {0x0263, 0, TOUP, -207},
+  {0x0265, 0, TOUP, 42280},
+  {0x0266, 0, TOUP, 42308},
+  {0x0268, 0, TOUP, -209},
+  {0x0269, 0, TOUP, -211},
+  {0x026A, 0, TOUP, 42308},
+  {0x026B, 0, TOUP, 10743},
+  {0x026C, 0, TOUP, 42305},
+  {0x026F, 0, TOUP, -211},
+  {0x0271, 0, TOUP, 10749},
+  {0x0272, 0, TOUP, -213},
+  {0x0275, 0, TOUP, -214},
+  {0x027D, 0, TOUP, 10727},
+  {0x0280, 0, TOUP, -218},
+  {0x0283, 0, TOUP, -218},
+  {0x0287, 0, TOUP, 42282},
+  {0x0288, 0, TOUP, -218},
+  {0x0289, 0, TOUP, -69},
+  {0x028A, 1, TOUP, -217},
+  {0x028C, 0, TOUP, -71},
+  {0x0292, 0, TOUP, -219},
+  {0x029D, 0, TOUP, 42261},
+  {0x029E, 0, TOUP, 42258},
+  {0x0345, 0, TOUP, 84},
+  {0x0370, 3, TO1, EVENCAP},
+  {0x0376, 1, TO1, EVENCAP},
+  {0x037B, 2, TOUP, 130},
+  {0x037F, 0, TOLO, 116},
+  {0x0386, 0, TOLO, 38},
+  {0x0388, 2, TOLO, 37},
+  {0x038C, 0, TOLO, 64},
+  {0x038E, 1, TOLO, 63},
+  {0x0391, 16, TOLO, 32},
+  {0x03A3, 8, TOLO, 32},
+  {0x03AC, 0, TOUP, -38},
+  {0x03AD, 2, TOUP, -37},
+  {0x03B1, 16, TOUP, -32},
+  {0x03C2, 0, TOUP, -31},
+  {0x03C3, 8, TOUP, -32},
+  {0x03CC, 0, TOUP, -64},
+  {0x03CD, 1, TOUP, -63},
+  {0x03CF, 0, TOLO, 8},
+  {0x03D0, 0, TOUP, -62},
+  {0x03D1, 0, TOUP, -57},
+  {0x03D5, 0, TOUP, -47},
+  {0x03D6, 0, TOUP, -54},
+  {0x03D7, 0, TOUP, -8},
+  {0x03D8, 23, TO1, EVENCAP},
+  {0x03F0, 0, TOUP, -86},
+  {0x03F1, 0, TOUP, -80},
+  {0x03F2, 0, TOUP, 7},
+  {0x03F3, 0, TOUP, -116},
+  {0x03F4, 0, TOLO, -60},
+  {0x03F5, 0, TOUP, -96},
+  {0x03F7, 1, TO1, ODDCAP},
+  {0x03F9, 0, TOLO, -7},
+  {0x03FA, 1, TO1, EVENCAP},
+  {0x03FD, 2, TOLO, -130},
+  {0x0400, 15, TOLO, 80},
+  {0x0410, 31, TOLO, 32},
+  {0x0430, 31, TOUP, -32},
+  {0x0450, 15, TOUP, -80},
+  {0x0460, 33, TO1, EVENCAP},
+  {0x048A, 53, TO1, EVENCAP},
+  {0x04C0, 0, TOLO, 15},
+  {0x04C1, 13, TO1, ODDCAP},
+  {0x04CF, 0, TOUP, -15},
+  {0x04D0, 95, TO1, EVENCAP},
+  {0x0531, 37, TOLO, 48},
+  {0x0561, 37, TOUP, -48},
+  {0x10A0, 37, TOLO, 7264},
+  {0x10C7, 0, TOLO, 7264},
+  {0x10CD, 0, TOLO, 7264},
+  {0x13A0, 79, TOLO, 38864},
+  {0x13F0, 5, TOLO, 8},
+  {0x13F8, 5, TOUP, -8},
+  {0x1C80, 0, TOUP, -6254},
+  {0x1C81, 0, TOUP, -6253},
+  {0x1C82, 0, TOUP, -6244},
+  {0x1C83, 1, TOUP, -6242},
+  {0x1C85, 0, TOUP, -6243},
+  {0x1C86, 0, TOUP, -6236},
+  {0x1C87, 0, TOUP, -6181},
+  {0x1C88, 0, TOUP, 35266},
+  {0x1D79, 0, TOUP, 35332},
+  {0x1D7D, 0, TOUP, 3814},
+  {0x1E00, 149, TO1, EVENCAP},
+  {0x1E9B, 0, TOUP, -59},
+  {0x1E9E, 0, TOLO, -7615},
+  {0x1EA0, 95, TO1, EVENCAP},
+  {0x1F00, 7, TOUP, 8},
+  {0x1F08, 7, TOLO, -8},
+  {0x1F10, 5, TOUP, 8},
+  {0x1F18, 5, TOLO, -8},
+  {0x1F20, 7, TOUP, 8},
+  {0x1F28, 7, TOLO, -8},
+  {0x1F30, 7, TOUP, 8},
+  {0x1F38, 7, TOLO, -8},
+  {0x1F40, 5, TOUP, 8},
+  {0x1F48, 5, TOLO, -8},
+  {0x1F51, 0, TOUP, 8},
+  {0x1F53, 0, TOUP, 8},
+  {0x1F55, 0, TOUP, 8},
+  {0x1F57, 0, TOUP, 8},
+  {0x1F59, 0, TOLO, -8},
+  {0x1F5B, 0, TOLO, -8},
+  {0x1F5D, 0, TOLO, -8},
+  {0x1F5F, 0, TOLO, -8},
+  {0x1F60, 7, TOUP, 8},
+  {0x1F68, 7, TOLO, -8},
+  {0x1F70, 1, TOUP, 74},
+  {0x1F72, 3, TOUP, 86},
+  {0x1F76, 1, TOUP, 100},
+  {0x1F78, 1, TOUP, 128},
+  {0x1F7A, 1, TOUP, 112},
+  {0x1F7C, 1, TOUP, 126},
+  {0x1F80, 7, TOUP, 8},
+  {0x1F88, 7, TOLO, -8},
+  {0x1F90, 7, TOUP, 8},
+  {0x1F98, 7, TOLO, -8},
+  {0x1FA0, 7, TOUP, 8},
+  {0x1FA8, 7, TOLO, -8},
+  {0x1FB0, 1, TOUP, 8},
+  {0x1FB3, 0, TOUP, 9},
+  {0x1FB8, 1, TOLO, -8},
+  {0x1FBA, 1, TOLO, -74},
+  {0x1FBC, 0, TOLO, -9},
+  {0x1FBE, 0, TOUP, -7205},
+  {0x1FC3, 0, TOUP, 9},
+  {0x1FC8, 3, TOLO, -86},
+  {0x1FCC, 0, TOLO, -9},
+  {0x1FD0, 1, TOUP, 8},
+  {0x1FD8, 1, TOLO, -8},
+  {0x1FDA, 1, TOLO, -100},
+  {0x1FE0, 1, TOUP, 8},
+  {0x1FE5, 0, TOUP, 7},
+  {0x1FE8, 1, TOLO, -8},
+  {0x1FEA, 1, TOLO, -112},
+  {0x1FEC, 0, TOLO, -7},
+  {0x1FF3, 0, TOUP, 9},
+  {0x1FF8, 1, TOLO, -128},
+  {0x1FFA, 1, TOLO, -126},
+  {0x1FFC, 0, TOLO, -9},
+  {0x2126, 0, TOLO, -7517},
+  {0x212A, 0, TOLO, -8383},
+  {0x212B, 0, TOLO, -8262},
+  {0x2132, 0, TOLO, 28},
+  {0x214E, 0, TOUP, -28},
+  {0x2160, 15, TOLO, 16},
+  {0x2170, 15, TOUP, -16},
+  {0x2183, 1, TO1, ODDCAP},
+  {0x24B6, 25, TOLO, 26},
+  {0x24D0, 25, TOUP, -26},
+  {0x2C00, 46, TOLO, 48},
+  {0x2C30, 46, TOUP, -48},
+  {0x2C60, 1, TO1, EVENCAP},
+  {0x2C62, 0, TOLO, -10743},
+  {0x2C63, 0, TOLO, -3814},
+  {0x2C64, 0, TOLO, -10727},
+  {0x2C65, 0, TOUP, -10795},
+  {0x2C66, 0, TOUP, -10792},
+  {0x2C67, 5, TO1, ODDCAP},
+  {0x2C6D, 0, TOLO, -10780},
+  {0x2C6E, 0, TOLO, -10749},
+  {0x2C6F, 0, TOLO, -10783},
+  {0x2C70, 0, TOLO, -10782},
+  {0x2C72, 1, TO1, EVENCAP},
+  {0x2C75, 1, TO1, ODDCAP},
+  {0x2C7E, 1, TOLO, -10815},
+  {0x2C80, 99, TO1, EVENCAP},
+  {0x2CEB, 3, TO1, ODDCAP},
+  {0x2CF2, 1, TO1, EVENCAP},
+  {0x2D00, 37, TOUP, -7264},
+  {0x2D27, 0, TOUP, -7264},
+  {0x2D2D, 0, TOUP, -7264},
+  {0xA640, 45, TO1, EVENCAP},
+  {0xA680, 27, TO1, EVENCAP},
+  {0xA722, 13, TO1, EVENCAP},
+  {0xA732, 61, TO1, EVENCAP},
+  {0xA779, 3, TO1, ODDCAP},
+  {0xA77D, 0, TOLO, -35332},
+  {0xA77E, 9, TO1, EVENCAP},
+  {0xA78B, 1, TO1, ODDCAP},
+  {0xA78D, 0, TOLO, -42280},
+  {0xA790, 3, TO1, EVENCAP},
+  {0xA796, 19, TO1, EVENCAP},
+  {0xA7AA, 0, TOLO, -42308},
+  {0xA7AB, 0, TOLO, -42319},
+  {0xA7AC, 0, TOLO, -42315},
+  {0xA7AD, 0, TOLO, -42305},
+  {0xA7AE, 0, TOLO, -42308},
+  {0xA7B0, 0, TOLO, -42258},
+  {0xA7B1, 0, TOLO, -42282},
+  {0xA7B2, 0, TOLO, -42261},
+  {0xA7B3, 0, TOLO, 928},
+  {0xA7B4, 3, TO1, EVENCAP},
+  {0xAB53, 0, TOUP, -928},
+  {0xAB70, 79, TOUP, -38864},
+  {0xFF21, 25, TOLO, 32},
+  {0xFF41, 25, TOUP, -32},
+  {0x10400, 39, TOLO, 40},
+  {0x10428, 39, TOUP, -40},
+  {0x104B0, 35, TOLO, 40},
+  {0x104D8, 35, TOUP, -40},
+  {0x10C80, 50, TOLO, 64},
+  {0x10CC0, 50, TOUP, -64},
+  {0x118A0, 31, TOLO, 32},
+  {0x118C0, 31, TOUP, -32},
+  {0x1E900, 33, TOLO, 34},
+  {0x1E922, 33, TOUP, -34},