tweak build process to update Unicode

Message ID a0a39a78-2b1f-3a01-2aa3-5b6c3f7f2b4d@towo.net
State New
Headers show
Series
  • tweak build process to update Unicode
Related show

Commit Message

Thomas Wolff April 16, 2021, 11:35 p.m.
From 509e3eb2687ad38ef4e2f75d7f311d9f1a742306 Mon Sep 17 00:00:00 2001
From: Thomas Wolff <towo@towo.net>
Date: Sat, 17 Apr 2021 00:00:00 +0200
Subject: [PATCH] fix and amend scripts and makefile rules to generate Unicode
 data

---
 newlib/Makefile.am           |  4 ----
 newlib/Makefile.in           |  4 ----
 newlib/libc/Makefile.unidata | 12 ++++++++++++
 newlib/libc/ctype/mkunidata  |  4 +++-
 newlib/libc/string/mkunidata |  6 ++++--
 newlib/libc/string/uniset    | 29 +++++++++++++++--------------
 6 files changed, 34 insertions(+), 25 deletions(-)
 create mode 100644 newlib/libc/Makefile.unidata

Comments

Corinna Vinschen July 6, 2021, 1:54 p.m. | #1
Hi Thomas,

On Apr 17 01:35, Thomas Wolff wrote:
> From 509e3eb2687ad38ef4e2f75d7f311d9f1a742306 Mon Sep 17 00:00:00 2001

> From: Thomas Wolff <towo@towo.net>

> Date: Sat, 17 Apr 2021 00:00:00 +0200

> Subject: [PATCH] fix and amend scripts and makefile rules to generate Unicode

>  data


Both patches pushed.  Additionally, wouldn't it make sense to remove
the temporary downloaded files like libc/ctype/UnicodeData.txt, etc?

Either automatically after creating the generated files or in a
matching MAINTAINERCLEANFILES variable in Makefile.am?

Also, maybe it makes sense to add these temporary files to the .gitignore
file?


Thanks,
Corinna

Patch

diff --git a/newlib/Makefile.am b/newlib/Makefile.am
index 03390cd17..3a8b99fce 100644
--- a/newlib/Makefile.am
+++ b/newlib/Makefile.am
@@ -420,10 +420,6 @@  endif
 
 # Generate Unicode data tables for libc/string/wcwidth and libc/ctype/??w*
 unidata:
-	cd $(srcdir)/libc/string; ./mkunidata
-	cd $(srcdir)/libc/ctype; ./mkunidata
-
-unidate-download:
 	cd $(srcdir)/libc/string; ./mkunidata -u
 	cd $(srcdir)/libc/ctype; ./mkunidata -u
 
diff --git a/newlib/Makefile.in b/newlib/Makefile.in
index fdaf34586..a4eba9120 100644
--- a/newlib/Makefile.in
+++ b/newlib/Makefile.in
@@ -1168,10 +1168,6 @@  install-data-local:	install-toollibLIBRARIES
 
 # Generate Unicode data tables for libc/string/wcwidth and libc/ctype/??w*
 unidata:
-	cd $(srcdir)/libc/string; ./mkunidata
-	cd $(srcdir)/libc/ctype; ./mkunidata
-
-unidate-download:
 	cd $(srcdir)/libc/string; ./mkunidata -u
 	cd $(srcdir)/libc/ctype; ./mkunidata -u
 
diff --git a/newlib/libc/Makefile.unidata b/newlib/libc/Makefile.unidata
new file mode 100644
index 000000000..0f9276193
--- /dev/null
+++ b/newlib/libc/Makefile.unidata
@@ -0,0 +1,12 @@ 
+# Generate Unicode data tables for string/wcwidth and ctype/??w*
+# Download Unicode data files
+unidata:
+	cd string; ./mkunidata -u
+	cd ctype; ./mkunidata -u
+
+# Generate Unicode data tables for string/wcwidth and ctype/??w*
+# Use installed Unicode data files from package unicode-ucd
+unidata-local:
+	cd string; ./mkunidata -i
+	cd ctype; ./mkunidata -i
+
diff --git a/newlib/libc/ctype/mkunidata b/newlib/libc/ctype/mkunidata
index 4bdf3bc81..4adf667ef 100755
--- a/newlib/libc/ctype/mkunidata
+++ b/newlib/libc/ctype/mkunidata
@@ -20,7 +20,9 @@  case "$1" in
 	;;
 -u)
 	wget () {
-		curl -R -O --connect-timeout 55 -z "`basename $1`" "$1"
+		ref=`basename $1`
+		ref=`ls "$ref" 2> /dev/null || echo 01-Jan-1970`
+		curl -R -O --connect-timeout 55 -z "$ref" "$1"
 	}
 
 	echo downloading data from unicode.org
diff --git a/newlib/libc/string/mkunidata b/newlib/libc/string/mkunidata
index 7ebebeb07..7b6135f8d 100755
--- a/newlib/libc/string/mkunidata
+++ b/newlib/libc/string/mkunidata
@@ -21,11 +21,13 @@  case "$1" in
 	;;
 -u)
 	wget () {
-		curl -R -O --connect-timeout 55 -z "`basename $1`" "$1"
+		ref=`basename $1`
+		ref=`ls "$ref" 2> /dev/null || echo 01-Jan-1970`
+		curl -R -O --connect-timeout 55 -z "$ref" "$1"
 	}
 
 	echo downloading uniset tool
-	wget http://www.cl.cam.ac.uk/~mgk25/download/uniset.tar.gz
+	wget https://www.cl.cam.ac.uk/~mgk25/download/uniset.tar.gz
 	gzip -dc uniset.tar.gz | tar xvf - uniset
 
 	echo downloading data from unicode.org
diff --git a/newlib/libc/string/uniset b/newlib/libc/string/uniset
index 85d3b2a6f..b118dd663 100755
--- a/newlib/libc/string/uniset
+++ b/newlib/libc/string/uniset
@@ -2,7 +2,7 @@ 
 # Uniset -- Unicode subset manager -- Markus Kuhn
 # http://www.cl.cam.ac.uk/~mgk25/download/uniset.tar.gz
 
-require 5.008;
+require 5.014;
 use open ':utf8';
 use FindBin qw($RealBin);  # to find directory where this file is located
 
@@ -147,11 +147,12 @@  sub is_unicode {
     return exists $name{$ucs};
 }
 
-my @search_path;
-push @search_path, "$ENV{HOME}/local/share/uniset"
-    if -d "$ENV{HOME}/local/share/uniset";
-push @search_path, "/usr/share/uniset" if -d "/usr/share/uniset";
-push @search_path, $RealBin unless $RealBin =~ m|^/usr/bin|;
+my @search_path = ();
+if ($RealBin =~ m|^(.*)/bin\z| && -d "$1/share/uniset") {
+    push @search_path, "$1/share/uniset";
+} else {
+    push @search_path, $RealBin;
+}
 
 sub search_open {
     my ($mode, $fn) = @_;
@@ -186,7 +187,7 @@  while (<$data>) {
 	$category{$ucs} = $3;
         $comment{$ucs} = $12;
     } else {
-        die("Syntax error in line '$_' in file '$unicodedata'");
+        die("Syntax error in line '$_' in file '$unicodedata'\n");
     }
 }
 close($data);
@@ -209,7 +210,7 @@  while (<$data>) {
     } elsif (/^\s*\#/ || /^\s*$/) {
 	# ignore comments and empty lines
     } else {
-        die("Syntax error in line '$_' in file '$blockdata'");
+        die("Syntax error in line '$_' in file '$blockdata'\n");
     }
 }
 close($data);
@@ -231,16 +232,16 @@  while ($_ = shift(@ARGV)) {
 	$image = 1;
     } elsif (/^template$/) {
 	$template = shift(@ARGV);
-	open(TEMPLATE, $template) || die("Can't open template file '$template': '$!'");
+	open(TEMPLATE, $template) || die("Can't open template file '$template': $!\n");
 	while (<TEMPLATE>) {
 	    if (/^\#\s*include\s+\"([^\"]*)\"\s*$/) {
-		open(INCLUDE, $1) || die("Can't open template include file '$1': '$!'");
+		open(INCLUDE, $1) || die("Can't open template include file '$1': $!\n");
 		while (<INCLUDE>) {
 		    print $_;
 		}
 		close(INCLUDE);
 	    } elsif (/^\#\s*quote\s+\"([^\"]*)\"\s*$/) {
-		open(INCLUDE, $1) || die("Can't open template include file '$1': '$!'");
+		open(INCLUDE, $1) || die("Can't open template include file '$1': $!\n");
 		while (<INCLUDE>) {
 		    s/&/&amp;/g;
 		    s/</&lt;/g;
@@ -275,7 +276,7 @@  while ($_ = shift(@ARGV)) {
 	$setfile = $2;
 	$setfile = shift(@ARGV) if $setfile eq "";
 	push(@SETS, $setfile);
-	open(SET, $setfile) || die("Can't open set file '$setfile': '$!'");
+	open(SET, $setfile) || die("Can't open set file '$setfile': $!\n");
 	$setname = $setfile;
 	while (<SET>) {
 	    while ($_) {
@@ -303,7 +304,7 @@  while ($_ = shift(@ARGV)) {
 	$setfile = shift(@ARGV) if $setfile eq "";
 	push(@SETS, $setfile);
 	my $setf = search_open('<', $setfile);
-	die("Can't open set file '$setfile': '$!'") unless $setf;
+	die("Can't open set file '$setfile': $!\n") unless $setf;
 	$cedf = ($setfile =~ /cedf/); # detect Kosta Kosti's trans CEDF format by path name
 	$setname = $setfile;
 	$setname =~ s/([^.\[\]]*)\..*/$1/;
@@ -691,6 +692,6 @@  while ($_ = shift(@ARGV)) {
 	    delete $used{$i} if is_unicode($i);
 	}
     } else {
-	die("Unknown command line command '$_'");
+	die("Unknown command line command '$_'\n");
     };
 }