[3/3] Remove locale access for _REENT_SMALL

Message ID 20180216184628.8013-3-info@jasoon.nl
State New
Headers show
Series
  • [1/3] Remove direct locale references from stdlib
Related show

Commit Message

Jaap de Wolff Feb. 16, 2018, 6:46 p.m.
---
 newlib/libc/locale/duplocale.c   |  6 ++++++
 newlib/libc/locale/locale.c      | 27 ++++++++++++++++++++++++++-
 newlib/libc/locale/localeconv.c  |  6 ++++++
 newlib/libc/locale/newlocale.c   |  5 +++++
 newlib/libc/locale/nl_langinfo.c |  8 ++++++++
 newlib/libc/locale/setlocale.h   | 34 ++++++++++++++++++++++++++++++----
 newlib/libc/locale/uselocale.c   |  6 ++++++
 7 files changed, 87 insertions(+), 5 deletions(-)

-- 
2.11.0

Comments

Corinna Vinschen Feb. 19, 2018, 12:14 p.m. | #1
Hi Jaap,


Review inline.

On Feb 16 19:46, Jaap de Wolff wrote:
> diff --git a/newlib/libc/locale/duplocale.c b/newlib/libc/locale/duplocale.c

> index d3e7d782e..432c512f1 100644

> --- a/newlib/libc/locale/duplocale.c

> +++ b/newlib/libc/locale/duplocale.c

> @@ -35,6 +35,7 @@ PORTABILITY

>  */

>  

>  #include <newlib.h>

> +#include <errno.h>

>  #include <reent.h>

>  #include <stdlib.h>

>  #include "setlocale.h"

> @@ -42,6 +43,10 @@ PORTABILITY

>  struct __locale_t *

>  _duplocale_r (struct _reent *p, struct __locale_t *locobj)

>  {

> +#ifdef _REENT_SMALL

> +  p->_errno = ENOMEM;


ENOSYS, please.

> @@ -239,6 +239,7 @@ const struct __locale_t __C_locale =

>  };

>  #endif /* _MB_CAPABLE */

>  

> +#ifndef _REENT_SMALL

>  struct __locale_t __global_locale =


Don't you want to skip __C_locale as well for _REENT_SMALL? 

> @@ -288,6 +291,17 @@ static char *currentlocale (void);

>  

>  #endif /* _MB_CAPABLE */

>  

> +#ifdef _REENT_SMALL

> +const char *default_decimal_point = ".";

> +

> +const char *

> +_locale_decimalpoint (void *locale)

> +{

> +  return default_decimal_point;

> +}

> +

> +#endif

> +


Why not in a header as inline or macro, kind of like

  #define _locale_decimalpoint (_dummy) "."

?

>  const char *

>  __locale_ctype_ptr_l (struct __locale_t *locale)

>  {

> +#ifdef _REENT_SMALL

> +  return DEFAULT_CTYPE_PTR;

> +#else

>    return locale->ctype_ptr;

> +#endif

> +

>  }


Inline?  Macro?

>  const char *

>  __locale_ctype_ptr (void)

>  {

> +#ifdef _REENT_SMALL

> +  return DEFAULT_CTYPE_PTR;

> +#else

>    return __get_current_locale ()->ctype_ptr;

> +#endif

>  }


Same here?

>  #include <reent.h>

>  #include "setlocale.h"

>  

> +#ifndef _REENT_SMALL

> +

>  struct lconv *

>  __localeconv_l (struct __locale_t *locale)

>  {

> @@ -65,4 +67,8 @@ localeconv (void)

>  {

>    return __localeconv_l (__get_current_locale ());

>  }

> +#endif /* _REENT_SMALL */


Hang on, that doesn't make sense.  In case of newlocale, duplocale, etc.
you keep the functions and just return an error, while in case of of
localeconv you disable the entire function.  Please note that localeconv
existed for _REENT_SMALL targets before introducing per-thread locales,
check the git history.  The difference way back when was that the
localeconv info resided in a global lconv variable instead.  You may want
to retain the localconv info for small targets.

> diff --git a/newlib/libc/locale/newlocale.c b/newlib/libc/locale/newlocale.c

> index c6c2a9ca9..a696cbc0c 100644

> --- a/newlib/libc/locale/newlocale.c

> +++ b/newlib/libc/locale/newlocale.c

> @@ -86,6 +86,10 @@ struct __locale_t *

>  _newlocale_r (struct _reent *p, int category_mask, const char *locale,

>  	      struct __locale_t *base)

>  {

> +#ifdef _REENT_SMALL

> +  p->_errno = ENOMEM;


ENOSYS

> diff --git a/newlib/libc/locale/nl_langinfo.c b/newlib/libc/locale/nl_langinfo.c

> index eb984912f..882dc77b6 100644

> --- a/newlib/libc/locale/nl_langinfo.c

> +++ b/newlib/libc/locale/nl_langinfo.c

> @@ -331,6 +331,7 @@ do_codeset:

>  		break;

>  	case CRNCYSTR:

>  		ret = "";

> +#ifndef _REENT_SMALL

>  		cs = (char*) __get_monetary_locale (locale)->currency_symbol;

>  		if (*cs != '\0') {

>  			char pos = __localeconv_l (locale)->p_cs_precedes;


See above in terms of localconv info.

>  #endif /* _MB_CAPABLE */

>  

> -extern struct lconv *__localeconv_l (struct __locale_t *locale);

> -

>  extern size_t _wcsnrtombs_l (struct _reent *, char *, const wchar_t **,

>  			     size_t, size_t, mbstate_t *, struct __locale_t *);

>  

> +

> +#ifndef _REENT_SMALL

> +

> +extern struct lconv *__localeconv_l (struct __locale_t *locale);

> +


Same here.

>  #ifdef __HAVE_LOCALE_INFO__

>  _ELIDABLE_INLINE const struct lc_ctype_T *

> @@ -276,14 +280,18 @@ __get_current_ctype_locale (void)

>  _ELIDABLE_INLINE int

>  __locale_mb_cur_max_l (struct __locale_t *locale)

>  {

> +#ifdef _REENT_SMALL

> +	return _MB_LEN_MAX;

> +#else


This is incorrect.  MB_CUR_MAX != MB_LEN_MAX.  You have to return the
max value for the current charset here, and you have to store the info
somewhere.  In the pre per-thread locale days this function returned the
value of the global variable __mb_cur_max which got set in loadlocale
(now __loadlocale) from the setting of the local var mbc_max which still
exists.  Just returning MB_LEN_MAX here is an invalid reduction and may
lead to misbehaving user code.

> +#ifdef _REENT_SMALL

> +const char *_locale_decimalpoint(void *locale);

> +#define __CURRENT_LOCALE (locale_t)0

> +#ifdef __CYGWIN__


__CYGWIN__ is *never* _REENT_SMALL.  Just drop this.

> +#define _locale_wctomb(loc,r,buff,wc,ps) __utf8_wctomb(r,buff,wc,ps)

> +#define _locale_mbtowc(loc,r,buff,wc,ps) __utf8_mbtowc(r,buff,wc,ps)

> +#define __WCTOMB __utf8_wctomb

> +#define __MBTOWC __utf8_mbtowc

> +#else

> +#define _locale_wctomb(loc,r,buff,wc,ps) __ascii_wctomb(r,buff,wc,ps)

> +#define _locale_mbtowc(loc,r,buff,wc,ps) __ascii_mbtowc(r,buff,wc,ps)

> +#define __WCTOMB __ascii_wctomb

> +#define __MBTOWC __ascii_mbtowc


But this is wrong.  You're basically switching off *any* multibyte
charset handling on small targets.  But newlib supports UTF-8, EUCJP,
SJIS on small targets as well and did so since 2000 or so.

I don't think everybody will be happy to get this support disabled.

> +#ifdef _REENT_SMALL

> +  p->_errno = ENOMEM;


ENOSYS

I'd like to ask: Do you *really* want to get rid of __global_locale,
or are you just unhappy in terms of its size?  Note that I didn't 
add any extra locale info to targets not defining __HAVE_LOCALE_INFO__.
The only difference is that everything is now in one place and thus
takes memory even if only parts are used.  Let's take a closer look.
Assuming a 32 bit target:

struct __locale_t
{
  char                   categories[...];	// 224 bytes
  int                   (*wctomb)		//   4 bytes
  int                   (*mbtowc)		//   4 bytes
  int                    cjk_lang;		//   4 bytes
  char                  *ctype_ptr;		//   4 bytes
  struct lconv           lconv;			//  96 bytes
  char                   mb_cur_max[2];		//   2 bytes
  char                   ctype_codeset[];	//  32 bytes
  char                   message_codeset[];	//  32 bytes
};

Again, there's nothing in that struct which wasn't there before,
and especially codeset support was required for all targets.

The difference is that the categories and lconv only existed if
the application called setlocale or localeconv.

So, what about just moving the biggest offenders, categories and lconv,
out of the struct and make sure they only exist on a REENT_SMALL target
if it calls setlocale or localeconv?


Corinna

-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat

Patch

diff --git a/newlib/libc/locale/duplocale.c b/newlib/libc/locale/duplocale.c
index d3e7d782e..432c512f1 100644
--- a/newlib/libc/locale/duplocale.c
+++ b/newlib/libc/locale/duplocale.c
@@ -35,6 +35,7 @@  PORTABILITY
 */
 
 #include <newlib.h>
+#include <errno.h>
 #include <reent.h>
 #include <stdlib.h>
 #include "setlocale.h"
@@ -42,6 +43,10 @@  PORTABILITY
 struct __locale_t *
 _duplocale_r (struct _reent *p, struct __locale_t *locobj)
 {
+#ifdef _REENT_SMALL
+  p->_errno = ENOMEM;
+  return (locale_t)0;
+#else
   struct __locale_t tmp_locale, *new_locale;
   int i;
 
@@ -91,6 +96,7 @@  error:
 
   return NULL;
 #endif /* _MB_CAPABLE */
+#endif /* _REENT_SMALL */
 }
 
 #ifndef _REENT_ONLY
diff --git a/newlib/libc/locale/locale.c b/newlib/libc/locale/locale.c
index baa5451a6..cb3984dee 100644
--- a/newlib/libc/locale/locale.c
+++ b/newlib/libc/locale/locale.c
@@ -239,6 +239,7 @@  const struct __locale_t __C_locale =
 };
 #endif /* _MB_CAPABLE */
 
+#ifndef _REENT_SMALL
 struct __locale_t __global_locale =
 {
   { "C", "C", DEFAULT_LOCALE, "C", "C", "C", "C", },
@@ -278,6 +279,8 @@  struct __locale_t __global_locale =
   },
 #endif /* __HAVE_LOCALE_INFO__ */
 };
+#endif /* !_REENT_SMALL */
+
 
 #ifdef _MB_CAPABLE
 /* Renamed from current_locale_string to make clear this is only the
@@ -288,6 +291,17 @@  static char *currentlocale (void);
 
 #endif /* _MB_CAPABLE */
 
+#ifdef _REENT_SMALL
+const char *default_decimal_point = ".";
+
+const char *
+_locale_decimalpoint (void *locale)
+{
+  return default_decimal_point;
+}
+
+#endif
+
 char *
 _setlocale_r (struct _reent *p,
        int category,
@@ -879,7 +893,7 @@  restart:
   switch (category)
     {
     case LC_CTYPE:
-#ifndef __HAVE_LOCALE_INFO__
+#if !defined(__HAVE_LOCALE_INFO__) && !defined (_REENT_SMALL)
       strcpy (loc->ctype_codeset, charset);
       loc->mb_cur_max[0] = mbc_max;
 #endif
@@ -964,6 +978,7 @@  __get_locale_env (struct _reent *p, int category)
 }
 #endif /* _MB_CAPABLE */
 
+#ifndef _REENT_SMALL
 int
 __locale_mb_cur_max (void)
 {
@@ -973,17 +988,27 @@  __locale_mb_cur_max (void)
   return __get_current_locale ()->mb_cur_max[0];
 #endif
 }
+#endif /* !_REENT_SMALL */
 
 const char *
 __locale_ctype_ptr_l (struct __locale_t *locale)
 {
+#ifdef _REENT_SMALL
+  return DEFAULT_CTYPE_PTR;
+#else
   return locale->ctype_ptr;
+#endif
+
 }
 
 const char *
 __locale_ctype_ptr (void)
 {
+#ifdef _REENT_SMALL
+  return DEFAULT_CTYPE_PTR;
+#else
   return __get_current_locale ()->ctype_ptr;
+#endif
 }
 
 #ifndef _REENT_ONLY
diff --git a/newlib/libc/locale/localeconv.c b/newlib/libc/locale/localeconv.c
index 5f34a785f..ca132c5cd 100644
--- a/newlib/libc/locale/localeconv.c
+++ b/newlib/libc/locale/localeconv.c
@@ -2,6 +2,8 @@ 
 #include <reent.h>
 #include "setlocale.h"
 
+#ifndef _REENT_SMALL
+
 struct lconv *
 __localeconv_l (struct __locale_t *locale)
 {
@@ -65,4 +67,8 @@  localeconv (void)
 {
   return __localeconv_l (__get_current_locale ());
 }
+#endif /* _REENT_SMALL */
+
 #endif
+
+
diff --git a/newlib/libc/locale/newlocale.c b/newlib/libc/locale/newlocale.c
index c6c2a9ca9..a696cbc0c 100644
--- a/newlib/libc/locale/newlocale.c
+++ b/newlib/libc/locale/newlocale.c
@@ -86,6 +86,10 @@  struct __locale_t *
 _newlocale_r (struct _reent *p, int category_mask, const char *locale,
 	      struct __locale_t *base)
 {
+#ifdef _REENT_SMALL
+  p->_errno = ENOMEM;
+  return (locale_t)0;
+#else
 #ifndef _MB_CAPABLE
   return __get_C_locale ();
 #else /* _MB_CAPABLE */
@@ -213,6 +217,7 @@  error:
 
   return NULL;
 #endif /* _MB_CAPABLE */
+#endif /* _REENT_SMALL */
 }
 
 struct __locale_t *
diff --git a/newlib/libc/locale/nl_langinfo.c b/newlib/libc/locale/nl_langinfo.c
index eb984912f..882dc77b6 100644
--- a/newlib/libc/locale/nl_langinfo.c
+++ b/newlib/libc/locale/nl_langinfo.c
@@ -331,6 +331,7 @@  do_codeset:
 		break;
 	case CRNCYSTR:
 		ret = "";
+#ifndef _REENT_SMALL
 		cs = (char*) __get_monetary_locale (locale)->currency_symbol;
 		if (*cs != '\0') {
 			char pos = __localeconv_l (locale)->p_cs_precedes;
@@ -360,6 +361,7 @@  do_codeset:
 				}
 			}
 		}
+#endif /* !_REENT_SMALL */
 		break;
 	case D_MD_ORDER:        /* local extension */
 		ret = (char *) __get_time_locale (locale)->md_order;
@@ -370,6 +372,7 @@  do_codeset:
 		break;
 #endif
 	default:
+#ifndef _REENT_SMALL
 		/* Relies on the fact that LC_ALL is 0, and all other
 		   LC_ constants are in ascending order. */
 		if (item > NL_LOCALE_NAME(LC_ALL)
@@ -377,6 +380,7 @@  do_codeset:
 			return locale->categories[item
 						  - NL_LOCALE_NAME(LC_ALL)];
 		}
+#endif
 #ifdef __HAVE_LOCALE_INFO_EXTENDED__
 		if (item > _NL_LOCALE_EXTENDED_FIRST_ENTRY
 		    && item < _NL_LOCALE_EXTENDED_LAST_ENTRY) {
@@ -392,5 +396,9 @@  do_codeset:
 
 char *nl_langinfo (nl_item item)
 {
+#ifdef _REENT_SMALL
+  return nl_langinfo_l (item, NULL );
+#else
   return nl_langinfo_l (item, __get_current_locale ());
+#endif
 }
diff --git a/newlib/libc/locale/setlocale.h b/newlib/libc/locale/setlocale.h
index fe212c1ec..f2621b0b1 100644
--- a/newlib/libc/locale/setlocale.h
+++ b/newlib/libc/locale/setlocale.h
@@ -199,11 +199,14 @@  extern char *__loadlocale (struct __locale_t *, int, const char *);
 extern const char *__get_locale_env(struct _reent *, int);
 #endif /* _MB_CAPABLE */
 
-extern struct lconv *__localeconv_l (struct __locale_t *locale);
-
 extern size_t _wcsnrtombs_l (struct _reent *, char *, const wchar_t **,
 			     size_t, size_t, mbstate_t *, struct __locale_t *);
 
+
+#ifndef _REENT_SMALL
+
+extern struct lconv *__localeconv_l (struct __locale_t *locale);
+
 /* In POSIX terms the global locale is the process-wide locale.  Use this
    function to always refer to the global locale. */
 _ELIDABLE_INLINE struct __locale_t *
@@ -257,6 +260,7 @@  __get_current_collate_locale (void)
 	 __get_current_locale ()->lc_cat[LC_COLLATE].ptr;
 }
 #endif
+#endif /* !_REENT_SMALL */
 
 #ifdef __HAVE_LOCALE_INFO__
 _ELIDABLE_INLINE const struct lc_ctype_T *
@@ -276,14 +280,18 @@  __get_current_ctype_locale (void)
 _ELIDABLE_INLINE int
 __locale_mb_cur_max_l (struct __locale_t *locale)
 {
+#ifdef _REENT_SMALL
+	return _MB_LEN_MAX;
+#else
 #ifdef __HAVE_LOCALE_INFO__
   return __get_ctype_locale (locale)->mb_cur_max[0];
 #else
   return locale->mb_cur_max[0];
 #endif
+#endif
 }
 
-#ifdef __HAVE_LOCALE_INFO__
+#if defined (__HAVE_LOCALE_INFO__) && !defined(_REENT_SMALL)
 _ELIDABLE_INLINE const struct lc_monetary_T *
 __get_monetary_locale (struct __locale_t *locale)
 {
@@ -385,6 +393,7 @@  __get_current_messages_locale (void)
 }
 #endif /* !__HAVE_LOCALE_INFO__ */
 
+#ifndef _REENT_SMALL
 _ELIDABLE_INLINE const char *
 __locale_charset (struct __locale_t *locale)
 {
@@ -438,14 +447,31 @@  int __collate_load_locale (struct __locale_t *, const char *, void *,
 extern void __set_charset_from_locale (const char *locale, char *charset);
 extern char *__set_locale_from_locale_alias (const char *, char *);
 #endif
+#endif	/* !_REENT_SMALL */
 
+#ifdef _REENT_SMALL
+const char *_locale_decimalpoint(void *locale);
+#define __CURRENT_LOCALE (locale_t)0
+#ifdef __CYGWIN__
+#define _locale_wctomb(loc,r,buff,wc,ps) __utf8_wctomb(r,buff,wc,ps)
+#define _locale_mbtowc(loc,r,buff,wc,ps) __utf8_mbtowc(r,buff,wc,ps)
+#define __WCTOMB __utf8_wctomb
+#define __MBTOWC __utf8_mbtowc
+#else
+#define _locale_wctomb(loc,r,buff,wc,ps) __ascii_wctomb(r,buff,wc,ps)
+#define _locale_mbtowc(loc,r,buff,wc,ps) __ascii_mbtowc(r,buff,wc,ps)
+#define __WCTOMB __ascii_wctomb
+#define __MBTOWC __ascii_mbtowc
+#endif /* __CYGWIN__ */
+#else /*  _REENT_SMALL */
 #define _locale_decimalpoint(loc) (__localeconv_l(loc))->decimal_point
 #define _locale_wctomb(loc,r,buff,wc,ps) (loc)->wctomb(r,buff,wc,ps)
 #define _locale_mbtowc(loc,r,buff,wc,ps) (loc)->mbtowc(r,buff,wc,ps)
 #define __WCTOMB (__get_current_locale ()->wctomb)
 #define __MBTOWC (__get_current_locale ()->mbtowc)
-
 #define __CURRENT_LOCALE  __get_current_locale()
+#endif /*  _REENT_SMALL */
+
 __END_DECLS
 
 #endif /* !_SETLOCALE_H_ */
diff --git a/newlib/libc/locale/uselocale.c b/newlib/libc/locale/uselocale.c
index 83ebcdd19..a6eb197b5 100644
--- a/newlib/libc/locale/uselocale.c
+++ b/newlib/libc/locale/uselocale.c
@@ -51,6 +51,7 @@  PORTABILITY
 */
 
 #include <newlib.h>
+#include <errno.h>
 #include <reent.h>
 #include <stdlib.h>
 #include "setlocale.h"
@@ -58,6 +59,10 @@  PORTABILITY
 struct __locale_t *
 _uselocale_r (struct _reent *p, struct __locale_t *newloc)
 {
+#ifdef _REENT_SMALL
+  p->_errno = ENOMEM;
+  return (locale_t)0;
+#else
   struct __locale_t *current_locale;
 
   current_locale = __get_locale_r (p);
@@ -68,6 +73,7 @@  _uselocale_r (struct _reent *p, struct __locale_t *newloc)
   else if (newloc)
     p->_locale = newloc;
   return current_locale;
+#endif
 }
 
 #ifndef _REENT_ONLY