Add optional arguments LOCALE and IGNORE-CASE to collation functions.

* fns.c (Fstring_collate_lessp, Fstring_collate_equalp):
Add optional arguments LOCALE and IGNORE-CASE.

* lisp.h (str_collate): Adapt argument list.

* sysdep.c (LC_CTYPE, LC_CTYPE_MASK, towlower_l):
Define substitutes for platforms that lack them.
(str_collate): Add arguments locale and ignore_case.
This commit is contained in:
Michael Albinus 2014-08-29 19:57:36 +02:00
parent 55412cd901
commit b579ae53e4
4 changed files with 110 additions and 37 deletions

View file

@ -1,3 +1,14 @@
2014-08-29 Michael Albinus <michael.albinus@gmx.de>
* sysdep.c (LC_CTYPE, LC_CTYPE_MASK, towlower_l):
Define substitutes for platforms that lack them.
(str_collate): Add arguments locale and ignore_case.
* fns.c (Fstring_collate_lessp, Fstring_collate_equalp):
Add optional arguments LOCALE and IGNORE-CASE.
* lisp.h (str_collate): Adapt argument list.
2014-08-29 Dmitry Antipov <dmantipov@yandex.ru>
Add vectors support to Fsort.

View file

@ -344,25 +344,28 @@ Symbols are also allowed; their print names are used instead. */)
return i1 < SCHARS (s2) ? Qt : Qnil;
}
DEFUN ("string-collate-lessp", Fstring_collate_lessp, Sstring_collate_lessp, 2, 2, 0,
DEFUN ("string-collate-lessp", Fstring_collate_lessp, Sstring_collate_lessp, 2, 4, 0,
doc: /* Return t if first arg string is less than second in collation order.
Case is significant. Symbols are also allowed; their print names are
used instead.
Symbols are also allowed; their print names are used instead.
This function obeys the conventions for collation order in your
locale settings. For example, punctuation and whitespace characters
are considered less significant for sorting.
are considered less significant for sorting:
\(sort '\("11" "12" "1 1" "1 2" "1.1" "1.2") 'string-collate-lessp)
=> \("11" "1 1" "1.1" "12" "1 2" "1.2")
If your system does not support a locale environment, this function
behaves like `string-lessp'.
The optional argument LOCALE, a string, overrides the setting of your
current locale identifier for collation. The value is system
dependent; a LOCALE \"en_US.UTF-8\" is applicable on POSIX systems,
while it would be \"English_USA.1252\" on MS Windows systems.
If the environment variable \"LC_COLLATE\" is set in `process-environment',
it overrides the setting of your current locale. */)
(Lisp_Object s1, Lisp_Object s2)
If IGNORE-CASE is non-nil, characters are converted to lower-case
before comparing them.
If your system does not support a locale environment, this function
behaves like `string-lessp'. */)
(Lisp_Object s1, Lisp_Object s2, Lisp_Object locale, Lisp_Object ignore_case)
{
#if defined __STDC_ISO_10646__ || defined WINDOWSNT
/* Check parameters. */
@ -372,34 +375,39 @@ it overrides the setting of your current locale. */)
s2 = SYMBOL_NAME (s2);
CHECK_STRING (s1);
CHECK_STRING (s2);
if (!NILP (locale))
CHECK_STRING (locale);
return (str_collate (s1, s2) < 0) ? Qt : Qnil;
return (str_collate (s1, s2, locale, ignore_case) < 0) ? Qt : Qnil;
#else /* !__STDC_ISO_10646__, !WINDOWSNT */
return Fstring_lessp (s1, s2);
#endif /* !__STDC_ISO_10646__, !WINDOWSNT */
}
DEFUN ("string-collate-equalp", Fstring_collate_equalp, Sstring_collate_equalp, 2, 2, 0,
DEFUN ("string-collate-equalp", Fstring_collate_equalp, Sstring_collate_equalp, 2, 4, 0,
doc: /* Return t if two strings have identical contents.
Case is significant. Symbols are also allowed; their print names are
used instead.
Symbols are also allowed; their print names are used instead.
This function obeys the conventions for collation order in your locale
settings. For example, characters with different coding points but
the same meaning are considered as equal, like different grave accent
unicode characters.
unicode characters:
\(string-collate-equalp \(string ?\\uFF40) \(string ?\\u1FEF))
=> t
If your system does not support a locale environment, this function
behaves like `string-equal'.
The optional argument LOCALE, a string, overrides the setting of your
current locale identifier for collation. The value is system
dependent; a LOCALE \"en_US.UTF-8\" is applicable on POSIX systems,
while it would be \"English_USA.1252\" on MS Windows systems.
If the environment variable \"LC_COLLATE\" is set in `process-environment',
it overrides the setting of your current locale. */)
(Lisp_Object s1, Lisp_Object s2)
If IGNORE-CASE is non-nil, characters are converted to lower-case
before comparing them.
If your system does not support a locale environment, this function
behaves like `string-equal'. */)
(Lisp_Object s1, Lisp_Object s2, Lisp_Object locale, Lisp_Object ignore_case)
{
#if defined __STDC_ISO_10646__ || defined WINDOWSNT
/* Check parameters. */
@ -409,8 +417,10 @@ it overrides the setting of your current locale. */)
s2 = SYMBOL_NAME (s2);
CHECK_STRING (s1);
CHECK_STRING (s2);
if (!NILP (locale))
CHECK_STRING (locale);
return (str_collate (s1, s2) == 0) ? Qt : Qnil;
return (str_collate (s1, s2, locale, ignore_case) == 0) ? Qt : Qnil;
#else /* !__STDC_ISO_10646__, !WINDOWSNT */
return Fstring_equal (s1, s2);

View file

@ -4301,7 +4301,7 @@ extern void lock_file (Lisp_Object);
extern void unlock_file (Lisp_Object);
extern void unlock_buffer (struct buffer *);
extern void syms_of_filelock (void);
extern int str_collate (Lisp_Object, Lisp_Object);
extern int str_collate (Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object);
/* Defined in sound.c. */
extern void syms_of_sound (void);

View file

@ -3605,6 +3605,7 @@ system_process_attributes (Lisp_Object pid)
#ifdef __STDC_ISO_10646__
# include <wchar.h>
# include <wctype.h>
# if defined HAVE_NEWLOCALE || defined HAVE_SETLOCALE
# include <locale.h>
@ -3615,15 +3616,24 @@ system_process_attributes (Lisp_Object pid)
# ifndef LC_COLLATE_MASK
# define LC_COLLATE_MASK 0
# endif
# ifndef LC_CTYPE
# define LC_CTYPE 0
# endif
# ifndef LC_CTYPE_MASK
# define LC_CTYPE_MASK 0
# endif
# ifndef HAVE_NEWLOCALE
# undef freelocale
# undef locale_t
# undef newlocale
# undef wcscoll_l
# undef towlower_l
# define freelocale emacs_freelocale
# define locale_t emacs_locale_t
# define newlocale emacs_newlocale
# define wcscoll_l emacs_wcscoll_l
# define towlower_l emacs_towlower_l
typedef char const *locale_t;
@ -3683,15 +3693,37 @@ wcscoll_l (wchar_t const *a, wchar_t const *b, locale_t loc)
errno = err;
return result;
}
static wint_t
towlower_l (wint_t wc, locale_t loc)
{
wint_t result = wc;
char *oldloc = emacs_setlocale (LC_CTYPE, NULL);
if (oldloc)
{
USE_SAFE_ALLOCA;
char *oldcopy = SAFE_ALLOCA (strlen (oldloc) + 1);
strcpy (oldcopy, oldloc);
if (emacs_setlocale (LC_CTYPE, loc))
{
result = towlower (wc);
emacs_setlocale (LC_COLLATE, oldcopy);
}
SAFE_FREE ();
}
return result;
}
# endif
int
str_collate (Lisp_Object s1, Lisp_Object s2)
str_collate (Lisp_Object s1, Lisp_Object s2,
Lisp_Object locale, Lisp_Object ignore_case)
{
int res, err;
ptrdiff_t len, i, i_byte;
wchar_t *p1, *p2;
Lisp_Object lc_collate;
USE_SAFE_ALLOCA;
@ -3708,22 +3740,43 @@ str_collate (Lisp_Object s1, Lisp_Object s2)
FETCH_STRING_CHAR_ADVANCE (*(p2+i-1), s2, i, i_byte);
*(p2+len) = 0;
lc_collate =
Fgetenv_internal (build_string ("LC_COLLATE"), Vprocess_environment);
if (STRINGP (lc_collate))
if (STRINGP (locale))
{
locale_t loc = newlocale (LC_COLLATE_MASK, SSDATA (lc_collate), 0);
locale_t loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK,
SSDATA (locale), 0);
if (!loc)
error ("Wrong locale: %s", strerror (errno));
errno = 0;
res = wcscoll_l (p1, p2, loc);
if (! NILP (ignore_case))
for (int i = 1; i < 3; i++)
{
wchar_t *p = (i == 1) ? p1 : p2;
for (; *p; p++)
{
*p = towlower_l (*p, loc);
if (errno)
break;
}
if (errno)
break;
}
if (! errno)
res = wcscoll_l (p1, p2, loc);
err = errno;
freelocale (loc);
}
else
{
errno = 0;
if (! NILP (ignore_case))
for (int i = 1; i < 3; i++)
{
wchar_t *p = (i == 1) ? p1 : p2;
for (; *p; p++)
*p = towlower (*p);
}
res = wcscoll (p1, p2);
err = errno;
}
@ -3733,15 +3786,14 @@ str_collate (Lisp_Object s1, Lisp_Object s2)
SAFE_FREE ();
return res;
}
#endif /* __STDC_ISO_10646__ */
#endif /* __STDC_ISO_10646__ */
#ifdef WINDOWSNT
int
str_collate (Lisp_Object s1, Lisp_Object s2)
{
Lisp_Object lc_collate =
Fgetenv_internal (build_string ("LC_COLLATE"), Vprocess_environment);
char *loc = STRINGP (lc_collate) ? SSDATA (lc_collate) : NULL;
str_collate (Lisp_Object s1, Lisp_Object s2,
{ Lisp_Object locale, Lisp_Object ignore_case)
char *loc = STRINGP (locale) ? SSDATA (locale) : NULL;
return w32_compare_strings (SDATA (s1), SDATA (s2), loc);
}