Implement case-insensitive and Unicode-compliant collation on MS-Windows.
src/fns.c (Fstring_collate_lessp, Fstring_collate_equalp): Doc fix. src/w32proc.c (w32_compare_strings): Accept additional argument IGNORE_CASE. Set up the flags for CompareStringW to ignore case if requested. If w32-collate-ignore-punctuation is non-nil, add NORM_IGNORESYMBOLS to the flags. (LINGUISTIC_IGNORECASE): Define if not already defined. (syms_of_ntproc) <Vw32_collate_ignore_punctuation>: New variable. src/sysdep.c (str_collate) [WINDOWSNT]: Adapt to the interface change. src/w32.h: Adjust prototype of w32_compare_strings. etc/NEWS: Mention w32-collate-ignore-punctuation. Fixes: debbugs:18051
This commit is contained in:
parent
2ae366c73e
commit
21ba51de76
7 changed files with 80 additions and 14 deletions
|
@ -1,3 +1,7 @@
|
|||
2014-08-29 Eli Zaretskii <eliz@gnu.org>
|
||||
|
||||
* NEWS: Mention w32-collate-ignore-punctuation.
|
||||
|
||||
2014-08-29 Dmitry Antipov <dmantipov@yandex.ru>
|
||||
|
||||
* NEWS: Mention that `sort' can handle vectors.
|
||||
|
|
7
etc/NEWS
7
etc/NEWS
|
@ -72,6 +72,13 @@ environment. For the time being this is implemented for modern POSIX
|
|||
systems and for MS-Windows, for other systems they fall back to their
|
||||
counterparts `string-lessp' and `string-equal'.
|
||||
|
||||
*** The MS-Windows specific variable `w32-collate-ignore-punctuation',
|
||||
if set to a non-nil value, causes the above 2 functions to ignore
|
||||
symbol and punctuation characters when collating strings. This
|
||||
emulates the behavior of modern Posix platforms when the locale's
|
||||
codeset is "UTF-8" (as in "en_US.UTF-8"). This is needed because
|
||||
MS-Windows doesn't support UTF-8 as codeset in its locales.
|
||||
|
||||
|
||||
* Editing Changes in Emacs 24.5
|
||||
|
||||
|
|
|
@ -1,3 +1,17 @@
|
|||
2014-08-29 Eli Zaretskii <eliz@gnu.org>
|
||||
|
||||
* fns.c (Fstring_collate_lessp, Fstring_collate_equalp): Doc fix.
|
||||
|
||||
* w32proc.c (w32_compare_strings): Accept additional argument
|
||||
IGNORE_CASE. Set up the flags for CompareStringW to ignore case
|
||||
if requested. If w32-collate-ignore-punctuation is non-nil, add
|
||||
NORM_IGNORESYMBOLS to the flags.
|
||||
(LINGUISTIC_IGNORECASE): Define if not already defined.
|
||||
(syms_of_ntproc) <Vw32_collate_ignore_punctuation>: New variable.
|
||||
|
||||
* sysdep.c (str_collate) [WINDOWSNT]: Adapt to the interface
|
||||
change.
|
||||
|
||||
2014-08-29 Michael Albinus <michael.albinus@gmx.de>
|
||||
|
||||
* sysdep.c (LC_CTYPE, LC_CTYPE_MASK, towlower_l):
|
||||
|
|
23
src/fns.c
23
src/fns.c
|
@ -350,7 +350,7 @@ Symbols are also allowed; their print names are used instead.
|
|||
|
||||
This function obeys the conventions for collation order in your
|
||||
locale settings. For example, punctuation and whitespace characters
|
||||
are considered less significant for sorting:
|
||||
might be considered less significant for sorting:
|
||||
|
||||
\(sort '\("11" "12" "1 1" "1 2" "1.1" "1.2") 'string-collate-lessp)
|
||||
=> \("11" "1 1" "1.1" "12" "1 2" "1.2")
|
||||
|
@ -358,11 +358,15 @@ are considered less significant for sorting:
|
|||
The optional argument LOCALE, a string, overrides the setting of your
|
||||
current locale identifier for collation. The value is system
|
||||
dependent; a LOCALE \"en_US.UTF-8\" is applicable on POSIX systems,
|
||||
while it would be \"English_USA.1252\" on MS Windows systems.
|
||||
while it would be, e.g., \"enu_USA.1252\" on MS-Windows systems.
|
||||
|
||||
If IGNORE-CASE is non-nil, characters are converted to lower-case
|
||||
before comparing them.
|
||||
|
||||
To emulate Unicode-compliant collation on MS-Windows systems,
|
||||
bind `w32-collate-ignore-punctuation' to a non-nil value, since
|
||||
the codeset part of the locale cannot be \"UTF-8\" on MS-Windows.
|
||||
|
||||
If your system does not support a locale environment, this function
|
||||
behaves like `string-lessp'. */)
|
||||
(Lisp_Object s1, Lisp_Object s2, Lisp_Object locale, Lisp_Object ignore_case)
|
||||
|
@ -391,8 +395,8 @@ Symbols are also allowed; their print names are used instead.
|
|||
|
||||
This function obeys the conventions for collation order in your locale
|
||||
settings. For example, characters with different coding points but
|
||||
the same meaning are considered as equal, like different grave accent
|
||||
unicode characters:
|
||||
the same meaning might be considered as equal, like different grave
|
||||
accent Unicode characters:
|
||||
|
||||
\(string-collate-equalp \(string ?\\uFF40) \(string ?\\u1FEF))
|
||||
=> t
|
||||
|
@ -400,13 +404,20 @@ unicode characters:
|
|||
The optional argument LOCALE, a string, overrides the setting of your
|
||||
current locale identifier for collation. The value is system
|
||||
dependent; a LOCALE \"en_US.UTF-8\" is applicable on POSIX systems,
|
||||
while it would be \"English_USA.1252\" on MS Windows systems.
|
||||
while it would be \"enu_USA.1252\" on MS Windows systems.
|
||||
|
||||
If IGNORE-CASE is non-nil, characters are converted to lower-case
|
||||
before comparing them.
|
||||
|
||||
To emulate Unicode-compliant collation on MS-Windows systems,
|
||||
bind `w32-collate-ignore-punctuation' to a non-nil value, since
|
||||
the codeset part of the locale cannot be \"UTF-8\" on MS-Windows.
|
||||
|
||||
If your system does not support a locale environment, this function
|
||||
behaves like `string-equal'. */)
|
||||
behaves like `string-equal'.
|
||||
|
||||
Do NOT use this function to compare file names for equality, only
|
||||
for sorting them. */)
|
||||
(Lisp_Object s1, Lisp_Object s2, Lisp_Object locale, Lisp_Object ignore_case)
|
||||
{
|
||||
#if defined __STDC_ISO_10646__ || defined WINDOWSNT
|
||||
|
|
|
@ -3796,6 +3796,6 @@ str_collate (Lisp_Object s1, Lisp_Object s2,
|
|||
|
||||
char *loc = STRINGP (locale) ? SSDATA (locale) : NULL;
|
||||
|
||||
return w32_compare_strings (SDATA (s1), SDATA (s2), loc);
|
||||
return w32_compare_strings (SDATA (s1), SDATA (s2), loc, !NILP (ignore_case));
|
||||
}
|
||||
#endif /* WINDOWSNT */
|
||||
|
|
|
@ -211,7 +211,7 @@ extern int w32_memory_info (unsigned long long *, unsigned long long *,
|
|||
unsigned long long *, unsigned long long *);
|
||||
|
||||
/* Compare 2 UTF-8 strings in locale-dependent fashion. */
|
||||
extern int w32_compare_strings (const char *, const char *, char *);
|
||||
extern int w32_compare_strings (const char *, const char *, char *, int);
|
||||
|
||||
#ifdef HAVE_GNUTLS
|
||||
#include <gnutls/gnutls.h>
|
||||
|
|
|
@ -3213,15 +3213,20 @@ get_lcid (const char *locale_name)
|
|||
#ifndef _NSLCMPERROR
|
||||
# define _NSLCMPERROR INT_MAX
|
||||
#endif
|
||||
#ifndef LINGUISTIC_IGNORECASE
|
||||
# define LINGUISTIC_IGNORECASE 0x00000010
|
||||
#endif
|
||||
|
||||
int
|
||||
w32_compare_strings (const char *s1, const char *s2, char *locname)
|
||||
w32_compare_strings (const char *s1, const char *s2, char *locname,
|
||||
int ignore_case)
|
||||
{
|
||||
LCID lcid = GetThreadLocale ();
|
||||
wchar_t *string1_w, *string2_w;
|
||||
int val, needed;
|
||||
extern BOOL g_b_init_compare_string_w;
|
||||
static int (WINAPI *pCompareStringW)(LCID, DWORD, LPCWSTR, int, LPCWSTR, int);
|
||||
DWORD flags = 0;
|
||||
|
||||
USE_SAFE_ALLOCA;
|
||||
|
||||
|
@ -3284,11 +3289,22 @@ w32_compare_strings (const char *s1, const char *s2, char *locname)
|
|||
lcid = new_lcid;
|
||||
}
|
||||
|
||||
/* FIXME: Need a way to control the FLAGS argument, perhaps via the
|
||||
CODESET part of LOCNAME. In particular, ls-lisp will want
|
||||
NORM_IGNORESYMBOLS and sometimes LINGUISTIC_IGNORECASE or
|
||||
NORM_IGNORECASE. */
|
||||
val = pCompareStringW (lcid, 0, string1_w, -1, string2_w, -1);
|
||||
if (ignore_case)
|
||||
{
|
||||
/* NORM_IGNORECASE ignores any tertiary distinction, not just
|
||||
case variants. LINGUISTIC_IGNORECASE is more selective, and
|
||||
is sensitive to the locale's language, but it is not
|
||||
available before Vista. */
|
||||
if (w32_major_version >= 6)
|
||||
flags |= LINGUISTIC_IGNORECASE;
|
||||
else
|
||||
flags |= NORM_IGNORECASE;
|
||||
}
|
||||
/* This approximates what glibc collation functions do when the
|
||||
locale's codeset is UTF-8. */
|
||||
if (!NILP (Vw32_collate_ignore_punctuation))
|
||||
flags |= NORM_IGNORESYMBOLS;
|
||||
val = pCompareStringW (lcid, flags, string1_w, -1, string2_w, -1);
|
||||
SAFE_FREE ();
|
||||
if (!val)
|
||||
{
|
||||
|
@ -3408,6 +3424,20 @@ Any other non-nil value means do this even on remote and removable drives
|
|||
where the performance impact may be noticeable even on modern hardware. */);
|
||||
Vw32_get_true_file_attributes = Qlocal;
|
||||
|
||||
DEFVAR_LISP ("w32-collate-ignore-punctuation",
|
||||
Vw32_collate_ignore_punctuation,
|
||||
doc: /* Non-nil causes string collation functions ignore punctuation on MS-Windows.
|
||||
On Posix platforms, `string-collate-lessp' and `string-collate-equalp'
|
||||
ignore punctuation characters when they compare strings, if the
|
||||
locale's codeset is UTF-8, as in \"en_US.UTF-8\". Binding this option
|
||||
to a non-nil value will achieve a similar effect on MS-Windows, where
|
||||
locales with UTF-8 codeset are not supported.
|
||||
|
||||
Note that setting this to non-nil will also ignore blanks and symbols
|
||||
in the strings. So do NOT use this option when comparing file names
|
||||
for equality, only when you need to sort them. */);
|
||||
Vw32_collate_ignore_punctuation = Qnil;
|
||||
|
||||
staticpro (&Vw32_valid_locale_ids);
|
||||
staticpro (&Vw32_valid_codepages);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue