Blame - yocto-poky/meta/recipes-core/glibc/glibc/strcoll-Remove-incorrect-STRDIFF-based-optimization-.patch - openbmc/openbmc

blob: 8ce255f11070ade2d4c97e0ade86f8afa47dd22b [file] [log] [blame]

Patrick Williams	f1e5d69	2016-03-30 15:21:19 -0500	[diff] [blame]	1	Upstream-Status: Backport
				2
				3	Signed-off-by: Li Xin <lixin.fnst@cn.fujitsu.com>
				4
				5	From https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=6c84109cfa26f35c3dfed3acb97d347361bd5849
				6	Author: Carlos O'Donell <carlos@systemhalted.org>
				7	Date: Thu Oct 8 16:34:53 2015 -0400
				8
				9	strcoll: Remove incorrect STRDIFF-based optimization (Bug 18589).
				10
				11	The optimization introduced in commit
				12	f13c2a8dff2329c6692a80176262ceaaf8a6f74e, causes regressions in
				13	sorting for languages that have digraphs that change sort order, like
				14	cs_CZ which sorts ch between h and i.
				15
				16	My analysis shows the fast-forwarding optimization in STRCOLL advances
				17	through a digraph while possibly stopping in the middle which results
				18	in a subsequent skipping of the digraph and incorrect sorting. The
				19	optimization is incorrect as implemented and because of that I'm
				20	removing it for 2.23, and I will also commit this fix for 2.22 where
				21	it was originally introduced.
				22
				23	This patch reverts the optimization, introduces a new bug-strcoll2.c
				24	regression test that tests both cs_CZ.UTF-8 and da_DK.ISO-8859-1 and
				25	ensures they sort one digraph each correctly. The optimization can't be
				26	applied without regressing this test.
				27
				28	Checked on x86_64, bug-strcoll2.c fails without this patch and passes
				29	after. This will also get a fix on 2.22 which has the same bug.
				30
				31	(cherry picked from commit 87701a58e291bd7ac3b407d10a829dac52c9c16e)
				32	---
				33	locale/C-collate.c \| 4 +-
				34	locale/categories.def \| 1 -
				35	locale/langinfo.h \| 1 -
				36	locale/localeinfo.h \| 7 ----
				37	locale/programs/ld-collate.c \| 9 -----
				38	string/bug-strcoll2.c \| 95 ++++++++++++++++++++++++++++++++++++++++++++
				39	string/strcoll_l.c \| 39 +-----------------
				40	wcsmbs/wcscoll_l.c \| 1 -
				41	8 files changed, 98 insertions(+), 59 deletions(-)
				42	create mode 100644 string/bug-strcoll2.c
				43
				44	diff --git a/locale/C-collate.c b/locale/C-collate.c
				45	index d7f3c55..06dfdfa 100644
				46	--- a/locale/C-collate.c
				47	+++ b/locale/C-collate.c
				48	@@ -144,8 +144,6 @@ const struct __locale_data _nl_C_LC_COLLATE attribute_hidden =
				49	/* _NL_COLLATE_COLLSEQWC */
				50	{ .string = (const char *) collseqwc },
				51	/* _NL_COLLATE_CODESET */
				52	- { .string = _nl_C_codeset },
				53	- /* _NL_COLLATE_ENCODING_TYPE */
				54	- { .word = __cet_8bit }
				55	+ { .string = _nl_C_codeset }
				56	}
				57	};
				58	diff --git a/locale/categories.def b/locale/categories.def
				59	index 045489d..a8dda53 100644
				60	--- a/locale/categories.def
				61	+++ b/locale/categories.def
				62	@@ -58,7 +58,6 @@ DEFINE_CATEGORY
				63	DEFINE_ELEMENT (_NL_COLLATE_COLLSEQMB, "collate-collseqmb", std, wstring)
				64	DEFINE_ELEMENT (_NL_COLLATE_COLLSEQWC, "collate-collseqwc", std, wstring)
				65	DEFINE_ELEMENT (_NL_COLLATE_CODESET, "collate-codeset", std, string)
				66	- DEFINE_ELEMENT (_NL_COLLATE_ENCODING_TYPE, "collate-encoding-type", std, word)
				67	), NO_POSTLOAD)
				68
				69
				70	diff --git a/locale/langinfo.h b/locale/langinfo.h
				71	index ffc5c7f..a565d9d 100644
				72	--- a/locale/langinfo.h
				73	+++ b/locale/langinfo.h
				74	@@ -255,7 +255,6 @@ enum
				75	_NL_COLLATE_COLLSEQMB,
				76	_NL_COLLATE_COLLSEQWC,
				77	_NL_COLLATE_CODESET,
				78	- _NL_COLLATE_ENCODING_TYPE,
				79	_NL_NUM_LC_COLLATE,
				80
				81	/* LC_CTYPE category: character classification.
				82	diff --git a/locale/localeinfo.h b/locale/localeinfo.h
				83	index a7516c0..c076d8e 100644
				84	--- a/locale/localeinfo.h
				85	+++ b/locale/localeinfo.h
				86	@@ -110,13 +110,6 @@ enum coll_sort_rule
				87	sort_mask
				88	};
				89
				90	-/* Collation encoding type. */
				91	-enum collation_encoding_type
				92	-{
				93	- __cet_other,
				94	- __cet_8bit,
				95	- __cet_utf8
				96	-};
				97
				98	/* We can map the types of the entries into a few categories. */
				99	enum value_type
				100	diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c
				101	index 16e9039..3c88c6d 100644
				102	--- a/locale/programs/ld-collate.c
				103	+++ b/locale/programs/ld-collate.c
				104	@@ -32,7 +32,6 @@
				105	#include "linereader.h"
				106	#include "locfile.h"
				107	#include "elem-hash.h"
				108	-#include "../localeinfo.h"
				109
				110	/* Uncomment the following line in the production version. */
				111	/* #define NDEBUG 1 */
				112	@@ -2130,8 +2129,6 @@ collate_output (struct localedef_t locale, const struct charmap_t charmap,
				113	/* The words have to be handled specially. */
				114	if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
				115	add_locale_uint32 (&file, 0);
				116	- else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_ENCODING_TYPE))
				117	- add_locale_uint32 (&file, __cet_other);
				118	else
				119	add_locale_empty (&file);
				120	}
				121	@@ -2495,12 +2492,6 @@ collate_output (struct localedef_t locale, const struct charmap_t charmap,
				122	add_locale_raw_data (&file, collate->mbseqorder, 256);
				123	add_locale_collseq_table (&file, &collate->wcseqorder);
				124	add_locale_string (&file, charmap->code_set_name);
				125	- if (strcmp (charmap->code_set_name, "UTF-8") == 0)
				126	- add_locale_uint32 (&file, __cet_utf8);
				127	- else if (charmap->mb_cur_max == 1)
				128	- add_locale_uint32 (&file, __cet_8bit);
				129	- else
				130	- add_locale_uint32 (&file, __cet_other);
				131	write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
				132
				133	obstack_free (&weightpool, NULL);
				134	diff --git a/string/bug-strcoll2.c b/string/bug-strcoll2.c
				135	new file mode 100644
				136	index 0000000..950b090
				137	--- /dev/null
				138	+++ b/string/bug-strcoll2.c
				139	@@ -0,0 +1,95 @@
				140	+/* Bug 18589: sort-test.sh fails at random.
				141	+ * Copyright (C) 1998-2015 Free Software Foundation, Inc.
				142	+ * This file is part of the GNU C Library.
				143	+ * Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
				144	+ *
				145	+ * The GNU C Library is free software; you can redistribute it and/or
				146	+ * modify it under the terms of the GNU Lesser General Public
				147	+ * License as published by the Free Software Foundation; either
				148	+ * version 2.1 of the License, or (at your option) any later version.
				149	+ *
				150	+ * The GNU C Library is distributed in the hope that it will be useful,
				151	+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
				152	+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				153	+ * Lesser General Public License for more details.
				154	+ *
				155	+ * You should have received a copy of the GNU Lesser General Public
				156	+ * License along with the GNU C Library; if not, see
				157	+ * <http://www.gnu.org/licenses/>. */
				158	+
				159	+#include <stdio.h>
				160	+#include <string.h>
				161	+#include <locale.h>
				162	+
				163	+/* An incorrect strcoll optimization resulted in incorrect
				164	+ * results from strcoll for cs_CZ and da_DK. */
				165	+
				166	+int
				167	+test_cs_CZ (void)
				168	+{
				169	+ const char t1[] = "config";
				170	+ const char t2[] = "choose";
				171	+ if (setlocale (LC_ALL, "cs_CZ.UTF-8") == NULL)
				172	+ {
				173	+ perror ("setlocale");
				174	+ return 1;
				175	+ }
				176	+ /* In Czech the digraph ch sorts after c, therefore we expect
				177	+ * config to sort before choose. */
				178	+ int a = strcoll (t1, t2);
				179	+ int b = strcoll (t2, t1);
				180	+ printf ("strcoll (\"%s\", \"%s\") = %d\n", t1, t2, a);
				181	+ printf ("strcoll (\"%s\", \"%s\") = %d\n", t2, t1, b);
				182	+ if (a < 0 && b > 0)
				183	+ {
				184	+ puts ("PASS: config < choose");
				185	+ return 0;
				186	+ }
				187	+ else
				188	+ {
				189	+ puts ("FAIL: Wrong sorting in cz_CZ.UTF-8.");
				190	+ return 1;
				191	+ }
				192	+}
				193	+
				194	+int
				195	+test_da_DK (void)
				196	+{
				197	+ const char t1[] = "AS";
				198	+ const char t2[] = "AA";
				199	+ if (setlocale (LC_ALL, "da_DK.ISO-8859-1") == NULL)
				200	+ {
				201	+ perror ("setlocale");
				202	+ return 1;
				203	+ }
				204	+ /* AA should be treated as the last letter of the Danish alphabet,
				205	+ * hence sorting after AS. */
				206	+ int a = strcoll (t1, t2);
				207	+ int b = strcoll (t2, t1);
				208	+ printf ("strcoll (\"%s\", \"%s\") = %d\n", t1, t2, a);
				209	+ printf ("strcoll (\"%s\", \"%s\") = %d\n", t2, t1, b);
				210	+ if (a < 0 && b > 0)
				211	+ {
				212	+ puts ("PASS: AS < AA");
				213	+ return 0;
				214	+ }
				215	+ else
				216	+ {
				217	+ puts ("FAIL: Wrong sorting in da_DK.ISO-8859-1");
				218	+ return 1;
				219	+ }
				220	+}
				221	+
				222	+static int
				223	+do_test (void)
				224	+{
				225	+ int err = 0;
				226	+ err \|= test_cs_CZ ();
				227	+ err \|= test_da_DK ();
				228	+ return err;
				229	+}
				230	+
				231	+#define TEST_FUNCTION do_test ()
				232	+#include "../test-skeleton.c"
				233	+
				234	+
				235	diff --git a/string/strcoll_l.c b/string/strcoll_l.c
				236	index b36b18c..a18b65e 100644
				237	--- a/string/strcoll_l.c
				238	+++ b/string/strcoll_l.c
				239	@@ -30,7 +30,6 @@
				240	# define STRING_TYPE char
				241	# define USTRING_TYPE unsigned char
				242	# define STRCOLL __strcoll_l
				243	-# define STRDIFF __strdiff
				244	# define STRCMP strcmp
				245	# define WEIGHT_H "../locale/weight.h"
				246	# define SUFFIX MB
				247	@@ -43,19 +42,6 @@
				248	#include "../locale/localeinfo.h"
				249	#include WEIGHT_H
				250
				251	-#define MASK_UTF8_7BIT (1 << 7)
				252	-#define MASK_UTF8_START (3 << 6)
				253	-
				254	-size_t
				255	-STRDIFF (const STRING_TYPE s, const STRING_TYPE t)
				256	-{
				257	- size_t n;
				258	-
				259	- for (n = 0; s != '\0' && s++ == *t++; ++n)
				260	- continue;
				261	-
				262	- return n;
				263	-}
				264
				265	/* Track status while looking for sequences in a string. */
				266	typedef struct
				267	@@ -274,29 +260,9 @@ STRCOLL (const STRING_TYPE s1, const STRING_TYPE s2, __locale_t l)
				268	const USTRING_TYPE *extra;
				269	const int32_t *indirect;
				270
				271	- /* In case there is no locale specific sort order (C / POSIX). */
				272	if (nrules == 0)
				273	return STRCMP (s1, s2);
				274
				275	- /* Fast forward to the position of the first difference. Needs to be
				276	- encoding aware as the byte-by-byte comparison can stop in the middle
				277	- of a char sequence for multibyte encodings like UTF-8. */
				278	- uint_fast32_t encoding =
				279	- current->values[_NL_ITEM_INDEX (_NL_COLLATE_ENCODING_TYPE)].word;
				280	- if (encoding != __cet_other)
				281	- {
				282	- size_t diff = STRDIFF (s1, s2);
				283	- if (diff > 0)
				284	- {
				285	- if (encoding == __cet_utf8 && (*(s1 + diff) & MASK_UTF8_7BIT) != 0)
				286	- do
				287	- diff--;
				288	- while (diff > 0 && (*(s1 + diff) & MASK_UTF8_START) != MASK_UTF8_START);
				289	- s1 += diff;
				290	- s2 += diff;
				291	- }
				292	- }
				293	-
				294	/* Catch empty strings. */
				295	if (__glibc_unlikely (s1 == '\0') \|\| __glibc_unlikely (s2 == '\0'))
				296	return (s1 != '\0') - (s2 != '\0');
				297	@@ -363,9 +329,8 @@ STRCOLL (const STRING_TYPE s1, const STRING_TYPE s2, __locale_t l)
				298	byte-level comparison to ensure that we don't waste time
				299	going through multiple passes for totally equal strings
				300	before proceeding to subsequent passes. */
				301	- if (pass == 0 && encoding == __cet_other &&
				302	- STRCMP (s1, s2) == 0)
				303	- return result;
				304	+ if (pass == 0 && STRCMP (s1, s2) == 0)
				305	+ return result;
				306	else
				307	break;
				308	}
				309	diff --git a/wcsmbs/wcscoll_l.c b/wcsmbs/wcscoll_l.c
				310	index 6d9384a..87f240d 100644
				311	--- a/wcsmbs/wcscoll_l.c
				312	+++ b/wcsmbs/wcscoll_l.c
				313	@@ -23,7 +23,6 @@
				314	#define STRING_TYPE wchar_t
				315	#define USTRING_TYPE wint_t
				316	#define STRCOLL __wcscoll_l
				317	-#define STRDIFF __wcsdiff
				318	#define STRCMP __wcscmp
				319	#define WEIGHT_H "../locale/weightwc.h"
				320	#define SUFFIX WC
				321	--
				322	1.8.4.2
				323