aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDon Mullis <don.mullis@gmail.com>2010-03-05 16:43:15 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2010-03-06 14:26:35 -0500
commit835cc0c8477fdbc59e0217891d6f11061b1ac4e2 (patch)
treebe179915300e6d6ea61c8458c7e1ccb764065ed4
parentd6a2eedfddcded92c8f9b0ac022a99c4134696b0 (diff)
lib: more scalable list_sort()
XFS and UBIFS can pass long lists to list_sort(); this alternative implementation scales better, reaching ~3x performance gain when list length exceeds the L2 cache size. Stand-alone program timings were run on a Core 2 duo L1=32KB L2=4MB, gcc-4.4, with flags extracted from an Ubuntu kernel build. Object size is 581 bytes compared to 455 for Mark J. Roberts' code. Worst case for either implementation is a list length just over a power of two, and to roughly the same degree, so here are timing results for a range of 2^N+1 lengths. List elements were 16 bytes each including malloc overhead; initial order was random. time (msec) Tatham-Roberts | generic-Mullis-v2 loop_count length | | ratio 4000000 2 206 294 1.427 2000000 3 176 227 1.289 1000000 5 199 172 0.864 500000 9 235 178 0.757 250000 17 243 182 0.748 125000 33 261 196 0.750 62500 65 277 209 0.754 31250 129 292 219 0.75 15625 257 317 235 0.741 7812 513 340 252 0.741 3906 1025 362 267 0.737 1953 2049 388 283 0.729 ~ L1 size 976 4097 556 323 0.580 488 8193 678 361 0.532 244 16385 773 395 0.510 122 32769 844 418 0.495 61 65537 917 454 0.495 30 131073 1128 543 0.481 15 262145 2355 869 0.369 ~ L2 size 7 524289 5597 1714 0.306 3 1048577 6218 2022 0.325 Mark's code does not actually implement the usual or generic mergesort, but rather a variant from Simon Tatham described here: http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html Simon's algorithm performs O(log N) passes over the entire input list, doing merges of sublists that double in size on each pass. The generic algorithm instead merges pairs of equal length lists as early as possible, in recursive order. For either algorithm, the elements that extend the list beyond power-of-two length are a special case, handled as nearly as possible as a "rounding-up" to a full POT. Some intuition for the locality of reference implications of merge order may be gotten by watching this animation: http://www.sorting-algorithms.com/merge-sort Simon's algorithm requires only O(1) extra space rather than the generic algorithm's O(log N), but in my non-recursive implementation the actual O(log N) data is merely a vector of ~20 pointers, which I've put on the stack. Long-running list_sort() calls: If the list passed in may be long, or the client's cmp() callback function is slow, the client's cmp() may periodically invoke cond_resched() to voluntarily yield the CPU. All inner loops of list_sort() call back to cmp(). Stability of the sort: distinct elements that compare equal emerge from the sort in the same order as with Mark's code, for simple test cases. A boot-time test is provided to verify this and other correctness requirements. A kernel that uses drm.ko appears to run normally with this change; I have no suitable hardware to similarly test the use by UBIFS. [akpm@linux-foundation.org: style tweaks, fix comment, make list_sort_test __init] Signed-off-by: Don Mullis <don.mullis@gmail.com> Cc: Dave Airlie <airlied@redhat.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Dave Chinner <david@fromorbit.com> Cc: Artem Bityutskiy <dedekind@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--lib/list_sort.c252
1 files changed, 183 insertions, 69 deletions
diff --git a/lib/list_sort.c b/lib/list_sort.c
index 19d11e0bb958..362c10f1653f 100644
--- a/lib/list_sort.c
+++ b/lib/list_sort.c
@@ -4,99 +4,213 @@
4#include <linux/slab.h> 4#include <linux/slab.h>
5#include <linux/list.h> 5#include <linux/list.h>
6 6
7#define MAX_LIST_LENGTH_BITS 20
8
9/*
10 * Returns a list organized in an intermediate format suited
11 * to chaining of merge() calls: null-terminated, no reserved or
12 * sentinel head node, "prev" links not maintained.
13 */
14static struct list_head *merge(void *priv,
15 int (*cmp)(void *priv, struct list_head *a,
16 struct list_head *b),
17 struct list_head *a, struct list_head *b)
18{
19 struct list_head head, *tail = &head;
20
21 while (a && b) {
22 /* if equal, take 'a' -- important for sort stability */
23 if ((*cmp)(priv, a, b) <= 0) {
24 tail->next = a;
25 a = a->next;
26 } else {
27 tail->next = b;
28 b = b->next;
29 }
30 tail = tail->next;
31 }
32 tail->next = a?:b;
33 return head.next;
34}
35
36/*
37 * Combine final list merge with restoration of standard doubly-linked
38 * list structure. This approach duplicates code from merge(), but
39 * runs faster than the tidier alternatives of either a separate final
40 * prev-link restoration pass, or maintaining the prev links
41 * throughout.
42 */
43static void merge_and_restore_back_links(void *priv,
44 int (*cmp)(void *priv, struct list_head *a,
45 struct list_head *b),
46 struct list_head *head,
47 struct list_head *a, struct list_head *b)
48{
49 struct list_head *tail = head;
50
51 while (a && b) {
52 /* if equal, take 'a' -- important for sort stability */
53 if ((*cmp)(priv, a, b) <= 0) {
54 tail->next = a;
55 a->prev = tail;
56 a = a->next;
57 } else {
58 tail->next = b;
59 b->prev = tail;
60 b = b->next;
61 }
62 tail = tail->next;
63 }
64 tail->next = a ? : b;
65
66 do {
67 /*
68 * In worst cases this loop may run many iterations.
69 * Continue callbacks to the client even though no
70 * element comparison is needed, so the client's cmp()
71 * routine can invoke cond_resched() periodically.
72 */
73 (*cmp)(priv, tail, tail);
74
75 tail->next->prev = tail;
76 tail = tail->next;
77 } while (tail->next);
78
79 tail->next = head;
80 head->prev = tail;
81}
82
7/** 83/**
8 * list_sort - sort a list. 84 * list_sort - sort a list.
9 * @priv: private data, passed to @cmp 85 * @priv: private data, passed to @cmp
10 * @head: the list to sort 86 * @head: the list to sort
11 * @cmp: the elements comparison function 87 * @cmp: the elements comparison function
12 * 88 *
13 * This function has been implemented by Mark J Roberts <mjr@znex.org>. It 89 * This function implements "merge sort" which has O(nlog(n)) complexity.
14 * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted 90 * The list is sorted in ascending order.
15 * in ascending order.
16 * 91 *
17 * The comparison function @cmp is supposed to return a negative value if @a is 92 * The comparison function @cmp is supposed to return a negative value if @a is
18 * less than @b, and a positive value if @a is greater than @b. If @a and @b 93 * less than @b, and a positive value if @a is greater than @b. If @a and @b
19 * are equivalent, then it does not matter what this function returns. 94 * are equivalent, then it does not matter what this function returns.
20 */ 95 */
21void list_sort(void *priv, struct list_head *head, 96void list_sort(void *priv, struct list_head *head,
22 int (*cmp)(void *priv, struct list_head *a, 97 int (*cmp)(void *priv, struct list_head *a,
23 struct list_head *b)) 98 struct list_head *b))
24{ 99{
25 struct list_head *p, *q, *e, *list, *tail, *oldhead; 100 struct list_head *part[MAX_LIST_LENGTH_BITS+1]; /* sorted partial lists
26 int insize, nmerges, psize, qsize, i; 101 -- last slot is a sentinel */
102 int lev; /* index into part[] */
103 int max_lev = 0;
104 struct list_head *list;
27 105
28 if (list_empty(head)) 106 if (list_empty(head))
29 return; 107 return;
30 108
109 memset(part, 0, sizeof(part));
110
111 head->prev->next = NULL;
31 list = head->next; 112 list = head->next;
32 list_del(head);
33 insize = 1;
34 for (;;) {
35 p = oldhead = list;
36 list = tail = NULL;
37 nmerges = 0;
38
39 while (p) {
40 nmerges++;
41 q = p;
42 psize = 0;
43 for (i = 0; i < insize; i++) {
44 psize++;
45 q = q->next == oldhead ? NULL : q->next;
46 if (!q)
47 break;
48 }
49 113
50 qsize = insize; 114 while (list) {
51 while (psize > 0 || (qsize > 0 && q)) { 115 struct list_head *cur = list;
52 if (!psize) { 116 list = list->next;
53 e = q; 117 cur->next = NULL;
54 q = q->next; 118
55 qsize--; 119 for (lev = 0; part[lev]; lev++) {
56 if (q == oldhead) 120 cur = merge(priv, cmp, part[lev], cur);
57 q = NULL; 121 part[lev] = NULL;
58 } else if (!qsize || !q) { 122 }
59 e = p; 123 if (lev > max_lev) {
60 p = p->next; 124 if (unlikely(lev >= ARRAY_SIZE(part)-1)) {
61 psize--; 125 printk_once(KERN_DEBUG "list passed to"
62 if (p == oldhead) 126 " list_sort() too long for"
63 p = NULL; 127 " efficiency\n");
64 } else if (cmp(priv, p, q) <= 0) { 128 lev--;
65 e = p;
66 p = p->next;
67 psize--;
68 if (p == oldhead)
69 p = NULL;
70 } else {
71 e = q;
72 q = q->next;
73 qsize--;
74 if (q == oldhead)
75 q = NULL;
76 }
77 if (tail)
78 tail->next = e;
79 else
80 list = e;
81 e->prev = tail;
82 tail = e;
83 } 129 }
84 p = q; 130 max_lev = lev;
85 } 131 }
132 part[lev] = cur;
133 }
86 134
87 tail->next = list; 135 for (lev = 0; lev < max_lev; lev++)
88 list->prev = tail; 136 if (part[lev])
137 list = merge(priv, cmp, part[lev], list);
89 138
90 if (nmerges <= 1) 139 merge_and_restore_back_links(priv, cmp, head, part[max_lev], list);
91 break; 140}
141EXPORT_SYMBOL(list_sort);
92 142
93 insize *= 2; 143#ifdef DEBUG_LIST_SORT
94 } 144struct debug_el {
145 struct list_head l_h;
146 int value;
147 unsigned serial;
148};
95 149
96 head->next = list; 150static int cmp(void *priv, struct list_head *a, struct list_head *b)
97 head->prev = list->prev; 151{
98 list->prev->next = head; 152 return container_of(a, struct debug_el, l_h)->value
99 list->prev = head; 153 - container_of(b, struct debug_el, l_h)->value;
100} 154}
101 155
102EXPORT_SYMBOL(list_sort); 156/*
157 * The pattern of set bits in the list length determines which cases
158 * are hit in list_sort().
159 */
160#define LIST_SORT_TEST_LENGTH (512+128+2) /* not including head */
161
162static int __init list_sort_test(void)
163{
164 int i, r = 1, count;
165 struct list_head *head = kmalloc(sizeof(*head), GFP_KERNEL);
166 struct list_head *cur;
167
168 printk(KERN_WARNING "testing list_sort()\n");
169
170 cur = head;
171 for (i = 0; i < LIST_SORT_TEST_LENGTH; i++) {
172 struct debug_el *el = kmalloc(sizeof(*el), GFP_KERNEL);
173 BUG_ON(!el);
174 /* force some equivalencies */
175 el->value = (r = (r * 725861) % 6599) % (LIST_SORT_TEST_LENGTH/3);
176 el->serial = i;
177
178 el->l_h.prev = cur;
179 cur->next = &el->l_h;
180 cur = cur->next;
181 }
182 head->prev = cur;
183
184 list_sort(NULL, head, cmp);
185
186 count = 1;
187 for (cur = head->next; cur->next != head; cur = cur->next) {
188 struct debug_el *el = container_of(cur, struct debug_el, l_h);
189 int cmp_result = cmp(NULL, cur, cur->next);
190 if (cur->next->prev != cur) {
191 printk(KERN_EMERG "list_sort() returned "
192 "a corrupted list!\n");
193 return 1;
194 } else if (cmp_result > 0) {
195 printk(KERN_EMERG "list_sort() failed to sort!\n");
196 return 1;
197 } else if (cmp_result == 0 &&
198 el->serial >= container_of(cur->next,
199 struct debug_el, l_h)->serial) {
200 printk(KERN_EMERG "list_sort() failed to preserve order"
201 " of equivalent elements!\n");
202 return 1;
203 }
204 kfree(cur->prev);
205 count++;
206 }
207 kfree(cur);
208 if (count != LIST_SORT_TEST_LENGTH) {
209 printk(KERN_EMERG "list_sort() returned list of"
210 "different length!\n");
211 return 1;
212 }
213 return 0;
214}
215module_init(list_sort_test);
216#endif