aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJesse Gross <jesse@nicira.com>2011-05-26 19:25:02 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-26 20:12:33 -0400
commit704f15ddb5fc2a7f25a12eb0913302d8ad9ffab3 (patch)
treeef17a945288c333c345643325783f374e10a4020
parent5bf54a9758c230d9e957e7b4f3a41c226660dd49 (diff)
flex_array: avoid divisions when accessing elements
On most architectures division is an expensive operation and accessing an element currently requires four of them. This performance penalty effectively precludes flex arrays from being used on any kind of fast path. However, two of these divisions can be handled at creation time and the others can be replaced by a reciprocal divide, completely avoiding real divisions on access. [eparis@redhat.com: rebase on top of changes to support 0 len elements] [eparis@redhat.com: initialize part_nr when array fits entirely in base] Signed-off-by: Jesse Gross <jesse@nicira.com> Signed-off-by: Eric Paris <eparis@redhat.com> Cc: Dave Hansen <dave@linux.vnet.ibm.com> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/flex_array.h2
-rw-r--r--lib/flex_array.c51
2 files changed, 31 insertions, 22 deletions
diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h
index ebeb2f3ad068..6843cf193a44 100644
--- a/include/linux/flex_array.h
+++ b/include/linux/flex_array.h
@@ -21,6 +21,8 @@ struct flex_array {
21 struct { 21 struct {
22 int element_size; 22 int element_size;
23 int total_nr_elements; 23 int total_nr_elements;
24 int elems_per_part;
25 u32 reciprocal_elems;
24 struct flex_array_part *parts[]; 26 struct flex_array_part *parts[];
25 }; 27 };
26 /* 28 /*
diff --git a/lib/flex_array.c b/lib/flex_array.c
index cab7621f98aa..9b8b89458c4c 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -24,6 +24,7 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/stddef.h> 25#include <linux/stddef.h>
26#include <linux/module.h> 26#include <linux/module.h>
27#include <linux/reciprocal_div.h>
27 28
28struct flex_array_part { 29struct flex_array_part {
29 char elements[FLEX_ARRAY_PART_SIZE]; 30 char elements[FLEX_ARRAY_PART_SIZE];
@@ -70,15 +71,15 @@ static inline int elements_fit_in_base(struct flex_array *fa)
70 * Element size | Objects | Objects | 71 * Element size | Objects | Objects |
71 * PAGE_SIZE=4k | 32-bit | 64-bit | 72 * PAGE_SIZE=4k | 32-bit | 64-bit |
72 * ---------------------------------| 73 * ---------------------------------|
73 * 1 bytes | 4186112 | 2093056 | 74 * 1 bytes | 4177920 | 2088960 |
74 * 2 bytes | 2093056 | 1046528 | 75 * 2 bytes | 2088960 | 1044480 |
75 * 3 bytes | 1395030 | 697515 | 76 * 3 bytes | 1392300 | 696150 |
76 * 4 bytes | 1046528 | 523264 | 77 * 4 bytes | 1044480 | 522240 |
77 * 32 bytes | 130816 | 65408 | 78 * 32 bytes | 130560 | 65408 |
78 * 33 bytes | 126728 | 63364 | 79 * 33 bytes | 126480 | 63240 |
79 * 2048 bytes | 2044 | 1022 | 80 * 2048 bytes | 2040 | 1020 |
80 * 2049 bytes | 1022 | 511 | 81 * 2049 bytes | 1020 | 510 |
81 * void * | 1046528 | 261632 | 82 * void * | 1044480 | 261120 |
82 * 83 *
83 * Since 64-bit pointers are twice the size, we lose half the 84 * Since 64-bit pointers are twice the size, we lose half the
84 * capacity in the base structure. Also note that no effort is made 85 * capacity in the base structure. Also note that no effort is made
@@ -88,11 +89,15 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
88 gfp_t flags) 89 gfp_t flags)
89{ 90{
90 struct flex_array *ret; 91 struct flex_array *ret;
92 int elems_per_part = 0;
93 int reciprocal_elems = 0;
91 int max_size = 0; 94 int max_size = 0;
92 95
93 if (element_size) 96 if (element_size) {
94 max_size = FLEX_ARRAY_NR_BASE_PTRS * 97 elems_per_part = FLEX_ARRAY_ELEMENTS_PER_PART(element_size);
95 FLEX_ARRAY_ELEMENTS_PER_PART(element_size); 98 reciprocal_elems = reciprocal_value(elems_per_part);
99 max_size = FLEX_ARRAY_NR_BASE_PTRS * elems_per_part;
100 }
96 101
97 /* max_size will end up 0 if element_size > PAGE_SIZE */ 102 /* max_size will end up 0 if element_size > PAGE_SIZE */
98 if (total > max_size) 103 if (total > max_size)
@@ -102,6 +107,8 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
102 return NULL; 107 return NULL;
103 ret->element_size = element_size; 108 ret->element_size = element_size;
104 ret->total_nr_elements = total; 109 ret->total_nr_elements = total;
110 ret->elems_per_part = elems_per_part;
111 ret->reciprocal_elems = reciprocal_elems;
105 if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO)) 112 if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO))
106 memset(&ret->parts[0], FLEX_ARRAY_FREE, 113 memset(&ret->parts[0], FLEX_ARRAY_FREE,
107 FLEX_ARRAY_BASE_BYTES_LEFT); 114 FLEX_ARRAY_BASE_BYTES_LEFT);
@@ -112,7 +119,7 @@ EXPORT_SYMBOL(flex_array_alloc);
112static int fa_element_to_part_nr(struct flex_array *fa, 119static int fa_element_to_part_nr(struct flex_array *fa,
113 unsigned int element_nr) 120 unsigned int element_nr)
114{ 121{
115 return element_nr / FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size); 122 return reciprocal_divide(element_nr, fa->reciprocal_elems);
116} 123}
117 124
118/** 125/**
@@ -141,12 +148,12 @@ void flex_array_free(struct flex_array *fa)
141EXPORT_SYMBOL(flex_array_free); 148EXPORT_SYMBOL(flex_array_free);
142 149
143static unsigned int index_inside_part(struct flex_array *fa, 150static unsigned int index_inside_part(struct flex_array *fa,
144 unsigned int element_nr) 151 unsigned int element_nr,
152 unsigned int part_nr)
145{ 153{
146 unsigned int part_offset; 154 unsigned int part_offset;
147 155
148 part_offset = element_nr % 156 part_offset = element_nr - part_nr * fa->elems_per_part;
149 FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size);
150 return part_offset * fa->element_size; 157 return part_offset * fa->element_size;
151} 158}
152 159
@@ -186,7 +193,7 @@ __fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
186int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, 193int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
187 gfp_t flags) 194 gfp_t flags)
188{ 195{
189 int part_nr; 196 int part_nr = 0;
190 struct flex_array_part *part; 197 struct flex_array_part *part;
191 void *dst; 198 void *dst;
192 199
@@ -202,7 +209,7 @@ int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
202 if (!part) 209 if (!part)
203 return -ENOMEM; 210 return -ENOMEM;
204 } 211 }
205 dst = &part->elements[index_inside_part(fa, element_nr)]; 212 dst = &part->elements[index_inside_part(fa, element_nr, part_nr)];
206 memcpy(dst, src, fa->element_size); 213 memcpy(dst, src, fa->element_size);
207 return 0; 214 return 0;
208} 215}
@@ -217,7 +224,7 @@ EXPORT_SYMBOL(flex_array_put);
217 */ 224 */
218int flex_array_clear(struct flex_array *fa, unsigned int element_nr) 225int flex_array_clear(struct flex_array *fa, unsigned int element_nr)
219{ 226{
220 int part_nr; 227 int part_nr = 0;
221 struct flex_array_part *part; 228 struct flex_array_part *part;
222 void *dst; 229 void *dst;
223 230
@@ -233,7 +240,7 @@ int flex_array_clear(struct flex_array *fa, unsigned int element_nr)
233 if (!part) 240 if (!part)
234 return -EINVAL; 241 return -EINVAL;
235 } 242 }
236 dst = &part->elements[index_inside_part(fa, element_nr)]; 243 dst = &part->elements[index_inside_part(fa, element_nr, part_nr)];
237 memset(dst, FLEX_ARRAY_FREE, fa->element_size); 244 memset(dst, FLEX_ARRAY_FREE, fa->element_size);
238 return 0; 245 return 0;
239} 246}
@@ -302,7 +309,7 @@ EXPORT_SYMBOL(flex_array_prealloc);
302 */ 309 */
303void *flex_array_get(struct flex_array *fa, unsigned int element_nr) 310void *flex_array_get(struct flex_array *fa, unsigned int element_nr)
304{ 311{
305 int part_nr; 312 int part_nr = 0;
306 struct flex_array_part *part; 313 struct flex_array_part *part;
307 314
308 if (!fa->element_size) 315 if (!fa->element_size)
@@ -317,7 +324,7 @@ void *flex_array_get(struct flex_array *fa, unsigned int element_nr)
317 if (!part) 324 if (!part)
318 return NULL; 325 return NULL;
319 } 326 }
320 return &part->elements[index_inside_part(fa, element_nr)]; 327 return &part->elements[index_inside_part(fa, element_nr, part_nr)];
321} 328}
322EXPORT_SYMBOL(flex_array_get); 329EXPORT_SYMBOL(flex_array_get);
323 330