aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorDave Hansen <dave@linux.vnet.ibm.com>2009-07-29 18:04:18 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-07-29 22:10:36 -0400
commit534acc057b5a08ec33fa57cdd2f5a09ef124e7f2 (patch)
tree186e6ff90a7a696a2d15f183871250c9d83f476d /lib
parenta9e58f25734e153b8c6516d904e2398fb8b0b23d (diff)
lib: flexible array implementation
Once a structure goes over PAGE_SIZE*2, we see occasional allocation failures. Some people have chosen to switch over to things like vmalloc() that will let them keep array-like access to such a large structures. But, vmalloc() has plenty of downsides. Here's an alternative. I think it's what Andrew was suggesting here: http://lkml.org/lkml/2009/7/2/518 I call it a flexible array. It does all of its work in PAGE_SIZE bits, so never does an order>0 allocation. The base level has PAGE_SIZE-2*sizeof(int) bytes of storage for pointers to the second level. So, with a 32-bit arch, you get about 4MB (4183112 bytes) of total storage when the objects pack nicely into a page. It is half that on 64-bit because the pointers are twice the size. There's a table detailing this in the code. There are kerneldocs for the functions, but here's an overview: flex_array_alloc() - dynamically allocate a base structure flex_array_free() - free the array and all of the second-level pages flex_array_free_parts() - free the second-level pages, but not the base (for static bases) flex_array_put() - copy into the array at the given index flex_array_get() - copy out of the array at the given index flex_array_prealloc() - preallocate the second-level pages between the given indexes to guarantee no allocs will occur at put() time. We could also potentially just pass the "element_size" into each of the API functions instead of storing it internally. That would get us one more base pointer on 32-bit. I've been testing this by running it in userspace. The header and patch that I've been using are here, as well as the little script I'm using to generate the size table which goes in the kerneldocs. http://sr71.net/~dave/linux/flexarray/ [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'lib')
-rw-r--r--lib/Makefile2
-rw-r--r--lib/flex_array.c269
2 files changed, 270 insertions, 1 deletions
diff --git a/lib/Makefile b/lib/Makefile
index b6d1857bbf08..2e78277eff9d 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -12,7 +12,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
12 idr.o int_sqrt.o extable.o prio_tree.o \ 12 idr.o int_sqrt.o extable.o prio_tree.o \
13 sha1.o irq_regs.o reciprocal_div.o argv_split.o \ 13 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
14 proportions.o prio_heap.o ratelimit.o show_mem.o \ 14 proportions.o prio_heap.o ratelimit.o show_mem.o \
15 is_single_threaded.o plist.o decompress.o 15 is_single_threaded.o plist.o decompress.o flex_array.o
16 16
17lib-$(CONFIG_MMU) += ioremap.o 17lib-$(CONFIG_MMU) += ioremap.o
18lib-$(CONFIG_SMP) += cpumask.o 18lib-$(CONFIG_SMP) += cpumask.o
diff --git a/lib/flex_array.c b/lib/flex_array.c
new file mode 100644
index 000000000000..0e7894ce8882
--- /dev/null
+++ b/lib/flex_array.c
@@ -0,0 +1,269 @@
1/*
2 * Flexible array managed in PAGE_SIZE parts
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright IBM Corporation, 2009
19 *
20 * Author: Dave Hansen <dave@linux.vnet.ibm.com>
21 */
22
23#include <linux/flex_array.h>
24#include <linux/slab.h>
25#include <linux/stddef.h>
26
27struct flex_array_part {
28 char elements[FLEX_ARRAY_PART_SIZE];
29};
30
31static inline int __elements_per_part(int element_size)
32{
33 return FLEX_ARRAY_PART_SIZE / element_size;
34}
35
36static inline int bytes_left_in_base(void)
37{
38 int element_offset = offsetof(struct flex_array, parts);
39 int bytes_left = FLEX_ARRAY_BASE_SIZE - element_offset;
40 return bytes_left;
41}
42
43static inline int nr_base_part_ptrs(void)
44{
45 return bytes_left_in_base() / sizeof(struct flex_array_part *);
46}
47
48/*
49 * If a user requests an allocation which is small
50 * enough, we may simply use the space in the
51 * flex_array->parts[] array to store the user
52 * data.
53 */
54static inline int elements_fit_in_base(struct flex_array *fa)
55{
56 int data_size = fa->element_size * fa->total_nr_elements;
57 if (data_size <= bytes_left_in_base())
58 return 1;
59 return 0;
60}
61
62/**
63 * flex_array_alloc - allocate a new flexible array
64 * @element_size: the size of individual elements in the array
65 * @total: total number of elements that this should hold
66 *
67 * Note: all locking must be provided by the caller.
68 *
69 * @total is used to size internal structures. If the user ever
70 * accesses any array indexes >=@total, it will produce errors.
71 *
72 * The maximum number of elements is defined as: the number of
73 * elements that can be stored in a page times the number of
74 * page pointers that we can fit in the base structure or (using
75 * integer math):
76 *
77 * (PAGE_SIZE/element_size) * (PAGE_SIZE-8)/sizeof(void *)
78 *
79 * Here's a table showing example capacities. Note that the maximum
80 * index that the get/put() functions is just nr_objects-1. This
81 * basically means that you get 4MB of storage on 32-bit and 2MB on
82 * 64-bit.
83 *
84 *
85 * Element size | Objects | Objects |
86 * PAGE_SIZE=4k | 32-bit | 64-bit |
87 * ---------------------------------|
88 * 1 bytes | 4186112 | 2093056 |
89 * 2 bytes | 2093056 | 1046528 |
90 * 3 bytes | 1395030 | 697515 |
91 * 4 bytes | 1046528 | 523264 |
92 * 32 bytes | 130816 | 65408 |
93 * 33 bytes | 126728 | 63364 |
94 * 2048 bytes | 2044 | 1022 |
95 * 2049 bytes | 1022 | 511 |
96 * void * | 1046528 | 261632 |
97 *
98 * Since 64-bit pointers are twice the size, we lose half the
99 * capacity in the base structure. Also note that no effort is made
100 * to efficiently pack objects across page boundaries.
101 */
102struct flex_array *flex_array_alloc(int element_size, int total, gfp_t flags)
103{
104 struct flex_array *ret;
105 int max_size = nr_base_part_ptrs() * __elements_per_part(element_size);
106
107 /* max_size will end up 0 if element_size > PAGE_SIZE */
108 if (total > max_size)
109 return NULL;
110 ret = kzalloc(sizeof(struct flex_array), flags);
111 if (!ret)
112 return NULL;
113 ret->element_size = element_size;
114 ret->total_nr_elements = total;
115 return ret;
116}
117
118static int fa_element_to_part_nr(struct flex_array *fa, int element_nr)
119{
120 return element_nr / __elements_per_part(fa->element_size);
121}
122
123/**
124 * flex_array_free_parts - just free the second-level pages
125 * @src: address of data to copy into the array
126 * @element_nr: index of the position in which to insert
127 * the new element.
128 *
129 * This is to be used in cases where the base 'struct flex_array'
130 * has been statically allocated and should not be free.
131 */
132void flex_array_free_parts(struct flex_array *fa)
133{
134 int part_nr;
135 int max_part = nr_base_part_ptrs();
136
137 if (elements_fit_in_base(fa))
138 return;
139 for (part_nr = 0; part_nr < max_part; part_nr++)
140 kfree(fa->parts[part_nr]);
141}
142
143void flex_array_free(struct flex_array *fa)
144{
145 flex_array_free_parts(fa);
146 kfree(fa);
147}
148
149static int fa_index_inside_part(struct flex_array *fa, int element_nr)
150{
151 return element_nr % __elements_per_part(fa->element_size);
152}
153
154static int index_inside_part(struct flex_array *fa, int element_nr)
155{
156 int part_offset = fa_index_inside_part(fa, element_nr);
157 return part_offset * fa->element_size;
158}
159
160static struct flex_array_part *
161__fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
162{
163 struct flex_array_part *part = fa->parts[part_nr];
164 if (!part) {
165 /*
166 * This leaves the part pages uninitialized
167 * and with potentially random data, just
168 * as if the user had kmalloc()'d the whole.
169 * __GFP_ZERO can be used to zero it.
170 */
171 part = kmalloc(FLEX_ARRAY_PART_SIZE, flags);
172 if (!part)
173 return NULL;
174 fa->parts[part_nr] = part;
175 }
176 return part;
177}
178
179/**
180 * flex_array_put - copy data into the array at @element_nr
181 * @src: address of data to copy into the array
182 * @element_nr: index of the position in which to insert
183 * the new element.
184 *
185 * Note that this *copies* the contents of @src into
186 * the array. If you are trying to store an array of
187 * pointers, make sure to pass in &ptr instead of ptr.
188 *
189 * Locking must be provided by the caller.
190 */
191int flex_array_put(struct flex_array *fa, int element_nr, void *src, gfp_t flags)
192{
193 int part_nr = fa_element_to_part_nr(fa, element_nr);
194 struct flex_array_part *part;
195 void *dst;
196
197 if (element_nr >= fa->total_nr_elements)
198 return -ENOSPC;
199 if (elements_fit_in_base(fa))
200 part = (struct flex_array_part *)&fa->parts[0];
201 else
202 part = __fa_get_part(fa, part_nr, flags);
203 if (!part)
204 return -ENOMEM;
205 dst = &part->elements[index_inside_part(fa, element_nr)];
206 memcpy(dst, src, fa->element_size);
207 return 0;
208}
209
210/**
211 * flex_array_prealloc - guarantee that array space exists
212 * @start: index of first array element for which space is allocated
213 * @end: index of last (inclusive) element for which space is allocated
214 *
215 * This will guarantee that no future calls to flex_array_put()
216 * will allocate memory. It can be used if you are expecting to
217 * be holding a lock or in some atomic context while writing
218 * data into the array.
219 *
220 * Locking must be provided by the caller.
221 */
222int flex_array_prealloc(struct flex_array *fa, int start, int end, gfp_t flags)
223{
224 int start_part;
225 int end_part;
226 int part_nr;
227 struct flex_array_part *part;
228
229 if (start >= fa->total_nr_elements || end >= fa->total_nr_elements)
230 return -ENOSPC;
231 if (elements_fit_in_base(fa))
232 return 0;
233 start_part = fa_element_to_part_nr(fa, start);
234 end_part = fa_element_to_part_nr(fa, end);
235 for (part_nr = start_part; part_nr <= end_part; part_nr++) {
236 part = __fa_get_part(fa, part_nr, flags);
237 if (!part)
238 return -ENOMEM;
239 }
240 return 0;
241}
242
243/**
244 * flex_array_get - pull data back out of the array
245 * @element_nr: index of the element to fetch from the array
246 *
247 * Returns a pointer to the data at index @element_nr. Note
248 * that this is a copy of the data that was passed in. If you
249 * are using this to store pointers, you'll get back &ptr.
250 *
251 * Locking must be provided by the caller.
252 */
253void *flex_array_get(struct flex_array *fa, int element_nr)
254{
255 int part_nr = fa_element_to_part_nr(fa, element_nr);
256 struct flex_array_part *part;
257 int index;
258
259 if (element_nr >= fa->total_nr_elements)
260 return NULL;
261 if (!fa->parts[part_nr])
262 return NULL;
263 if (elements_fit_in_base(fa))
264 part = (struct flex_array_part *)&fa->parts[0];
265 else
266 part = fa->parts[part_nr];
267 index = index_inside_part(fa, element_nr);
268 return &part->elements[index_inside_part(fa, element_nr)];
269}