diff options
author | Andy Grover <andy.grover@oracle.com> | 2009-02-24 10:30:26 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-02-27 02:39:28 -0500 |
commit | 7875e18e09961d29f30424c5e2e48e704dc3789b (patch) | |
tree | e0092fa58436f8f221694a94e119642331614c79 /net/rds/page.c | |
parent | 3e5048495c8569bfdd552750e0315973c61e7c93 (diff) |
RDS: Message parsing
Parsing of newly-received RDS message headers (including ext.
headers) and copy-to/from-user routines.
page.c implements a per-cpu page remainder cache, to reduce the
number of allocations needed for small datagrams.
Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/rds/page.c')
-rw-r--r-- | net/rds/page.c | 221 |
1 files changed, 221 insertions, 0 deletions
diff --git a/net/rds/page.c b/net/rds/page.c new file mode 100644 index 000000000000..c460743a89ad --- /dev/null +++ b/net/rds/page.c | |||
@@ -0,0 +1,221 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/highmem.h> | ||
34 | |||
35 | #include "rds.h" | ||
36 | |||
37 | struct rds_page_remainder { | ||
38 | struct page *r_page; | ||
39 | unsigned long r_offset; | ||
40 | }; | ||
41 | |||
42 | DEFINE_PER_CPU(struct rds_page_remainder, rds_page_remainders) ____cacheline_aligned; | ||
43 | |||
44 | /* | ||
45 | * returns 0 on success or -errno on failure. | ||
46 | * | ||
47 | * We don't have to worry about flush_dcache_page() as this only works | ||
48 | * with private pages. If, say, we were to do directed receive to pinned | ||
49 | * user pages we'd have to worry more about cache coherence. (Though | ||
50 | * the flush_dcache_page() in get_user_pages() would probably be enough). | ||
51 | */ | ||
52 | int rds_page_copy_user(struct page *page, unsigned long offset, | ||
53 | void __user *ptr, unsigned long bytes, | ||
54 | int to_user) | ||
55 | { | ||
56 | unsigned long ret; | ||
57 | void *addr; | ||
58 | |||
59 | if (to_user) | ||
60 | rds_stats_add(s_copy_to_user, bytes); | ||
61 | else | ||
62 | rds_stats_add(s_copy_from_user, bytes); | ||
63 | |||
64 | addr = kmap_atomic(page, KM_USER0); | ||
65 | if (to_user) | ||
66 | ret = __copy_to_user_inatomic(ptr, addr + offset, bytes); | ||
67 | else | ||
68 | ret = __copy_from_user_inatomic(addr + offset, ptr, bytes); | ||
69 | kunmap_atomic(addr, KM_USER0); | ||
70 | |||
71 | if (ret) { | ||
72 | addr = kmap(page); | ||
73 | if (to_user) | ||
74 | ret = copy_to_user(ptr, addr + offset, bytes); | ||
75 | else | ||
76 | ret = copy_from_user(addr + offset, ptr, bytes); | ||
77 | kunmap(page); | ||
78 | if (ret) | ||
79 | return -EFAULT; | ||
80 | } | ||
81 | |||
82 | return 0; | ||
83 | } | ||
84 | |||
85 | /* | ||
86 | * Message allocation uses this to build up regions of a message. | ||
87 | * | ||
88 | * @bytes - the number of bytes needed. | ||
89 | * @gfp - the waiting behaviour of the allocation | ||
90 | * | ||
91 | * @gfp is always ored with __GFP_HIGHMEM. Callers must be prepared to | ||
92 | * kmap the pages, etc. | ||
93 | * | ||
94 | * If @bytes is at least a full page then this just returns a page from | ||
95 | * alloc_page(). | ||
96 | * | ||
97 | * If @bytes is a partial page then this stores the unused region of the | ||
98 | * page in a per-cpu structure. Future partial-page allocations may be | ||
99 | * satisfied from that cached region. This lets us waste less memory on | ||
100 | * small allocations with minimal complexity. It works because the transmit | ||
101 | * path passes read-only page regions down to devices. They hold a page | ||
102 | * reference until they are done with the region. | ||
103 | */ | ||
104 | int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes, | ||
105 | gfp_t gfp) | ||
106 | { | ||
107 | struct rds_page_remainder *rem; | ||
108 | unsigned long flags; | ||
109 | struct page *page; | ||
110 | int ret; | ||
111 | |||
112 | gfp |= __GFP_HIGHMEM; | ||
113 | |||
114 | /* jump straight to allocation if we're trying for a huge page */ | ||
115 | if (bytes >= PAGE_SIZE) { | ||
116 | page = alloc_page(gfp); | ||
117 | if (page == NULL) { | ||
118 | ret = -ENOMEM; | ||
119 | } else { | ||
120 | sg_set_page(scat, page, PAGE_SIZE, 0); | ||
121 | ret = 0; | ||
122 | } | ||
123 | goto out; | ||
124 | } | ||
125 | |||
126 | rem = &per_cpu(rds_page_remainders, get_cpu()); | ||
127 | local_irq_save(flags); | ||
128 | |||
129 | while (1) { | ||
130 | /* avoid a tiny region getting stuck by tossing it */ | ||
131 | if (rem->r_page && bytes > (PAGE_SIZE - rem->r_offset)) { | ||
132 | rds_stats_inc(s_page_remainder_miss); | ||
133 | __free_page(rem->r_page); | ||
134 | rem->r_page = NULL; | ||
135 | } | ||
136 | |||
137 | /* hand out a fragment from the cached page */ | ||
138 | if (rem->r_page && bytes <= (PAGE_SIZE - rem->r_offset)) { | ||
139 | sg_set_page(scat, rem->r_page, bytes, rem->r_offset); | ||
140 | get_page(sg_page(scat)); | ||
141 | |||
142 | if (rem->r_offset != 0) | ||
143 | rds_stats_inc(s_page_remainder_hit); | ||
144 | |||
145 | rem->r_offset += bytes; | ||
146 | if (rem->r_offset == PAGE_SIZE) { | ||
147 | __free_page(rem->r_page); | ||
148 | rem->r_page = NULL; | ||
149 | } | ||
150 | ret = 0; | ||
151 | break; | ||
152 | } | ||
153 | |||
154 | /* alloc if there is nothing for us to use */ | ||
155 | local_irq_restore(flags); | ||
156 | put_cpu(); | ||
157 | |||
158 | page = alloc_page(gfp); | ||
159 | |||
160 | rem = &per_cpu(rds_page_remainders, get_cpu()); | ||
161 | local_irq_save(flags); | ||
162 | |||
163 | if (page == NULL) { | ||
164 | ret = -ENOMEM; | ||
165 | break; | ||
166 | } | ||
167 | |||
168 | /* did someone race to fill the remainder before us? */ | ||
169 | if (rem->r_page) { | ||
170 | __free_page(page); | ||
171 | continue; | ||
172 | } | ||
173 | |||
174 | /* otherwise install our page and loop around to alloc */ | ||
175 | rem->r_page = page; | ||
176 | rem->r_offset = 0; | ||
177 | } | ||
178 | |||
179 | local_irq_restore(flags); | ||
180 | put_cpu(); | ||
181 | out: | ||
182 | rdsdebug("bytes %lu ret %d %p %u %u\n", bytes, ret, | ||
183 | ret ? NULL : sg_page(scat), ret ? 0 : scat->offset, | ||
184 | ret ? 0 : scat->length); | ||
185 | return ret; | ||
186 | } | ||
187 | |||
188 | static int rds_page_remainder_cpu_notify(struct notifier_block *self, | ||
189 | unsigned long action, void *hcpu) | ||
190 | { | ||
191 | struct rds_page_remainder *rem; | ||
192 | long cpu = (long)hcpu; | ||
193 | |||
194 | rem = &per_cpu(rds_page_remainders, cpu); | ||
195 | |||
196 | rdsdebug("cpu %ld action 0x%lx\n", cpu, action); | ||
197 | |||
198 | switch (action) { | ||
199 | case CPU_DEAD: | ||
200 | if (rem->r_page) | ||
201 | __free_page(rem->r_page); | ||
202 | rem->r_page = NULL; | ||
203 | break; | ||
204 | } | ||
205 | |||
206 | return 0; | ||
207 | } | ||
208 | |||
209 | static struct notifier_block rds_page_remainder_nb = { | ||
210 | .notifier_call = rds_page_remainder_cpu_notify, | ||
211 | }; | ||
212 | |||
213 | void rds_page_exit(void) | ||
214 | { | ||
215 | int i; | ||
216 | |||
217 | for_each_possible_cpu(i) | ||
218 | rds_page_remainder_cpu_notify(&rds_page_remainder_nb, | ||
219 | (unsigned long)CPU_DEAD, | ||
220 | (void *)(long)i); | ||
221 | } | ||