aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Leech <christopher.leech@intel.com>2006-05-23 20:50:37 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2006-06-18 00:25:46 -0400
commitde5506e155276d385712c2aa1c2d9a27cd4ed947 (patch)
tree219c30dab27b9aef2597d8735dfc19db8454849e
parentdb21733488f84a596faaad0d05430b3f51804692 (diff)
[I/OAT]: Utility functions for offloading sk_buff to iovec copies
Provides for pinning user space pages in memory, copying to iovecs, and copying from sk_buffs including fragmented and chained sk_buffs. Signed-off-by: Chris Leech <christopher.leech@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/dma/Makefile3
-rw-r--r--drivers/dma/iovlock.c301
-rw-r--r--include/linux/dmaengine.h22
-rw-r--r--include/net/netdma.h6
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/user_dma.c127
6 files changed, 459 insertions, 1 deletions
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index c8a5f5677313..bdcfdbdb1aec 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -1,2 +1,3 @@
1obj-y += dmaengine.o 1obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
2obj-$(CONFIG_NET_DMA) += iovlock.o
2obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o 3obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
diff --git a/drivers/dma/iovlock.c b/drivers/dma/iovlock.c
new file mode 100644
index 000000000000..5ed327e453a2
--- /dev/null
+++ b/drivers/dma/iovlock.c
@@ -0,0 +1,301 @@
1/*
2 * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
3 * Portions based on net/core/datagram.c and copyrighted by their authors.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the Free
7 * Software Foundation; either version 2 of the License, or (at your option)
8 * any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59
17 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 *
19 * The full GNU General Public License is included in this distribution in the
20 * file called COPYING.
21 */
22
23/*
24 * This code allows the net stack to make use of a DMA engine for
25 * skb to iovec copies.
26 */
27
28#include <linux/dmaengine.h>
29#include <linux/pagemap.h>
30#include <net/tcp.h> /* for memcpy_toiovec */
31#include <asm/io.h>
32#include <asm/uaccess.h>
33
34int num_pages_spanned(struct iovec *iov)
35{
36 return
37 ((PAGE_ALIGN((unsigned long)iov->iov_base + iov->iov_len) -
38 ((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT);
39}
40
41/*
42 * Pin down all the iovec pages needed for len bytes.
43 * Return a struct dma_pinned_list to keep track of pages pinned down.
44 *
45 * We are allocating a single chunk of memory, and then carving it up into
46 * 3 sections, the latter 2 whose size depends on the number of iovecs and the
47 * total number of pages, respectively.
48 */
49struct dma_pinned_list *dma_pin_iovec_pages(struct iovec *iov, size_t len)
50{
51 struct dma_pinned_list *local_list;
52 struct page **pages;
53 int i;
54 int ret;
55 int nr_iovecs = 0;
56 int iovec_len_used = 0;
57 int iovec_pages_used = 0;
58 long err;
59
60 /* don't pin down non-user-based iovecs */
61 if (segment_eq(get_fs(), KERNEL_DS))
62 return NULL;
63
64 /* determine how many iovecs/pages there are, up front */
65 do {
66 iovec_len_used += iov[nr_iovecs].iov_len;
67 iovec_pages_used += num_pages_spanned(&iov[nr_iovecs]);
68 nr_iovecs++;
69 } while (iovec_len_used < len);
70
71 /* single kmalloc for pinned list, page_list[], and the page arrays */
72 local_list = kmalloc(sizeof(*local_list)
73 + (nr_iovecs * sizeof (struct dma_page_list))
74 + (iovec_pages_used * sizeof (struct page*)), GFP_KERNEL);
75 if (!local_list) {
76 err = -ENOMEM;
77 goto out;
78 }
79
80 /* list of pages starts right after the page list array */
81 pages = (struct page **) &local_list->page_list[nr_iovecs];
82
83 for (i = 0; i < nr_iovecs; i++) {
84 struct dma_page_list *page_list = &local_list->page_list[i];
85
86 len -= iov[i].iov_len;
87
88 if (!access_ok(VERIFY_WRITE, iov[i].iov_base, iov[i].iov_len)) {
89 err = -EFAULT;
90 goto unpin;
91 }
92
93 page_list->nr_pages = num_pages_spanned(&iov[i]);
94 page_list->base_address = iov[i].iov_base;
95
96 page_list->pages = pages;
97 pages += page_list->nr_pages;
98
99 /* pin pages down */
100 down_read(&current->mm->mmap_sem);
101 ret = get_user_pages(
102 current,
103 current->mm,
104 (unsigned long) iov[i].iov_base,
105 page_list->nr_pages,
106 1, /* write */
107 0, /* force */
108 page_list->pages,
109 NULL);
110 up_read(&current->mm->mmap_sem);
111
112 if (ret != page_list->nr_pages) {
113 err = -ENOMEM;
114 goto unpin;
115 }
116
117 local_list->nr_iovecs = i + 1;
118 }
119
120 return local_list;
121
122unpin:
123 dma_unpin_iovec_pages(local_list);
124out:
125 return ERR_PTR(err);
126}
127
128void dma_unpin_iovec_pages(struct dma_pinned_list *pinned_list)
129{
130 int i, j;
131
132 if (!pinned_list)
133 return;
134
135 for (i = 0; i < pinned_list->nr_iovecs; i++) {
136 struct dma_page_list *page_list = &pinned_list->page_list[i];
137 for (j = 0; j < page_list->nr_pages; j++) {
138 set_page_dirty_lock(page_list->pages[j]);
139 page_cache_release(page_list->pages[j]);
140 }
141 }
142
143 kfree(pinned_list);
144}
145
146static dma_cookie_t dma_memcpy_to_kernel_iovec(struct dma_chan *chan, struct
147 iovec *iov, unsigned char *kdata, size_t len)
148{
149 dma_cookie_t dma_cookie = 0;
150
151 while (len > 0) {
152 if (iov->iov_len) {
153 int copy = min_t(unsigned int, iov->iov_len, len);
154 dma_cookie = dma_async_memcpy_buf_to_buf(
155 chan,
156 iov->iov_base,
157 kdata,
158 copy);
159 kdata += copy;
160 len -= copy;
161 iov->iov_len -= copy;
162 iov->iov_base += copy;
163 }
164 iov++;
165 }
166
167 return dma_cookie;
168}
169
170/*
171 * We have already pinned down the pages we will be using in the iovecs.
172 * Each entry in iov array has corresponding entry in pinned_list->page_list.
173 * Using array indexing to keep iov[] and page_list[] in sync.
174 * Initial elements in iov array's iov->iov_len will be 0 if already copied into
175 * by another call.
176 * iov array length remaining guaranteed to be bigger than len.
177 */
178dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov,
179 struct dma_pinned_list *pinned_list, unsigned char *kdata, size_t len)
180{
181 int iov_byte_offset;
182 int copy;
183 dma_cookie_t dma_cookie = 0;
184 int iovec_idx;
185 int page_idx;
186
187 if (!chan)
188 return memcpy_toiovec(iov, kdata, len);
189
190 /* -> kernel copies (e.g. smbfs) */
191 if (!pinned_list)
192 return dma_memcpy_to_kernel_iovec(chan, iov, kdata, len);
193
194 iovec_idx = 0;
195 while (iovec_idx < pinned_list->nr_iovecs) {
196 struct dma_page_list *page_list;
197
198 /* skip already used-up iovecs */
199 while (!iov[iovec_idx].iov_len)
200 iovec_idx++;
201
202 page_list = &pinned_list->page_list[iovec_idx];
203
204 iov_byte_offset = ((unsigned long)iov[iovec_idx].iov_base & ~PAGE_MASK);
205 page_idx = (((unsigned long)iov[iovec_idx].iov_base & PAGE_MASK)
206 - ((unsigned long)page_list->base_address & PAGE_MASK)) >> PAGE_SHIFT;
207
208 /* break up copies to not cross page boundary */
209 while (iov[iovec_idx].iov_len) {
210 copy = min_t(int, PAGE_SIZE - iov_byte_offset, len);
211 copy = min_t(int, copy, iov[iovec_idx].iov_len);
212
213 dma_cookie = dma_async_memcpy_buf_to_pg(chan,
214 page_list->pages[page_idx],
215 iov_byte_offset,
216 kdata,
217 copy);
218
219 len -= copy;
220 iov[iovec_idx].iov_len -= copy;
221 iov[iovec_idx].iov_base += copy;
222
223 if (!len)
224 return dma_cookie;
225
226 kdata += copy;
227 iov_byte_offset = 0;
228 page_idx++;
229 }
230 iovec_idx++;
231 }
232
233 /* really bad if we ever run out of iovecs */
234 BUG();
235 return -EFAULT;
236}
237
238dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov,
239 struct dma_pinned_list *pinned_list, struct page *page,
240 unsigned int offset, size_t len)
241{
242 int iov_byte_offset;
243 int copy;
244 dma_cookie_t dma_cookie = 0;
245 int iovec_idx;
246 int page_idx;
247 int err;
248
249 /* this needs as-yet-unimplemented buf-to-buff, so punt. */
250 /* TODO: use dma for this */
251 if (!chan || !pinned_list) {
252 u8 *vaddr = kmap(page);
253 err = memcpy_toiovec(iov, vaddr + offset, len);
254 kunmap(page);
255 return err;
256 }
257
258 iovec_idx = 0;
259 while (iovec_idx < pinned_list->nr_iovecs) {
260 struct dma_page_list *page_list;
261
262 /* skip already used-up iovecs */
263 while (!iov[iovec_idx].iov_len)
264 iovec_idx++;
265
266 page_list = &pinned_list->page_list[iovec_idx];
267
268 iov_byte_offset = ((unsigned long)iov[iovec_idx].iov_base & ~PAGE_MASK);
269 page_idx = (((unsigned long)iov[iovec_idx].iov_base & PAGE_MASK)
270 - ((unsigned long)page_list->base_address & PAGE_MASK)) >> PAGE_SHIFT;
271
272 /* break up copies to not cross page boundary */
273 while (iov[iovec_idx].iov_len) {
274 copy = min_t(int, PAGE_SIZE - iov_byte_offset, len);
275 copy = min_t(int, copy, iov[iovec_idx].iov_len);
276
277 dma_cookie = dma_async_memcpy_pg_to_pg(chan,
278 page_list->pages[page_idx],
279 iov_byte_offset,
280 page,
281 offset,
282 copy);
283
284 len -= copy;
285 iov[iovec_idx].iov_len -= copy;
286 iov[iovec_idx].iov_base += copy;
287
288 if (!len)
289 return dma_cookie;
290
291 offset += copy;
292 iov_byte_offset = 0;
293 page_idx++;
294 }
295 iovec_idx++;
296 }
297
298 /* really bad if we ever run out of iovecs */
299 BUG();
300 return -EFAULT;
301}
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 30781546ac99..78b236ca04f8 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -333,5 +333,27 @@ static inline enum dma_status dma_async_is_complete(dma_cookie_t cookie,
333int dma_async_device_register(struct dma_device *device); 333int dma_async_device_register(struct dma_device *device);
334void dma_async_device_unregister(struct dma_device *device); 334void dma_async_device_unregister(struct dma_device *device);
335 335
336/* --- Helper iov-locking functions --- */
337
338struct dma_page_list {
339 char *base_address;
340 int nr_pages;
341 struct page **pages;
342};
343
344struct dma_pinned_list {
345 int nr_iovecs;
346 struct dma_page_list page_list[0];
347};
348
349struct dma_pinned_list *dma_pin_iovec_pages(struct iovec *iov, size_t len);
350void dma_unpin_iovec_pages(struct dma_pinned_list* pinned_list);
351
352dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov,
353 struct dma_pinned_list *pinned_list, unsigned char *kdata, size_t len);
354dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov,
355 struct dma_pinned_list *pinned_list, struct page *page,
356 unsigned int offset, size_t len);
357
336#endif /* CONFIG_DMA_ENGINE */ 358#endif /* CONFIG_DMA_ENGINE */
337#endif /* DMAENGINE_H */ 359#endif /* DMAENGINE_H */
diff --git a/include/net/netdma.h b/include/net/netdma.h
index cbfe89d7e5d0..19760eb131aa 100644
--- a/include/net/netdma.h
+++ b/include/net/netdma.h
@@ -23,6 +23,7 @@
23#include <linux/config.h> 23#include <linux/config.h>
24#ifdef CONFIG_NET_DMA 24#ifdef CONFIG_NET_DMA
25#include <linux/dmaengine.h> 25#include <linux/dmaengine.h>
26#include <linux/skbuff.h>
26 27
27static inline struct dma_chan *get_softnet_dma(void) 28static inline struct dma_chan *get_softnet_dma(void)
28{ 29{
@@ -34,5 +35,10 @@ static inline struct dma_chan *get_softnet_dma(void)
34 rcu_read_unlock(); 35 rcu_read_unlock();
35 return chan; 36 return chan;
36} 37}
38
39int dma_skb_copy_datagram_iovec(struct dma_chan* chan,
40 const struct sk_buff *skb, int offset, struct iovec *to,
41 size_t len, struct dma_pinned_list *pinned_list);
42
37#endif /* CONFIG_NET_DMA */ 43#endif /* CONFIG_NET_DMA */
38#endif /* NETDMA_H */ 44#endif /* NETDMA_H */
diff --git a/net/core/Makefile b/net/core/Makefile
index 79fe12cced27..e9bd2467d5a9 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -16,3 +16,4 @@ obj-$(CONFIG_NET_DIVERT) += dv.o
16obj-$(CONFIG_NET_PKTGEN) += pktgen.o 16obj-$(CONFIG_NET_PKTGEN) += pktgen.o
17obj-$(CONFIG_WIRELESS_EXT) += wireless.o 17obj-$(CONFIG_WIRELESS_EXT) += wireless.o
18obj-$(CONFIG_NETPOLL) += netpoll.o 18obj-$(CONFIG_NETPOLL) += netpoll.o
19obj-$(CONFIG_NET_DMA) += user_dma.o
diff --git a/net/core/user_dma.c b/net/core/user_dma.c
new file mode 100644
index 000000000000..9eee91bcbf3f
--- /dev/null
+++ b/net/core/user_dma.c
@@ -0,0 +1,127 @@
1/*
2 * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
3 * Portions based on net/core/datagram.c and copyrighted by their authors.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the Free
7 * Software Foundation; either version 2 of the License, or (at your option)
8 * any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59
17 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 *
19 * The full GNU General Public License is included in this distribution in the
20 * file called COPYING.
21 */
22
23/*
24 * This code allows the net stack to make use of a DMA engine for
25 * skb to iovec copies.
26 */
27
28#include <linux/dmaengine.h>
29#include <linux/socket.h>
30#include <linux/rtnetlink.h> /* for BUG_TRAP */
31#include <net/tcp.h>
32
33/**
34 * dma_skb_copy_datagram_iovec - Copy a datagram to an iovec.
35 * @skb - buffer to copy
36 * @offset - offset in the buffer to start copying from
37 * @iovec - io vector to copy to
38 * @len - amount of data to copy from buffer to iovec
39 * @pinned_list - locked iovec buffer data
40 *
41 * Note: the iovec is modified during the copy.
42 */
43int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
44 struct sk_buff *skb, int offset, struct iovec *to,
45 size_t len, struct dma_pinned_list *pinned_list)
46{
47 int start = skb_headlen(skb);
48 int i, copy = start - offset;
49 dma_cookie_t cookie = 0;
50
51 /* Copy header. */
52 if (copy > 0) {
53 if (copy > len)
54 copy = len;
55 cookie = dma_memcpy_to_iovec(chan, to, pinned_list,
56 skb->data + offset, copy);
57 if (cookie < 0)
58 goto fault;
59 len -= copy;
60 if (len == 0)
61 goto end;
62 offset += copy;
63 }
64
65 /* Copy paged appendix. Hmm... why does this look so complicated? */
66 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
67 int end;
68
69 BUG_TRAP(start <= offset + len);
70
71 end = start + skb_shinfo(skb)->frags[i].size;
72 copy = end - offset;
73 if ((copy = end - offset) > 0) {
74 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
75 struct page *page = frag->page;
76
77 if (copy > len)
78 copy = len;
79
80 cookie = dma_memcpy_pg_to_iovec(chan, to, pinned_list, page,
81 frag->page_offset + offset - start, copy);
82 if (cookie < 0)
83 goto fault;
84 len -= copy;
85 if (len == 0)
86 goto end;
87 offset += copy;
88 }
89 start = end;
90 }
91
92 if (skb_shinfo(skb)->frag_list) {
93 struct sk_buff *list = skb_shinfo(skb)->frag_list;
94
95 for (; list; list = list->next) {
96 int end;
97
98 BUG_TRAP(start <= offset + len);
99
100 end = start + list->len;
101 copy = end - offset;
102 if (copy > 0) {
103 if (copy > len)
104 copy = len;
105 cookie = dma_skb_copy_datagram_iovec(chan, list,
106 offset - start, to, copy,
107 pinned_list);
108 if (cookie < 0)
109 goto fault;
110 len -= copy;
111 if (len == 0)
112 goto end;
113 offset += copy;
114 }
115 start = end;
116 }
117 }
118
119end:
120 if (!len) {
121 skb->dma_cookie = cookie;
122 return cookie;
123 }
124
125fault:
126 return -EFAULT;
127}