aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorJens Axboe <jens.axboe@oracle.com>2007-11-07 02:30:13 -0500
committerDavid S. Miller <davem@davemloft.net>2008-01-28 17:53:31 -0500
commit9c55e01c0cc835818475a6ce8c4d684df9949ac8 (patch)
tree1115311436677f837a4b477e3fd23c5e0ae184ef /net/core
parentbbdfc2f70610bebb841d0874dc901c648308e43a (diff)
[TCP]: Splice receive support.
Support for network splice receive. Signed-off-by: Jens Axboe <jens.axboe@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r--net/core/skbuff.c246
1 files changed, 246 insertions, 0 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b6283779e93d..98420f9c4b6d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -52,6 +52,7 @@
52#endif 52#endif
53#include <linux/string.h> 53#include <linux/string.h>
54#include <linux/skbuff.h> 54#include <linux/skbuff.h>
55#include <linux/splice.h>
55#include <linux/cache.h> 56#include <linux/cache.h>
56#include <linux/rtnetlink.h> 57#include <linux/rtnetlink.h>
57#include <linux/init.h> 58#include <linux/init.h>
@@ -71,6 +72,40 @@
71static struct kmem_cache *skbuff_head_cache __read_mostly; 72static struct kmem_cache *skbuff_head_cache __read_mostly;
72static struct kmem_cache *skbuff_fclone_cache __read_mostly; 73static struct kmem_cache *skbuff_fclone_cache __read_mostly;
73 74
75static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
76 struct pipe_buffer *buf)
77{
78 struct sk_buff *skb = (struct sk_buff *) buf->private;
79
80 kfree_skb(skb);
81}
82
83static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
84 struct pipe_buffer *buf)
85{
86 struct sk_buff *skb = (struct sk_buff *) buf->private;
87
88 skb_get(skb);
89}
90
91static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
92 struct pipe_buffer *buf)
93{
94 return 1;
95}
96
97
98/* Pipe buffer operations for a socket. */
99static struct pipe_buf_operations sock_pipe_buf_ops = {
100 .can_merge = 0,
101 .map = generic_pipe_buf_map,
102 .unmap = generic_pipe_buf_unmap,
103 .confirm = generic_pipe_buf_confirm,
104 .release = sock_pipe_buf_release,
105 .steal = sock_pipe_buf_steal,
106 .get = sock_pipe_buf_get,
107};
108
74/* 109/*
75 * Keep out-of-line to prevent kernel bloat. 110 * Keep out-of-line to prevent kernel bloat.
76 * __builtin_return_address is not used because it is not always 111 * __builtin_return_address is not used because it is not always
@@ -1122,6 +1157,217 @@ fault:
1122 return -EFAULT; 1157 return -EFAULT;
1123} 1158}
1124 1159
1160/*
1161 * Callback from splice_to_pipe(), if we need to release some pages
1162 * at the end of the spd in case we error'ed out in filling the pipe.
1163 */
1164static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
1165{
1166 struct sk_buff *skb = (struct sk_buff *) spd->partial[i].private;
1167
1168 kfree_skb(skb);
1169}
1170
1171/*
1172 * Fill page/offset/length into spd, if it can hold more pages.
1173 */
1174static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
1175 unsigned int len, unsigned int offset,
1176 struct sk_buff *skb)
1177{
1178 if (unlikely(spd->nr_pages == PIPE_BUFFERS))
1179 return 1;
1180
1181 spd->pages[spd->nr_pages] = page;
1182 spd->partial[spd->nr_pages].len = len;
1183 spd->partial[spd->nr_pages].offset = offset;
1184 spd->partial[spd->nr_pages].private = (unsigned long) skb_get(skb);
1185 spd->nr_pages++;
1186 return 0;
1187}
1188
1189/*
1190 * Map linear and fragment data from the skb to spd. Returns number of
1191 * pages mapped.
1192 */
1193static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
1194 unsigned int *total_len,
1195 struct splice_pipe_desc *spd)
1196{
1197 unsigned int nr_pages = spd->nr_pages;
1198 unsigned int poff, plen, len, toff, tlen;
1199 int headlen, seg;
1200
1201 toff = *offset;
1202 tlen = *total_len;
1203 if (!tlen)
1204 goto err;
1205
1206 /*
1207 * if the offset is greater than the linear part, go directly to
1208 * the fragments.
1209 */
1210 headlen = skb_headlen(skb);
1211 if (toff >= headlen) {
1212 toff -= headlen;
1213 goto map_frag;
1214 }
1215
1216 /*
1217 * first map the linear region into the pages/partial map, skipping
1218 * any potential initial offset.
1219 */
1220 len = 0;
1221 while (len < headlen) {
1222 void *p = skb->data + len;
1223
1224 poff = (unsigned long) p & (PAGE_SIZE - 1);
1225 plen = min_t(unsigned int, headlen - len, PAGE_SIZE - poff);
1226 len += plen;
1227
1228 if (toff) {
1229 if (plen <= toff) {
1230 toff -= plen;
1231 continue;
1232 }
1233 plen -= toff;
1234 poff += toff;
1235 toff = 0;
1236 }
1237
1238 plen = min(plen, tlen);
1239 if (!plen)
1240 break;
1241
1242 /*
1243 * just jump directly to update and return, no point
1244 * in going over fragments when the output is full.
1245 */
1246 if (spd_fill_page(spd, virt_to_page(p), plen, poff, skb))
1247 goto done;
1248
1249 tlen -= plen;
1250 }
1251
1252 /*
1253 * then map the fragments
1254 */
1255map_frag:
1256 for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
1257 const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
1258
1259 plen = f->size;
1260 poff = f->page_offset;
1261
1262 if (toff) {
1263 if (plen <= toff) {
1264 toff -= plen;
1265 continue;
1266 }
1267 plen -= toff;
1268 poff += toff;
1269 toff = 0;
1270 }
1271
1272 plen = min(plen, tlen);
1273 if (!plen)
1274 break;
1275
1276 if (spd_fill_page(spd, f->page, plen, poff, skb))
1277 break;
1278
1279 tlen -= plen;
1280 }
1281
1282done:
1283 if (spd->nr_pages - nr_pages) {
1284 *offset = 0;
1285 *total_len = tlen;
1286 return 0;
1287 }
1288err:
1289 return 1;
1290}
1291
1292/*
1293 * Map data from the skb to a pipe. Should handle both the linear part,
1294 * the fragments, and the frag list. It does NOT handle frag lists within
1295 * the frag list, if such a thing exists. We'd probably need to recurse to
1296 * handle that cleanly.
1297 */
1298int skb_splice_bits(struct sk_buff *__skb, unsigned int offset,
1299 struct pipe_inode_info *pipe, unsigned int tlen,
1300 unsigned int flags)
1301{
1302 struct partial_page partial[PIPE_BUFFERS];
1303 struct page *pages[PIPE_BUFFERS];
1304 struct splice_pipe_desc spd = {
1305 .pages = pages,
1306 .partial = partial,
1307 .flags = flags,
1308 .ops = &sock_pipe_buf_ops,
1309 .spd_release = sock_spd_release,
1310 };
1311 struct sk_buff *skb;
1312
1313 /*
1314 * I'd love to avoid the clone here, but tcp_read_sock()
1315 * ignores reference counts and unconditonally kills the sk_buff
1316 * on return from the actor.
1317 */
1318 skb = skb_clone(__skb, GFP_KERNEL);
1319 if (unlikely(!skb))
1320 return -ENOMEM;
1321
1322 /*
1323 * __skb_splice_bits() only fails if the output has no room left,
1324 * so no point in going over the frag_list for the error case.
1325 */
1326 if (__skb_splice_bits(skb, &offset, &tlen, &spd))
1327 goto done;
1328 else if (!tlen)
1329 goto done;
1330
1331 /*
1332 * now see if we have a frag_list to map
1333 */
1334 if (skb_shinfo(skb)->frag_list) {
1335 struct sk_buff *list = skb_shinfo(skb)->frag_list;
1336
1337 for (; list && tlen; list = list->next) {
1338 if (__skb_splice_bits(list, &offset, &tlen, &spd))
1339 break;
1340 }
1341 }
1342
1343done:
1344 /*
1345 * drop our reference to the clone, the pipe consumption will
1346 * drop the rest.
1347 */
1348 kfree_skb(skb);
1349
1350 if (spd.nr_pages) {
1351 int ret;
1352
1353 /*
1354 * Drop the socket lock, otherwise we have reverse
1355 * locking dependencies between sk_lock and i_mutex
1356 * here as compared to sendfile(). We enter here
1357 * with the socket lock held, and splice_to_pipe() will
1358 * grab the pipe inode lock. For sendfile() emulation,
1359 * we call into ->sendpage() with the i_mutex lock held
1360 * and networking will grab the socket lock.
1361 */
1362 release_sock(__skb->sk);
1363 ret = splice_to_pipe(pipe, &spd);
1364 lock_sock(__skb->sk);
1365 return ret;
1366 }
1367
1368 return 0;
1369}
1370
1125/** 1371/**
1126 * skb_store_bits - store bits from kernel buffer to skb 1372 * skb_store_bits - store bits from kernel buffer to skb
1127 * @skb: destination buffer 1373 * @skb: destination buffer