diff options
author | Paul Mackerras <paulus@samba.org> | 2005-05-12 19:47:12 -0400 |
---|---|---|
committer | Jeff Garzik <jgarzik@pobox.com> | 2005-05-12 19:47:12 -0400 |
commit | 516cd15f1c0dd6eada3619915b113b4e5baccc7a (patch) | |
tree | 0ca69fcdeff9aa93b3b76722f97b512cbb5e403b | |
parent | 88d7bd8cb9eb8d64bf7997600b0d64f7834047c5 (diff) |
[PATCH] PPP multilink fragmentation improvements
Here's a patch for -mm for now. Not sure whose territory this falls
in, so I'm sending it to everyone I can think of. :)
Some time ago I did some experiments with using PPP multilink over
largish numbers of channels (up to 32). The TCP performance was
woeful due to wildly fluctuating packet latencies, which turned out to
be because we would sometimes split a packet across all 32 channels,
and sometimes we would send a whole packet down a single channel.
This patch fixes those problems by being a bit cleverer about how the
packets are split across the available channels, and in particular, it
waits until at least half of the channels can take another fragment
before starting to split up the next packet.
The patch also fixes a buglet in the multilink reconstruction code
where it would discard incoming packets that had just the multilink
header and no data. Such packets are valid and shouldn't be
discarded.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
-rw-r--r-- | drivers/net/ppp_generic.c | 177 |
1 files changed, 110 insertions, 67 deletions
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index 3b377f6cd4a0..ad4b58af6b76 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c | |||
@@ -1217,36 +1217,43 @@ ppp_push(struct ppp *ppp) | |||
1217 | */ | 1217 | */ |
1218 | static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) | 1218 | static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) |
1219 | { | 1219 | { |
1220 | int nch, len, fragsize; | 1220 | int len, fragsize; |
1221 | int i, bits, hdrlen, mtu; | 1221 | int i, bits, hdrlen, mtu; |
1222 | int flen, fnb; | 1222 | int flen; |
1223 | int navail, nfree; | ||
1224 | int nbigger; | ||
1223 | unsigned char *p, *q; | 1225 | unsigned char *p, *q; |
1224 | struct list_head *list; | 1226 | struct list_head *list; |
1225 | struct channel *pch; | 1227 | struct channel *pch; |
1226 | struct sk_buff *frag; | 1228 | struct sk_buff *frag; |
1227 | struct ppp_channel *chan; | 1229 | struct ppp_channel *chan; |
1228 | 1230 | ||
1229 | nch = 0; | 1231 | nfree = 0; /* # channels which have no packet already queued */ |
1232 | navail = 0; /* total # of usable channels (not deregistered) */ | ||
1230 | hdrlen = (ppp->flags & SC_MP_XSHORTSEQ)? MPHDRLEN_SSN: MPHDRLEN; | 1233 | hdrlen = (ppp->flags & SC_MP_XSHORTSEQ)? MPHDRLEN_SSN: MPHDRLEN; |
1234 | i = 0; | ||
1231 | list = &ppp->channels; | 1235 | list = &ppp->channels; |
1232 | while ((list = list->next) != &ppp->channels) { | 1236 | while ((list = list->next) != &ppp->channels) { |
1233 | pch = list_entry(list, struct channel, clist); | 1237 | pch = list_entry(list, struct channel, clist); |
1234 | nch += pch->avail = (skb_queue_len(&pch->file.xq) == 0); | 1238 | navail += pch->avail = (pch->chan != NULL); |
1235 | /* | 1239 | if (pch->avail) { |
1236 | * If a channel hasn't had a fragment yet, it has to get | 1240 | if (skb_queue_len(&pch->file.xq) == 0 |
1237 | * one before we send any fragments on later channels. | 1241 | || !pch->had_frag) { |
1238 | * If it can't take a fragment now, don't give any | 1242 | pch->avail = 2; |
1239 | * to subsequent channels. | 1243 | ++nfree; |
1240 | */ | ||
1241 | if (!pch->had_frag && !pch->avail) { | ||
1242 | while ((list = list->next) != &ppp->channels) { | ||
1243 | pch = list_entry(list, struct channel, clist); | ||
1244 | pch->avail = 0; | ||
1245 | } | 1244 | } |
1246 | break; | 1245 | if (!pch->had_frag && i < ppp->nxchan) |
1246 | ppp->nxchan = i; | ||
1247 | } | 1247 | } |
1248 | ++i; | ||
1248 | } | 1249 | } |
1249 | if (nch == 0) | 1250 | |
1251 | /* | ||
1252 | * Don't start sending this packet unless at least half of | ||
1253 | * the channels are free. This gives much better TCP | ||
1254 | * performance if we have a lot of channels. | ||
1255 | */ | ||
1256 | if (nfree == 0 || nfree < navail / 2) | ||
1250 | return 0; /* can't take now, leave it in xmit_pending */ | 1257 | return 0; /* can't take now, leave it in xmit_pending */ |
1251 | 1258 | ||
1252 | /* Do protocol field compression (XXX this should be optional) */ | 1259 | /* Do protocol field compression (XXX this should be optional) */ |
@@ -1257,14 +1264,19 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) | |||
1257 | --len; | 1264 | --len; |
1258 | } | 1265 | } |
1259 | 1266 | ||
1260 | /* decide on fragment size */ | 1267 | /* |
1268 | * Decide on fragment size. | ||
1269 | * We create a fragment for each free channel regardless of | ||
1270 | * how small they are (i.e. even 0 length) in order to minimize | ||
1271 | * the time that it will take to detect when a channel drops | ||
1272 | * a fragment. | ||
1273 | */ | ||
1261 | fragsize = len; | 1274 | fragsize = len; |
1262 | if (nch > 1) { | 1275 | if (nfree > 1) |
1263 | int maxch = ROUNDUP(len, MIN_FRAG_SIZE); | 1276 | fragsize = ROUNDUP(fragsize, nfree); |
1264 | if (nch > maxch) | 1277 | /* nbigger channels get fragsize bytes, the rest get fragsize-1, |
1265 | nch = maxch; | 1278 | except if nbigger==0, then they all get fragsize. */ |
1266 | fragsize = ROUNDUP(fragsize, nch); | 1279 | nbigger = len % nfree; |
1267 | } | ||
1268 | 1280 | ||
1269 | /* skip to the channel after the one we last used | 1281 | /* skip to the channel after the one we last used |
1270 | and start at that one */ | 1282 | and start at that one */ |
@@ -1278,7 +1290,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) | |||
1278 | 1290 | ||
1279 | /* create a fragment for each channel */ | 1291 | /* create a fragment for each channel */ |
1280 | bits = B; | 1292 | bits = B; |
1281 | do { | 1293 | while (nfree > 0 || len > 0) { |
1282 | list = list->next; | 1294 | list = list->next; |
1283 | if (list == &ppp->channels) { | 1295 | if (list == &ppp->channels) { |
1284 | i = 0; | 1296 | i = 0; |
@@ -1289,61 +1301,92 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) | |||
1289 | if (!pch->avail) | 1301 | if (!pch->avail) |
1290 | continue; | 1302 | continue; |
1291 | 1303 | ||
1304 | /* | ||
1305 | * Skip this channel if it has a fragment pending already and | ||
1306 | * we haven't given a fragment to all of the free channels. | ||
1307 | */ | ||
1308 | if (pch->avail == 1) { | ||
1309 | if (nfree > 0) | ||
1310 | continue; | ||
1311 | } else { | ||
1312 | --nfree; | ||
1313 | pch->avail = 1; | ||
1314 | } | ||
1315 | |||
1292 | /* check the channel's mtu and whether it is still attached. */ | 1316 | /* check the channel's mtu and whether it is still attached. */ |
1293 | spin_lock_bh(&pch->downl); | 1317 | spin_lock_bh(&pch->downl); |
1294 | if (pch->chan == 0 || (mtu = pch->chan->mtu) < hdrlen) { | 1318 | if (pch->chan == NULL) { |
1295 | /* can't use this channel */ | 1319 | /* can't use this channel, it's being deregistered */ |
1296 | spin_unlock_bh(&pch->downl); | 1320 | spin_unlock_bh(&pch->downl); |
1297 | pch->avail = 0; | 1321 | pch->avail = 0; |
1298 | if (--nch == 0) | 1322 | if (--navail == 0) |
1299 | break; | 1323 | break; |
1300 | continue; | 1324 | continue; |
1301 | } | 1325 | } |
1302 | 1326 | ||
1303 | /* | 1327 | /* |
1304 | * We have to create multiple fragments for this channel | 1328 | * Create a fragment for this channel of |
1305 | * if fragsize is greater than the channel's mtu. | 1329 | * min(max(mtu+2-hdrlen, 4), fragsize, len) bytes. |
1330 | * If mtu+2-hdrlen < 4, that is a ridiculously small | ||
1331 | * MTU, so we use mtu = 2 + hdrlen. | ||
1306 | */ | 1332 | */ |
1307 | if (fragsize > len) | 1333 | if (fragsize > len) |
1308 | fragsize = len; | 1334 | fragsize = len; |
1309 | for (flen = fragsize; flen > 0; flen -= fnb) { | 1335 | flen = fragsize; |
1310 | fnb = flen; | 1336 | mtu = pch->chan->mtu + 2 - hdrlen; |
1311 | if (fnb > mtu + 2 - hdrlen) | 1337 | if (mtu < 4) |
1312 | fnb = mtu + 2 - hdrlen; | 1338 | mtu = 4; |
1313 | if (fnb >= len) | 1339 | if (flen > mtu) |
1314 | bits |= E; | 1340 | flen = mtu; |
1315 | frag = alloc_skb(fnb + hdrlen, GFP_ATOMIC); | 1341 | if (flen == len && nfree == 0) |
1316 | if (frag == 0) | 1342 | bits |= E; |
1317 | goto noskb; | 1343 | frag = alloc_skb(flen + hdrlen + (flen == 0), GFP_ATOMIC); |
1318 | q = skb_put(frag, fnb + hdrlen); | 1344 | if (frag == 0) |
1319 | /* make the MP header */ | 1345 | goto noskb; |
1320 | q[0] = PPP_MP >> 8; | 1346 | q = skb_put(frag, flen + hdrlen); |
1321 | q[1] = PPP_MP; | 1347 | |
1322 | if (ppp->flags & SC_MP_XSHORTSEQ) { | 1348 | /* make the MP header */ |
1323 | q[2] = bits + ((ppp->nxseq >> 8) & 0xf); | 1349 | q[0] = PPP_MP >> 8; |
1324 | q[3] = ppp->nxseq; | 1350 | q[1] = PPP_MP; |
1325 | } else { | 1351 | if (ppp->flags & SC_MP_XSHORTSEQ) { |
1326 | q[2] = bits; | 1352 | q[2] = bits + ((ppp->nxseq >> 8) & 0xf); |
1327 | q[3] = ppp->nxseq >> 16; | 1353 | q[3] = ppp->nxseq; |
1328 | q[4] = ppp->nxseq >> 8; | 1354 | } else { |
1329 | q[5] = ppp->nxseq; | 1355 | q[2] = bits; |
1330 | } | 1356 | q[3] = ppp->nxseq >> 16; |
1331 | 1357 | q[4] = ppp->nxseq >> 8; | |
1332 | /* copy the data in */ | 1358 | q[5] = ppp->nxseq; |
1333 | memcpy(q + hdrlen, p, fnb); | ||
1334 | |||
1335 | /* try to send it down the channel */ | ||
1336 | chan = pch->chan; | ||
1337 | if (!chan->ops->start_xmit(chan, frag)) | ||
1338 | skb_queue_tail(&pch->file.xq, frag); | ||
1339 | pch->had_frag = 1; | ||
1340 | p += fnb; | ||
1341 | len -= fnb; | ||
1342 | ++ppp->nxseq; | ||
1343 | bits = 0; | ||
1344 | } | 1359 | } |
1360 | |||
1361 | /* | ||
1362 | * Copy the data in. | ||
1363 | * Unfortunately there is a bug in older versions of | ||
1364 | * the Linux PPP multilink reconstruction code where it | ||
1365 | * drops 0-length fragments. Therefore we make sure the | ||
1366 | * fragment has at least one byte of data. Any bytes | ||
1367 | * we add in this situation will end up as padding on the | ||
1368 | * end of the reconstructed packet. | ||
1369 | */ | ||
1370 | if (flen == 0) | ||
1371 | *skb_put(frag, 1) = 0; | ||
1372 | else | ||
1373 | memcpy(q + hdrlen, p, flen); | ||
1374 | |||
1375 | /* try to send it down the channel */ | ||
1376 | chan = pch->chan; | ||
1377 | if (skb_queue_len(&pch->file.xq) | ||
1378 | || !chan->ops->start_xmit(chan, frag)) | ||
1379 | skb_queue_tail(&pch->file.xq, frag); | ||
1380 | pch->had_frag = 1; | ||
1381 | p += flen; | ||
1382 | len -= flen; | ||
1383 | ++ppp->nxseq; | ||
1384 | bits = 0; | ||
1345 | spin_unlock_bh(&pch->downl); | 1385 | spin_unlock_bh(&pch->downl); |
1346 | } while (len > 0); | 1386 | |
1387 | if (--nbigger == 0 && fragsize > 0) | ||
1388 | --fragsize; | ||
1389 | } | ||
1347 | ppp->nxchan = i; | 1390 | ppp->nxchan = i; |
1348 | 1391 | ||
1349 | return 1; | 1392 | return 1; |
@@ -1422,7 +1465,7 @@ ppp_input(struct ppp_channel *chan, struct sk_buff *skb) | |||
1422 | kfree_skb(skb); | 1465 | kfree_skb(skb); |
1423 | return; | 1466 | return; |
1424 | } | 1467 | } |
1425 | 1468 | ||
1426 | proto = PPP_PROTO(skb); | 1469 | proto = PPP_PROTO(skb); |
1427 | read_lock_bh(&pch->upl); | 1470 | read_lock_bh(&pch->upl); |
1428 | if (pch->ppp == 0 || proto >= 0xc000 || proto == PPP_CCPFRAG) { | 1471 | if (pch->ppp == 0 || proto >= 0xc000 || proto == PPP_CCPFRAG) { |
@@ -1691,7 +1734,7 @@ ppp_receive_mp_frame(struct ppp *ppp, struct sk_buff *skb, struct channel *pch) | |||
1691 | struct list_head *l; | 1734 | struct list_head *l; |
1692 | int mphdrlen = (ppp->flags & SC_MP_SHORTSEQ)? MPHDRLEN_SSN: MPHDRLEN; | 1735 | int mphdrlen = (ppp->flags & SC_MP_SHORTSEQ)? MPHDRLEN_SSN: MPHDRLEN; |
1693 | 1736 | ||
1694 | if (!pskb_may_pull(skb, mphdrlen + 1) || ppp->mrru == 0) | 1737 | if (!pskb_may_pull(skb, mphdrlen) || ppp->mrru == 0) |
1695 | goto err; /* no good, throw it away */ | 1738 | goto err; /* no good, throw it away */ |
1696 | 1739 | ||
1697 | /* Decode sequence number and begin/end bits */ | 1740 | /* Decode sequence number and begin/end bits */ |