aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorGabriele Paoloni <gabriele.paoloni@intel.com>2009-03-13 19:09:12 -0400
committerDavid S. Miller <davem@davemloft.net>2009-03-13 19:09:12 -0400
commit9c705260feea6ae329bc6b6d5f6d2ef0227eda0a (patch)
tree0c8dc286ff35bbd0c75be020bee609b771084ae3 /drivers
parenta2025b8b1039e5abaa38319b2eaab3b17867479a (diff)
ppp: ppp_mp_explode() redesign
I found the PPP subsystem to not work properly when connecting channels with different speeds to the same bundle. Problem Description: As the "ppp_mp_explode" function fragments the sk_buff buffer evenly among the PPP channels that are connected to a certain PPP unit to make up a bundle, if we are transmitting using an upper layer protocol that requires an Ack before sending the next packet (like TCP/IP for example), we will have a bandwidth bottleneck on the slowest channel of the bundle. Let's clarify by an example. Let's consider a scenario where we have two PPP links making up a bundle: a slow link (10KB/sec) and a fast link (1000KB/sec) working at the best (full bandwidth). On the top we have a TCP/IP stack sending a 1000 Bytes sk_buff buffer down to the PPP subsystem. The "ppp_mp_explode" function will divide the buffer in two fragments of 500B each (we are neglecting all the headers, crc, flags etc?.). Before the TCP/IP stack sends out the next buffer, it will have to wait for the ACK response from the remote peer, so it will have to wait for both fragments to have been sent over the two PPP links, received by the remote peer and reconstructed. The resulting behaviour is that, rather than having a bundle working @1010KB/sec (the sum of the channels bandwidths), we'll have a bundle working @20KB/sec (the double of the slowest channels bandwidth). Problem Solution: The problem has been solved by redesigning the "ppp_mp_explode" function in such a way to make it split the sk_buff buffer according to the speeds of the underlying PPP channels (the speeds of the serial interfaces respectively attached to the PPP channels). Referring to the above example, the redesigned "ppp_mp_explode" function will now divide the 1000 Bytes buffer into two fragments whose sizes are set according to the speeds of the channels where they are going to be sent on (e.g . 10 Byets on 10KB/sec channel and 990 Bytes on 1000KB/sec channel). The reworked function grants the same performances of the original one in optimal working conditions (i.e. a bundle made up of PPP links all working at the same speed), while greatly improving performances on the bundles made up of channels working at different speeds. Signed-off-by: Gabriele Paoloni <gabriele.paoloni@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/net/ppp_async.c3
-rw-r--r--drivers/net/ppp_generic.c211
-rw-r--r--drivers/net/ppp_synctty.c3
3 files changed, 126 insertions, 91 deletions
diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c
index 5de6fedd1d76..6de8399d6dd9 100644
--- a/drivers/net/ppp_async.c
+++ b/drivers/net/ppp_async.c
@@ -157,6 +157,7 @@ ppp_asynctty_open(struct tty_struct *tty)
157{ 157{
158 struct asyncppp *ap; 158 struct asyncppp *ap;
159 int err; 159 int err;
160 int speed;
160 161
161 if (tty->ops->write == NULL) 162 if (tty->ops->write == NULL)
162 return -EOPNOTSUPP; 163 return -EOPNOTSUPP;
@@ -187,6 +188,8 @@ ppp_asynctty_open(struct tty_struct *tty)
187 ap->chan.private = ap; 188 ap->chan.private = ap;
188 ap->chan.ops = &async_ops; 189 ap->chan.ops = &async_ops;
189 ap->chan.mtu = PPP_MRU; 190 ap->chan.mtu = PPP_MRU;
191 speed = tty_get_baud_rate(tty);
192 ap->chan.speed = speed;
190 err = ppp_register_channel(&ap->chan); 193 err = ppp_register_channel(&ap->chan);
191 if (err) 194 if (err)
192 goto out_free; 195 goto out_free;
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 42d455578453..8ee91421db12 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -167,6 +167,7 @@ struct channel {
167 u8 avail; /* flag used in multilink stuff */ 167 u8 avail; /* flag used in multilink stuff */
168 u8 had_frag; /* >= 1 fragments have been sent */ 168 u8 had_frag; /* >= 1 fragments have been sent */
169 u32 lastseq; /* MP: last sequence # received */ 169 u32 lastseq; /* MP: last sequence # received */
170 int speed; /* speed of the corresponding ppp channel*/
170#endif /* CONFIG_PPP_MULTILINK */ 171#endif /* CONFIG_PPP_MULTILINK */
171}; 172};
172 173
@@ -1307,138 +1308,181 @@ ppp_push(struct ppp *ppp)
1307 */ 1308 */
1308static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) 1309static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
1309{ 1310{
1310 int len, fragsize; 1311 int len, totlen;
1311 int i, bits, hdrlen, mtu; 1312 int i, bits, hdrlen, mtu;
1312 int flen; 1313 int flen;
1313 int navail, nfree; 1314 int navail, nfree, nzero;
1314 int nbigger; 1315 int nbigger;
1316 int totspeed;
1317 int totfree;
1315 unsigned char *p, *q; 1318 unsigned char *p, *q;
1316 struct list_head *list; 1319 struct list_head *list;
1317 struct channel *pch; 1320 struct channel *pch;
1318 struct sk_buff *frag; 1321 struct sk_buff *frag;
1319 struct ppp_channel *chan; 1322 struct ppp_channel *chan;
1320 1323
1321 nfree = 0; /* # channels which have no packet already queued */ 1324 totspeed = 0; /*total bitrate of the bundle*/
1325 nfree = 0; /* # channels which have no packet already queued */
1322 navail = 0; /* total # of usable channels (not deregistered) */ 1326 navail = 0; /* total # of usable channels (not deregistered) */
1327 nzero = 0; /* number of channels with zero speed associated*/
1328 totfree = 0; /*total # of channels available and
1329 *having no queued packets before
1330 *starting the fragmentation*/
1331
1323 hdrlen = (ppp->flags & SC_MP_XSHORTSEQ)? MPHDRLEN_SSN: MPHDRLEN; 1332 hdrlen = (ppp->flags & SC_MP_XSHORTSEQ)? MPHDRLEN_SSN: MPHDRLEN;
1324 i = 0; 1333 i = 0;
1325 list_for_each_entry(pch, &ppp->channels, clist) { 1334 list_for_each_entry(pch, &ppp->channels, clist) {
1326 navail += pch->avail = (pch->chan != NULL); 1335 navail += pch->avail = (pch->chan != NULL);
1327 if (pch->avail) { 1336 pch->speed = pch->chan->speed;
1337 if (pch->avail) {
1328 if (skb_queue_empty(&pch->file.xq) || 1338 if (skb_queue_empty(&pch->file.xq) ||
1329 !pch->had_frag) { 1339 !pch->had_frag) {
1330 pch->avail = 2; 1340 if (pch->speed == 0)
1331 ++nfree; 1341 nzero++;
1332 } 1342 else
1333 if (!pch->had_frag && i < ppp->nxchan) 1343 totspeed += pch->speed;
1334 ppp->nxchan = i; 1344
1345 pch->avail = 2;
1346 ++nfree;
1347 ++totfree;
1348 }
1349 if (!pch->had_frag && i < ppp->nxchan)
1350 ppp->nxchan = i;
1335 } 1351 }
1336 ++i; 1352 ++i;
1337 } 1353 }
1338
1339 /* 1354 /*
1340 * Don't start sending this packet unless at least half of 1355 * Don't start sending this packet unless at least half of
1341 * the channels are free. This gives much better TCP 1356 * the channels are free. This gives much better TCP
1342 * performance if we have a lot of channels. 1357 * performance if we have a lot of channels.
1343 */ 1358 */
1344 if (nfree == 0 || nfree < navail / 2) 1359 if (nfree == 0 || nfree < navail / 2)
1345 return 0; /* can't take now, leave it in xmit_pending */ 1360 return 0; /* can't take now, leave it in xmit_pending */
1346 1361
1347 /* Do protocol field compression (XXX this should be optional) */ 1362 /* Do protocol field compression (XXX this should be optional) */
1348 p = skb->data; 1363 p = skb->data;
1349 len = skb->len; 1364 len = skb->len;
1350 if (*p == 0) { 1365 if (*p == 0) {
1351 ++p; 1366 ++p;
1352 --len; 1367 --len;
1353 } 1368 }
1354 1369
1355 /* 1370 totlen = len;
1356 * Decide on fragment size. 1371 nbigger = len % nfree;
1357 * We create a fragment for each free channel regardless of 1372
1358 * how small they are (i.e. even 0 length) in order to minimize 1373 /* skip to the channel after the one we last used
1359 * the time that it will take to detect when a channel drops 1374 and start at that one */
1360 * a fragment.
1361 */
1362 fragsize = len;
1363 if (nfree > 1)
1364 fragsize = DIV_ROUND_UP(fragsize, nfree);
1365 /* nbigger channels get fragsize bytes, the rest get fragsize-1,
1366 except if nbigger==0, then they all get fragsize. */
1367 nbigger = len % nfree;
1368
1369 /* skip to the channel after the one we last used
1370 and start at that one */
1371 list = &ppp->channels; 1375 list = &ppp->channels;
1372 for (i = 0; i < ppp->nxchan; ++i) { 1376 for (i = 0; i < ppp->nxchan; ++i) {
1373 list = list->next; 1377 list = list->next;
1374 if (list == &ppp->channels) { 1378 if (list == &ppp->channels) {
1375 i = 0; 1379 i = 0;
1376 break; 1380 break;
1377 } 1381 }
1378 } 1382 }
1379 1383
1380 /* create a fragment for each channel */ 1384 /* create a fragment for each channel */
1381 bits = B; 1385 bits = B;
1382 while (nfree > 0 || len > 0) { 1386 while (nfree > 0 && len > 0) {
1383 list = list->next; 1387 list = list->next;
1384 if (list == &ppp->channels) { 1388 if (list == &ppp->channels) {
1385 i = 0; 1389 i = 0;
1386 continue; 1390 continue;
1387 } 1391 }
1388 pch = list_entry(list, struct channel, clist); 1392 pch = list_entry(list, struct channel, clist);
1389 ++i; 1393 ++i;
1390 if (!pch->avail) 1394 if (!pch->avail)
1391 continue; 1395 continue;
1392 1396
1393 /* 1397 /*
1394 * Skip this channel if it has a fragment pending already and 1398 * Skip this channel if it has a fragment pending already and
1395 * we haven't given a fragment to all of the free channels. 1399 * we haven't given a fragment to all of the free channels.
1396 */ 1400 */
1397 if (pch->avail == 1) { 1401 if (pch->avail == 1) {
1398 if (nfree > 0) 1402 if (nfree > 0)
1399 continue; 1403 continue;
1400 } else { 1404 } else {
1401 --nfree;
1402 pch->avail = 1; 1405 pch->avail = 1;
1403 } 1406 }
1404 1407
1405 /* check the channel's mtu and whether it is still attached. */ 1408 /* check the channel's mtu and whether it is still attached. */
1406 spin_lock_bh(&pch->downl); 1409 spin_lock_bh(&pch->downl);
1407 if (pch->chan == NULL) { 1410 if (pch->chan == NULL) {
1408 /* can't use this channel, it's being deregistered */ 1411 /* can't use this channel, it's being deregistered */
1412 if (pch->speed == 0)
1413 nzero--;
1414 else
1415 totspeed -= pch->speed;
1416
1409 spin_unlock_bh(&pch->downl); 1417 spin_unlock_bh(&pch->downl);
1410 pch->avail = 0; 1418 pch->avail = 0;
1411 if (--navail == 0) 1419 totlen = len;
1420 totfree--;
1421 nfree--;
1422 if (--navail == 0)
1412 break; 1423 break;
1413 continue; 1424 continue;
1414 } 1425 }
1415 1426
1416 /* 1427 /*
1417 * Create a fragment for this channel of 1428 *if the channel speed is not set divide
1418 * min(max(mtu+2-hdrlen, 4), fragsize, len) bytes. 1429 *the packet evenly among the free channels;
1419 * If mtu+2-hdrlen < 4, that is a ridiculously small 1430 *otherwise divide it according to the speed
1420 * MTU, so we use mtu = 2 + hdrlen. 1431 *of the channel we are going to transmit on
1432 */
1433 if (pch->speed == 0) {
1434 flen = totlen/nfree ;
1435 if (nbigger > 0) {
1436 flen++;
1437 nbigger--;
1438 }
1439 } else {
1440 flen = (((totfree - nzero)*(totlen + hdrlen*totfree)) /
1441 ((totspeed*totfree)/pch->speed)) - hdrlen;
1442 if (nbigger > 0) {
1443 flen += ((totfree - nzero)*pch->speed)/totspeed;
1444 nbigger -= ((totfree - nzero)*pch->speed)/
1445 totspeed;
1446 }
1447 }
1448 nfree--;
1449
1450 /*
1451 *check if we are on the last channel or
1452 *we exceded the lenght of the data to
1453 *fragment
1454 */
1455 if ((nfree == 0) || (flen > len))
1456 flen = len;
1457 /*
1458 *it is not worth to tx on slow channels:
1459 *in that case from the resulting flen according to the
1460 *above formula will be equal or less than zero.
1461 *Skip the channel in this case
1421 */ 1462 */
1422 if (fragsize > len) 1463 if (flen <= 0) {
1423 fragsize = len; 1464 pch->avail = 2;
1424 flen = fragsize; 1465 spin_unlock_bh(&pch->downl);
1425 mtu = pch->chan->mtu + 2 - hdrlen; 1466 continue;
1426 if (mtu < 4) 1467 }
1427 mtu = 4; 1468
1469 mtu = pch->chan->mtu + 2 - hdrlen;
1470 if (mtu < 4)
1471 mtu = 4;
1428 if (flen > mtu) 1472 if (flen > mtu)
1429 flen = mtu; 1473 flen = mtu;
1430 if (flen == len && nfree == 0) 1474 if (flen == len)
1431 bits |= E; 1475 bits |= E;
1432 frag = alloc_skb(flen + hdrlen + (flen == 0), GFP_ATOMIC); 1476 frag = alloc_skb(flen + hdrlen + (flen == 0), GFP_ATOMIC);
1433 if (!frag) 1477 if (!frag)
1434 goto noskb; 1478 goto noskb;
1435 q = skb_put(frag, flen + hdrlen); 1479 q = skb_put(frag, flen + hdrlen);
1436 1480
1437 /* make the MP header */ 1481 /* make the MP header */
1438 q[0] = PPP_MP >> 8; 1482 q[0] = PPP_MP >> 8;
1439 q[1] = PPP_MP; 1483 q[1] = PPP_MP;
1440 if (ppp->flags & SC_MP_XSHORTSEQ) { 1484 if (ppp->flags & SC_MP_XSHORTSEQ) {
1441 q[2] = bits + ((ppp->nxseq >> 8) & 0xf); 1485 q[2] = bits + ((ppp->nxseq >> 8) & 0xf);
1442 q[3] = ppp->nxseq; 1486 q[3] = ppp->nxseq;
1443 } else { 1487 } else {
1444 q[2] = bits; 1488 q[2] = bits;
@@ -1447,43 +1491,28 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
1447 q[5] = ppp->nxseq; 1491 q[5] = ppp->nxseq;
1448 } 1492 }
1449 1493
1450 /* 1494 memcpy(q + hdrlen, p, flen);
1451 * Copy the data in.
1452 * Unfortunately there is a bug in older versions of
1453 * the Linux PPP multilink reconstruction code where it
1454 * drops 0-length fragments. Therefore we make sure the
1455 * fragment has at least one byte of data. Any bytes
1456 * we add in this situation will end up as padding on the
1457 * end of the reconstructed packet.
1458 */
1459 if (flen == 0)
1460 *skb_put(frag, 1) = 0;
1461 else
1462 memcpy(q + hdrlen, p, flen);
1463 1495
1464 /* try to send it down the channel */ 1496 /* try to send it down the channel */
1465 chan = pch->chan; 1497 chan = pch->chan;
1466 if (!skb_queue_empty(&pch->file.xq) || 1498 if (!skb_queue_empty(&pch->file.xq) ||
1467 !chan->ops->start_xmit(chan, frag)) 1499 !chan->ops->start_xmit(chan, frag))
1468 skb_queue_tail(&pch->file.xq, frag); 1500 skb_queue_tail(&pch->file.xq, frag);
1469 pch->had_frag = 1; 1501 pch->had_frag = 1;
1470 p += flen; 1502 p += flen;
1471 len -= flen; 1503 len -= flen;
1472 ++ppp->nxseq; 1504 ++ppp->nxseq;
1473 bits = 0; 1505 bits = 0;
1474 spin_unlock_bh(&pch->downl); 1506 spin_unlock_bh(&pch->downl);
1475
1476 if (--nbigger == 0 && fragsize > 0)
1477 --fragsize;
1478 } 1507 }
1479 ppp->nxchan = i; 1508 ppp->nxchan = i;
1480 1509
1481 return 1; 1510 return 1;
1482 1511
1483 noskb: 1512 noskb:
1484 spin_unlock_bh(&pch->downl); 1513 spin_unlock_bh(&pch->downl);
1485 if (ppp->debug & 1) 1514 if (ppp->debug & 1)
1486 printk(KERN_ERR "PPP: no memory (fragment)\n"); 1515 printk(KERN_ERR "PPP: no memory (fragment)\n");
1487 ++ppp->dev->stats.tx_errors; 1516 ++ppp->dev->stats.tx_errors;
1488 ++ppp->nxseq; 1517 ++ppp->nxseq;
1489 return 1; /* abandon the frame */ 1518 return 1; /* abandon the frame */
diff --git a/drivers/net/ppp_synctty.c b/drivers/net/ppp_synctty.c
index 3ea791d16b00..d2fa2db13586 100644
--- a/drivers/net/ppp_synctty.c
+++ b/drivers/net/ppp_synctty.c
@@ -206,6 +206,7 @@ ppp_sync_open(struct tty_struct *tty)
206{ 206{
207 struct syncppp *ap; 207 struct syncppp *ap;
208 int err; 208 int err;
209 int speed;
209 210
210 if (tty->ops->write == NULL) 211 if (tty->ops->write == NULL)
211 return -EOPNOTSUPP; 212 return -EOPNOTSUPP;
@@ -234,6 +235,8 @@ ppp_sync_open(struct tty_struct *tty)
234 ap->chan.ops = &sync_ops; 235 ap->chan.ops = &sync_ops;
235 ap->chan.mtu = PPP_MRU; 236 ap->chan.mtu = PPP_MRU;
236 ap->chan.hdrlen = 2; /* for A/C bytes */ 237 ap->chan.hdrlen = 2; /* for A/C bytes */
238 speed = tty_get_baud_rate(tty);
239 ap->chan.speed = speed;
237 err = ppp_register_channel(&ap->chan); 240 err = ppp_register_channel(&ap->chan);
238 if (err) 241 if (err)
239 goto out_free; 242 goto out_free;