diff options
Diffstat (limited to 'net')
| -rw-r--r-- | net/core/dev.c | 125 | ||||
| -rw-r--r-- | net/core/skbuff.c | 157 | ||||
| -rw-r--r-- | net/core/sysctl_net_core.c | 46 | ||||
| -rw-r--r-- | net/ipv4/tcp.c | 31 | ||||
| -rw-r--r-- | net/ipv4/tcp_cong.c | 46 | ||||
| -rw-r--r-- | net/ipv4/tcp_ipv4.c | 2 | ||||
| -rw-r--r-- | net/ipv6/tcp_ipv6.c | 2 | ||||
| -rw-r--r-- | net/sched/Kconfig | 12 | ||||
| -rw-r--r-- | net/sched/Makefile | 1 | ||||
| -rw-r--r-- | net/sched/em_text.c | 157 |
10 files changed, 420 insertions, 159 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index ab935778ce81..7016e0c36b3d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
| @@ -115,18 +115,6 @@ | |||
| 115 | #endif /* CONFIG_NET_RADIO */ | 115 | #endif /* CONFIG_NET_RADIO */ |
| 116 | #include <asm/current.h> | 116 | #include <asm/current.h> |
| 117 | 117 | ||
| 118 | /* This define, if set, will randomly drop a packet when congestion | ||
| 119 | * is more than moderate. It helps fairness in the multi-interface | ||
| 120 | * case when one of them is a hog, but it kills performance for the | ||
| 121 | * single interface case so it is off now by default. | ||
| 122 | */ | ||
| 123 | #undef RAND_LIE | ||
| 124 | |||
| 125 | /* Setting this will sample the queue lengths and thus congestion | ||
| 126 | * via a timer instead of as each packet is received. | ||
| 127 | */ | ||
| 128 | #undef OFFLINE_SAMPLE | ||
| 129 | |||
| 130 | /* | 118 | /* |
| 131 | * The list of packet types we will receive (as opposed to discard) | 119 | * The list of packet types we will receive (as opposed to discard) |
| 132 | * and the routines to invoke. | 120 | * and the routines to invoke. |
| @@ -159,11 +147,6 @@ static DEFINE_SPINLOCK(ptype_lock); | |||
| 159 | static struct list_head ptype_base[16]; /* 16 way hashed list */ | 147 | static struct list_head ptype_base[16]; /* 16 way hashed list */ |
| 160 | static struct list_head ptype_all; /* Taps */ | 148 | static struct list_head ptype_all; /* Taps */ |
| 161 | 149 | ||
| 162 | #ifdef OFFLINE_SAMPLE | ||
| 163 | static void sample_queue(unsigned long dummy); | ||
| 164 | static struct timer_list samp_timer = TIMER_INITIALIZER(sample_queue, 0, 0); | ||
| 165 | #endif | ||
| 166 | |||
| 167 | /* | 150 | /* |
| 168 | * The @dev_base list is protected by @dev_base_lock and the rtln | 151 | * The @dev_base list is protected by @dev_base_lock and the rtln |
| 169 | * semaphore. | 152 | * semaphore. |
| @@ -215,7 +198,7 @@ static struct notifier_block *netdev_chain; | |||
| 215 | * Device drivers call our routines to queue packets here. We empty the | 198 | * Device drivers call our routines to queue packets here. We empty the |
| 216 | * queue in the local softnet handler. | 199 | * queue in the local softnet handler. |
| 217 | */ | 200 | */ |
| 218 | DEFINE_PER_CPU(struct softnet_data, softnet_data) = { 0, }; | 201 | DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL }; |
| 219 | 202 | ||
| 220 | #ifdef CONFIG_SYSFS | 203 | #ifdef CONFIG_SYSFS |
| 221 | extern int netdev_sysfs_init(void); | 204 | extern int netdev_sysfs_init(void); |
| @@ -1363,71 +1346,13 @@ out: | |||
| 1363 | Receiver routines | 1346 | Receiver routines |
| 1364 | =======================================================================*/ | 1347 | =======================================================================*/ |
| 1365 | 1348 | ||
| 1366 | int netdev_max_backlog = 300; | 1349 | int netdev_max_backlog = 1000; |
| 1350 | int netdev_budget = 300; | ||
| 1367 | int weight_p = 64; /* old backlog weight */ | 1351 | int weight_p = 64; /* old backlog weight */ |
| 1368 | /* These numbers are selected based on intuition and some | ||
| 1369 | * experimentatiom, if you have more scientific way of doing this | ||
| 1370 | * please go ahead and fix things. | ||
| 1371 | */ | ||
| 1372 | int no_cong_thresh = 10; | ||
| 1373 | int no_cong = 20; | ||
| 1374 | int lo_cong = 100; | ||
| 1375 | int mod_cong = 290; | ||
| 1376 | 1352 | ||
| 1377 | DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; | 1353 | DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; |
| 1378 | 1354 | ||
| 1379 | 1355 | ||
| 1380 | static void get_sample_stats(int cpu) | ||
| 1381 | { | ||
| 1382 | #ifdef RAND_LIE | ||
| 1383 | unsigned long rd; | ||
| 1384 | int rq; | ||
| 1385 | #endif | ||
| 1386 | struct softnet_data *sd = &per_cpu(softnet_data, cpu); | ||
| 1387 | int blog = sd->input_pkt_queue.qlen; | ||
| 1388 | int avg_blog = sd->avg_blog; | ||
| 1389 | |||
| 1390 | avg_blog = (avg_blog >> 1) + (blog >> 1); | ||
| 1391 | |||
| 1392 | if (avg_blog > mod_cong) { | ||
| 1393 | /* Above moderate congestion levels. */ | ||
| 1394 | sd->cng_level = NET_RX_CN_HIGH; | ||
| 1395 | #ifdef RAND_LIE | ||
| 1396 | rd = net_random(); | ||
| 1397 | rq = rd % netdev_max_backlog; | ||
| 1398 | if (rq < avg_blog) /* unlucky bastard */ | ||
| 1399 | sd->cng_level = NET_RX_DROP; | ||
| 1400 | #endif | ||
| 1401 | } else if (avg_blog > lo_cong) { | ||
| 1402 | sd->cng_level = NET_RX_CN_MOD; | ||
| 1403 | #ifdef RAND_LIE | ||
| 1404 | rd = net_random(); | ||
| 1405 | rq = rd % netdev_max_backlog; | ||
| 1406 | if (rq < avg_blog) /* unlucky bastard */ | ||
| 1407 | sd->cng_level = NET_RX_CN_HIGH; | ||
| 1408 | #endif | ||
| 1409 | } else if (avg_blog > no_cong) | ||
| 1410 | sd->cng_level = NET_RX_CN_LOW; | ||
| 1411 | else /* no congestion */ | ||
| 1412 | sd->cng_level = NET_RX_SUCCESS; | ||
| 1413 | |||
| 1414 | sd->avg_blog = avg_blog; | ||
| 1415 | } | ||
| 1416 | |||
| 1417 | #ifdef OFFLINE_SAMPLE | ||
| 1418 | static void sample_queue(unsigned long dummy) | ||
| 1419 | { | ||
| 1420 | /* 10 ms 0r 1ms -- i don't care -- JHS */ | ||
| 1421 | int next_tick = 1; | ||
| 1422 | int cpu = smp_processor_id(); | ||
| 1423 | |||
| 1424 | get_sample_stats(cpu); | ||
| 1425 | next_tick += jiffies; | ||
| 1426 | mod_timer(&samp_timer, next_tick); | ||
| 1427 | } | ||
| 1428 | #endif | ||
| 1429 | |||
| 1430 | |||
| 1431 | /** | 1356 | /** |
| 1432 | * netif_rx - post buffer to the network code | 1357 | * netif_rx - post buffer to the network code |
| 1433 | * @skb: buffer to post | 1358 | * @skb: buffer to post |
| @@ -1448,7 +1373,6 @@ static void sample_queue(unsigned long dummy) | |||
| 1448 | 1373 | ||
| 1449 | int netif_rx(struct sk_buff *skb) | 1374 | int netif_rx(struct sk_buff *skb) |
| 1450 | { | 1375 | { |
| 1451 | int this_cpu; | ||
| 1452 | struct softnet_data *queue; | 1376 | struct softnet_data *queue; |
| 1453 | unsigned long flags; | 1377 | unsigned long flags; |
| 1454 | 1378 | ||
| @@ -1464,38 +1388,22 @@ int netif_rx(struct sk_buff *skb) | |||
| 1464 | * short when CPU is congested, but is still operating. | 1388 | * short when CPU is congested, but is still operating. |
| 1465 | */ | 1389 | */ |
| 1466 | local_irq_save(flags); | 1390 | local_irq_save(flags); |
| 1467 | this_cpu = smp_processor_id(); | ||
| 1468 | queue = &__get_cpu_var(softnet_data); | 1391 | queue = &__get_cpu_var(softnet_data); |
| 1469 | 1392 | ||
| 1470 | __get_cpu_var(netdev_rx_stat).total++; | 1393 | __get_cpu_var(netdev_rx_stat).total++; |
| 1471 | if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { | 1394 | if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { |
| 1472 | if (queue->input_pkt_queue.qlen) { | 1395 | if (queue->input_pkt_queue.qlen) { |
| 1473 | if (queue->throttle) | ||
| 1474 | goto drop; | ||
| 1475 | |||
| 1476 | enqueue: | 1396 | enqueue: |
| 1477 | dev_hold(skb->dev); | 1397 | dev_hold(skb->dev); |
| 1478 | __skb_queue_tail(&queue->input_pkt_queue, skb); | 1398 | __skb_queue_tail(&queue->input_pkt_queue, skb); |
| 1479 | #ifndef OFFLINE_SAMPLE | ||
| 1480 | get_sample_stats(this_cpu); | ||
| 1481 | #endif | ||
| 1482 | local_irq_restore(flags); | 1399 | local_irq_restore(flags); |
| 1483 | return queue->cng_level; | 1400 | return NET_RX_SUCCESS; |
| 1484 | } | 1401 | } |
| 1485 | 1402 | ||
| 1486 | if (queue->throttle) | ||
| 1487 | queue->throttle = 0; | ||
| 1488 | |||
| 1489 | netif_rx_schedule(&queue->backlog_dev); | 1403 | netif_rx_schedule(&queue->backlog_dev); |
| 1490 | goto enqueue; | 1404 | goto enqueue; |
| 1491 | } | 1405 | } |
| 1492 | 1406 | ||
| 1493 | if (!queue->throttle) { | ||
| 1494 | queue->throttle = 1; | ||
| 1495 | __get_cpu_var(netdev_rx_stat).throttled++; | ||
| 1496 | } | ||
| 1497 | |||
| 1498 | drop: | ||
| 1499 | __get_cpu_var(netdev_rx_stat).dropped++; | 1407 | __get_cpu_var(netdev_rx_stat).dropped++; |
| 1500 | local_irq_restore(flags); | 1408 | local_irq_restore(flags); |
| 1501 | 1409 | ||
| @@ -1780,8 +1688,6 @@ job_done: | |||
| 1780 | smp_mb__before_clear_bit(); | 1688 | smp_mb__before_clear_bit(); |
| 1781 | netif_poll_enable(backlog_dev); | 1689 | netif_poll_enable(backlog_dev); |
| 1782 | 1690 | ||
| 1783 | if (queue->throttle) | ||
| 1784 | queue->throttle = 0; | ||
| 1785 | local_irq_enable(); | 1691 | local_irq_enable(); |
| 1786 | return 0; | 1692 | return 0; |
| 1787 | } | 1693 | } |
| @@ -1790,8 +1696,7 @@ static void net_rx_action(struct softirq_action *h) | |||
| 1790 | { | 1696 | { |
| 1791 | struct softnet_data *queue = &__get_cpu_var(softnet_data); | 1697 | struct softnet_data *queue = &__get_cpu_var(softnet_data); |
| 1792 | unsigned long start_time = jiffies; | 1698 | unsigned long start_time = jiffies; |
| 1793 | int budget = netdev_max_backlog; | 1699 | int budget = netdev_budget; |
| 1794 | |||
| 1795 | 1700 | ||
| 1796 | local_irq_disable(); | 1701 | local_irq_disable(); |
| 1797 | 1702 | ||
| @@ -2055,15 +1960,9 @@ static int softnet_seq_show(struct seq_file *seq, void *v) | |||
| 2055 | struct netif_rx_stats *s = v; | 1960 | struct netif_rx_stats *s = v; |
| 2056 | 1961 | ||
| 2057 | seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", | 1962 | seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", |
| 2058 | s->total, s->dropped, s->time_squeeze, s->throttled, | 1963 | s->total, s->dropped, s->time_squeeze, 0, |
| 2059 | s->fastroute_hit, s->fastroute_success, s->fastroute_defer, | 1964 | 0, 0, 0, 0, /* was fastroute */ |
| 2060 | s->fastroute_deferred_out, | 1965 | s->cpu_collision ); |
| 2061 | #if 0 | ||
| 2062 | s->fastroute_latency_reduction | ||
| 2063 | #else | ||
| 2064 | s->cpu_collision | ||
| 2065 | #endif | ||
| 2066 | ); | ||
| 2067 | return 0; | 1966 | return 0; |
| 2068 | } | 1967 | } |
| 2069 | 1968 | ||
| @@ -3305,9 +3204,6 @@ static int __init net_dev_init(void) | |||
| 3305 | 3204 | ||
| 3306 | queue = &per_cpu(softnet_data, i); | 3205 | queue = &per_cpu(softnet_data, i); |
| 3307 | skb_queue_head_init(&queue->input_pkt_queue); | 3206 | skb_queue_head_init(&queue->input_pkt_queue); |
| 3308 | queue->throttle = 0; | ||
| 3309 | queue->cng_level = 0; | ||
| 3310 | queue->avg_blog = 10; /* arbitrary non-zero */ | ||
| 3311 | queue->completion_queue = NULL; | 3207 | queue->completion_queue = NULL; |
| 3312 | INIT_LIST_HEAD(&queue->poll_list); | 3208 | INIT_LIST_HEAD(&queue->poll_list); |
| 3313 | set_bit(__LINK_STATE_START, &queue->backlog_dev.state); | 3209 | set_bit(__LINK_STATE_START, &queue->backlog_dev.state); |
| @@ -3316,11 +3212,6 @@ static int __init net_dev_init(void) | |||
| 3316 | atomic_set(&queue->backlog_dev.refcnt, 1); | 3212 | atomic_set(&queue->backlog_dev.refcnt, 1); |
| 3317 | } | 3213 | } |
| 3318 | 3214 | ||
| 3319 | #ifdef OFFLINE_SAMPLE | ||
| 3320 | samp_timer.expires = jiffies + (10 * HZ); | ||
| 3321 | add_timer(&samp_timer); | ||
| 3322 | #endif | ||
| 3323 | |||
| 3324 | dev_boot_phase = 0; | 3215 | dev_boot_phase = 0; |
| 3325 | 3216 | ||
| 3326 | open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); | 3217 | open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6d68c03bc051..bb73b2190ec7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
| @@ -1500,6 +1500,159 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) | |||
| 1500 | skb_split_no_header(skb, skb1, len, pos); | 1500 | skb_split_no_header(skb, skb1, len, pos); |
| 1501 | } | 1501 | } |
| 1502 | 1502 | ||
| 1503 | /** | ||
| 1504 | * skb_prepare_seq_read - Prepare a sequential read of skb data | ||
| 1505 | * @skb: the buffer to read | ||
| 1506 | * @from: lower offset of data to be read | ||
| 1507 | * @to: upper offset of data to be read | ||
| 1508 | * @st: state variable | ||
| 1509 | * | ||
| 1510 | * Initializes the specified state variable. Must be called before | ||
| 1511 | * invoking skb_seq_read() for the first time. | ||
| 1512 | */ | ||
| 1513 | void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, | ||
| 1514 | unsigned int to, struct skb_seq_state *st) | ||
| 1515 | { | ||
| 1516 | st->lower_offset = from; | ||
| 1517 | st->upper_offset = to; | ||
| 1518 | st->root_skb = st->cur_skb = skb; | ||
| 1519 | st->frag_idx = st->stepped_offset = 0; | ||
| 1520 | st->frag_data = NULL; | ||
| 1521 | } | ||
| 1522 | |||
| 1523 | /** | ||
| 1524 | * skb_seq_read - Sequentially read skb data | ||
| 1525 | * @consumed: number of bytes consumed by the caller so far | ||
| 1526 | * @data: destination pointer for data to be returned | ||
| 1527 | * @st: state variable | ||
| 1528 | * | ||
| 1529 | * Reads a block of skb data at &consumed relative to the | ||
| 1530 | * lower offset specified to skb_prepare_seq_read(). Assigns | ||
| 1531 | * the head of the data block to &data and returns the length | ||
| 1532 | * of the block or 0 if the end of the skb data or the upper | ||
| 1533 | * offset has been reached. | ||
| 1534 | * | ||
| 1535 | * The caller is not required to consume all of the data | ||
| 1536 | * returned, i.e. &consumed is typically set to the number | ||
| 1537 | * of bytes already consumed and the next call to | ||
| 1538 | * skb_seq_read() will return the remaining part of the block. | ||
| 1539 | * | ||
| 1540 | * Note: The size of each block of data returned can be arbitary, | ||
| 1541 | * this limitation is the cost for zerocopy seqeuental | ||
| 1542 | * reads of potentially non linear data. | ||
| 1543 | * | ||
| 1544 | * Note: Fragment lists within fragments are not implemented | ||
| 1545 | * at the moment, state->root_skb could be replaced with | ||
| 1546 | * a stack for this purpose. | ||
| 1547 | */ | ||
| 1548 | unsigned int skb_seq_read(unsigned int consumed, const u8 **data, | ||
| 1549 | struct skb_seq_state *st) | ||
| 1550 | { | ||
| 1551 | unsigned int block_limit, abs_offset = consumed + st->lower_offset; | ||
| 1552 | skb_frag_t *frag; | ||
| 1553 | |||
| 1554 | if (unlikely(abs_offset >= st->upper_offset)) | ||
| 1555 | return 0; | ||
| 1556 | |||
| 1557 | next_skb: | ||
| 1558 | block_limit = skb_headlen(st->cur_skb); | ||
| 1559 | |||
| 1560 | if (abs_offset < block_limit) { | ||
| 1561 | *data = st->cur_skb->data + abs_offset; | ||
| 1562 | return block_limit - abs_offset; | ||
| 1563 | } | ||
| 1564 | |||
| 1565 | if (st->frag_idx == 0 && !st->frag_data) | ||
| 1566 | st->stepped_offset += skb_headlen(st->cur_skb); | ||
| 1567 | |||
| 1568 | while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { | ||
| 1569 | frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; | ||
| 1570 | block_limit = frag->size + st->stepped_offset; | ||
| 1571 | |||
| 1572 | if (abs_offset < block_limit) { | ||
| 1573 | if (!st->frag_data) | ||
| 1574 | st->frag_data = kmap_skb_frag(frag); | ||
| 1575 | |||
| 1576 | *data = (u8 *) st->frag_data + frag->page_offset + | ||
| 1577 | (abs_offset - st->stepped_offset); | ||
| 1578 | |||
| 1579 | return block_limit - abs_offset; | ||
| 1580 | } | ||
| 1581 | |||
| 1582 | if (st->frag_data) { | ||
| 1583 | kunmap_skb_frag(st->frag_data); | ||
| 1584 | st->frag_data = NULL; | ||
| 1585 | } | ||
| 1586 | |||
| 1587 | st->frag_idx++; | ||
| 1588 | st->stepped_offset += frag->size; | ||
| 1589 | } | ||
| 1590 | |||
| 1591 | if (st->cur_skb->next) { | ||
| 1592 | st->cur_skb = st->cur_skb->next; | ||
| 1593 | st->frag_idx = 0; | ||
| 1594 | goto next_skb; | ||
| 1595 | } else if (st->root_skb == st->cur_skb && | ||
| 1596 | skb_shinfo(st->root_skb)->frag_list) { | ||
| 1597 | st->cur_skb = skb_shinfo(st->root_skb)->frag_list; | ||
| 1598 | goto next_skb; | ||
| 1599 | } | ||
| 1600 | |||
| 1601 | return 0; | ||
| 1602 | } | ||
| 1603 | |||
| 1604 | /** | ||
| 1605 | * skb_abort_seq_read - Abort a sequential read of skb data | ||
| 1606 | * @st: state variable | ||
| 1607 | * | ||
| 1608 | * Must be called if skb_seq_read() was not called until it | ||
| 1609 | * returned 0. | ||
| 1610 | */ | ||
| 1611 | void skb_abort_seq_read(struct skb_seq_state *st) | ||
| 1612 | { | ||
| 1613 | if (st->frag_data) | ||
| 1614 | kunmap_skb_frag(st->frag_data); | ||
| 1615 | } | ||
| 1616 | |||
| 1617 | #define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb)) | ||
| 1618 | |||
| 1619 | static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text, | ||
| 1620 | struct ts_config *conf, | ||
| 1621 | struct ts_state *state) | ||
| 1622 | { | ||
| 1623 | return skb_seq_read(offset, text, TS_SKB_CB(state)); | ||
| 1624 | } | ||
| 1625 | |||
| 1626 | static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) | ||
| 1627 | { | ||
| 1628 | skb_abort_seq_read(TS_SKB_CB(state)); | ||
| 1629 | } | ||
| 1630 | |||
| 1631 | /** | ||
| 1632 | * skb_find_text - Find a text pattern in skb data | ||
| 1633 | * @skb: the buffer to look in | ||
| 1634 | * @from: search offset | ||
| 1635 | * @to: search limit | ||
| 1636 | * @config: textsearch configuration | ||
| 1637 | * @state: uninitialized textsearch state variable | ||
| 1638 | * | ||
| 1639 | * Finds a pattern in the skb data according to the specified | ||
| 1640 | * textsearch configuration. Use textsearch_next() to retrieve | ||
| 1641 | * subsequent occurrences of the pattern. Returns the offset | ||
| 1642 | * to the first occurrence or UINT_MAX if no match was found. | ||
| 1643 | */ | ||
| 1644 | unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, | ||
| 1645 | unsigned int to, struct ts_config *config, | ||
| 1646 | struct ts_state *state) | ||
| 1647 | { | ||
| 1648 | config->get_next_block = skb_ts_get_next_block; | ||
| 1649 | config->finish = skb_ts_finish; | ||
| 1650 | |||
| 1651 | skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state)); | ||
| 1652 | |||
| 1653 | return textsearch_find(config, state); | ||
| 1654 | } | ||
| 1655 | |||
| 1503 | void __init skb_init(void) | 1656 | void __init skb_init(void) |
| 1504 | { | 1657 | { |
| 1505 | skbuff_head_cache = kmem_cache_create("skbuff_head_cache", | 1658 | skbuff_head_cache = kmem_cache_create("skbuff_head_cache", |
| @@ -1538,3 +1691,7 @@ EXPORT_SYMBOL(skb_queue_tail); | |||
| 1538 | EXPORT_SYMBOL(skb_unlink); | 1691 | EXPORT_SYMBOL(skb_unlink); |
| 1539 | EXPORT_SYMBOL(skb_append); | 1692 | EXPORT_SYMBOL(skb_append); |
| 1540 | EXPORT_SYMBOL(skb_split); | 1693 | EXPORT_SYMBOL(skb_split); |
| 1694 | EXPORT_SYMBOL(skb_prepare_seq_read); | ||
| 1695 | EXPORT_SYMBOL(skb_seq_read); | ||
| 1696 | EXPORT_SYMBOL(skb_abort_seq_read); | ||
| 1697 | EXPORT_SYMBOL(skb_find_text); | ||
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 880a88815211..8f817ad9f546 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c | |||
| @@ -13,12 +13,8 @@ | |||
| 13 | #ifdef CONFIG_SYSCTL | 13 | #ifdef CONFIG_SYSCTL |
| 14 | 14 | ||
| 15 | extern int netdev_max_backlog; | 15 | extern int netdev_max_backlog; |
| 16 | extern int netdev_budget; | ||
| 16 | extern int weight_p; | 17 | extern int weight_p; |
| 17 | extern int no_cong_thresh; | ||
| 18 | extern int no_cong; | ||
| 19 | extern int lo_cong; | ||
| 20 | extern int mod_cong; | ||
| 21 | extern int netdev_fastroute; | ||
| 22 | extern int net_msg_cost; | 18 | extern int net_msg_cost; |
| 23 | extern int net_msg_burst; | 19 | extern int net_msg_burst; |
| 24 | 20 | ||
| @@ -86,38 +82,6 @@ ctl_table core_table[] = { | |||
| 86 | .proc_handler = &proc_dointvec | 82 | .proc_handler = &proc_dointvec |
| 87 | }, | 83 | }, |
| 88 | { | 84 | { |
| 89 | .ctl_name = NET_CORE_NO_CONG_THRESH, | ||
| 90 | .procname = "no_cong_thresh", | ||
| 91 | .data = &no_cong_thresh, | ||
| 92 | .maxlen = sizeof(int), | ||
| 93 | .mode = 0644, | ||
| 94 | .proc_handler = &proc_dointvec | ||
| 95 | }, | ||
| 96 | { | ||
| 97 | .ctl_name = NET_CORE_NO_CONG, | ||
| 98 | .procname = "no_cong", | ||
| 99 | .data = &no_cong, | ||
| 100 | .maxlen = sizeof(int), | ||
| 101 | .mode = 0644, | ||
| 102 | .proc_handler = &proc_dointvec | ||
| 103 | }, | ||
| 104 | { | ||
| 105 | .ctl_name = NET_CORE_LO_CONG, | ||
| 106 | .procname = "lo_cong", | ||
| 107 | .data = &lo_cong, | ||
| 108 | .maxlen = sizeof(int), | ||
| 109 | .mode = 0644, | ||
| 110 | .proc_handler = &proc_dointvec | ||
| 111 | }, | ||
| 112 | { | ||
| 113 | .ctl_name = NET_CORE_MOD_CONG, | ||
| 114 | .procname = "mod_cong", | ||
| 115 | .data = &mod_cong, | ||
| 116 | .maxlen = sizeof(int), | ||
| 117 | .mode = 0644, | ||
| 118 | .proc_handler = &proc_dointvec | ||
| 119 | }, | ||
| 120 | { | ||
| 121 | .ctl_name = NET_CORE_MSG_COST, | 85 | .ctl_name = NET_CORE_MSG_COST, |
| 122 | .procname = "message_cost", | 86 | .procname = "message_cost", |
| 123 | .data = &net_msg_cost, | 87 | .data = &net_msg_cost, |
| @@ -161,6 +125,14 @@ ctl_table core_table[] = { | |||
| 161 | .mode = 0644, | 125 | .mode = 0644, |
| 162 | .proc_handler = &proc_dointvec | 126 | .proc_handler = &proc_dointvec |
| 163 | }, | 127 | }, |
| 128 | { | ||
| 129 | .ctl_name = NET_CORE_BUDGET, | ||
| 130 | .procname = "netdev_budget", | ||
| 131 | .data = &netdev_budget, | ||
| 132 | .maxlen = sizeof(int), | ||
| 133 | .mode = 0644, | ||
| 134 | .proc_handler = &proc_dointvec | ||
| 135 | }, | ||
| 164 | { .ctl_name = 0 } | 136 | { .ctl_name = 0 } |
| 165 | }; | 137 | }; |
| 166 | 138 | ||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f3dbc8dc1263..882436da9a3a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
| @@ -1927,6 +1927,25 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
| 1927 | return tp->af_specific->setsockopt(sk, level, optname, | 1927 | return tp->af_specific->setsockopt(sk, level, optname, |
| 1928 | optval, optlen); | 1928 | optval, optlen); |
| 1929 | 1929 | ||
| 1930 | /* This is a string value all the others are int's */ | ||
| 1931 | if (optname == TCP_CONGESTION) { | ||
| 1932 | char name[TCP_CA_NAME_MAX]; | ||
| 1933 | |||
| 1934 | if (optlen < 1) | ||
| 1935 | return -EINVAL; | ||
| 1936 | |||
| 1937 | val = strncpy_from_user(name, optval, | ||
| 1938 | min(TCP_CA_NAME_MAX-1, optlen)); | ||
| 1939 | if (val < 0) | ||
| 1940 | return -EFAULT; | ||
| 1941 | name[val] = 0; | ||
| 1942 | |||
| 1943 | lock_sock(sk); | ||
| 1944 | err = tcp_set_congestion_control(tp, name); | ||
| 1945 | release_sock(sk); | ||
| 1946 | return err; | ||
| 1947 | } | ||
| 1948 | |||
| 1930 | if (optlen < sizeof(int)) | 1949 | if (optlen < sizeof(int)) |
| 1931 | return -EINVAL; | 1950 | return -EINVAL; |
| 1932 | 1951 | ||
| @@ -2211,6 +2230,16 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
| 2211 | case TCP_QUICKACK: | 2230 | case TCP_QUICKACK: |
| 2212 | val = !tp->ack.pingpong; | 2231 | val = !tp->ack.pingpong; |
| 2213 | break; | 2232 | break; |
| 2233 | |||
| 2234 | case TCP_CONGESTION: | ||
| 2235 | if (get_user(len, optlen)) | ||
| 2236 | return -EFAULT; | ||
| 2237 | len = min_t(unsigned int, len, TCP_CA_NAME_MAX); | ||
| 2238 | if (put_user(len, optlen)) | ||
| 2239 | return -EFAULT; | ||
| 2240 | if (copy_to_user(optval, tp->ca_ops->name, len)) | ||
| 2241 | return -EFAULT; | ||
| 2242 | return 0; | ||
| 2214 | default: | 2243 | default: |
| 2215 | return -ENOPROTOOPT; | 2244 | return -ENOPROTOOPT; |
| 2216 | }; | 2245 | }; |
| @@ -2224,7 +2253,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
| 2224 | 2253 | ||
| 2225 | 2254 | ||
| 2226 | extern void __skb_cb_too_small_for_tcp(int, int); | 2255 | extern void __skb_cb_too_small_for_tcp(int, int); |
| 2227 | extern void tcpdiag_init(void); | 2256 | extern struct tcp_congestion_ops tcp_reno; |
| 2228 | 2257 | ||
| 2229 | static __initdata unsigned long thash_entries; | 2258 | static __initdata unsigned long thash_entries; |
| 2230 | static int __init set_thash_entries(char *str) | 2259 | static int __init set_thash_entries(char *str) |
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 665394a63ae4..4970d10a7785 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
| @@ -21,7 +21,7 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name) | |||
| 21 | { | 21 | { |
| 22 | struct tcp_congestion_ops *e; | 22 | struct tcp_congestion_ops *e; |
| 23 | 23 | ||
| 24 | list_for_each_entry(e, &tcp_cong_list, list) { | 24 | list_for_each_entry_rcu(e, &tcp_cong_list, list) { |
| 25 | if (strcmp(e->name, name) == 0) | 25 | if (strcmp(e->name, name) == 0) |
| 26 | return e; | 26 | return e; |
| 27 | } | 27 | } |
| @@ -77,6 +77,9 @@ void tcp_init_congestion_control(struct tcp_sock *tp) | |||
| 77 | { | 77 | { |
| 78 | struct tcp_congestion_ops *ca; | 78 | struct tcp_congestion_ops *ca; |
| 79 | 79 | ||
| 80 | if (tp->ca_ops != &tcp_init_congestion_ops) | ||
| 81 | return; | ||
| 82 | |||
| 80 | rcu_read_lock(); | 83 | rcu_read_lock(); |
| 81 | list_for_each_entry_rcu(ca, &tcp_cong_list, list) { | 84 | list_for_each_entry_rcu(ca, &tcp_cong_list, list) { |
| 82 | if (try_module_get(ca->owner)) { | 85 | if (try_module_get(ca->owner)) { |
| @@ -139,6 +142,34 @@ void tcp_get_default_congestion_control(char *name) | |||
| 139 | rcu_read_unlock(); | 142 | rcu_read_unlock(); |
| 140 | } | 143 | } |
| 141 | 144 | ||
| 145 | /* Change congestion control for socket */ | ||
| 146 | int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) | ||
| 147 | { | ||
| 148 | struct tcp_congestion_ops *ca; | ||
| 149 | int err = 0; | ||
| 150 | |||
| 151 | rcu_read_lock(); | ||
| 152 | ca = tcp_ca_find(name); | ||
| 153 | if (ca == tp->ca_ops) | ||
| 154 | goto out; | ||
| 155 | |||
| 156 | if (!ca) | ||
| 157 | err = -ENOENT; | ||
| 158 | |||
| 159 | else if (!try_module_get(ca->owner)) | ||
| 160 | err = -EBUSY; | ||
| 161 | |||
| 162 | else { | ||
| 163 | tcp_cleanup_congestion_control(tp); | ||
| 164 | tp->ca_ops = ca; | ||
| 165 | if (tp->ca_ops->init) | ||
| 166 | tp->ca_ops->init(tp); | ||
| 167 | } | ||
| 168 | out: | ||
| 169 | rcu_read_unlock(); | ||
| 170 | return err; | ||
| 171 | } | ||
| 172 | |||
| 142 | /* | 173 | /* |
| 143 | * TCP Reno congestion control | 174 | * TCP Reno congestion control |
| 144 | * This is special case used for fallback as well. | 175 | * This is special case used for fallback as well. |
| @@ -192,4 +223,15 @@ struct tcp_congestion_ops tcp_reno = { | |||
| 192 | .min_cwnd = tcp_reno_min_cwnd, | 223 | .min_cwnd = tcp_reno_min_cwnd, |
| 193 | }; | 224 | }; |
| 194 | 225 | ||
| 195 | EXPORT_SYMBOL_GPL(tcp_reno); | 226 | /* Initial congestion control used (until SYN) |
| 227 | * really reno under another name so we can tell difference | ||
| 228 | * during tcp_set_default_congestion_control | ||
| 229 | */ | ||
| 230 | struct tcp_congestion_ops tcp_init_congestion_ops = { | ||
| 231 | .name = "", | ||
| 232 | .owner = THIS_MODULE, | ||
| 233 | .ssthresh = tcp_reno_ssthresh, | ||
| 234 | .cong_avoid = tcp_reno_cong_avoid, | ||
| 235 | .min_cwnd = tcp_reno_min_cwnd, | ||
| 236 | }; | ||
| 237 | EXPORT_SYMBOL_GPL(tcp_init_congestion_ops); | ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9122814c13ad..ebf112347a97 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
| @@ -2048,7 +2048,7 @@ static int tcp_v4_init_sock(struct sock *sk) | |||
| 2048 | tp->mss_cache_std = tp->mss_cache = 536; | 2048 | tp->mss_cache_std = tp->mss_cache = 536; |
| 2049 | 2049 | ||
| 2050 | tp->reordering = sysctl_tcp_reordering; | 2050 | tp->reordering = sysctl_tcp_reordering; |
| 2051 | tp->ca_ops = &tcp_reno; | 2051 | tp->ca_ops = &tcp_init_congestion_ops; |
| 2052 | 2052 | ||
| 2053 | sk->sk_state = TCP_CLOSE; | 2053 | sk->sk_state = TCP_CLOSE; |
| 2054 | 2054 | ||
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index fce56039b0e9..9dac7fdf4726 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
| @@ -2025,7 +2025,7 @@ static int tcp_v6_init_sock(struct sock *sk) | |||
| 2025 | sk->sk_state = TCP_CLOSE; | 2025 | sk->sk_state = TCP_CLOSE; |
| 2026 | 2026 | ||
| 2027 | tp->af_specific = &ipv6_specific; | 2027 | tp->af_specific = &ipv6_specific; |
| 2028 | tp->ca_ops = &tcp_reno; | 2028 | tp->ca_ops = &tcp_init_congestion_ops; |
| 2029 | sk->sk_write_space = sk_stream_write_space; | 2029 | sk->sk_write_space = sk_stream_write_space; |
| 2030 | sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); | 2030 | sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); |
| 2031 | 2031 | ||
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index b22c9beb604d..447b89e556b1 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig | |||
| @@ -449,6 +449,18 @@ config NET_EMATCH_META | |||
| 449 | To compile this code as a module, choose M here: the | 449 | To compile this code as a module, choose M here: the |
| 450 | module will be called em_meta. | 450 | module will be called em_meta. |
| 451 | 451 | ||
| 452 | config NET_EMATCH_TEXT | ||
| 453 | tristate "Textsearch" | ||
| 454 | depends on NET_EMATCH | ||
| 455 | select TEXTSEARCH | ||
| 456 | ---help--- | ||
| 457 | Say Y here if you want to be ablt to classify packets based on | ||
| 458 | textsearch comparisons. Please select the appropriate textsearch | ||
| 459 | algorithms in the Library section. | ||
| 460 | |||
| 461 | To compile this code as a module, choose M here: the | ||
| 462 | module will be called em_text. | ||
| 463 | |||
| 452 | config NET_CLS_ACT | 464 | config NET_CLS_ACT |
| 453 | bool "Packet ACTION" | 465 | bool "Packet ACTION" |
| 454 | depends on EXPERIMENTAL && NET_CLS && NET_QOS | 466 | depends on EXPERIMENTAL && NET_CLS && NET_QOS |
diff --git a/net/sched/Makefile b/net/sched/Makefile index eb3fe583eba8..8f58cecd6266 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile | |||
| @@ -40,3 +40,4 @@ obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o | |||
| 40 | obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o | 40 | obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o |
| 41 | obj-$(CONFIG_NET_EMATCH_U32) += em_u32.o | 41 | obj-$(CONFIG_NET_EMATCH_U32) += em_u32.o |
| 42 | obj-$(CONFIG_NET_EMATCH_META) += em_meta.o | 42 | obj-$(CONFIG_NET_EMATCH_META) += em_meta.o |
| 43 | obj-$(CONFIG_NET_EMATCH_TEXT) += em_text.o | ||
diff --git a/net/sched/em_text.c b/net/sched/em_text.c new file mode 100644 index 000000000000..873840d8d072 --- /dev/null +++ b/net/sched/em_text.c | |||
| @@ -0,0 +1,157 @@ | |||
| 1 | /* | ||
| 2 | * net/sched/em_text.c Textsearch ematch | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public License | ||
| 6 | * as published by the Free Software Foundation; either version | ||
| 7 | * 2 of the License, or (at your option) any later version. | ||
| 8 | * | ||
| 9 | * Authors: Thomas Graf <tgraf@suug.ch> | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/config.h> | ||
| 13 | #include <linux/module.h> | ||
| 14 | #include <linux/types.h> | ||
| 15 | #include <linux/kernel.h> | ||
| 16 | #include <linux/sched.h> | ||
| 17 | #include <linux/string.h> | ||
| 18 | #include <linux/skbuff.h> | ||
| 19 | #include <linux/textsearch.h> | ||
| 20 | #include <linux/tc_ematch/tc_em_text.h> | ||
| 21 | #include <net/pkt_cls.h> | ||
| 22 | |||
| 23 | struct text_match | ||
| 24 | { | ||
| 25 | u16 from_offset; | ||
| 26 | u16 to_offset; | ||
| 27 | u8 from_layer; | ||
| 28 | u8 to_layer; | ||
| 29 | struct ts_config *config; | ||
| 30 | }; | ||
| 31 | |||
| 32 | #define EM_TEXT_PRIV(m) ((struct text_match *) (m)->data) | ||
| 33 | |||
| 34 | static int em_text_match(struct sk_buff *skb, struct tcf_ematch *m, | ||
| 35 | struct tcf_pkt_info *info) | ||
| 36 | { | ||
| 37 | struct text_match *tm = EM_TEXT_PRIV(m); | ||
| 38 | int from, to; | ||
| 39 | struct ts_state state; | ||
| 40 | |||
| 41 | from = tcf_get_base_ptr(skb, tm->from_layer) - skb->data; | ||
| 42 | from += tm->from_offset; | ||
| 43 | |||
| 44 | to = tcf_get_base_ptr(skb, tm->to_layer) - skb->data; | ||
| 45 | to += tm->to_offset; | ||
| 46 | |||
| 47 | return skb_find_text(skb, from, to, tm->config, &state) != UINT_MAX; | ||
| 48 | } | ||
| 49 | |||
| 50 | static int em_text_change(struct tcf_proto *tp, void *data, int len, | ||
| 51 | struct tcf_ematch *m) | ||
| 52 | { | ||
| 53 | struct text_match *tm; | ||
| 54 | struct tcf_em_text *conf = data; | ||
| 55 | struct ts_config *ts_conf; | ||
| 56 | int flags = 0; | ||
| 57 | |||
| 58 | printk("Configuring text: %s from %d:%d to %d:%d len %d\n", conf->algo, conf->from_offset, | ||
| 59 | conf->from_layer, conf->to_offset, conf->to_layer, conf->pattern_len); | ||
| 60 | |||
| 61 | if (len < sizeof(*conf) || len < (sizeof(*conf) + conf->pattern_len)) | ||
| 62 | return -EINVAL; | ||
| 63 | |||
| 64 | if (conf->from_layer > conf->to_layer) | ||
| 65 | return -EINVAL; | ||
| 66 | |||
| 67 | if (conf->from_layer == conf->to_layer && | ||
| 68 | conf->from_offset > conf->to_offset) | ||
| 69 | return -EINVAL; | ||
| 70 | |||
| 71 | retry: | ||
| 72 | ts_conf = textsearch_prepare(conf->algo, (u8 *) conf + sizeof(*conf), | ||
| 73 | conf->pattern_len, GFP_KERNEL, flags); | ||
| 74 | |||
| 75 | if (flags & TS_AUTOLOAD) | ||
| 76 | rtnl_lock(); | ||
| 77 | |||
| 78 | if (IS_ERR(ts_conf)) { | ||
| 79 | if (PTR_ERR(ts_conf) == -ENOENT && !(flags & TS_AUTOLOAD)) { | ||
| 80 | rtnl_unlock(); | ||
| 81 | flags |= TS_AUTOLOAD; | ||
| 82 | goto retry; | ||
| 83 | } else | ||
| 84 | return PTR_ERR(ts_conf); | ||
| 85 | } else if (flags & TS_AUTOLOAD) { | ||
| 86 | textsearch_destroy(ts_conf); | ||
| 87 | return -EAGAIN; | ||
| 88 | } | ||
| 89 | |||
| 90 | tm = kmalloc(sizeof(*tm), GFP_KERNEL); | ||
| 91 | if (tm == NULL) { | ||
| 92 | textsearch_destroy(ts_conf); | ||
| 93 | return -ENOBUFS; | ||
| 94 | } | ||
| 95 | |||
| 96 | tm->from_offset = conf->from_offset; | ||
| 97 | tm->to_offset = conf->to_offset; | ||
| 98 | tm->from_layer = conf->from_layer; | ||
| 99 | tm->to_layer = conf->to_layer; | ||
| 100 | tm->config = ts_conf; | ||
| 101 | |||
| 102 | m->datalen = sizeof(*tm); | ||
| 103 | m->data = (unsigned long) tm; | ||
| 104 | |||
| 105 | return 0; | ||
| 106 | } | ||
| 107 | |||
| 108 | static void em_text_destroy(struct tcf_proto *tp, struct tcf_ematch *m) | ||
| 109 | { | ||
| 110 | textsearch_destroy(EM_TEXT_PRIV(m)->config); | ||
| 111 | } | ||
| 112 | |||
| 113 | static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m) | ||
| 114 | { | ||
| 115 | struct text_match *tm = EM_TEXT_PRIV(m); | ||
| 116 | struct tcf_em_text conf; | ||
| 117 | |||
| 118 | strncpy(conf.algo, tm->config->ops->name, sizeof(conf.algo) - 1); | ||
| 119 | conf.from_offset = tm->from_offset; | ||
| 120 | conf.to_offset = tm->to_offset; | ||
| 121 | conf.from_layer = tm->from_layer; | ||
| 122 | conf.to_layer = tm->to_layer; | ||
| 123 | conf.pattern_len = textsearch_get_pattern_len(tm->config); | ||
| 124 | conf.pad = 0; | ||
| 125 | |||
| 126 | RTA_PUT_NOHDR(skb, sizeof(conf), &conf); | ||
| 127 | RTA_APPEND(skb, conf.pattern_len, textsearch_get_pattern(tm->config)); | ||
| 128 | return 0; | ||
| 129 | |||
| 130 | rtattr_failure: | ||
| 131 | return -1; | ||
| 132 | } | ||
| 133 | |||
| 134 | static struct tcf_ematch_ops em_text_ops = { | ||
| 135 | .kind = TCF_EM_TEXT, | ||
| 136 | .change = em_text_change, | ||
| 137 | .match = em_text_match, | ||
| 138 | .destroy = em_text_destroy, | ||
| 139 | .dump = em_text_dump, | ||
| 140 | .owner = THIS_MODULE, | ||
| 141 | .link = LIST_HEAD_INIT(em_text_ops.link) | ||
| 142 | }; | ||
| 143 | |||
| 144 | static int __init init_em_text(void) | ||
| 145 | { | ||
| 146 | return tcf_em_register(&em_text_ops); | ||
| 147 | } | ||
| 148 | |||
| 149 | static void __exit exit_em_text(void) | ||
| 150 | { | ||
| 151 | tcf_em_unregister(&em_text_ops); | ||
| 152 | } | ||
| 153 | |||
| 154 | MODULE_LICENSE("GPL"); | ||
| 155 | |||
| 156 | module_init(init_em_text); | ||
| 157 | module_exit(exit_em_text); | ||
