diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-24 03:31:46 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-24 03:31:46 -0400 |
commit | 59a49e38711a146dc0bef4837c825b5422335460 (patch) | |
tree | 7e6e3d1850159f94e5b05d1c5775bd3cc87c3690 /net | |
parent | 52c1da39534fb382c061de58b65f678ad74b59f5 (diff) | |
parent | f2d368fa3ef90f2159d9e542303901ebf68144dd (diff) |
Merge rsync://rsync.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
Diffstat (limited to 'net')
-rw-r--r-- | net/core/dev.c | 125 | ||||
-rw-r--r-- | net/core/skbuff.c | 157 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 46 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 31 | ||||
-rw-r--r-- | net/ipv4/tcp_cong.c | 46 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 2 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 2 | ||||
-rw-r--r-- | net/sched/Kconfig | 12 | ||||
-rw-r--r-- | net/sched/Makefile | 1 | ||||
-rw-r--r-- | net/sched/em_text.c | 157 |
10 files changed, 420 insertions, 159 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index ab935778ce81..7016e0c36b3d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -115,18 +115,6 @@ | |||
115 | #endif /* CONFIG_NET_RADIO */ | 115 | #endif /* CONFIG_NET_RADIO */ |
116 | #include <asm/current.h> | 116 | #include <asm/current.h> |
117 | 117 | ||
118 | /* This define, if set, will randomly drop a packet when congestion | ||
119 | * is more than moderate. It helps fairness in the multi-interface | ||
120 | * case when one of them is a hog, but it kills performance for the | ||
121 | * single interface case so it is off now by default. | ||
122 | */ | ||
123 | #undef RAND_LIE | ||
124 | |||
125 | /* Setting this will sample the queue lengths and thus congestion | ||
126 | * via a timer instead of as each packet is received. | ||
127 | */ | ||
128 | #undef OFFLINE_SAMPLE | ||
129 | |||
130 | /* | 118 | /* |
131 | * The list of packet types we will receive (as opposed to discard) | 119 | * The list of packet types we will receive (as opposed to discard) |
132 | * and the routines to invoke. | 120 | * and the routines to invoke. |
@@ -159,11 +147,6 @@ static DEFINE_SPINLOCK(ptype_lock); | |||
159 | static struct list_head ptype_base[16]; /* 16 way hashed list */ | 147 | static struct list_head ptype_base[16]; /* 16 way hashed list */ |
160 | static struct list_head ptype_all; /* Taps */ | 148 | static struct list_head ptype_all; /* Taps */ |
161 | 149 | ||
162 | #ifdef OFFLINE_SAMPLE | ||
163 | static void sample_queue(unsigned long dummy); | ||
164 | static struct timer_list samp_timer = TIMER_INITIALIZER(sample_queue, 0, 0); | ||
165 | #endif | ||
166 | |||
167 | /* | 150 | /* |
168 | * The @dev_base list is protected by @dev_base_lock and the rtln | 151 | * The @dev_base list is protected by @dev_base_lock and the rtln |
169 | * semaphore. | 152 | * semaphore. |
@@ -215,7 +198,7 @@ static struct notifier_block *netdev_chain; | |||
215 | * Device drivers call our routines to queue packets here. We empty the | 198 | * Device drivers call our routines to queue packets here. We empty the |
216 | * queue in the local softnet handler. | 199 | * queue in the local softnet handler. |
217 | */ | 200 | */ |
218 | DEFINE_PER_CPU(struct softnet_data, softnet_data) = { 0, }; | 201 | DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL }; |
219 | 202 | ||
220 | #ifdef CONFIG_SYSFS | 203 | #ifdef CONFIG_SYSFS |
221 | extern int netdev_sysfs_init(void); | 204 | extern int netdev_sysfs_init(void); |
@@ -1363,71 +1346,13 @@ out: | |||
1363 | Receiver routines | 1346 | Receiver routines |
1364 | =======================================================================*/ | 1347 | =======================================================================*/ |
1365 | 1348 | ||
1366 | int netdev_max_backlog = 300; | 1349 | int netdev_max_backlog = 1000; |
1350 | int netdev_budget = 300; | ||
1367 | int weight_p = 64; /* old backlog weight */ | 1351 | int weight_p = 64; /* old backlog weight */ |
1368 | /* These numbers are selected based on intuition and some | ||
1369 | * experimentatiom, if you have more scientific way of doing this | ||
1370 | * please go ahead and fix things. | ||
1371 | */ | ||
1372 | int no_cong_thresh = 10; | ||
1373 | int no_cong = 20; | ||
1374 | int lo_cong = 100; | ||
1375 | int mod_cong = 290; | ||
1376 | 1352 | ||
1377 | DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; | 1353 | DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; |
1378 | 1354 | ||
1379 | 1355 | ||
1380 | static void get_sample_stats(int cpu) | ||
1381 | { | ||
1382 | #ifdef RAND_LIE | ||
1383 | unsigned long rd; | ||
1384 | int rq; | ||
1385 | #endif | ||
1386 | struct softnet_data *sd = &per_cpu(softnet_data, cpu); | ||
1387 | int blog = sd->input_pkt_queue.qlen; | ||
1388 | int avg_blog = sd->avg_blog; | ||
1389 | |||
1390 | avg_blog = (avg_blog >> 1) + (blog >> 1); | ||
1391 | |||
1392 | if (avg_blog > mod_cong) { | ||
1393 | /* Above moderate congestion levels. */ | ||
1394 | sd->cng_level = NET_RX_CN_HIGH; | ||
1395 | #ifdef RAND_LIE | ||
1396 | rd = net_random(); | ||
1397 | rq = rd % netdev_max_backlog; | ||
1398 | if (rq < avg_blog) /* unlucky bastard */ | ||
1399 | sd->cng_level = NET_RX_DROP; | ||
1400 | #endif | ||
1401 | } else if (avg_blog > lo_cong) { | ||
1402 | sd->cng_level = NET_RX_CN_MOD; | ||
1403 | #ifdef RAND_LIE | ||
1404 | rd = net_random(); | ||
1405 | rq = rd % netdev_max_backlog; | ||
1406 | if (rq < avg_blog) /* unlucky bastard */ | ||
1407 | sd->cng_level = NET_RX_CN_HIGH; | ||
1408 | #endif | ||
1409 | } else if (avg_blog > no_cong) | ||
1410 | sd->cng_level = NET_RX_CN_LOW; | ||
1411 | else /* no congestion */ | ||
1412 | sd->cng_level = NET_RX_SUCCESS; | ||
1413 | |||
1414 | sd->avg_blog = avg_blog; | ||
1415 | } | ||
1416 | |||
1417 | #ifdef OFFLINE_SAMPLE | ||
1418 | static void sample_queue(unsigned long dummy) | ||
1419 | { | ||
1420 | /* 10 ms 0r 1ms -- i don't care -- JHS */ | ||
1421 | int next_tick = 1; | ||
1422 | int cpu = smp_processor_id(); | ||
1423 | |||
1424 | get_sample_stats(cpu); | ||
1425 | next_tick += jiffies; | ||
1426 | mod_timer(&samp_timer, next_tick); | ||
1427 | } | ||
1428 | #endif | ||
1429 | |||
1430 | |||
1431 | /** | 1356 | /** |
1432 | * netif_rx - post buffer to the network code | 1357 | * netif_rx - post buffer to the network code |
1433 | * @skb: buffer to post | 1358 | * @skb: buffer to post |
@@ -1448,7 +1373,6 @@ static void sample_queue(unsigned long dummy) | |||
1448 | 1373 | ||
1449 | int netif_rx(struct sk_buff *skb) | 1374 | int netif_rx(struct sk_buff *skb) |
1450 | { | 1375 | { |
1451 | int this_cpu; | ||
1452 | struct softnet_data *queue; | 1376 | struct softnet_data *queue; |
1453 | unsigned long flags; | 1377 | unsigned long flags; |
1454 | 1378 | ||
@@ -1464,38 +1388,22 @@ int netif_rx(struct sk_buff *skb) | |||
1464 | * short when CPU is congested, but is still operating. | 1388 | * short when CPU is congested, but is still operating. |
1465 | */ | 1389 | */ |
1466 | local_irq_save(flags); | 1390 | local_irq_save(flags); |
1467 | this_cpu = smp_processor_id(); | ||
1468 | queue = &__get_cpu_var(softnet_data); | 1391 | queue = &__get_cpu_var(softnet_data); |
1469 | 1392 | ||
1470 | __get_cpu_var(netdev_rx_stat).total++; | 1393 | __get_cpu_var(netdev_rx_stat).total++; |
1471 | if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { | 1394 | if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { |
1472 | if (queue->input_pkt_queue.qlen) { | 1395 | if (queue->input_pkt_queue.qlen) { |
1473 | if (queue->throttle) | ||
1474 | goto drop; | ||
1475 | |||
1476 | enqueue: | 1396 | enqueue: |
1477 | dev_hold(skb->dev); | 1397 | dev_hold(skb->dev); |
1478 | __skb_queue_tail(&queue->input_pkt_queue, skb); | 1398 | __skb_queue_tail(&queue->input_pkt_queue, skb); |
1479 | #ifndef OFFLINE_SAMPLE | ||
1480 | get_sample_stats(this_cpu); | ||
1481 | #endif | ||
1482 | local_irq_restore(flags); | 1399 | local_irq_restore(flags); |
1483 | return queue->cng_level; | 1400 | return NET_RX_SUCCESS; |
1484 | } | 1401 | } |
1485 | 1402 | ||
1486 | if (queue->throttle) | ||
1487 | queue->throttle = 0; | ||
1488 | |||
1489 | netif_rx_schedule(&queue->backlog_dev); | 1403 | netif_rx_schedule(&queue->backlog_dev); |
1490 | goto enqueue; | 1404 | goto enqueue; |
1491 | } | 1405 | } |
1492 | 1406 | ||
1493 | if (!queue->throttle) { | ||
1494 | queue->throttle = 1; | ||
1495 | __get_cpu_var(netdev_rx_stat).throttled++; | ||
1496 | } | ||
1497 | |||
1498 | drop: | ||
1499 | __get_cpu_var(netdev_rx_stat).dropped++; | 1407 | __get_cpu_var(netdev_rx_stat).dropped++; |
1500 | local_irq_restore(flags); | 1408 | local_irq_restore(flags); |
1501 | 1409 | ||
@@ -1780,8 +1688,6 @@ job_done: | |||
1780 | smp_mb__before_clear_bit(); | 1688 | smp_mb__before_clear_bit(); |
1781 | netif_poll_enable(backlog_dev); | 1689 | netif_poll_enable(backlog_dev); |
1782 | 1690 | ||
1783 | if (queue->throttle) | ||
1784 | queue->throttle = 0; | ||
1785 | local_irq_enable(); | 1691 | local_irq_enable(); |
1786 | return 0; | 1692 | return 0; |
1787 | } | 1693 | } |
@@ -1790,8 +1696,7 @@ static void net_rx_action(struct softirq_action *h) | |||
1790 | { | 1696 | { |
1791 | struct softnet_data *queue = &__get_cpu_var(softnet_data); | 1697 | struct softnet_data *queue = &__get_cpu_var(softnet_data); |
1792 | unsigned long start_time = jiffies; | 1698 | unsigned long start_time = jiffies; |
1793 | int budget = netdev_max_backlog; | 1699 | int budget = netdev_budget; |
1794 | |||
1795 | 1700 | ||
1796 | local_irq_disable(); | 1701 | local_irq_disable(); |
1797 | 1702 | ||
@@ -2055,15 +1960,9 @@ static int softnet_seq_show(struct seq_file *seq, void *v) | |||
2055 | struct netif_rx_stats *s = v; | 1960 | struct netif_rx_stats *s = v; |
2056 | 1961 | ||
2057 | seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", | 1962 | seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", |
2058 | s->total, s->dropped, s->time_squeeze, s->throttled, | 1963 | s->total, s->dropped, s->time_squeeze, 0, |
2059 | s->fastroute_hit, s->fastroute_success, s->fastroute_defer, | 1964 | 0, 0, 0, 0, /* was fastroute */ |
2060 | s->fastroute_deferred_out, | 1965 | s->cpu_collision ); |
2061 | #if 0 | ||
2062 | s->fastroute_latency_reduction | ||
2063 | #else | ||
2064 | s->cpu_collision | ||
2065 | #endif | ||
2066 | ); | ||
2067 | return 0; | 1966 | return 0; |
2068 | } | 1967 | } |
2069 | 1968 | ||
@@ -3305,9 +3204,6 @@ static int __init net_dev_init(void) | |||
3305 | 3204 | ||
3306 | queue = &per_cpu(softnet_data, i); | 3205 | queue = &per_cpu(softnet_data, i); |
3307 | skb_queue_head_init(&queue->input_pkt_queue); | 3206 | skb_queue_head_init(&queue->input_pkt_queue); |
3308 | queue->throttle = 0; | ||
3309 | queue->cng_level = 0; | ||
3310 | queue->avg_blog = 10; /* arbitrary non-zero */ | ||
3311 | queue->completion_queue = NULL; | 3207 | queue->completion_queue = NULL; |
3312 | INIT_LIST_HEAD(&queue->poll_list); | 3208 | INIT_LIST_HEAD(&queue->poll_list); |
3313 | set_bit(__LINK_STATE_START, &queue->backlog_dev.state); | 3209 | set_bit(__LINK_STATE_START, &queue->backlog_dev.state); |
@@ -3316,11 +3212,6 @@ static int __init net_dev_init(void) | |||
3316 | atomic_set(&queue->backlog_dev.refcnt, 1); | 3212 | atomic_set(&queue->backlog_dev.refcnt, 1); |
3317 | } | 3213 | } |
3318 | 3214 | ||
3319 | #ifdef OFFLINE_SAMPLE | ||
3320 | samp_timer.expires = jiffies + (10 * HZ); | ||
3321 | add_timer(&samp_timer); | ||
3322 | #endif | ||
3323 | |||
3324 | dev_boot_phase = 0; | 3215 | dev_boot_phase = 0; |
3325 | 3216 | ||
3326 | open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); | 3217 | open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6d68c03bc051..bb73b2190ec7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -1500,6 +1500,159 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) | |||
1500 | skb_split_no_header(skb, skb1, len, pos); | 1500 | skb_split_no_header(skb, skb1, len, pos); |
1501 | } | 1501 | } |
1502 | 1502 | ||
1503 | /** | ||
1504 | * skb_prepare_seq_read - Prepare a sequential read of skb data | ||
1505 | * @skb: the buffer to read | ||
1506 | * @from: lower offset of data to be read | ||
1507 | * @to: upper offset of data to be read | ||
1508 | * @st: state variable | ||
1509 | * | ||
1510 | * Initializes the specified state variable. Must be called before | ||
1511 | * invoking skb_seq_read() for the first time. | ||
1512 | */ | ||
1513 | void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, | ||
1514 | unsigned int to, struct skb_seq_state *st) | ||
1515 | { | ||
1516 | st->lower_offset = from; | ||
1517 | st->upper_offset = to; | ||
1518 | st->root_skb = st->cur_skb = skb; | ||
1519 | st->frag_idx = st->stepped_offset = 0; | ||
1520 | st->frag_data = NULL; | ||
1521 | } | ||
1522 | |||
1523 | /** | ||
1524 | * skb_seq_read - Sequentially read skb data | ||
1525 | * @consumed: number of bytes consumed by the caller so far | ||
1526 | * @data: destination pointer for data to be returned | ||
1527 | * @st: state variable | ||
1528 | * | ||
1529 | * Reads a block of skb data at &consumed relative to the | ||
1530 | * lower offset specified to skb_prepare_seq_read(). Assigns | ||
1531 | * the head of the data block to &data and returns the length | ||
1532 | * of the block or 0 if the end of the skb data or the upper | ||
1533 | * offset has been reached. | ||
1534 | * | ||
1535 | * The caller is not required to consume all of the data | ||
1536 | * returned, i.e. &consumed is typically set to the number | ||
1537 | * of bytes already consumed and the next call to | ||
1538 | * skb_seq_read() will return the remaining part of the block. | ||
1539 | * | ||
1540 | * Note: The size of each block of data returned can be arbitary, | ||
1541 | * this limitation is the cost for zerocopy seqeuental | ||
1542 | * reads of potentially non linear data. | ||
1543 | * | ||
1544 | * Note: Fragment lists within fragments are not implemented | ||
1545 | * at the moment, state->root_skb could be replaced with | ||
1546 | * a stack for this purpose. | ||
1547 | */ | ||
1548 | unsigned int skb_seq_read(unsigned int consumed, const u8 **data, | ||
1549 | struct skb_seq_state *st) | ||
1550 | { | ||
1551 | unsigned int block_limit, abs_offset = consumed + st->lower_offset; | ||
1552 | skb_frag_t *frag; | ||
1553 | |||
1554 | if (unlikely(abs_offset >= st->upper_offset)) | ||
1555 | return 0; | ||
1556 | |||
1557 | next_skb: | ||
1558 | block_limit = skb_headlen(st->cur_skb); | ||
1559 | |||
1560 | if (abs_offset < block_limit) { | ||
1561 | *data = st->cur_skb->data + abs_offset; | ||
1562 | return block_limit - abs_offset; | ||
1563 | } | ||
1564 | |||
1565 | if (st->frag_idx == 0 && !st->frag_data) | ||
1566 | st->stepped_offset += skb_headlen(st->cur_skb); | ||
1567 | |||
1568 | while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { | ||
1569 | frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; | ||
1570 | block_limit = frag->size + st->stepped_offset; | ||
1571 | |||
1572 | if (abs_offset < block_limit) { | ||
1573 | if (!st->frag_data) | ||
1574 | st->frag_data = kmap_skb_frag(frag); | ||
1575 | |||
1576 | *data = (u8 *) st->frag_data + frag->page_offset + | ||
1577 | (abs_offset - st->stepped_offset); | ||
1578 | |||
1579 | return block_limit - abs_offset; | ||
1580 | } | ||
1581 | |||
1582 | if (st->frag_data) { | ||
1583 | kunmap_skb_frag(st->frag_data); | ||
1584 | st->frag_data = NULL; | ||
1585 | } | ||
1586 | |||
1587 | st->frag_idx++; | ||
1588 | st->stepped_offset += frag->size; | ||
1589 | } | ||
1590 | |||
1591 | if (st->cur_skb->next) { | ||
1592 | st->cur_skb = st->cur_skb->next; | ||
1593 | st->frag_idx = 0; | ||
1594 | goto next_skb; | ||
1595 | } else if (st->root_skb == st->cur_skb && | ||
1596 | skb_shinfo(st->root_skb)->frag_list) { | ||
1597 | st->cur_skb = skb_shinfo(st->root_skb)->frag_list; | ||
1598 | goto next_skb; | ||
1599 | } | ||
1600 | |||
1601 | return 0; | ||
1602 | } | ||
1603 | |||
1604 | /** | ||
1605 | * skb_abort_seq_read - Abort a sequential read of skb data | ||
1606 | * @st: state variable | ||
1607 | * | ||
1608 | * Must be called if skb_seq_read() was not called until it | ||
1609 | * returned 0. | ||
1610 | */ | ||
1611 | void skb_abort_seq_read(struct skb_seq_state *st) | ||
1612 | { | ||
1613 | if (st->frag_data) | ||
1614 | kunmap_skb_frag(st->frag_data); | ||
1615 | } | ||
1616 | |||
1617 | #define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb)) | ||
1618 | |||
1619 | static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text, | ||
1620 | struct ts_config *conf, | ||
1621 | struct ts_state *state) | ||
1622 | { | ||
1623 | return skb_seq_read(offset, text, TS_SKB_CB(state)); | ||
1624 | } | ||
1625 | |||
1626 | static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) | ||
1627 | { | ||
1628 | skb_abort_seq_read(TS_SKB_CB(state)); | ||
1629 | } | ||
1630 | |||
1631 | /** | ||
1632 | * skb_find_text - Find a text pattern in skb data | ||
1633 | * @skb: the buffer to look in | ||
1634 | * @from: search offset | ||
1635 | * @to: search limit | ||
1636 | * @config: textsearch configuration | ||
1637 | * @state: uninitialized textsearch state variable | ||
1638 | * | ||
1639 | * Finds a pattern in the skb data according to the specified | ||
1640 | * textsearch configuration. Use textsearch_next() to retrieve | ||
1641 | * subsequent occurrences of the pattern. Returns the offset | ||
1642 | * to the first occurrence or UINT_MAX if no match was found. | ||
1643 | */ | ||
1644 | unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, | ||
1645 | unsigned int to, struct ts_config *config, | ||
1646 | struct ts_state *state) | ||
1647 | { | ||
1648 | config->get_next_block = skb_ts_get_next_block; | ||
1649 | config->finish = skb_ts_finish; | ||
1650 | |||
1651 | skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state)); | ||
1652 | |||
1653 | return textsearch_find(config, state); | ||
1654 | } | ||
1655 | |||
1503 | void __init skb_init(void) | 1656 | void __init skb_init(void) |
1504 | { | 1657 | { |
1505 | skbuff_head_cache = kmem_cache_create("skbuff_head_cache", | 1658 | skbuff_head_cache = kmem_cache_create("skbuff_head_cache", |
@@ -1538,3 +1691,7 @@ EXPORT_SYMBOL(skb_queue_tail); | |||
1538 | EXPORT_SYMBOL(skb_unlink); | 1691 | EXPORT_SYMBOL(skb_unlink); |
1539 | EXPORT_SYMBOL(skb_append); | 1692 | EXPORT_SYMBOL(skb_append); |
1540 | EXPORT_SYMBOL(skb_split); | 1693 | EXPORT_SYMBOL(skb_split); |
1694 | EXPORT_SYMBOL(skb_prepare_seq_read); | ||
1695 | EXPORT_SYMBOL(skb_seq_read); | ||
1696 | EXPORT_SYMBOL(skb_abort_seq_read); | ||
1697 | EXPORT_SYMBOL(skb_find_text); | ||
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 880a88815211..8f817ad9f546 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c | |||
@@ -13,12 +13,8 @@ | |||
13 | #ifdef CONFIG_SYSCTL | 13 | #ifdef CONFIG_SYSCTL |
14 | 14 | ||
15 | extern int netdev_max_backlog; | 15 | extern int netdev_max_backlog; |
16 | extern int netdev_budget; | ||
16 | extern int weight_p; | 17 | extern int weight_p; |
17 | extern int no_cong_thresh; | ||
18 | extern int no_cong; | ||
19 | extern int lo_cong; | ||
20 | extern int mod_cong; | ||
21 | extern int netdev_fastroute; | ||
22 | extern int net_msg_cost; | 18 | extern int net_msg_cost; |
23 | extern int net_msg_burst; | 19 | extern int net_msg_burst; |
24 | 20 | ||
@@ -86,38 +82,6 @@ ctl_table core_table[] = { | |||
86 | .proc_handler = &proc_dointvec | 82 | .proc_handler = &proc_dointvec |
87 | }, | 83 | }, |
88 | { | 84 | { |
89 | .ctl_name = NET_CORE_NO_CONG_THRESH, | ||
90 | .procname = "no_cong_thresh", | ||
91 | .data = &no_cong_thresh, | ||
92 | .maxlen = sizeof(int), | ||
93 | .mode = 0644, | ||
94 | .proc_handler = &proc_dointvec | ||
95 | }, | ||
96 | { | ||
97 | .ctl_name = NET_CORE_NO_CONG, | ||
98 | .procname = "no_cong", | ||
99 | .data = &no_cong, | ||
100 | .maxlen = sizeof(int), | ||
101 | .mode = 0644, | ||
102 | .proc_handler = &proc_dointvec | ||
103 | }, | ||
104 | { | ||
105 | .ctl_name = NET_CORE_LO_CONG, | ||
106 | .procname = "lo_cong", | ||
107 | .data = &lo_cong, | ||
108 | .maxlen = sizeof(int), | ||
109 | .mode = 0644, | ||
110 | .proc_handler = &proc_dointvec | ||
111 | }, | ||
112 | { | ||
113 | .ctl_name = NET_CORE_MOD_CONG, | ||
114 | .procname = "mod_cong", | ||
115 | .data = &mod_cong, | ||
116 | .maxlen = sizeof(int), | ||
117 | .mode = 0644, | ||
118 | .proc_handler = &proc_dointvec | ||
119 | }, | ||
120 | { | ||
121 | .ctl_name = NET_CORE_MSG_COST, | 85 | .ctl_name = NET_CORE_MSG_COST, |
122 | .procname = "message_cost", | 86 | .procname = "message_cost", |
123 | .data = &net_msg_cost, | 87 | .data = &net_msg_cost, |
@@ -161,6 +125,14 @@ ctl_table core_table[] = { | |||
161 | .mode = 0644, | 125 | .mode = 0644, |
162 | .proc_handler = &proc_dointvec | 126 | .proc_handler = &proc_dointvec |
163 | }, | 127 | }, |
128 | { | ||
129 | .ctl_name = NET_CORE_BUDGET, | ||
130 | .procname = "netdev_budget", | ||
131 | .data = &netdev_budget, | ||
132 | .maxlen = sizeof(int), | ||
133 | .mode = 0644, | ||
134 | .proc_handler = &proc_dointvec | ||
135 | }, | ||
164 | { .ctl_name = 0 } | 136 | { .ctl_name = 0 } |
165 | }; | 137 | }; |
166 | 138 | ||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f3dbc8dc1263..882436da9a3a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -1927,6 +1927,25 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
1927 | return tp->af_specific->setsockopt(sk, level, optname, | 1927 | return tp->af_specific->setsockopt(sk, level, optname, |
1928 | optval, optlen); | 1928 | optval, optlen); |
1929 | 1929 | ||
1930 | /* This is a string value all the others are int's */ | ||
1931 | if (optname == TCP_CONGESTION) { | ||
1932 | char name[TCP_CA_NAME_MAX]; | ||
1933 | |||
1934 | if (optlen < 1) | ||
1935 | return -EINVAL; | ||
1936 | |||
1937 | val = strncpy_from_user(name, optval, | ||
1938 | min(TCP_CA_NAME_MAX-1, optlen)); | ||
1939 | if (val < 0) | ||
1940 | return -EFAULT; | ||
1941 | name[val] = 0; | ||
1942 | |||
1943 | lock_sock(sk); | ||
1944 | err = tcp_set_congestion_control(tp, name); | ||
1945 | release_sock(sk); | ||
1946 | return err; | ||
1947 | } | ||
1948 | |||
1930 | if (optlen < sizeof(int)) | 1949 | if (optlen < sizeof(int)) |
1931 | return -EINVAL; | 1950 | return -EINVAL; |
1932 | 1951 | ||
@@ -2211,6 +2230,16 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
2211 | case TCP_QUICKACK: | 2230 | case TCP_QUICKACK: |
2212 | val = !tp->ack.pingpong; | 2231 | val = !tp->ack.pingpong; |
2213 | break; | 2232 | break; |
2233 | |||
2234 | case TCP_CONGESTION: | ||
2235 | if (get_user(len, optlen)) | ||
2236 | return -EFAULT; | ||
2237 | len = min_t(unsigned int, len, TCP_CA_NAME_MAX); | ||
2238 | if (put_user(len, optlen)) | ||
2239 | return -EFAULT; | ||
2240 | if (copy_to_user(optval, tp->ca_ops->name, len)) | ||
2241 | return -EFAULT; | ||
2242 | return 0; | ||
2214 | default: | 2243 | default: |
2215 | return -ENOPROTOOPT; | 2244 | return -ENOPROTOOPT; |
2216 | }; | 2245 | }; |
@@ -2224,7 +2253,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
2224 | 2253 | ||
2225 | 2254 | ||
2226 | extern void __skb_cb_too_small_for_tcp(int, int); | 2255 | extern void __skb_cb_too_small_for_tcp(int, int); |
2227 | extern void tcpdiag_init(void); | 2256 | extern struct tcp_congestion_ops tcp_reno; |
2228 | 2257 | ||
2229 | static __initdata unsigned long thash_entries; | 2258 | static __initdata unsigned long thash_entries; |
2230 | static int __init set_thash_entries(char *str) | 2259 | static int __init set_thash_entries(char *str) |
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 665394a63ae4..4970d10a7785 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
@@ -21,7 +21,7 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name) | |||
21 | { | 21 | { |
22 | struct tcp_congestion_ops *e; | 22 | struct tcp_congestion_ops *e; |
23 | 23 | ||
24 | list_for_each_entry(e, &tcp_cong_list, list) { | 24 | list_for_each_entry_rcu(e, &tcp_cong_list, list) { |
25 | if (strcmp(e->name, name) == 0) | 25 | if (strcmp(e->name, name) == 0) |
26 | return e; | 26 | return e; |
27 | } | 27 | } |
@@ -77,6 +77,9 @@ void tcp_init_congestion_control(struct tcp_sock *tp) | |||
77 | { | 77 | { |
78 | struct tcp_congestion_ops *ca; | 78 | struct tcp_congestion_ops *ca; |
79 | 79 | ||
80 | if (tp->ca_ops != &tcp_init_congestion_ops) | ||
81 | return; | ||
82 | |||
80 | rcu_read_lock(); | 83 | rcu_read_lock(); |
81 | list_for_each_entry_rcu(ca, &tcp_cong_list, list) { | 84 | list_for_each_entry_rcu(ca, &tcp_cong_list, list) { |
82 | if (try_module_get(ca->owner)) { | 85 | if (try_module_get(ca->owner)) { |
@@ -139,6 +142,34 @@ void tcp_get_default_congestion_control(char *name) | |||
139 | rcu_read_unlock(); | 142 | rcu_read_unlock(); |
140 | } | 143 | } |
141 | 144 | ||
145 | /* Change congestion control for socket */ | ||
146 | int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) | ||
147 | { | ||
148 | struct tcp_congestion_ops *ca; | ||
149 | int err = 0; | ||
150 | |||
151 | rcu_read_lock(); | ||
152 | ca = tcp_ca_find(name); | ||
153 | if (ca == tp->ca_ops) | ||
154 | goto out; | ||
155 | |||
156 | if (!ca) | ||
157 | err = -ENOENT; | ||
158 | |||
159 | else if (!try_module_get(ca->owner)) | ||
160 | err = -EBUSY; | ||
161 | |||
162 | else { | ||
163 | tcp_cleanup_congestion_control(tp); | ||
164 | tp->ca_ops = ca; | ||
165 | if (tp->ca_ops->init) | ||
166 | tp->ca_ops->init(tp); | ||
167 | } | ||
168 | out: | ||
169 | rcu_read_unlock(); | ||
170 | return err; | ||
171 | } | ||
172 | |||
142 | /* | 173 | /* |
143 | * TCP Reno congestion control | 174 | * TCP Reno congestion control |
144 | * This is special case used for fallback as well. | 175 | * This is special case used for fallback as well. |
@@ -192,4 +223,15 @@ struct tcp_congestion_ops tcp_reno = { | |||
192 | .min_cwnd = tcp_reno_min_cwnd, | 223 | .min_cwnd = tcp_reno_min_cwnd, |
193 | }; | 224 | }; |
194 | 225 | ||
195 | EXPORT_SYMBOL_GPL(tcp_reno); | 226 | /* Initial congestion control used (until SYN) |
227 | * really reno under another name so we can tell difference | ||
228 | * during tcp_set_default_congestion_control | ||
229 | */ | ||
230 | struct tcp_congestion_ops tcp_init_congestion_ops = { | ||
231 | .name = "", | ||
232 | .owner = THIS_MODULE, | ||
233 | .ssthresh = tcp_reno_ssthresh, | ||
234 | .cong_avoid = tcp_reno_cong_avoid, | ||
235 | .min_cwnd = tcp_reno_min_cwnd, | ||
236 | }; | ||
237 | EXPORT_SYMBOL_GPL(tcp_init_congestion_ops); | ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9122814c13ad..ebf112347a97 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -2048,7 +2048,7 @@ static int tcp_v4_init_sock(struct sock *sk) | |||
2048 | tp->mss_cache_std = tp->mss_cache = 536; | 2048 | tp->mss_cache_std = tp->mss_cache = 536; |
2049 | 2049 | ||
2050 | tp->reordering = sysctl_tcp_reordering; | 2050 | tp->reordering = sysctl_tcp_reordering; |
2051 | tp->ca_ops = &tcp_reno; | 2051 | tp->ca_ops = &tcp_init_congestion_ops; |
2052 | 2052 | ||
2053 | sk->sk_state = TCP_CLOSE; | 2053 | sk->sk_state = TCP_CLOSE; |
2054 | 2054 | ||
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index fce56039b0e9..9dac7fdf4726 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -2025,7 +2025,7 @@ static int tcp_v6_init_sock(struct sock *sk) | |||
2025 | sk->sk_state = TCP_CLOSE; | 2025 | sk->sk_state = TCP_CLOSE; |
2026 | 2026 | ||
2027 | tp->af_specific = &ipv6_specific; | 2027 | tp->af_specific = &ipv6_specific; |
2028 | tp->ca_ops = &tcp_reno; | 2028 | tp->ca_ops = &tcp_init_congestion_ops; |
2029 | sk->sk_write_space = sk_stream_write_space; | 2029 | sk->sk_write_space = sk_stream_write_space; |
2030 | sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); | 2030 | sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); |
2031 | 2031 | ||
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index b22c9beb604d..447b89e556b1 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig | |||
@@ -449,6 +449,18 @@ config NET_EMATCH_META | |||
449 | To compile this code as a module, choose M here: the | 449 | To compile this code as a module, choose M here: the |
450 | module will be called em_meta. | 450 | module will be called em_meta. |
451 | 451 | ||
452 | config NET_EMATCH_TEXT | ||
453 | tristate "Textsearch" | ||
454 | depends on NET_EMATCH | ||
455 | select TEXTSEARCH | ||
456 | ---help--- | ||
457 | Say Y here if you want to be ablt to classify packets based on | ||
458 | textsearch comparisons. Please select the appropriate textsearch | ||
459 | algorithms in the Library section. | ||
460 | |||
461 | To compile this code as a module, choose M here: the | ||
462 | module will be called em_text. | ||
463 | |||
452 | config NET_CLS_ACT | 464 | config NET_CLS_ACT |
453 | bool "Packet ACTION" | 465 | bool "Packet ACTION" |
454 | depends on EXPERIMENTAL && NET_CLS && NET_QOS | 466 | depends on EXPERIMENTAL && NET_CLS && NET_QOS |
diff --git a/net/sched/Makefile b/net/sched/Makefile index eb3fe583eba8..8f58cecd6266 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile | |||
@@ -40,3 +40,4 @@ obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o | |||
40 | obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o | 40 | obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o |
41 | obj-$(CONFIG_NET_EMATCH_U32) += em_u32.o | 41 | obj-$(CONFIG_NET_EMATCH_U32) += em_u32.o |
42 | obj-$(CONFIG_NET_EMATCH_META) += em_meta.o | 42 | obj-$(CONFIG_NET_EMATCH_META) += em_meta.o |
43 | obj-$(CONFIG_NET_EMATCH_TEXT) += em_text.o | ||
diff --git a/net/sched/em_text.c b/net/sched/em_text.c new file mode 100644 index 000000000000..873840d8d072 --- /dev/null +++ b/net/sched/em_text.c | |||
@@ -0,0 +1,157 @@ | |||
1 | /* | ||
2 | * net/sched/em_text.c Textsearch ematch | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation; either version | ||
7 | * 2 of the License, or (at your option) any later version. | ||
8 | * | ||
9 | * Authors: Thomas Graf <tgraf@suug.ch> | ||
10 | */ | ||
11 | |||
12 | #include <linux/config.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/types.h> | ||
15 | #include <linux/kernel.h> | ||
16 | #include <linux/sched.h> | ||
17 | #include <linux/string.h> | ||
18 | #include <linux/skbuff.h> | ||
19 | #include <linux/textsearch.h> | ||
20 | #include <linux/tc_ematch/tc_em_text.h> | ||
21 | #include <net/pkt_cls.h> | ||
22 | |||
23 | struct text_match | ||
24 | { | ||
25 | u16 from_offset; | ||
26 | u16 to_offset; | ||
27 | u8 from_layer; | ||
28 | u8 to_layer; | ||
29 | struct ts_config *config; | ||
30 | }; | ||
31 | |||
32 | #define EM_TEXT_PRIV(m) ((struct text_match *) (m)->data) | ||
33 | |||
34 | static int em_text_match(struct sk_buff *skb, struct tcf_ematch *m, | ||
35 | struct tcf_pkt_info *info) | ||
36 | { | ||
37 | struct text_match *tm = EM_TEXT_PRIV(m); | ||
38 | int from, to; | ||
39 | struct ts_state state; | ||
40 | |||
41 | from = tcf_get_base_ptr(skb, tm->from_layer) - skb->data; | ||
42 | from += tm->from_offset; | ||
43 | |||
44 | to = tcf_get_base_ptr(skb, tm->to_layer) - skb->data; | ||
45 | to += tm->to_offset; | ||
46 | |||
47 | return skb_find_text(skb, from, to, tm->config, &state) != UINT_MAX; | ||
48 | } | ||
49 | |||
50 | static int em_text_change(struct tcf_proto *tp, void *data, int len, | ||
51 | struct tcf_ematch *m) | ||
52 | { | ||
53 | struct text_match *tm; | ||
54 | struct tcf_em_text *conf = data; | ||
55 | struct ts_config *ts_conf; | ||
56 | int flags = 0; | ||
57 | |||
58 | printk("Configuring text: %s from %d:%d to %d:%d len %d\n", conf->algo, conf->from_offset, | ||
59 | conf->from_layer, conf->to_offset, conf->to_layer, conf->pattern_len); | ||
60 | |||
61 | if (len < sizeof(*conf) || len < (sizeof(*conf) + conf->pattern_len)) | ||
62 | return -EINVAL; | ||
63 | |||
64 | if (conf->from_layer > conf->to_layer) | ||
65 | return -EINVAL; | ||
66 | |||
67 | if (conf->from_layer == conf->to_layer && | ||
68 | conf->from_offset > conf->to_offset) | ||
69 | return -EINVAL; | ||
70 | |||
71 | retry: | ||
72 | ts_conf = textsearch_prepare(conf->algo, (u8 *) conf + sizeof(*conf), | ||
73 | conf->pattern_len, GFP_KERNEL, flags); | ||
74 | |||
75 | if (flags & TS_AUTOLOAD) | ||
76 | rtnl_lock(); | ||
77 | |||
78 | if (IS_ERR(ts_conf)) { | ||
79 | if (PTR_ERR(ts_conf) == -ENOENT && !(flags & TS_AUTOLOAD)) { | ||
80 | rtnl_unlock(); | ||
81 | flags |= TS_AUTOLOAD; | ||
82 | goto retry; | ||
83 | } else | ||
84 | return PTR_ERR(ts_conf); | ||
85 | } else if (flags & TS_AUTOLOAD) { | ||
86 | textsearch_destroy(ts_conf); | ||
87 | return -EAGAIN; | ||
88 | } | ||
89 | |||
90 | tm = kmalloc(sizeof(*tm), GFP_KERNEL); | ||
91 | if (tm == NULL) { | ||
92 | textsearch_destroy(ts_conf); | ||
93 | return -ENOBUFS; | ||
94 | } | ||
95 | |||
96 | tm->from_offset = conf->from_offset; | ||
97 | tm->to_offset = conf->to_offset; | ||
98 | tm->from_layer = conf->from_layer; | ||
99 | tm->to_layer = conf->to_layer; | ||
100 | tm->config = ts_conf; | ||
101 | |||
102 | m->datalen = sizeof(*tm); | ||
103 | m->data = (unsigned long) tm; | ||
104 | |||
105 | return 0; | ||
106 | } | ||
107 | |||
108 | static void em_text_destroy(struct tcf_proto *tp, struct tcf_ematch *m) | ||
109 | { | ||
110 | textsearch_destroy(EM_TEXT_PRIV(m)->config); | ||
111 | } | ||
112 | |||
113 | static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m) | ||
114 | { | ||
115 | struct text_match *tm = EM_TEXT_PRIV(m); | ||
116 | struct tcf_em_text conf; | ||
117 | |||
118 | strncpy(conf.algo, tm->config->ops->name, sizeof(conf.algo) - 1); | ||
119 | conf.from_offset = tm->from_offset; | ||
120 | conf.to_offset = tm->to_offset; | ||
121 | conf.from_layer = tm->from_layer; | ||
122 | conf.to_layer = tm->to_layer; | ||
123 | conf.pattern_len = textsearch_get_pattern_len(tm->config); | ||
124 | conf.pad = 0; | ||
125 | |||
126 | RTA_PUT_NOHDR(skb, sizeof(conf), &conf); | ||
127 | RTA_APPEND(skb, conf.pattern_len, textsearch_get_pattern(tm->config)); | ||
128 | return 0; | ||
129 | |||
130 | rtattr_failure: | ||
131 | return -1; | ||
132 | } | ||
133 | |||
134 | static struct tcf_ematch_ops em_text_ops = { | ||
135 | .kind = TCF_EM_TEXT, | ||
136 | .change = em_text_change, | ||
137 | .match = em_text_match, | ||
138 | .destroy = em_text_destroy, | ||
139 | .dump = em_text_dump, | ||
140 | .owner = THIS_MODULE, | ||
141 | .link = LIST_HEAD_INIT(em_text_ops.link) | ||
142 | }; | ||
143 | |||
144 | static int __init init_em_text(void) | ||
145 | { | ||
146 | return tcf_em_register(&em_text_ops); | ||
147 | } | ||
148 | |||
149 | static void __exit exit_em_text(void) | ||
150 | { | ||
151 | tcf_em_unregister(&em_text_ops); | ||
152 | } | ||
153 | |||
154 | MODULE_LICENSE("GPL"); | ||
155 | |||
156 | module_init(init_em_text); | ||
157 | module_exit(exit_em_text); | ||