aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-24 03:31:46 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-24 03:31:46 -0400
commit59a49e38711a146dc0bef4837c825b5422335460 (patch)
tree7e6e3d1850159f94e5b05d1c5775bd3cc87c3690 /net
parent52c1da39534fb382c061de58b65f678ad74b59f5 (diff)
parentf2d368fa3ef90f2159d9e542303901ebf68144dd (diff)
Merge rsync://rsync.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
Diffstat (limited to 'net')
-rw-r--r--net/core/dev.c125
-rw-r--r--net/core/skbuff.c157
-rw-r--r--net/core/sysctl_net_core.c46
-rw-r--r--net/ipv4/tcp.c31
-rw-r--r--net/ipv4/tcp_cong.c46
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv6/tcp_ipv6.c2
-rw-r--r--net/sched/Kconfig12
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/em_text.c157
10 files changed, 420 insertions, 159 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index ab935778ce8..7016e0c36b3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -115,18 +115,6 @@
115#endif /* CONFIG_NET_RADIO */ 115#endif /* CONFIG_NET_RADIO */
116#include <asm/current.h> 116#include <asm/current.h>
117 117
118/* This define, if set, will randomly drop a packet when congestion
119 * is more than moderate. It helps fairness in the multi-interface
120 * case when one of them is a hog, but it kills performance for the
121 * single interface case so it is off now by default.
122 */
123#undef RAND_LIE
124
125/* Setting this will sample the queue lengths and thus congestion
126 * via a timer instead of as each packet is received.
127 */
128#undef OFFLINE_SAMPLE
129
130/* 118/*
131 * The list of packet types we will receive (as opposed to discard) 119 * The list of packet types we will receive (as opposed to discard)
132 * and the routines to invoke. 120 * and the routines to invoke.
@@ -159,11 +147,6 @@ static DEFINE_SPINLOCK(ptype_lock);
159static struct list_head ptype_base[16]; /* 16 way hashed list */ 147static struct list_head ptype_base[16]; /* 16 way hashed list */
160static struct list_head ptype_all; /* Taps */ 148static struct list_head ptype_all; /* Taps */
161 149
162#ifdef OFFLINE_SAMPLE
163static void sample_queue(unsigned long dummy);
164static struct timer_list samp_timer = TIMER_INITIALIZER(sample_queue, 0, 0);
165#endif
166
167/* 150/*
168 * The @dev_base list is protected by @dev_base_lock and the rtln 151 * The @dev_base list is protected by @dev_base_lock and the rtln
169 * semaphore. 152 * semaphore.
@@ -215,7 +198,7 @@ static struct notifier_block *netdev_chain;
215 * Device drivers call our routines to queue packets here. We empty the 198 * Device drivers call our routines to queue packets here. We empty the
216 * queue in the local softnet handler. 199 * queue in the local softnet handler.
217 */ 200 */
218DEFINE_PER_CPU(struct softnet_data, softnet_data) = { 0, }; 201DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
219 202
220#ifdef CONFIG_SYSFS 203#ifdef CONFIG_SYSFS
221extern int netdev_sysfs_init(void); 204extern int netdev_sysfs_init(void);
@@ -1363,71 +1346,13 @@ out:
1363 Receiver routines 1346 Receiver routines
1364 =======================================================================*/ 1347 =======================================================================*/
1365 1348
1366int netdev_max_backlog = 300; 1349int netdev_max_backlog = 1000;
1350int netdev_budget = 300;
1367int weight_p = 64; /* old backlog weight */ 1351int weight_p = 64; /* old backlog weight */
1368/* These numbers are selected based on intuition and some
1369 * experimentatiom, if you have more scientific way of doing this
1370 * please go ahead and fix things.
1371 */
1372int no_cong_thresh = 10;
1373int no_cong = 20;
1374int lo_cong = 100;
1375int mod_cong = 290;
1376 1352
1377DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; 1353DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1378 1354
1379 1355
1380static void get_sample_stats(int cpu)
1381{
1382#ifdef RAND_LIE
1383 unsigned long rd;
1384 int rq;
1385#endif
1386 struct softnet_data *sd = &per_cpu(softnet_data, cpu);
1387 int blog = sd->input_pkt_queue.qlen;
1388 int avg_blog = sd->avg_blog;
1389
1390 avg_blog = (avg_blog >> 1) + (blog >> 1);
1391
1392 if (avg_blog > mod_cong) {
1393 /* Above moderate congestion levels. */
1394 sd->cng_level = NET_RX_CN_HIGH;
1395#ifdef RAND_LIE
1396 rd = net_random();
1397 rq = rd % netdev_max_backlog;
1398 if (rq < avg_blog) /* unlucky bastard */
1399 sd->cng_level = NET_RX_DROP;
1400#endif
1401 } else if (avg_blog > lo_cong) {
1402 sd->cng_level = NET_RX_CN_MOD;
1403#ifdef RAND_LIE
1404 rd = net_random();
1405 rq = rd % netdev_max_backlog;
1406 if (rq < avg_blog) /* unlucky bastard */
1407 sd->cng_level = NET_RX_CN_HIGH;
1408#endif
1409 } else if (avg_blog > no_cong)
1410 sd->cng_level = NET_RX_CN_LOW;
1411 else /* no congestion */
1412 sd->cng_level = NET_RX_SUCCESS;
1413
1414 sd->avg_blog = avg_blog;
1415}
1416
1417#ifdef OFFLINE_SAMPLE
1418static void sample_queue(unsigned long dummy)
1419{
1420/* 10 ms 0r 1ms -- i don't care -- JHS */
1421 int next_tick = 1;
1422 int cpu = smp_processor_id();
1423
1424 get_sample_stats(cpu);
1425 next_tick += jiffies;
1426 mod_timer(&samp_timer, next_tick);
1427}
1428#endif
1429
1430
1431/** 1356/**
1432 * netif_rx - post buffer to the network code 1357 * netif_rx - post buffer to the network code
1433 * @skb: buffer to post 1358 * @skb: buffer to post
@@ -1448,7 +1373,6 @@ static void sample_queue(unsigned long dummy)
1448 1373
1449int netif_rx(struct sk_buff *skb) 1374int netif_rx(struct sk_buff *skb)
1450{ 1375{
1451 int this_cpu;
1452 struct softnet_data *queue; 1376 struct softnet_data *queue;
1453 unsigned long flags; 1377 unsigned long flags;
1454 1378
@@ -1464,38 +1388,22 @@ int netif_rx(struct sk_buff *skb)
1464 * short when CPU is congested, but is still operating. 1388 * short when CPU is congested, but is still operating.
1465 */ 1389 */
1466 local_irq_save(flags); 1390 local_irq_save(flags);
1467 this_cpu = smp_processor_id();
1468 queue = &__get_cpu_var(softnet_data); 1391 queue = &__get_cpu_var(softnet_data);
1469 1392
1470 __get_cpu_var(netdev_rx_stat).total++; 1393 __get_cpu_var(netdev_rx_stat).total++;
1471 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { 1394 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1472 if (queue->input_pkt_queue.qlen) { 1395 if (queue->input_pkt_queue.qlen) {
1473 if (queue->throttle)
1474 goto drop;
1475
1476enqueue: 1396enqueue:
1477 dev_hold(skb->dev); 1397 dev_hold(skb->dev);
1478 __skb_queue_tail(&queue->input_pkt_queue, skb); 1398 __skb_queue_tail(&queue->input_pkt_queue, skb);
1479#ifndef OFFLINE_SAMPLE
1480 get_sample_stats(this_cpu);
1481#endif
1482 local_irq_restore(flags); 1399 local_irq_restore(flags);
1483 return queue->cng_level; 1400 return NET_RX_SUCCESS;
1484 } 1401 }
1485 1402
1486 if (queue->throttle)
1487 queue->throttle = 0;
1488
1489 netif_rx_schedule(&queue->backlog_dev); 1403 netif_rx_schedule(&queue->backlog_dev);
1490 goto enqueue; 1404 goto enqueue;
1491 } 1405 }
1492 1406
1493 if (!queue->throttle) {
1494 queue->throttle = 1;
1495 __get_cpu_var(netdev_rx_stat).throttled++;
1496 }
1497
1498drop:
1499 __get_cpu_var(netdev_rx_stat).dropped++; 1407 __get_cpu_var(netdev_rx_stat).dropped++;
1500 local_irq_restore(flags); 1408 local_irq_restore(flags);
1501 1409
@@ -1780,8 +1688,6 @@ job_done:
1780 smp_mb__before_clear_bit(); 1688 smp_mb__before_clear_bit();
1781 netif_poll_enable(backlog_dev); 1689 netif_poll_enable(backlog_dev);
1782 1690
1783 if (queue->throttle)
1784 queue->throttle = 0;
1785 local_irq_enable(); 1691 local_irq_enable();
1786 return 0; 1692 return 0;
1787} 1693}
@@ -1790,8 +1696,7 @@ static void net_rx_action(struct softirq_action *h)
1790{ 1696{
1791 struct softnet_data *queue = &__get_cpu_var(softnet_data); 1697 struct softnet_data *queue = &__get_cpu_var(softnet_data);
1792 unsigned long start_time = jiffies; 1698 unsigned long start_time = jiffies;
1793 int budget = netdev_max_backlog; 1699 int budget = netdev_budget;
1794
1795 1700
1796 local_irq_disable(); 1701 local_irq_disable();
1797 1702
@@ -2055,15 +1960,9 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
2055 struct netif_rx_stats *s = v; 1960 struct netif_rx_stats *s = v;
2056 1961
2057 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", 1962 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2058 s->total, s->dropped, s->time_squeeze, s->throttled, 1963 s->total, s->dropped, s->time_squeeze, 0,
2059 s->fastroute_hit, s->fastroute_success, s->fastroute_defer, 1964 0, 0, 0, 0, /* was fastroute */
2060 s->fastroute_deferred_out, 1965 s->cpu_collision );
2061#if 0
2062 s->fastroute_latency_reduction
2063#else
2064 s->cpu_collision
2065#endif
2066 );
2067 return 0; 1966 return 0;
2068} 1967}
2069 1968
@@ -3305,9 +3204,6 @@ static int __init net_dev_init(void)
3305 3204
3306 queue = &per_cpu(softnet_data, i); 3205 queue = &per_cpu(softnet_data, i);
3307 skb_queue_head_init(&queue->input_pkt_queue); 3206 skb_queue_head_init(&queue->input_pkt_queue);
3308 queue->throttle = 0;
3309 queue->cng_level = 0;
3310 queue->avg_blog = 10; /* arbitrary non-zero */
3311 queue->completion_queue = NULL; 3207 queue->completion_queue = NULL;
3312 INIT_LIST_HEAD(&queue->poll_list); 3208 INIT_LIST_HEAD(&queue->poll_list);
3313 set_bit(__LINK_STATE_START, &queue->backlog_dev.state); 3209 set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
@@ -3316,11 +3212,6 @@ static int __init net_dev_init(void)
3316 atomic_set(&queue->backlog_dev.refcnt, 1); 3212 atomic_set(&queue->backlog_dev.refcnt, 1);
3317 } 3213 }
3318 3214
3319#ifdef OFFLINE_SAMPLE
3320 samp_timer.expires = jiffies + (10 * HZ);
3321 add_timer(&samp_timer);
3322#endif
3323
3324 dev_boot_phase = 0; 3215 dev_boot_phase = 0;
3325 3216
3326 open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); 3217 open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6d68c03bc05..bb73b2190ec 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1500,6 +1500,159 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
1500 skb_split_no_header(skb, skb1, len, pos); 1500 skb_split_no_header(skb, skb1, len, pos);
1501} 1501}
1502 1502
1503/**
1504 * skb_prepare_seq_read - Prepare a sequential read of skb data
1505 * @skb: the buffer to read
1506 * @from: lower offset of data to be read
1507 * @to: upper offset of data to be read
1508 * @st: state variable
1509 *
1510 * Initializes the specified state variable. Must be called before
1511 * invoking skb_seq_read() for the first time.
1512 */
1513void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
1514 unsigned int to, struct skb_seq_state *st)
1515{
1516 st->lower_offset = from;
1517 st->upper_offset = to;
1518 st->root_skb = st->cur_skb = skb;
1519 st->frag_idx = st->stepped_offset = 0;
1520 st->frag_data = NULL;
1521}
1522
1523/**
1524 * skb_seq_read - Sequentially read skb data
1525 * @consumed: number of bytes consumed by the caller so far
1526 * @data: destination pointer for data to be returned
1527 * @st: state variable
1528 *
1529 * Reads a block of skb data at &consumed relative to the
1530 * lower offset specified to skb_prepare_seq_read(). Assigns
1531 * the head of the data block to &data and returns the length
1532 * of the block or 0 if the end of the skb data or the upper
1533 * offset has been reached.
1534 *
1535 * The caller is not required to consume all of the data
1536 * returned, i.e. &consumed is typically set to the number
1537 * of bytes already consumed and the next call to
1538 * skb_seq_read() will return the remaining part of the block.
1539 *
1540 * Note: The size of each block of data returned can be arbitary,
1541 * this limitation is the cost for zerocopy seqeuental
1542 * reads of potentially non linear data.
1543 *
1544 * Note: Fragment lists within fragments are not implemented
1545 * at the moment, state->root_skb could be replaced with
1546 * a stack for this purpose.
1547 */
1548unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
1549 struct skb_seq_state *st)
1550{
1551 unsigned int block_limit, abs_offset = consumed + st->lower_offset;
1552 skb_frag_t *frag;
1553
1554 if (unlikely(abs_offset >= st->upper_offset))
1555 return 0;
1556
1557next_skb:
1558 block_limit = skb_headlen(st->cur_skb);
1559
1560 if (abs_offset < block_limit) {
1561 *data = st->cur_skb->data + abs_offset;
1562 return block_limit - abs_offset;
1563 }
1564
1565 if (st->frag_idx == 0 && !st->frag_data)
1566 st->stepped_offset += skb_headlen(st->cur_skb);
1567
1568 while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) {
1569 frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx];
1570 block_limit = frag->size + st->stepped_offset;
1571
1572 if (abs_offset < block_limit) {
1573 if (!st->frag_data)
1574 st->frag_data = kmap_skb_frag(frag);
1575
1576 *data = (u8 *) st->frag_data + frag->page_offset +
1577 (abs_offset - st->stepped_offset);
1578
1579 return block_limit - abs_offset;
1580 }
1581
1582 if (st->frag_data) {
1583 kunmap_skb_frag(st->frag_data);
1584 st->frag_data = NULL;
1585 }
1586
1587 st->frag_idx++;
1588 st->stepped_offset += frag->size;
1589 }
1590
1591 if (st->cur_skb->next) {
1592 st->cur_skb = st->cur_skb->next;
1593 st->frag_idx = 0;
1594 goto next_skb;
1595 } else if (st->root_skb == st->cur_skb &&
1596 skb_shinfo(st->root_skb)->frag_list) {
1597 st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
1598 goto next_skb;
1599 }
1600
1601 return 0;
1602}
1603
1604/**
1605 * skb_abort_seq_read - Abort a sequential read of skb data
1606 * @st: state variable
1607 *
1608 * Must be called if skb_seq_read() was not called until it
1609 * returned 0.
1610 */
1611void skb_abort_seq_read(struct skb_seq_state *st)
1612{
1613 if (st->frag_data)
1614 kunmap_skb_frag(st->frag_data);
1615}
1616
1617#define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb))
1618
1619static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text,
1620 struct ts_config *conf,
1621 struct ts_state *state)
1622{
1623 return skb_seq_read(offset, text, TS_SKB_CB(state));
1624}
1625
1626static void skb_ts_finish(struct ts_config *conf, struct ts_state *state)
1627{
1628 skb_abort_seq_read(TS_SKB_CB(state));
1629}
1630
1631/**
1632 * skb_find_text - Find a text pattern in skb data
1633 * @skb: the buffer to look in
1634 * @from: search offset
1635 * @to: search limit
1636 * @config: textsearch configuration
1637 * @state: uninitialized textsearch state variable
1638 *
1639 * Finds a pattern in the skb data according to the specified
1640 * textsearch configuration. Use textsearch_next() to retrieve
1641 * subsequent occurrences of the pattern. Returns the offset
1642 * to the first occurrence or UINT_MAX if no match was found.
1643 */
1644unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
1645 unsigned int to, struct ts_config *config,
1646 struct ts_state *state)
1647{
1648 config->get_next_block = skb_ts_get_next_block;
1649 config->finish = skb_ts_finish;
1650
1651 skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state));
1652
1653 return textsearch_find(config, state);
1654}
1655
1503void __init skb_init(void) 1656void __init skb_init(void)
1504{ 1657{
1505 skbuff_head_cache = kmem_cache_create("skbuff_head_cache", 1658 skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
@@ -1538,3 +1691,7 @@ EXPORT_SYMBOL(skb_queue_tail);
1538EXPORT_SYMBOL(skb_unlink); 1691EXPORT_SYMBOL(skb_unlink);
1539EXPORT_SYMBOL(skb_append); 1692EXPORT_SYMBOL(skb_append);
1540EXPORT_SYMBOL(skb_split); 1693EXPORT_SYMBOL(skb_split);
1694EXPORT_SYMBOL(skb_prepare_seq_read);
1695EXPORT_SYMBOL(skb_seq_read);
1696EXPORT_SYMBOL(skb_abort_seq_read);
1697EXPORT_SYMBOL(skb_find_text);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 880a8881521..8f817ad9f54 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -13,12 +13,8 @@
13#ifdef CONFIG_SYSCTL 13#ifdef CONFIG_SYSCTL
14 14
15extern int netdev_max_backlog; 15extern int netdev_max_backlog;
16extern int netdev_budget;
16extern int weight_p; 17extern int weight_p;
17extern int no_cong_thresh;
18extern int no_cong;
19extern int lo_cong;
20extern int mod_cong;
21extern int netdev_fastroute;
22extern int net_msg_cost; 18extern int net_msg_cost;
23extern int net_msg_burst; 19extern int net_msg_burst;
24 20
@@ -86,38 +82,6 @@ ctl_table core_table[] = {
86 .proc_handler = &proc_dointvec 82 .proc_handler = &proc_dointvec
87 }, 83 },
88 { 84 {
89 .ctl_name = NET_CORE_NO_CONG_THRESH,
90 .procname = "no_cong_thresh",
91 .data = &no_cong_thresh,
92 .maxlen = sizeof(int),
93 .mode = 0644,
94 .proc_handler = &proc_dointvec
95 },
96 {
97 .ctl_name = NET_CORE_NO_CONG,
98 .procname = "no_cong",
99 .data = &no_cong,
100 .maxlen = sizeof(int),
101 .mode = 0644,
102 .proc_handler = &proc_dointvec
103 },
104 {
105 .ctl_name = NET_CORE_LO_CONG,
106 .procname = "lo_cong",
107 .data = &lo_cong,
108 .maxlen = sizeof(int),
109 .mode = 0644,
110 .proc_handler = &proc_dointvec
111 },
112 {
113 .ctl_name = NET_CORE_MOD_CONG,
114 .procname = "mod_cong",
115 .data = &mod_cong,
116 .maxlen = sizeof(int),
117 .mode = 0644,
118 .proc_handler = &proc_dointvec
119 },
120 {
121 .ctl_name = NET_CORE_MSG_COST, 85 .ctl_name = NET_CORE_MSG_COST,
122 .procname = "message_cost", 86 .procname = "message_cost",
123 .data = &net_msg_cost, 87 .data = &net_msg_cost,
@@ -161,6 +125,14 @@ ctl_table core_table[] = {
161 .mode = 0644, 125 .mode = 0644,
162 .proc_handler = &proc_dointvec 126 .proc_handler = &proc_dointvec
163 }, 127 },
128 {
129 .ctl_name = NET_CORE_BUDGET,
130 .procname = "netdev_budget",
131 .data = &netdev_budget,
132 .maxlen = sizeof(int),
133 .mode = 0644,
134 .proc_handler = &proc_dointvec
135 },
164 { .ctl_name = 0 } 136 { .ctl_name = 0 }
165}; 137};
166 138
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f3dbc8dc126..882436da9a3 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1927,6 +1927,25 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
1927 return tp->af_specific->setsockopt(sk, level, optname, 1927 return tp->af_specific->setsockopt(sk, level, optname,
1928 optval, optlen); 1928 optval, optlen);
1929 1929
1930 /* This is a string value all the others are int's */
1931 if (optname == TCP_CONGESTION) {
1932 char name[TCP_CA_NAME_MAX];
1933
1934 if (optlen < 1)
1935 return -EINVAL;
1936
1937 val = strncpy_from_user(name, optval,
1938 min(TCP_CA_NAME_MAX-1, optlen));
1939 if (val < 0)
1940 return -EFAULT;
1941 name[val] = 0;
1942
1943 lock_sock(sk);
1944 err = tcp_set_congestion_control(tp, name);
1945 release_sock(sk);
1946 return err;
1947 }
1948
1930 if (optlen < sizeof(int)) 1949 if (optlen < sizeof(int))
1931 return -EINVAL; 1950 return -EINVAL;
1932 1951
@@ -2211,6 +2230,16 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
2211 case TCP_QUICKACK: 2230 case TCP_QUICKACK:
2212 val = !tp->ack.pingpong; 2231 val = !tp->ack.pingpong;
2213 break; 2232 break;
2233
2234 case TCP_CONGESTION:
2235 if (get_user(len, optlen))
2236 return -EFAULT;
2237 len = min_t(unsigned int, len, TCP_CA_NAME_MAX);
2238 if (put_user(len, optlen))
2239 return -EFAULT;
2240 if (copy_to_user(optval, tp->ca_ops->name, len))
2241 return -EFAULT;
2242 return 0;
2214 default: 2243 default:
2215 return -ENOPROTOOPT; 2244 return -ENOPROTOOPT;
2216 }; 2245 };
@@ -2224,7 +2253,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
2224 2253
2225 2254
2226extern void __skb_cb_too_small_for_tcp(int, int); 2255extern void __skb_cb_too_small_for_tcp(int, int);
2227extern void tcpdiag_init(void); 2256extern struct tcp_congestion_ops tcp_reno;
2228 2257
2229static __initdata unsigned long thash_entries; 2258static __initdata unsigned long thash_entries;
2230static int __init set_thash_entries(char *str) 2259static int __init set_thash_entries(char *str)
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 665394a63ae..4970d10a778 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -21,7 +21,7 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name)
21{ 21{
22 struct tcp_congestion_ops *e; 22 struct tcp_congestion_ops *e;
23 23
24 list_for_each_entry(e, &tcp_cong_list, list) { 24 list_for_each_entry_rcu(e, &tcp_cong_list, list) {
25 if (strcmp(e->name, name) == 0) 25 if (strcmp(e->name, name) == 0)
26 return e; 26 return e;
27 } 27 }
@@ -77,6 +77,9 @@ void tcp_init_congestion_control(struct tcp_sock *tp)
77{ 77{
78 struct tcp_congestion_ops *ca; 78 struct tcp_congestion_ops *ca;
79 79
80 if (tp->ca_ops != &tcp_init_congestion_ops)
81 return;
82
80 rcu_read_lock(); 83 rcu_read_lock();
81 list_for_each_entry_rcu(ca, &tcp_cong_list, list) { 84 list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
82 if (try_module_get(ca->owner)) { 85 if (try_module_get(ca->owner)) {
@@ -139,6 +142,34 @@ void tcp_get_default_congestion_control(char *name)
139 rcu_read_unlock(); 142 rcu_read_unlock();
140} 143}
141 144
145/* Change congestion control for socket */
146int tcp_set_congestion_control(struct tcp_sock *tp, const char *name)
147{
148 struct tcp_congestion_ops *ca;
149 int err = 0;
150
151 rcu_read_lock();
152 ca = tcp_ca_find(name);
153 if (ca == tp->ca_ops)
154 goto out;
155
156 if (!ca)
157 err = -ENOENT;
158
159 else if (!try_module_get(ca->owner))
160 err = -EBUSY;
161
162 else {
163 tcp_cleanup_congestion_control(tp);
164 tp->ca_ops = ca;
165 if (tp->ca_ops->init)
166 tp->ca_ops->init(tp);
167 }
168 out:
169 rcu_read_unlock();
170 return err;
171}
172
142/* 173/*
143 * TCP Reno congestion control 174 * TCP Reno congestion control
144 * This is special case used for fallback as well. 175 * This is special case used for fallback as well.
@@ -192,4 +223,15 @@ struct tcp_congestion_ops tcp_reno = {
192 .min_cwnd = tcp_reno_min_cwnd, 223 .min_cwnd = tcp_reno_min_cwnd,
193}; 224};
194 225
195EXPORT_SYMBOL_GPL(tcp_reno); 226/* Initial congestion control used (until SYN)
227 * really reno under another name so we can tell difference
228 * during tcp_set_default_congestion_control
229 */
230struct tcp_congestion_ops tcp_init_congestion_ops = {
231 .name = "",
232 .owner = THIS_MODULE,
233 .ssthresh = tcp_reno_ssthresh,
234 .cong_avoid = tcp_reno_cong_avoid,
235 .min_cwnd = tcp_reno_min_cwnd,
236};
237EXPORT_SYMBOL_GPL(tcp_init_congestion_ops);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9122814c13a..ebf112347a9 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2048,7 +2048,7 @@ static int tcp_v4_init_sock(struct sock *sk)
2048 tp->mss_cache_std = tp->mss_cache = 536; 2048 tp->mss_cache_std = tp->mss_cache = 536;
2049 2049
2050 tp->reordering = sysctl_tcp_reordering; 2050 tp->reordering = sysctl_tcp_reordering;
2051 tp->ca_ops = &tcp_reno; 2051 tp->ca_ops = &tcp_init_congestion_ops;
2052 2052
2053 sk->sk_state = TCP_CLOSE; 2053 sk->sk_state = TCP_CLOSE;
2054 2054
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index fce56039b0e..9dac7fdf472 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2025,7 +2025,7 @@ static int tcp_v6_init_sock(struct sock *sk)
2025 sk->sk_state = TCP_CLOSE; 2025 sk->sk_state = TCP_CLOSE;
2026 2026
2027 tp->af_specific = &ipv6_specific; 2027 tp->af_specific = &ipv6_specific;
2028 tp->ca_ops = &tcp_reno; 2028 tp->ca_ops = &tcp_init_congestion_ops;
2029 sk->sk_write_space = sk_stream_write_space; 2029 sk->sk_write_space = sk_stream_write_space;
2030 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); 2030 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2031 2031
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index b22c9beb604..447b89e556b 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -449,6 +449,18 @@ config NET_EMATCH_META
449 To compile this code as a module, choose M here: the 449 To compile this code as a module, choose M here: the
450 module will be called em_meta. 450 module will be called em_meta.
451 451
452config NET_EMATCH_TEXT
453 tristate "Textsearch"
454 depends on NET_EMATCH
455 select TEXTSEARCH
456 ---help---
457 Say Y here if you want to be ablt to classify packets based on
458 textsearch comparisons. Please select the appropriate textsearch
459 algorithms in the Library section.
460
461 To compile this code as a module, choose M here: the
462 module will be called em_text.
463
452config NET_CLS_ACT 464config NET_CLS_ACT
453 bool "Packet ACTION" 465 bool "Packet ACTION"
454 depends on EXPERIMENTAL && NET_CLS && NET_QOS 466 depends on EXPERIMENTAL && NET_CLS && NET_QOS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index eb3fe583eba..8f58cecd626 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -40,3 +40,4 @@ obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o
40obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o 40obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o
41obj-$(CONFIG_NET_EMATCH_U32) += em_u32.o 41obj-$(CONFIG_NET_EMATCH_U32) += em_u32.o
42obj-$(CONFIG_NET_EMATCH_META) += em_meta.o 42obj-$(CONFIG_NET_EMATCH_META) += em_meta.o
43obj-$(CONFIG_NET_EMATCH_TEXT) += em_text.o
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
new file mode 100644
index 00000000000..873840d8d07
--- /dev/null
+++ b/net/sched/em_text.c
@@ -0,0 +1,157 @@
1/*
2 * net/sched/em_text.c Textsearch ematch
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Thomas Graf <tgraf@suug.ch>
10 */
11
12#include <linux/config.h>
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/kernel.h>
16#include <linux/sched.h>
17#include <linux/string.h>
18#include <linux/skbuff.h>
19#include <linux/textsearch.h>
20#include <linux/tc_ematch/tc_em_text.h>
21#include <net/pkt_cls.h>
22
23struct text_match
24{
25 u16 from_offset;
26 u16 to_offset;
27 u8 from_layer;
28 u8 to_layer;
29 struct ts_config *config;
30};
31
32#define EM_TEXT_PRIV(m) ((struct text_match *) (m)->data)
33
34static int em_text_match(struct sk_buff *skb, struct tcf_ematch *m,
35 struct tcf_pkt_info *info)
36{
37 struct text_match *tm = EM_TEXT_PRIV(m);
38 int from, to;
39 struct ts_state state;
40
41 from = tcf_get_base_ptr(skb, tm->from_layer) - skb->data;
42 from += tm->from_offset;
43
44 to = tcf_get_base_ptr(skb, tm->to_layer) - skb->data;
45 to += tm->to_offset;
46
47 return skb_find_text(skb, from, to, tm->config, &state) != UINT_MAX;
48}
49
50static int em_text_change(struct tcf_proto *tp, void *data, int len,
51 struct tcf_ematch *m)
52{
53 struct text_match *tm;
54 struct tcf_em_text *conf = data;
55 struct ts_config *ts_conf;
56 int flags = 0;
57
58 printk("Configuring text: %s from %d:%d to %d:%d len %d\n", conf->algo, conf->from_offset,
59 conf->from_layer, conf->to_offset, conf->to_layer, conf->pattern_len);
60
61 if (len < sizeof(*conf) || len < (sizeof(*conf) + conf->pattern_len))
62 return -EINVAL;
63
64 if (conf->from_layer > conf->to_layer)
65 return -EINVAL;
66
67 if (conf->from_layer == conf->to_layer &&
68 conf->from_offset > conf->to_offset)
69 return -EINVAL;
70
71retry:
72 ts_conf = textsearch_prepare(conf->algo, (u8 *) conf + sizeof(*conf),
73 conf->pattern_len, GFP_KERNEL, flags);
74
75 if (flags & TS_AUTOLOAD)
76 rtnl_lock();
77
78 if (IS_ERR(ts_conf)) {
79 if (PTR_ERR(ts_conf) == -ENOENT && !(flags & TS_AUTOLOAD)) {
80 rtnl_unlock();
81 flags |= TS_AUTOLOAD;
82 goto retry;
83 } else
84 return PTR_ERR(ts_conf);
85 } else if (flags & TS_AUTOLOAD) {
86 textsearch_destroy(ts_conf);
87 return -EAGAIN;
88 }
89
90 tm = kmalloc(sizeof(*tm), GFP_KERNEL);
91 if (tm == NULL) {
92 textsearch_destroy(ts_conf);
93 return -ENOBUFS;
94 }
95
96 tm->from_offset = conf->from_offset;
97 tm->to_offset = conf->to_offset;
98 tm->from_layer = conf->from_layer;
99 tm->to_layer = conf->to_layer;
100 tm->config = ts_conf;
101
102 m->datalen = sizeof(*tm);
103 m->data = (unsigned long) tm;
104
105 return 0;
106}
107
108static void em_text_destroy(struct tcf_proto *tp, struct tcf_ematch *m)
109{
110 textsearch_destroy(EM_TEXT_PRIV(m)->config);
111}
112
113static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m)
114{
115 struct text_match *tm = EM_TEXT_PRIV(m);
116 struct tcf_em_text conf;
117
118 strncpy(conf.algo, tm->config->ops->name, sizeof(conf.algo) - 1);
119 conf.from_offset = tm->from_offset;
120 conf.to_offset = tm->to_offset;
121 conf.from_layer = tm->from_layer;
122 conf.to_layer = tm->to_layer;
123 conf.pattern_len = textsearch_get_pattern_len(tm->config);
124 conf.pad = 0;
125
126 RTA_PUT_NOHDR(skb, sizeof(conf), &conf);
127 RTA_APPEND(skb, conf.pattern_len, textsearch_get_pattern(tm->config));
128 return 0;
129
130rtattr_failure:
131 return -1;
132}
133
134static struct tcf_ematch_ops em_text_ops = {
135 .kind = TCF_EM_TEXT,
136 .change = em_text_change,
137 .match = em_text_match,
138 .destroy = em_text_destroy,
139 .dump = em_text_dump,
140 .owner = THIS_MODULE,
141 .link = LIST_HEAD_INIT(em_text_ops.link)
142};
143
144static int __init init_em_text(void)
145{
146 return tcf_em_register(&em_text_ops);
147}
148
149static void __exit exit_em_text(void)
150{
151 tcf_em_unregister(&em_text_ops);
152}
153
154MODULE_LICENSE("GPL");
155
156module_init(init_em_text);
157module_exit(exit_em_text);