aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/net/bnx2.c2
-rw-r--r--drivers/net/bnx2x/bnx2x_link.c3
-rw-r--r--drivers/net/sfc/efx.c24
-rw-r--r--drivers/net/sfc/efx.h2
-rw-r--r--drivers/net/sfc/net_driver.h13
-rw-r--r--drivers/net/sfc/tx.c111
-rw-r--r--drivers/net/stmmac/stmmac_main.c50
-rw-r--r--drivers/net/sundance.c23
-rw-r--r--include/net/tcp.h3
-rw-r--r--net/core/dev.c9
-rw-r--r--net/core/filter.c72
-rw-r--r--net/ipv4/tcp_output.c11
-rw-r--r--net/sched/sch_sfq.c260
13 files changed, 299 insertions, 284 deletions
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 819b55cbd49..6fa798468ad 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -8395,8 +8395,6 @@ bnx2_remove_one(struct pci_dev *pdev)
8395 struct net_device *dev = pci_get_drvdata(pdev); 8395 struct net_device *dev = pci_get_drvdata(pdev);
8396 struct bnx2 *bp = netdev_priv(dev); 8396 struct bnx2 *bp = netdev_priv(dev);
8397 8397
8398 cancel_work_sync(&bp->reset_task);
8399
8400 unregister_netdev(dev); 8398 unregister_netdev(dev);
8401 8399
8402 if (bp->mips_firmware) 8400 if (bp->mips_firmware)
diff --git a/drivers/net/bnx2x/bnx2x_link.c b/drivers/net/bnx2x/bnx2x_link.c
index 97cbee2927f..43b0de24f39 100644
--- a/drivers/net/bnx2x/bnx2x_link.c
+++ b/drivers/net/bnx2x/bnx2x_link.c
@@ -354,9 +354,6 @@ u8 bnx2x_ets_strict(const struct link_params *params, const u8 strict_cos)
354 struct bnx2x *bp = params->bp; 354 struct bnx2x *bp = params->bp;
355 u32 val = 0; 355 u32 val = 0;
356 356
357 if ((1 < strict_cos) && (NULL == params))
358 return -EINVAL;
359
360 DP(NETIF_MSG_LINK, "ETS enabled strict configuration\n"); 357 DP(NETIF_MSG_LINK, "ETS enabled strict configuration\n");
361 /** 358 /**
362 * Bitmap of 5bits length. Each bit specifies whether the entry behaves 359 * Bitmap of 5bits length. Each bit specifies whether the entry behaves
diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c
index 2166c1d0a53..711449c6e67 100644
--- a/drivers/net/sfc/efx.c
+++ b/drivers/net/sfc/efx.c
@@ -461,9 +461,6 @@ efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel)
461 } 461 }
462 } 462 }
463 463
464 spin_lock_init(&channel->tx_stop_lock);
465 atomic_set(&channel->tx_stop_count, 1);
466
467 rx_queue = &channel->rx_queue; 464 rx_queue = &channel->rx_queue;
468 rx_queue->efx = efx; 465 rx_queue->efx = efx;
469 setup_timer(&rx_queue->slow_fill, efx_rx_slow_fill, 466 setup_timer(&rx_queue->slow_fill, efx_rx_slow_fill,
@@ -1406,11 +1403,11 @@ static void efx_start_all(struct efx_nic *efx)
1406 * restart the transmit interface early so the watchdog timer stops */ 1403 * restart the transmit interface early so the watchdog timer stops */
1407 efx_start_port(efx); 1404 efx_start_port(efx);
1408 1405
1409 efx_for_each_channel(channel, efx) { 1406 if (efx_dev_registered(efx))
1410 if (efx_dev_registered(efx)) 1407 netif_tx_wake_all_queues(efx->net_dev);
1411 efx_wake_queue(channel); 1408
1409 efx_for_each_channel(channel, efx)
1412 efx_start_channel(channel); 1410 efx_start_channel(channel);
1413 }
1414 1411
1415 if (efx->legacy_irq) 1412 if (efx->legacy_irq)
1416 efx->legacy_irq_enabled = true; 1413 efx->legacy_irq_enabled = true;
@@ -1498,9 +1495,7 @@ static void efx_stop_all(struct efx_nic *efx)
1498 /* Stop the kernel transmit interface late, so the watchdog 1495 /* Stop the kernel transmit interface late, so the watchdog
1499 * timer isn't ticking over the flush */ 1496 * timer isn't ticking over the flush */
1500 if (efx_dev_registered(efx)) { 1497 if (efx_dev_registered(efx)) {
1501 struct efx_channel *channel; 1498 netif_tx_stop_all_queues(efx->net_dev);
1502 efx_for_each_channel(channel, efx)
1503 efx_stop_queue(channel);
1504 netif_tx_lock_bh(efx->net_dev); 1499 netif_tx_lock_bh(efx->net_dev);
1505 netif_tx_unlock_bh(efx->net_dev); 1500 netif_tx_unlock_bh(efx->net_dev);
1506 } 1501 }
@@ -1896,6 +1891,7 @@ static DEVICE_ATTR(phy_type, 0644, show_phy_type, NULL);
1896static int efx_register_netdev(struct efx_nic *efx) 1891static int efx_register_netdev(struct efx_nic *efx)
1897{ 1892{
1898 struct net_device *net_dev = efx->net_dev; 1893 struct net_device *net_dev = efx->net_dev;
1894 struct efx_channel *channel;
1899 int rc; 1895 int rc;
1900 1896
1901 net_dev->watchdog_timeo = 5 * HZ; 1897 net_dev->watchdog_timeo = 5 * HZ;
@@ -1918,6 +1914,14 @@ static int efx_register_netdev(struct efx_nic *efx)
1918 if (rc) 1914 if (rc)
1919 goto fail_locked; 1915 goto fail_locked;
1920 1916
1917 efx_for_each_channel(channel, efx) {
1918 struct efx_tx_queue *tx_queue;
1919 efx_for_each_channel_tx_queue(tx_queue, channel) {
1920 tx_queue->core_txq = netdev_get_tx_queue(
1921 efx->net_dev, tx_queue->queue / EFX_TXQ_TYPES);
1922 }
1923 }
1924
1921 /* Always start with carrier off; PHY events will detect the link */ 1925 /* Always start with carrier off; PHY events will detect the link */
1922 netif_carrier_off(efx->net_dev); 1926 netif_carrier_off(efx->net_dev);
1923 1927
diff --git a/drivers/net/sfc/efx.h b/drivers/net/sfc/efx.h
index 003fdb35b4b..d43a7e5212b 100644
--- a/drivers/net/sfc/efx.h
+++ b/drivers/net/sfc/efx.h
@@ -36,8 +36,6 @@ efx_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev);
36extern netdev_tx_t 36extern netdev_tx_t
37efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb); 37efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb);
38extern void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index); 38extern void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
39extern void efx_stop_queue(struct efx_channel *channel);
40extern void efx_wake_queue(struct efx_channel *channel);
41 39
42/* RX */ 40/* RX */
43extern int efx_probe_rx_queue(struct efx_rx_queue *rx_queue); 41extern int efx_probe_rx_queue(struct efx_rx_queue *rx_queue);
diff --git a/drivers/net/sfc/net_driver.h b/drivers/net/sfc/net_driver.h
index 76f2fb197f0..bdce66ddf93 100644
--- a/drivers/net/sfc/net_driver.h
+++ b/drivers/net/sfc/net_driver.h
@@ -136,6 +136,7 @@ struct efx_tx_buffer {
136 * @efx: The associated Efx NIC 136 * @efx: The associated Efx NIC
137 * @queue: DMA queue number 137 * @queue: DMA queue number
138 * @channel: The associated channel 138 * @channel: The associated channel
139 * @core_txq: The networking core TX queue structure
139 * @buffer: The software buffer ring 140 * @buffer: The software buffer ring
140 * @txd: The hardware descriptor ring 141 * @txd: The hardware descriptor ring
141 * @ptr_mask: The size of the ring minus 1. 142 * @ptr_mask: The size of the ring minus 1.
@@ -148,8 +149,6 @@ struct efx_tx_buffer {
148 * variable indicates that the queue is empty. This is to 149 * variable indicates that the queue is empty. This is to
149 * avoid cache-line ping-pong between the xmit path and the 150 * avoid cache-line ping-pong between the xmit path and the
150 * completion path. 151 * completion path.
151 * @stopped: Stopped count.
152 * Set if this TX queue is currently stopping its port.
153 * @insert_count: Current insert pointer 152 * @insert_count: Current insert pointer
154 * This is the number of buffers that have been added to the 153 * This is the number of buffers that have been added to the
155 * software ring. 154 * software ring.
@@ -179,7 +178,7 @@ struct efx_tx_queue {
179 struct efx_nic *efx ____cacheline_aligned_in_smp; 178 struct efx_nic *efx ____cacheline_aligned_in_smp;
180 unsigned queue; 179 unsigned queue;
181 struct efx_channel *channel; 180 struct efx_channel *channel;
182 struct efx_nic *nic; 181 struct netdev_queue *core_txq;
183 struct efx_tx_buffer *buffer; 182 struct efx_tx_buffer *buffer;
184 struct efx_special_buffer txd; 183 struct efx_special_buffer txd;
185 unsigned int ptr_mask; 184 unsigned int ptr_mask;
@@ -188,7 +187,6 @@ struct efx_tx_queue {
188 /* Members used mainly on the completion path */ 187 /* Members used mainly on the completion path */
189 unsigned int read_count ____cacheline_aligned_in_smp; 188 unsigned int read_count ____cacheline_aligned_in_smp;
190 unsigned int old_write_count; 189 unsigned int old_write_count;
191 int stopped;
192 190
193 /* Members used only on the xmit path */ 191 /* Members used only on the xmit path */
194 unsigned int insert_count ____cacheline_aligned_in_smp; 192 unsigned int insert_count ____cacheline_aligned_in_smp;
@@ -321,7 +319,6 @@ enum efx_rx_alloc_method {
321 * @irq_moderation: IRQ moderation value (in hardware ticks) 319 * @irq_moderation: IRQ moderation value (in hardware ticks)
322 * @napi_dev: Net device used with NAPI 320 * @napi_dev: Net device used with NAPI
323 * @napi_str: NAPI control structure 321 * @napi_str: NAPI control structure
324 * @reset_work: Scheduled reset work thread
325 * @work_pending: Is work pending via NAPI? 322 * @work_pending: Is work pending via NAPI?
326 * @eventq: Event queue buffer 323 * @eventq: Event queue buffer
327 * @eventq_mask: Event queue pointer mask 324 * @eventq_mask: Event queue pointer mask
@@ -342,8 +339,6 @@ enum efx_rx_alloc_method {
342 * @n_rx_overlength: Count of RX_OVERLENGTH errors 339 * @n_rx_overlength: Count of RX_OVERLENGTH errors
343 * @n_skbuff_leaks: Count of skbuffs leaked due to RX overrun 340 * @n_skbuff_leaks: Count of skbuffs leaked due to RX overrun
344 * @rx_queue: RX queue for this channel 341 * @rx_queue: RX queue for this channel
345 * @tx_stop_count: Core TX queue stop count
346 * @tx_stop_lock: Core TX queue stop lock
347 * @tx_queue: TX queues for this channel 342 * @tx_queue: TX queues for this channel
348 */ 343 */
349struct efx_channel { 344struct efx_channel {
@@ -382,10 +377,6 @@ struct efx_channel {
382 bool rx_pkt_csummed; 377 bool rx_pkt_csummed;
383 378
384 struct efx_rx_queue rx_queue; 379 struct efx_rx_queue rx_queue;
385
386 atomic_t tx_stop_count;
387 spinlock_t tx_stop_lock;
388
389 struct efx_tx_queue tx_queue[2]; 380 struct efx_tx_queue tx_queue[2];
390}; 381};
391 382
diff --git a/drivers/net/sfc/tx.c b/drivers/net/sfc/tx.c
index bdb92b4af68..2f5e9da657b 100644
--- a/drivers/net/sfc/tx.c
+++ b/drivers/net/sfc/tx.c
@@ -30,50 +30,6 @@
30 */ 30 */
31#define EFX_TXQ_THRESHOLD(_efx) ((_efx)->txq_entries / 2u) 31#define EFX_TXQ_THRESHOLD(_efx) ((_efx)->txq_entries / 2u)
32 32
33/* We need to be able to nest calls to netif_tx_stop_queue(), partly
34 * because of the 2 hardware queues associated with each core queue,
35 * but also so that we can inhibit TX for reasons other than a full
36 * hardware queue. */
37void efx_stop_queue(struct efx_channel *channel)
38{
39 struct efx_nic *efx = channel->efx;
40 struct efx_tx_queue *tx_queue = efx_channel_get_tx_queue(channel, 0);
41
42 if (!tx_queue)
43 return;
44
45 spin_lock_bh(&channel->tx_stop_lock);
46 netif_vdbg(efx, tx_queued, efx->net_dev, "stop TX queue\n");
47
48 atomic_inc(&channel->tx_stop_count);
49 netif_tx_stop_queue(
50 netdev_get_tx_queue(efx->net_dev,
51 tx_queue->queue / EFX_TXQ_TYPES));
52
53 spin_unlock_bh(&channel->tx_stop_lock);
54}
55
56/* Decrement core TX queue stop count and wake it if the count is 0 */
57void efx_wake_queue(struct efx_channel *channel)
58{
59 struct efx_nic *efx = channel->efx;
60 struct efx_tx_queue *tx_queue = efx_channel_get_tx_queue(channel, 0);
61
62 if (!tx_queue)
63 return;
64
65 local_bh_disable();
66 if (atomic_dec_and_lock(&channel->tx_stop_count,
67 &channel->tx_stop_lock)) {
68 netif_vdbg(efx, tx_queued, efx->net_dev, "waking TX queue\n");
69 netif_tx_wake_queue(
70 netdev_get_tx_queue(efx->net_dev,
71 tx_queue->queue / EFX_TXQ_TYPES));
72 spin_unlock(&channel->tx_stop_lock);
73 }
74 local_bh_enable();
75}
76
77static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, 33static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
78 struct efx_tx_buffer *buffer) 34 struct efx_tx_buffer *buffer)
79{ 35{
@@ -234,9 +190,9 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
234 * checked. Update the xmit path's 190 * checked. Update the xmit path's
235 * copy of read_count. 191 * copy of read_count.
236 */ 192 */
237 ++tx_queue->stopped; 193 netif_tx_stop_queue(tx_queue->core_txq);
238 /* This memory barrier protects the 194 /* This memory barrier protects the
239 * change of stopped from the access 195 * change of queue state from the access
240 * of read_count. */ 196 * of read_count. */
241 smp_mb(); 197 smp_mb();
242 tx_queue->old_read_count = 198 tx_queue->old_read_count =
@@ -244,10 +200,12 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
244 fill_level = (tx_queue->insert_count 200 fill_level = (tx_queue->insert_count
245 - tx_queue->old_read_count); 201 - tx_queue->old_read_count);
246 q_space = efx->txq_entries - 1 - fill_level; 202 q_space = efx->txq_entries - 1 - fill_level;
247 if (unlikely(q_space-- <= 0)) 203 if (unlikely(q_space-- <= 0)) {
248 goto stop; 204 rc = NETDEV_TX_BUSY;
205 goto unwind;
206 }
249 smp_mb(); 207 smp_mb();
250 --tx_queue->stopped; 208 netif_tx_start_queue(tx_queue->core_txq);
251 } 209 }
252 210
253 insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; 211 insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
@@ -307,13 +265,6 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
307 265
308 /* Mark the packet as transmitted, and free the SKB ourselves */ 266 /* Mark the packet as transmitted, and free the SKB ourselves */
309 dev_kfree_skb_any(skb); 267 dev_kfree_skb_any(skb);
310 goto unwind;
311
312 stop:
313 rc = NETDEV_TX_BUSY;
314
315 if (tx_queue->stopped == 1)
316 efx_stop_queue(tx_queue->channel);
317 268
318 unwind: 269 unwind:
319 /* Work backwards until we hit the original insert pointer value */ 270 /* Work backwards until we hit the original insert pointer value */
@@ -400,32 +351,21 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
400{ 351{
401 unsigned fill_level; 352 unsigned fill_level;
402 struct efx_nic *efx = tx_queue->efx; 353 struct efx_nic *efx = tx_queue->efx;
403 struct netdev_queue *queue;
404 354
405 EFX_BUG_ON_PARANOID(index > tx_queue->ptr_mask); 355 EFX_BUG_ON_PARANOID(index > tx_queue->ptr_mask);
406 356
407 efx_dequeue_buffers(tx_queue, index); 357 efx_dequeue_buffers(tx_queue, index);
408 358
409 /* See if we need to restart the netif queue. This barrier 359 /* See if we need to restart the netif queue. This barrier
410 * separates the update of read_count from the test of 360 * separates the update of read_count from the test of the
411 * stopped. */ 361 * queue state. */
412 smp_mb(); 362 smp_mb();
413 if (unlikely(tx_queue->stopped) && likely(efx->port_enabled)) { 363 if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) &&
364 likely(efx->port_enabled)) {
414 fill_level = tx_queue->insert_count - tx_queue->read_count; 365 fill_level = tx_queue->insert_count - tx_queue->read_count;
415 if (fill_level < EFX_TXQ_THRESHOLD(efx)) { 366 if (fill_level < EFX_TXQ_THRESHOLD(efx)) {
416 EFX_BUG_ON_PARANOID(!efx_dev_registered(efx)); 367 EFX_BUG_ON_PARANOID(!efx_dev_registered(efx));
417 368 netif_tx_wake_queue(tx_queue->core_txq);
418 /* Do this under netif_tx_lock(), to avoid racing
419 * with efx_xmit(). */
420 queue = netdev_get_tx_queue(
421 efx->net_dev,
422 tx_queue->queue / EFX_TXQ_TYPES);
423 __netif_tx_lock(queue, smp_processor_id());
424 if (tx_queue->stopped) {
425 tx_queue->stopped = 0;
426 efx_wake_queue(tx_queue->channel);
427 }
428 __netif_tx_unlock(queue);
429 } 369 }
430 } 370 }
431 371
@@ -487,7 +427,6 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
487 tx_queue->read_count = 0; 427 tx_queue->read_count = 0;
488 tx_queue->old_read_count = 0; 428 tx_queue->old_read_count = 0;
489 tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID; 429 tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID;
490 BUG_ON(tx_queue->stopped);
491 430
492 /* Set up TX descriptor ring */ 431 /* Set up TX descriptor ring */
493 efx_nic_init_tx(tx_queue); 432 efx_nic_init_tx(tx_queue);
@@ -523,12 +462,6 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
523 462
524 /* Free up TSO header cache */ 463 /* Free up TSO header cache */
525 efx_fini_tso(tx_queue); 464 efx_fini_tso(tx_queue);
526
527 /* Release queue's stop on port, if any */
528 if (tx_queue->stopped) {
529 tx_queue->stopped = 0;
530 efx_wake_queue(tx_queue->channel);
531 }
532} 465}
533 466
534void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) 467void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
@@ -770,9 +703,9 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
770 * since the xmit path last checked. Update 703 * since the xmit path last checked. Update
771 * the xmit path's copy of read_count. 704 * the xmit path's copy of read_count.
772 */ 705 */
773 ++tx_queue->stopped; 706 netif_tx_stop_queue(tx_queue->core_txq);
774 /* This memory barrier protects the change of 707 /* This memory barrier protects the change of
775 * stopped from the access of read_count. */ 708 * queue state from the access of read_count. */
776 smp_mb(); 709 smp_mb();
777 tx_queue->old_read_count = 710 tx_queue->old_read_count =
778 ACCESS_ONCE(tx_queue->read_count); 711 ACCESS_ONCE(tx_queue->read_count);
@@ -784,7 +717,7 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
784 return 1; 717 return 1;
785 } 718 }
786 smp_mb(); 719 smp_mb();
787 --tx_queue->stopped; 720 netif_tx_start_queue(tx_queue->core_txq);
788 } 721 }
789 722
790 insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; 723 insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
@@ -1124,8 +1057,10 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
1124 1057
1125 while (1) { 1058 while (1) {
1126 rc = tso_fill_packet_with_fragment(tx_queue, skb, &state); 1059 rc = tso_fill_packet_with_fragment(tx_queue, skb, &state);
1127 if (unlikely(rc)) 1060 if (unlikely(rc)) {
1128 goto stop; 1061 rc2 = NETDEV_TX_BUSY;
1062 goto unwind;
1063 }
1129 1064
1130 /* Move onto the next fragment? */ 1065 /* Move onto the next fragment? */
1131 if (state.in_len == 0) { 1066 if (state.in_len == 0) {
@@ -1154,14 +1089,6 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
1154 netif_err(efx, tx_err, efx->net_dev, 1089 netif_err(efx, tx_err, efx->net_dev,
1155 "Out of memory for TSO headers, or PCI mapping error\n"); 1090 "Out of memory for TSO headers, or PCI mapping error\n");
1156 dev_kfree_skb_any(skb); 1091 dev_kfree_skb_any(skb);
1157 goto unwind;
1158
1159 stop:
1160 rc2 = NETDEV_TX_BUSY;
1161
1162 /* Stop the queue if it wasn't stopped before. */
1163 if (tx_queue->stopped == 1)
1164 efx_stop_queue(tx_queue->channel);
1165 1092
1166 unwind: 1093 unwind:
1167 /* Free the DMA mapping we were in the process of writing out */ 1094 /* Free the DMA mapping we were in the process of writing out */
diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c
index 20f803df868..34a0af3837f 100644
--- a/drivers/net/stmmac/stmmac_main.c
+++ b/drivers/net/stmmac/stmmac_main.c
@@ -1647,10 +1647,8 @@ static int stmmac_dvr_probe(struct platform_device *pdev)
1647 1647
1648 pr_info("STMMAC driver:\n\tplatform registration... "); 1648 pr_info("STMMAC driver:\n\tplatform registration... ");
1649 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 1649 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1650 if (!res) { 1650 if (!res)
1651 ret = -ENODEV; 1651 return -ENODEV;
1652 goto out;
1653 }
1654 pr_info("\tdone!\n"); 1652 pr_info("\tdone!\n");
1655 1653
1656 if (!request_mem_region(res->start, resource_size(res), 1654 if (!request_mem_region(res->start, resource_size(res),
@@ -1658,22 +1656,21 @@ static int stmmac_dvr_probe(struct platform_device *pdev)
1658 pr_err("%s: ERROR: memory allocation failed" 1656 pr_err("%s: ERROR: memory allocation failed"
1659 "cannot get the I/O addr 0x%x\n", 1657 "cannot get the I/O addr 0x%x\n",
1660 __func__, (unsigned int)res->start); 1658 __func__, (unsigned int)res->start);
1661 ret = -EBUSY; 1659 return -EBUSY;
1662 goto out;
1663 } 1660 }
1664 1661
1665 addr = ioremap(res->start, resource_size(res)); 1662 addr = ioremap(res->start, resource_size(res));
1666 if (!addr) { 1663 if (!addr) {
1667 pr_err("%s: ERROR: memory mapping failed\n", __func__); 1664 pr_err("%s: ERROR: memory mapping failed\n", __func__);
1668 ret = -ENOMEM; 1665 ret = -ENOMEM;
1669 goto out; 1666 goto out_release_region;
1670 } 1667 }
1671 1668
1672 ndev = alloc_etherdev(sizeof(struct stmmac_priv)); 1669 ndev = alloc_etherdev(sizeof(struct stmmac_priv));
1673 if (!ndev) { 1670 if (!ndev) {
1674 pr_err("%s: ERROR: allocating the device\n", __func__); 1671 pr_err("%s: ERROR: allocating the device\n", __func__);
1675 ret = -ENOMEM; 1672 ret = -ENOMEM;
1676 goto out; 1673 goto out_unmap;
1677 } 1674 }
1678 1675
1679 SET_NETDEV_DEV(ndev, &pdev->dev); 1676 SET_NETDEV_DEV(ndev, &pdev->dev);
@@ -1683,8 +1680,8 @@ static int stmmac_dvr_probe(struct platform_device *pdev)
1683 if (ndev->irq == -ENXIO) { 1680 if (ndev->irq == -ENXIO) {
1684 pr_err("%s: ERROR: MAC IRQ configuration " 1681 pr_err("%s: ERROR: MAC IRQ configuration "
1685 "information not found\n", __func__); 1682 "information not found\n", __func__);
1686 ret = -ENODEV; 1683 ret = -ENXIO;
1687 goto out; 1684 goto out_free_ndev;
1688 } 1685 }
1689 1686
1690 priv = netdev_priv(ndev); 1687 priv = netdev_priv(ndev);
@@ -1711,18 +1708,18 @@ static int stmmac_dvr_probe(struct platform_device *pdev)
1711 if (priv->plat->init) { 1708 if (priv->plat->init) {
1712 ret = priv->plat->init(pdev); 1709 ret = priv->plat->init(pdev);
1713 if (unlikely(ret)) 1710 if (unlikely(ret))
1714 goto out; 1711 goto out_free_ndev;
1715 } 1712 }
1716 1713
1717 /* MAC HW revice detection */ 1714 /* MAC HW revice detection */
1718 ret = stmmac_mac_device_setup(ndev); 1715 ret = stmmac_mac_device_setup(ndev);
1719 if (ret < 0) 1716 if (ret < 0)
1720 goto out; 1717 goto out_plat_exit;
1721 1718
1722 /* Network Device Registration */ 1719 /* Network Device Registration */
1723 ret = stmmac_probe(ndev); 1720 ret = stmmac_probe(ndev);
1724 if (ret < 0) 1721 if (ret < 0)
1725 goto out; 1722 goto out_plat_exit;
1726 1723
1727 /* associate a PHY - it is provided by another platform bus */ 1724 /* associate a PHY - it is provided by another platform bus */
1728 if (!driver_for_each_device 1725 if (!driver_for_each_device
@@ -1730,7 +1727,7 @@ static int stmmac_dvr_probe(struct platform_device *pdev)
1730 stmmac_associate_phy)) { 1727 stmmac_associate_phy)) {
1731 pr_err("No PHY device is associated with this MAC!\n"); 1728 pr_err("No PHY device is associated with this MAC!\n");
1732 ret = -ENODEV; 1729 ret = -ENODEV;
1733 goto out; 1730 goto out_unregister;
1734 } 1731 }
1735 1732
1736 pr_info("\t%s - (dev. name: %s - id: %d, IRQ #%d\n" 1733 pr_info("\t%s - (dev. name: %s - id: %d, IRQ #%d\n"
@@ -1741,19 +1738,22 @@ static int stmmac_dvr_probe(struct platform_device *pdev)
1741 pr_debug("\tMDIO bus (id: %d)...", priv->plat->bus_id); 1738 pr_debug("\tMDIO bus (id: %d)...", priv->plat->bus_id);
1742 ret = stmmac_mdio_register(ndev); 1739 ret = stmmac_mdio_register(ndev);
1743 if (ret < 0) 1740 if (ret < 0)
1744 goto out; 1741 goto out_unregister;
1745 pr_debug("registered!\n"); 1742 pr_debug("registered!\n");
1743 return 0;
1746 1744
1747out: 1745out_unregister:
1748 if (ret < 0) { 1746 unregister_netdev(ndev);
1749 if (priv->plat->exit) 1747out_plat_exit:
1750 priv->plat->exit(pdev); 1748 if (priv->plat->exit)
1751 1749 priv->plat->exit(pdev);
1752 platform_set_drvdata(pdev, NULL); 1750out_free_ndev:
1753 release_mem_region(res->start, resource_size(res)); 1751 free_netdev(ndev);
1754 if (addr != NULL) 1752 platform_set_drvdata(pdev, NULL);
1755 iounmap(addr); 1753out_unmap:
1756 } 1754 iounmap(addr);
1755out_release_region:
1756 release_mem_region(res->start, resource_size(res));
1757 1757
1758 return ret; 1758 return ret;
1759} 1759}
diff --git a/drivers/net/sundance.c b/drivers/net/sundance.c
index 3ed2a67bd6d..e5662962c7b 100644
--- a/drivers/net/sundance.c
+++ b/drivers/net/sundance.c
@@ -294,6 +294,9 @@ enum alta_offsets {
294 /* Aliased and bogus values! */ 294 /* Aliased and bogus values! */
295 RxStatus = 0x0c, 295 RxStatus = 0x0c,
296}; 296};
297
298#define ASIC_HI_WORD(x) ((x) + 2)
299
297enum ASICCtrl_HiWord_bit { 300enum ASICCtrl_HiWord_bit {
298 GlobalReset = 0x0001, 301 GlobalReset = 0x0001,
299 RxReset = 0x0002, 302 RxReset = 0x0002,
@@ -431,6 +434,7 @@ static void netdev_error(struct net_device *dev, int intr_status);
431static void netdev_error(struct net_device *dev, int intr_status); 434static void netdev_error(struct net_device *dev, int intr_status);
432static void set_rx_mode(struct net_device *dev); 435static void set_rx_mode(struct net_device *dev);
433static int __set_mac_addr(struct net_device *dev); 436static int __set_mac_addr(struct net_device *dev);
437static int sundance_set_mac_addr(struct net_device *dev, void *data);
434static struct net_device_stats *get_stats(struct net_device *dev); 438static struct net_device_stats *get_stats(struct net_device *dev);
435static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); 439static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
436static int netdev_close(struct net_device *dev); 440static int netdev_close(struct net_device *dev);
@@ -464,7 +468,7 @@ static const struct net_device_ops netdev_ops = {
464 .ndo_do_ioctl = netdev_ioctl, 468 .ndo_do_ioctl = netdev_ioctl,
465 .ndo_tx_timeout = tx_timeout, 469 .ndo_tx_timeout = tx_timeout,
466 .ndo_change_mtu = change_mtu, 470 .ndo_change_mtu = change_mtu,
467 .ndo_set_mac_address = eth_mac_addr, 471 .ndo_set_mac_address = sundance_set_mac_addr,
468 .ndo_validate_addr = eth_validate_addr, 472 .ndo_validate_addr = eth_validate_addr,
469}; 473};
470 474
@@ -1592,6 +1596,19 @@ static int __set_mac_addr(struct net_device *dev)
1592 return 0; 1596 return 0;
1593} 1597}
1594 1598
1599/* Invoked with rtnl_lock held */
1600static int sundance_set_mac_addr(struct net_device *dev, void *data)
1601{
1602 const struct sockaddr *addr = data;
1603
1604 if (!is_valid_ether_addr(addr->sa_data))
1605 return -EINVAL;
1606 memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
1607 __set_mac_addr(dev);
1608
1609 return 0;
1610}
1611
1595static const struct { 1612static const struct {
1596 const char name[ETH_GSTRING_LEN]; 1613 const char name[ETH_GSTRING_LEN];
1597} sundance_stats[] = { 1614} sundance_stats[] = {
@@ -1772,10 +1789,10 @@ static int netdev_close(struct net_device *dev)
1772 } 1789 }
1773 1790
1774 iowrite16(GlobalReset | DMAReset | FIFOReset | NetworkReset, 1791 iowrite16(GlobalReset | DMAReset | FIFOReset | NetworkReset,
1775 ioaddr +ASICCtrl + 2); 1792 ioaddr + ASIC_HI_WORD(ASICCtrl));
1776 1793
1777 for (i = 2000; i > 0; i--) { 1794 for (i = 2000; i > 0; i--) {
1778 if ((ioread16(ioaddr + ASICCtrl +2) & ResetBusy) == 0) 1795 if ((ioread16(ioaddr + ASIC_HI_WORD(ASICCtrl)) & ResetBusy) == 0)
1779 break; 1796 break;
1780 mdelay(1); 1797 mdelay(1);
1781 } 1798 }
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b4480300cad..38509f04738 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -60,6 +60,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
60 */ 60 */
61#define MAX_TCP_WINDOW 32767U 61#define MAX_TCP_WINDOW 32767U
62 62
63/* Offer an initial receive window of 10 mss. */
64#define TCP_DEFAULT_INIT_RCVWND 10
65
63/* Minimal accepted MSS. It is (60+60+8) - (20+20). */ 66/* Minimal accepted MSS. It is (60+60+8) - (20+20). */
64#define TCP_MIN_MSS 88U 67#define TCP_MIN_MSS 88U
65 68
diff --git a/net/core/dev.c b/net/core/dev.c
index 59877290bca..a215269d2e3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1547,13 +1547,6 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1547 struct sk_buff *skb2 = NULL; 1547 struct sk_buff *skb2 = NULL;
1548 struct packet_type *pt_prev = NULL; 1548 struct packet_type *pt_prev = NULL;
1549 1549
1550#ifdef CONFIG_NET_CLS_ACT
1551 if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
1552 net_timestamp_set(skb);
1553#else
1554 net_timestamp_set(skb);
1555#endif
1556
1557 rcu_read_lock(); 1550 rcu_read_lock();
1558 list_for_each_entry_rcu(ptype, &ptype_all, list) { 1551 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1559 /* Never send packets back to the socket 1552 /* Never send packets back to the socket
@@ -1572,6 +1565,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1572 if (!skb2) 1565 if (!skb2)
1573 break; 1566 break;
1574 1567
1568 net_timestamp_set(skb2);
1569
1575 /* skb->nh should be correctly 1570 /* skb->nh should be correctly
1576 set by sender, so that the second statement is 1571 set by sender, so that the second statement is
1577 just protection against buggy protocols. 1572 just protection against buggy protocols.
diff --git a/net/core/filter.c b/net/core/filter.c
index e8a6ac411ff..2b27d4efdd4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -85,6 +85,17 @@ enum {
85 BPF_S_JMP_JGT_X, 85 BPF_S_JMP_JGT_X,
86 BPF_S_JMP_JSET_K, 86 BPF_S_JMP_JSET_K,
87 BPF_S_JMP_JSET_X, 87 BPF_S_JMP_JSET_X,
88 /* Ancillary data */
89 BPF_S_ANC_PROTOCOL,
90 BPF_S_ANC_PKTTYPE,
91 BPF_S_ANC_IFINDEX,
92 BPF_S_ANC_NLATTR,
93 BPF_S_ANC_NLATTR_NEST,
94 BPF_S_ANC_MARK,
95 BPF_S_ANC_QUEUE,
96 BPF_S_ANC_HATYPE,
97 BPF_S_ANC_RXHASH,
98 BPF_S_ANC_CPU,
88}; 99};
89 100
90/* No hurry in this branch */ 101/* No hurry in this branch */
@@ -107,11 +118,7 @@ static inline void *load_pointer(const struct sk_buff *skb, int k,
107{ 118{
108 if (k >= 0) 119 if (k >= 0)
109 return skb_header_pointer(skb, k, size, buffer); 120 return skb_header_pointer(skb, k, size, buffer);
110 else { 121 return __load_pointer(skb, k, size);
111 if (k >= SKF_AD_OFF)
112 return NULL;
113 return __load_pointer(skb, k, size);
114 }
115} 122}
116 123
117/** 124/**
@@ -269,7 +276,7 @@ load_w:
269 A = get_unaligned_be32(ptr); 276 A = get_unaligned_be32(ptr);
270 continue; 277 continue;
271 } 278 }
272 break; 279 return 0;
273 case BPF_S_LD_H_ABS: 280 case BPF_S_LD_H_ABS:
274 k = K; 281 k = K;
275load_h: 282load_h:
@@ -278,7 +285,7 @@ load_h:
278 A = get_unaligned_be16(ptr); 285 A = get_unaligned_be16(ptr);
279 continue; 286 continue;
280 } 287 }
281 break; 288 return 0;
282 case BPF_S_LD_B_ABS: 289 case BPF_S_LD_B_ABS:
283 k = K; 290 k = K;
284load_b: 291load_b:
@@ -287,7 +294,7 @@ load_b:
287 A = *(u8 *)ptr; 294 A = *(u8 *)ptr;
288 continue; 295 continue;
289 } 296 }
290 break; 297 return 0;
291 case BPF_S_LD_W_LEN: 298 case BPF_S_LD_W_LEN:
292 A = skb->len; 299 A = skb->len;
293 continue; 300 continue;
@@ -338,45 +345,35 @@ load_b:
338 case BPF_S_STX: 345 case BPF_S_STX:
339 mem[K] = X; 346 mem[K] = X;
340 continue; 347 continue;
341 default: 348 case BPF_S_ANC_PROTOCOL:
342 WARN_ON(1);
343 return 0;
344 }
345
346 /*
347 * Handle ancillary data, which are impossible
348 * (or very difficult) to get parsing packet contents.
349 */
350 switch (k-SKF_AD_OFF) {
351 case SKF_AD_PROTOCOL:
352 A = ntohs(skb->protocol); 349 A = ntohs(skb->protocol);
353 continue; 350 continue;
354 case SKF_AD_PKTTYPE: 351 case BPF_S_ANC_PKTTYPE:
355 A = skb->pkt_type; 352 A = skb->pkt_type;
356 continue; 353 continue;
357 case SKF_AD_IFINDEX: 354 case BPF_S_ANC_IFINDEX:
358 if (!skb->dev) 355 if (!skb->dev)
359 return 0; 356 return 0;
360 A = skb->dev->ifindex; 357 A = skb->dev->ifindex;
361 continue; 358 continue;
362 case SKF_AD_MARK: 359 case BPF_S_ANC_MARK:
363 A = skb->mark; 360 A = skb->mark;
364 continue; 361 continue;
365 case SKF_AD_QUEUE: 362 case BPF_S_ANC_QUEUE:
366 A = skb->queue_mapping; 363 A = skb->queue_mapping;
367 continue; 364 continue;
368 case SKF_AD_HATYPE: 365 case BPF_S_ANC_HATYPE:
369 if (!skb->dev) 366 if (!skb->dev)
370 return 0; 367 return 0;
371 A = skb->dev->type; 368 A = skb->dev->type;
372 continue; 369 continue;
373 case SKF_AD_RXHASH: 370 case BPF_S_ANC_RXHASH:
374 A = skb->rxhash; 371 A = skb->rxhash;
375 continue; 372 continue;
376 case SKF_AD_CPU: 373 case BPF_S_ANC_CPU:
377 A = raw_smp_processor_id(); 374 A = raw_smp_processor_id();
378 continue; 375 continue;
379 case SKF_AD_NLATTR: { 376 case BPF_S_ANC_NLATTR: {
380 struct nlattr *nla; 377 struct nlattr *nla;
381 378
382 if (skb_is_nonlinear(skb)) 379 if (skb_is_nonlinear(skb))
@@ -392,7 +389,7 @@ load_b:
392 A = 0; 389 A = 0;
393 continue; 390 continue;
394 } 391 }
395 case SKF_AD_NLATTR_NEST: { 392 case BPF_S_ANC_NLATTR_NEST: {
396 struct nlattr *nla; 393 struct nlattr *nla;
397 394
398 if (skb_is_nonlinear(skb)) 395 if (skb_is_nonlinear(skb))
@@ -412,6 +409,7 @@ load_b:
412 continue; 409 continue;
413 } 410 }
414 default: 411 default:
412 WARN_ON(1);
415 return 0; 413 return 0;
416 } 414 }
417 } 415 }
@@ -600,6 +598,24 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
600 pc + ftest->jf + 1 >= flen) 598 pc + ftest->jf + 1 >= flen)
601 return -EINVAL; 599 return -EINVAL;
602 break; 600 break;
601 case BPF_S_LD_W_ABS:
602 case BPF_S_LD_H_ABS:
603 case BPF_S_LD_B_ABS:
604#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \
605 code = BPF_S_ANC_##CODE; \
606 break
607 switch (ftest->k) {
608 ANCILLARY(PROTOCOL);
609 ANCILLARY(PKTTYPE);
610 ANCILLARY(IFINDEX);
611 ANCILLARY(NLATTR);
612 ANCILLARY(NLATTR_NEST);
613 ANCILLARY(MARK);
614 ANCILLARY(QUEUE);
615 ANCILLARY(HATYPE);
616 ANCILLARY(RXHASH);
617 ANCILLARY(CPU);
618 }
603 } 619 }
604 ftest->code = code; 620 ftest->code = code;
605 } 621 }
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 2d390669d40..dc7c096ddfe 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -228,10 +228,15 @@ void tcp_select_initial_window(int __space, __u32 mss,
228 } 228 }
229 } 229 }
230 230
231 /* Set initial window to value enough for senders, following RFC5681. */ 231 /* Set initial window to a value enough for senders starting with
232 * initial congestion window of TCP_DEFAULT_INIT_RCVWND. Place
233 * a limit on the initial window when mss is larger than 1460.
234 */
232 if (mss > (1 << *rcv_wscale)) { 235 if (mss > (1 << *rcv_wscale)) {
233 int init_cwnd = rfc3390_bytes_to_packets(mss); 236 int init_cwnd = TCP_DEFAULT_INIT_RCVWND;
234 237 if (mss > 1460)
238 init_cwnd =
239 max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
235 /* when initializing use the value from init_rcv_wnd 240 /* when initializing use the value from init_rcv_wnd
236 * rather than the default from above 241 * rather than the default from above
237 */ 242 */
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 42396c965dd..13322e8a045 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -67,27 +67,42 @@
67 67
68 IMPLEMENTATION: 68 IMPLEMENTATION:
69 This implementation limits maximal queue length to 128; 69 This implementation limits maximal queue length to 128;
70 maximal mtu to 2^15-1; number of hash buckets to 1024. 70 maximal mtu to 2^15-1; max 128 flows, number of hash buckets to 1024.
71 The only goal of this restrictions was that all data 71 The only goal of this restrictions was that all data
72 fit into one 4K page :-). Struct sfq_sched_data is 72 fit into one 4K page on 32bit arches.
73 organized in anti-cache manner: all the data for a bucket
74 are scattered over different locations. This is not good,
75 but it allowed me to put it into 4K.
76 73
77 It is easy to increase these values, but not in flight. */ 74 It is easy to increase these values, but not in flight. */
78 75
79#define SFQ_DEPTH 128 76#define SFQ_DEPTH 128 /* max number of packets per flow */
77#define SFQ_SLOTS 128 /* max number of flows */
78#define SFQ_EMPTY_SLOT 255
80#define SFQ_HASH_DIVISOR 1024 79#define SFQ_HASH_DIVISOR 1024
81 80
82/* This type should contain at least SFQ_DEPTH*2 values */ 81/* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */
83typedef unsigned char sfq_index; 82typedef unsigned char sfq_index;
84 83
84/*
85 * We dont use pointers to save space.
86 * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array
87 * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
88 * are 'pointers' to dep[] array
89 */
85struct sfq_head 90struct sfq_head
86{ 91{
87 sfq_index next; 92 sfq_index next;
88 sfq_index prev; 93 sfq_index prev;
89}; 94};
90 95
96struct sfq_slot {
97 struct sk_buff *skblist_next;
98 struct sk_buff *skblist_prev;
99 sfq_index qlen; /* number of skbs in skblist */
100 sfq_index next; /* next slot in sfq chain */
101 struct sfq_head dep; /* anchor in dep[] chains */
102 unsigned short hash; /* hash value (index in ht[]) */
103 short allot; /* credit for this slot */
104};
105
91struct sfq_sched_data 106struct sfq_sched_data
92{ 107{
93/* Parameters */ 108/* Parameters */
@@ -99,17 +114,24 @@ struct sfq_sched_data
99 struct tcf_proto *filter_list; 114 struct tcf_proto *filter_list;
100 struct timer_list perturb_timer; 115 struct timer_list perturb_timer;
101 u32 perturbation; 116 u32 perturbation;
102 sfq_index tail; /* Index of current slot in round */ 117 sfq_index cur_depth; /* depth of longest slot */
103 sfq_index max_depth; /* Maximal depth */
104 118
119 struct sfq_slot *tail; /* current slot in round */
105 sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */ 120 sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */
106 sfq_index next[SFQ_DEPTH]; /* Active slots link */ 121 struct sfq_slot slots[SFQ_SLOTS];
107 short allot[SFQ_DEPTH]; /* Current allotment per slot */ 122 struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */
108 unsigned short hash[SFQ_DEPTH]; /* Hash value indexed by slots */
109 struct sk_buff_head qs[SFQ_DEPTH]; /* Slot queue */
110 struct sfq_head dep[SFQ_DEPTH*2]; /* Linked list of slots, indexed by depth */
111}; 123};
112 124
125/*
126 * sfq_head are either in a sfq_slot or in dep[] array
127 */
128static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
129{
130 if (val < SFQ_SLOTS)
131 return &q->slots[val].dep;
132 return &q->dep[val - SFQ_SLOTS];
133}
134
113static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) 135static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
114{ 136{
115 return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1); 137 return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1);
@@ -200,30 +222,41 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
200 return 0; 222 return 0;
201} 223}
202 224
225/*
226 * x : slot number [0 .. SFQ_SLOTS - 1]
227 */
203static inline void sfq_link(struct sfq_sched_data *q, sfq_index x) 228static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
204{ 229{
205 sfq_index p, n; 230 sfq_index p, n;
206 int d = q->qs[x].qlen + SFQ_DEPTH; 231 int qlen = q->slots[x].qlen;
232
233 p = qlen + SFQ_SLOTS;
234 n = q->dep[qlen].next;
207 235
208 p = d; 236 q->slots[x].dep.next = n;
209 n = q->dep[d].next; 237 q->slots[x].dep.prev = p;
210 q->dep[x].next = n; 238
211 q->dep[x].prev = p; 239 q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */
212 q->dep[p].next = q->dep[n].prev = x; 240 sfq_dep_head(q, n)->prev = x;
213} 241}
214 242
243#define sfq_unlink(q, x, n, p) \
244 n = q->slots[x].dep.next; \
245 p = q->slots[x].dep.prev; \
246 sfq_dep_head(q, p)->next = n; \
247 sfq_dep_head(q, n)->prev = p
248
249
215static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x) 250static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
216{ 251{
217 sfq_index p, n; 252 sfq_index p, n;
253 int d;
218 254
219 n = q->dep[x].next; 255 sfq_unlink(q, x, n, p);
220 p = q->dep[x].prev;
221 q->dep[p].next = n;
222 q->dep[n].prev = p;
223
224 if (n == p && q->max_depth == q->qs[x].qlen + 1)
225 q->max_depth--;
226 256
257 d = q->slots[x].qlen--;
258 if (n == p && q->cur_depth == d)
259 q->cur_depth--;
227 sfq_link(q, x); 260 sfq_link(q, x);
228} 261}
229 262
@@ -232,34 +265,72 @@ static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
232 sfq_index p, n; 265 sfq_index p, n;
233 int d; 266 int d;
234 267
235 n = q->dep[x].next; 268 sfq_unlink(q, x, n, p);
236 p = q->dep[x].prev;
237 q->dep[p].next = n;
238 q->dep[n].prev = p;
239 d = q->qs[x].qlen;
240 if (q->max_depth < d)
241 q->max_depth = d;
242 269
270 d = ++q->slots[x].qlen;
271 if (q->cur_depth < d)
272 q->cur_depth = d;
243 sfq_link(q, x); 273 sfq_link(q, x);
244} 274}
245 275
276/* helper functions : might be changed when/if skb use a standard list_head */
277
278/* remove one skb from tail of slot queue */
279static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
280{
281 struct sk_buff *skb = slot->skblist_prev;
282
283 slot->skblist_prev = skb->prev;
284 skb->next = skb->prev = NULL;
285 return skb;
286}
287
288/* remove one skb from head of slot queue */
289static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
290{
291 struct sk_buff *skb = slot->skblist_next;
292
293 slot->skblist_next = skb->next;
294 skb->next = skb->prev = NULL;
295 return skb;
296}
297
298static inline void slot_queue_init(struct sfq_slot *slot)
299{
300 slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
301}
302
303/* add skb to slot queue (tail add) */
304static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
305{
306 skb->prev = slot->skblist_prev;
307 skb->next = (struct sk_buff *)slot;
308 slot->skblist_prev->next = skb;
309 slot->skblist_prev = skb;
310}
311
312#define slot_queue_walk(slot, skb) \
313 for (skb = slot->skblist_next; \
314 skb != (struct sk_buff *)slot; \
315 skb = skb->next)
316
246static unsigned int sfq_drop(struct Qdisc *sch) 317static unsigned int sfq_drop(struct Qdisc *sch)
247{ 318{
248 struct sfq_sched_data *q = qdisc_priv(sch); 319 struct sfq_sched_data *q = qdisc_priv(sch);
249 sfq_index d = q->max_depth; 320 sfq_index x, d = q->cur_depth;
250 struct sk_buff *skb; 321 struct sk_buff *skb;
251 unsigned int len; 322 unsigned int len;
323 struct sfq_slot *slot;
252 324
253 /* Queue is full! Find the longest slot and 325 /* Queue is full! Find the longest slot and drop tail packet from it */
254 drop a packet from it */
255
256 if (d > 1) { 326 if (d > 1) {
257 sfq_index x = q->dep[d + SFQ_DEPTH].next; 327 x = q->dep[d].next;
258 skb = q->qs[x].prev; 328 slot = &q->slots[x];
329drop:
330 skb = slot_dequeue_tail(slot);
259 len = qdisc_pkt_len(skb); 331 len = qdisc_pkt_len(skb);
260 __skb_unlink(skb, &q->qs[x]);
261 kfree_skb(skb);
262 sfq_dec(q, x); 332 sfq_dec(q, x);
333 kfree_skb(skb);
263 sch->q.qlen--; 334 sch->q.qlen--;
264 sch->qstats.drops++; 335 sch->qstats.drops++;
265 sch->qstats.backlog -= len; 336 sch->qstats.backlog -= len;
@@ -268,18 +339,11 @@ static unsigned int sfq_drop(struct Qdisc *sch)
268 339
269 if (d == 1) { 340 if (d == 1) {
270 /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ 341 /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
271 d = q->next[q->tail]; 342 x = q->tail->next;
272 q->next[q->tail] = q->next[d]; 343 slot = &q->slots[x];
273 skb = q->qs[d].prev; 344 q->tail->next = slot->next;
274 len = qdisc_pkt_len(skb); 345 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
275 __skb_unlink(skb, &q->qs[d]); 346 goto drop;
276 kfree_skb(skb);
277 sfq_dec(q, d);
278 sch->q.qlen--;
279 q->ht[q->hash[d]] = SFQ_DEPTH;
280 sch->qstats.drops++;
281 sch->qstats.backlog -= len;
282 return len;
283 } 347 }
284 348
285 return 0; 349 return 0;
@@ -291,6 +355,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
291 struct sfq_sched_data *q = qdisc_priv(sch); 355 struct sfq_sched_data *q = qdisc_priv(sch);
292 unsigned int hash; 356 unsigned int hash;
293 sfq_index x; 357 sfq_index x;
358 struct sfq_slot *slot;
294 int uninitialized_var(ret); 359 int uninitialized_var(ret);
295 360
296 hash = sfq_classify(skb, sch, &ret); 361 hash = sfq_classify(skb, sch, &ret);
@@ -303,30 +368,33 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
303 hash--; 368 hash--;
304 369
305 x = q->ht[hash]; 370 x = q->ht[hash];
306 if (x == SFQ_DEPTH) { 371 slot = &q->slots[x];
307 q->ht[hash] = x = q->dep[SFQ_DEPTH].next; 372 if (x == SFQ_EMPTY_SLOT) {
308 q->hash[x] = hash; 373 x = q->dep[0].next; /* get a free slot */
374 q->ht[hash] = x;
375 slot = &q->slots[x];
376 slot->hash = hash;
377 slot_queue_init(slot);
309 } 378 }
310 379
311 /* If selected queue has length q->limit, this means that 380 /* If selected queue has length q->limit, do simple tail drop,
312 * all another queues are empty and that we do simple tail drop,
313 * i.e. drop _this_ packet. 381 * i.e. drop _this_ packet.
314 */ 382 */
315 if (q->qs[x].qlen >= q->limit) 383 if (slot->qlen >= q->limit)
316 return qdisc_drop(skb, sch); 384 return qdisc_drop(skb, sch);
317 385
318 sch->qstats.backlog += qdisc_pkt_len(skb); 386 sch->qstats.backlog += qdisc_pkt_len(skb);
319 __skb_queue_tail(&q->qs[x], skb); 387 slot_queue_add(slot, skb);
320 sfq_inc(q, x); 388 sfq_inc(q, x);
321 if (q->qs[x].qlen == 1) { /* The flow is new */ 389 if (slot->qlen == 1) { /* The flow is new */
322 if (q->tail == SFQ_DEPTH) { /* It is the first flow */ 390 if (q->tail == NULL) { /* It is the first flow */
323 q->next[x] = x; 391 slot->next = x;
324 } else { 392 } else {
325 q->next[x] = q->next[q->tail]; 393 slot->next = q->tail->next;
326 q->next[q->tail] = x; 394 q->tail->next = x;
327 } 395 }
328 q->tail = x; 396 q->tail = slot;
329 q->allot[x] = q->quantum; 397 slot->allot = q->quantum;
330 } 398 }
331 if (++sch->q.qlen <= q->limit) { 399 if (++sch->q.qlen <= q->limit) {
332 sch->bstats.bytes += qdisc_pkt_len(skb); 400 sch->bstats.bytes += qdisc_pkt_len(skb);
@@ -342,14 +410,12 @@ static struct sk_buff *
342sfq_peek(struct Qdisc *sch) 410sfq_peek(struct Qdisc *sch)
343{ 411{
344 struct sfq_sched_data *q = qdisc_priv(sch); 412 struct sfq_sched_data *q = qdisc_priv(sch);
345 sfq_index a;
346 413
347 /* No active slots */ 414 /* No active slots */
348 if (q->tail == SFQ_DEPTH) 415 if (q->tail == NULL)
349 return NULL; 416 return NULL;
350 417
351 a = q->next[q->tail]; 418 return q->slots[q->tail->next].skblist_next;
352 return skb_peek(&q->qs[a]);
353} 419}
354 420
355static struct sk_buff * 421static struct sk_buff *
@@ -358,31 +424,31 @@ sfq_dequeue(struct Qdisc *sch)
358 struct sfq_sched_data *q = qdisc_priv(sch); 424 struct sfq_sched_data *q = qdisc_priv(sch);
359 struct sk_buff *skb; 425 struct sk_buff *skb;
360 sfq_index a, next_a; 426 sfq_index a, next_a;
427 struct sfq_slot *slot;
361 428
362 /* No active slots */ 429 /* No active slots */
363 if (q->tail == SFQ_DEPTH) 430 if (q->tail == NULL)
364 return NULL; 431 return NULL;
365 432
366 a = q->next[q->tail]; 433 a = q->tail->next;
367 434 slot = &q->slots[a];
368 /* Grab packet */ 435 skb = slot_dequeue_head(slot);
369 skb = __skb_dequeue(&q->qs[a]);
370 sfq_dec(q, a); 436 sfq_dec(q, a);
371 sch->q.qlen--; 437 sch->q.qlen--;
372 sch->qstats.backlog -= qdisc_pkt_len(skb); 438 sch->qstats.backlog -= qdisc_pkt_len(skb);
373 439
374 /* Is the slot empty? */ 440 /* Is the slot empty? */
375 if (q->qs[a].qlen == 0) { 441 if (slot->qlen == 0) {
376 q->ht[q->hash[a]] = SFQ_DEPTH; 442 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
377 next_a = q->next[a]; 443 next_a = slot->next;
378 if (a == next_a) { 444 if (a == next_a) {
379 q->tail = SFQ_DEPTH; 445 q->tail = NULL; /* no more active slots */
380 return skb; 446 return skb;
381 } 447 }
382 q->next[q->tail] = next_a; 448 q->tail->next = next_a;
383 } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) { 449 } else if ((slot->allot -= qdisc_pkt_len(skb)) <= 0) {
384 q->allot[a] += q->quantum; 450 q->tail = slot;
385 q->tail = a; 451 slot->allot += q->quantum;
386 } 452 }
387 return skb; 453 return skb;
388} 454}
@@ -446,17 +512,16 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
446 init_timer_deferrable(&q->perturb_timer); 512 init_timer_deferrable(&q->perturb_timer);
447 513
448 for (i = 0; i < SFQ_HASH_DIVISOR; i++) 514 for (i = 0; i < SFQ_HASH_DIVISOR; i++)
449 q->ht[i] = SFQ_DEPTH; 515 q->ht[i] = SFQ_EMPTY_SLOT;
450 516
451 for (i = 0; i < SFQ_DEPTH; i++) { 517 for (i = 0; i < SFQ_DEPTH; i++) {
452 skb_queue_head_init(&q->qs[i]); 518 q->dep[i].next = i + SFQ_SLOTS;
453 q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH; 519 q->dep[i].prev = i + SFQ_SLOTS;
454 q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH;
455 } 520 }
456 521
457 q->limit = SFQ_DEPTH - 1; 522 q->limit = SFQ_DEPTH - 1;
458 q->max_depth = 0; 523 q->cur_depth = 0;
459 q->tail = SFQ_DEPTH; 524 q->tail = NULL;
460 if (opt == NULL) { 525 if (opt == NULL) {
461 q->quantum = psched_mtu(qdisc_dev(sch)); 526 q->quantum = psched_mtu(qdisc_dev(sch));
462 q->perturb_period = 0; 527 q->perturb_period = 0;
@@ -467,7 +532,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
467 return err; 532 return err;
468 } 533 }
469 534
470 for (i = 0; i < SFQ_DEPTH; i++) 535 for (i = 0; i < SFQ_SLOTS; i++)
471 sfq_link(q, i); 536 sfq_link(q, i);
472 return 0; 537 return 0;
473} 538}
@@ -543,13 +608,12 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
543 struct gnet_dump *d) 608 struct gnet_dump *d)
544{ 609{
545 struct sfq_sched_data *q = qdisc_priv(sch); 610 struct sfq_sched_data *q = qdisc_priv(sch);
546 sfq_index idx = q->ht[cl-1]; 611 const struct sfq_slot *slot = &q->slots[q->ht[cl - 1]];
547 struct sk_buff_head *list = &q->qs[idx]; 612 struct gnet_stats_queue qs = { .qlen = slot->qlen };
548 struct gnet_stats_queue qs = { .qlen = list->qlen }; 613 struct tc_sfq_xstats xstats = { .allot = slot->allot };
549 struct tc_sfq_xstats xstats = { .allot = q->allot[idx] };
550 struct sk_buff *skb; 614 struct sk_buff *skb;
551 615
552 skb_queue_walk(list, skb) 616 slot_queue_walk(slot, skb)
553 qs.backlog += qdisc_pkt_len(skb); 617 qs.backlog += qdisc_pkt_len(skb);
554 618
555 if (gnet_stats_copy_queue(d, &qs) < 0) 619 if (gnet_stats_copy_queue(d, &qs) < 0)
@@ -566,7 +630,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
566 return; 630 return;
567 631
568 for (i = 0; i < SFQ_HASH_DIVISOR; i++) { 632 for (i = 0; i < SFQ_HASH_DIVISOR; i++) {
569 if (q->ht[i] == SFQ_DEPTH || 633 if (q->ht[i] == SFQ_EMPTY_SLOT ||
570 arg->count < arg->skip) { 634 arg->count < arg->skip) {
571 arg->count++; 635 arg->count++;
572 continue; 636 continue;