aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Daney <ddaney@caviumnetworks.com>2009-06-23 19:20:56 -0400
committerRalf Baechle <ralf@linux-mips.org>2009-06-24 13:34:41 -0400
commita620c1632629b42369e78448acc7b384fe1faf48 (patch)
tree3318683c03abb4ca45307c7df0019f74bcba3b13
parentf696a10838ffab85e5bc07e7cff0d0e1870a30d7 (diff)
Staging: octeon-ethernet: Fix race freeing transmit buffers.
The existing code had the following race: Thread-1 Thread-2 inc/read in_use inc/read in_use inc tx_free_list[qos].len inc tx_free_list[qos].len The actual in_use value was incremented twice, but thread-1 is going to free memory based on its stale value, and will free one too many times. The result is that memory is freed back to the kernel while its packet is still in the transmit buffer. If the memory is overwritten before it is transmitted, the hardware will put a valid checksum on it and send it out (just like it does with good packets). If by chance the TCP flags are clobbered but not the addresses or ports, the result can be a broken TCP stream. The fix is to track the number of freed packets in a single location (a Fetch-and-Add Unit register). That way it can never get out of sync with itself. We try to free up to MAX_SKB_TO_FREE (currently 10) buffers at a time. If fewer are available we adjust the free count with the difference. The action of claiming buffers to free is atomic so two threads cannot claim the same buffers. Signed-off-by: David Daney <ddaney@caviumnetworks.com> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
-rw-r--r--drivers/staging/octeon/ethernet-defines.h2
-rw-r--r--drivers/staging/octeon/ethernet-tx.c56
-rw-r--r--drivers/staging/octeon/ethernet-tx.h25
-rw-r--r--drivers/staging/octeon/ethernet.c89
4 files changed, 106 insertions, 66 deletions
diff --git a/drivers/staging/octeon/ethernet-defines.h b/drivers/staging/octeon/ethernet-defines.h
index 8f7374e7664c..f13131b03c33 100644
--- a/drivers/staging/octeon/ethernet-defines.h
+++ b/drivers/staging/octeon/ethernet-defines.h
@@ -117,6 +117,8 @@
117 117
118/* Maximum number of packets to process per interrupt. */ 118/* Maximum number of packets to process per interrupt. */
119#define MAX_RX_PACKETS 120 119#define MAX_RX_PACKETS 120
120/* Maximum number of SKBs to try to free per xmit packet. */
121#define MAX_SKB_TO_FREE 10
120#define MAX_OUT_QUEUE_DEPTH 1000 122#define MAX_OUT_QUEUE_DEPTH 1000
121 123
122#ifndef CONFIG_SMP 124#ifndef CONFIG_SMP
diff --git a/drivers/staging/octeon/ethernet-tx.c b/drivers/staging/octeon/ethernet-tx.c
index bfd3dd2fcef8..81a851390f1b 100644
--- a/drivers/staging/octeon/ethernet-tx.c
+++ b/drivers/staging/octeon/ethernet-tx.c
@@ -47,6 +47,7 @@
47 47
48#include "ethernet-defines.h" 48#include "ethernet-defines.h"
49#include "octeon-ethernet.h" 49#include "octeon-ethernet.h"
50#include "ethernet-tx.h"
50#include "ethernet-util.h" 51#include "ethernet-util.h"
51 52
52#include "cvmx-wqe.h" 53#include "cvmx-wqe.h"
@@ -82,8 +83,10 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
82 uint64_t old_scratch2; 83 uint64_t old_scratch2;
83 int dropped; 84 int dropped;
84 int qos; 85 int qos;
86 int queue_it_up;
85 struct octeon_ethernet *priv = netdev_priv(dev); 87 struct octeon_ethernet *priv = netdev_priv(dev);
86 int32_t in_use; 88 int32_t skb_to_free;
89 int32_t undo;
87 int32_t buffers_to_free; 90 int32_t buffers_to_free;
88#if REUSE_SKBUFFS_WITHOUT_FREE 91#if REUSE_SKBUFFS_WITHOUT_FREE
89 unsigned char *fpa_head; 92 unsigned char *fpa_head;
@@ -120,15 +123,15 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
120 old_scratch2 = cvmx_scratch_read64(CVMX_SCR_SCRATCH + 8); 123 old_scratch2 = cvmx_scratch_read64(CVMX_SCR_SCRATCH + 8);
121 124
122 /* 125 /*
123 * Assume we're going to be able t osend this 126 * Fetch and increment the number of packets to be
124 * packet. Fetch and increment the number of pending 127 * freed.
125 * packets for output.
126 */ 128 */
127 cvmx_fau_async_fetch_and_add32(CVMX_SCR_SCRATCH + 8, 129 cvmx_fau_async_fetch_and_add32(CVMX_SCR_SCRATCH + 8,
128 FAU_NUM_PACKET_BUFFERS_TO_FREE, 130 FAU_NUM_PACKET_BUFFERS_TO_FREE,
129 0); 131 0);
130 cvmx_fau_async_fetch_and_add32(CVMX_SCR_SCRATCH, 132 cvmx_fau_async_fetch_and_add32(CVMX_SCR_SCRATCH,
131 priv->fau + qos * 4, 1); 133 priv->fau + qos * 4,
134 MAX_SKB_TO_FREE);
132 } 135 }
133 136
134 /* 137 /*
@@ -286,16 +289,30 @@ dont_put_skbuff_in_hw:
286 if (USE_ASYNC_IOBDMA) { 289 if (USE_ASYNC_IOBDMA) {
287 /* Get the number of skbuffs in use by the hardware */ 290 /* Get the number of skbuffs in use by the hardware */
288 CVMX_SYNCIOBDMA; 291 CVMX_SYNCIOBDMA;
289 in_use = cvmx_scratch_read64(CVMX_SCR_SCRATCH); 292 skb_to_free = cvmx_scratch_read64(CVMX_SCR_SCRATCH);
290 buffers_to_free = cvmx_scratch_read64(CVMX_SCR_SCRATCH + 8); 293 buffers_to_free = cvmx_scratch_read64(CVMX_SCR_SCRATCH + 8);
291 } else { 294 } else {
292 /* Get the number of skbuffs in use by the hardware */ 295 /* Get the number of skbuffs in use by the hardware */
293 in_use = cvmx_fau_fetch_and_add32(priv->fau + qos * 4, 1); 296 skb_to_free = cvmx_fau_fetch_and_add32(priv->fau + qos * 4,
297 MAX_SKB_TO_FREE);
294 buffers_to_free = 298 buffers_to_free =
295 cvmx_fau_fetch_and_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE, 0); 299 cvmx_fau_fetch_and_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE, 0);
296 } 300 }
297 301
298 /* 302 /*
303 * We try to claim MAX_SKB_TO_FREE buffers. If there were not
304 * that many available, we have to un-claim (undo) any that
305 * were in excess. If skb_to_free is positive we will free
306 * that many buffers.
307 */
308 undo = skb_to_free > 0 ?
309 MAX_SKB_TO_FREE : skb_to_free + MAX_SKB_TO_FREE;
310 if (undo > 0)
311 cvmx_fau_atomic_add32(priv->fau+qos*4, -undo);
312 skb_to_free = -skb_to_free > MAX_SKB_TO_FREE ?
313 MAX_SKB_TO_FREE : -skb_to_free;
314
315 /*
299 * If we're sending faster than the receive can free them then 316 * If we're sending faster than the receive can free them then
300 * don't do the HW free. 317 * don't do the HW free.
301 */ 318 */
@@ -330,38 +347,31 @@ dont_put_skbuff_in_hw:
330 cvmx_scratch_write64(CVMX_SCR_SCRATCH + 8, old_scratch2); 347 cvmx_scratch_write64(CVMX_SCR_SCRATCH + 8, old_scratch2);
331 } 348 }
332 349
350 queue_it_up = 0;
333 if (unlikely(dropped)) { 351 if (unlikely(dropped)) {
334 dev_kfree_skb_any(skb); 352 dev_kfree_skb_any(skb);
335 cvmx_fau_atomic_add32(priv->fau + qos * 4, -1);
336 priv->stats.tx_dropped++; 353 priv->stats.tx_dropped++;
337 } else { 354 } else {
338 if (USE_SKBUFFS_IN_HW) { 355 if (USE_SKBUFFS_IN_HW) {
339 /* Put this packet on the queue to be freed later */ 356 /* Put this packet on the queue to be freed later */
340 if (pko_command.s.dontfree) 357 if (pko_command.s.dontfree)
341 skb_queue_tail(&priv->tx_free_list[qos], skb); 358 queue_it_up = 1;
342 else { 359 else
343 cvmx_fau_atomic_add32 360 cvmx_fau_atomic_add32
344 (FAU_NUM_PACKET_BUFFERS_TO_FREE, -1); 361 (FAU_NUM_PACKET_BUFFERS_TO_FREE, -1);
345 cvmx_fau_atomic_add32(priv->fau + qos * 4, -1);
346 }
347 } else { 362 } else {
348 /* Put this packet on the queue to be freed later */ 363 /* Put this packet on the queue to be freed later */
349 skb_queue_tail(&priv->tx_free_list[qos], skb); 364 queue_it_up = 1;
350 } 365 }
351 } 366 }
352 367
353 /* Free skbuffs not in use by the hardware, possibly two at a time */ 368 if (queue_it_up) {
354 if (skb_queue_len(&priv->tx_free_list[qos]) > in_use) {
355 spin_lock(&priv->tx_free_list[qos].lock); 369 spin_lock(&priv->tx_free_list[qos].lock);
356 /* 370 __skb_queue_tail(&priv->tx_free_list[qos], skb);
357 * Check again now that we have the lock. It might 371 cvm_oct_free_tx_skbs(priv, skb_to_free, qos, 0);
358 * have changed.
359 */
360 if (skb_queue_len(&priv->tx_free_list[qos]) > in_use)
361 dev_kfree_skb(__skb_dequeue(&priv->tx_free_list[qos]));
362 if (skb_queue_len(&priv->tx_free_list[qos]) > in_use)
363 dev_kfree_skb(__skb_dequeue(&priv->tx_free_list[qos]));
364 spin_unlock(&priv->tx_free_list[qos].lock); 372 spin_unlock(&priv->tx_free_list[qos].lock);
373 } else {
374 cvm_oct_free_tx_skbs(priv, skb_to_free, qos, 1);
365 } 375 }
366 376
367 return 0; 377 return 0;
diff --git a/drivers/staging/octeon/ethernet-tx.h b/drivers/staging/octeon/ethernet-tx.h
index 5106236fe981..c0bebf750bc0 100644
--- a/drivers/staging/octeon/ethernet-tx.h
+++ b/drivers/staging/octeon/ethernet-tx.h
@@ -30,3 +30,28 @@ int cvm_oct_xmit_pow(struct sk_buff *skb, struct net_device *dev);
30int cvm_oct_transmit_qos(struct net_device *dev, void *work_queue_entry, 30int cvm_oct_transmit_qos(struct net_device *dev, void *work_queue_entry,
31 int do_free, int qos); 31 int do_free, int qos);
32void cvm_oct_tx_shutdown(struct net_device *dev); 32void cvm_oct_tx_shutdown(struct net_device *dev);
33
34/**
35 * Free dead transmit skbs.
36 *
37 * @priv: The driver data
38 * @skb_to_free: The number of SKBs to free (free none if negative).
39 * @qos: The queue to free from.
40 * @take_lock: If true, acquire the skb list lock.
41 */
42static inline void cvm_oct_free_tx_skbs(struct octeon_ethernet *priv,
43 int skb_to_free,
44 int qos, int take_lock)
45{
46 /* Free skbuffs not in use by the hardware. */
47 if (skb_to_free > 0) {
48 if (take_lock)
49 spin_lock(&priv->tx_free_list[qos].lock);
50 while (skb_to_free > 0) {
51 dev_kfree_skb(__skb_dequeue(&priv->tx_free_list[qos]));
52 skb_to_free--;
53 }
54 if (take_lock)
55 spin_unlock(&priv->tx_free_list[qos].lock);
56 }
57}
diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c
index 2d9356dfbca6..b8479517dce2 100644
--- a/drivers/staging/octeon/ethernet.c
+++ b/drivers/staging/octeon/ethernet.c
@@ -37,13 +37,14 @@
37#include <asm/octeon/octeon.h> 37#include <asm/octeon/octeon.h>
38 38
39#include "ethernet-defines.h" 39#include "ethernet-defines.h"
40#include "octeon-ethernet.h"
40#include "ethernet-mem.h" 41#include "ethernet-mem.h"
41#include "ethernet-rx.h" 42#include "ethernet-rx.h"
42#include "ethernet-tx.h" 43#include "ethernet-tx.h"
43#include "ethernet-mdio.h" 44#include "ethernet-mdio.h"
44#include "ethernet-util.h" 45#include "ethernet-util.h"
45#include "ethernet-proc.h" 46#include "ethernet-proc.h"
46#include "octeon-ethernet.h" 47
47 48
48#include "cvmx-pip.h" 49#include "cvmx-pip.h"
49#include "cvmx-pko.h" 50#include "cvmx-pko.h"
@@ -130,53 +131,55 @@ extern struct semaphore mdio_sem;
130 */ 131 */
131static void cvm_do_timer(unsigned long arg) 132static void cvm_do_timer(unsigned long arg)
132{ 133{
134 int32_t skb_to_free, undo;
135 int queues_per_port;
136 int qos;
137 struct octeon_ethernet *priv;
133 static int port; 138 static int port;
134 if (port < CVMX_PIP_NUM_INPUT_PORTS) {
135 if (cvm_oct_device[port]) {
136 int queues_per_port;
137 int qos;
138 struct octeon_ethernet *priv =
139 netdev_priv(cvm_oct_device[port]);
140 if (priv->poll) {
141 /* skip polling if we don't get the lock */
142 if (!down_trylock(&mdio_sem)) {
143 priv->poll(cvm_oct_device[port]);
144 up(&mdio_sem);
145 }
146 }
147 139
148 queues_per_port = cvmx_pko_get_num_queues(port); 140 if (port >= CVMX_PIP_NUM_INPUT_PORTS) {
149 /* Drain any pending packets in the free list */ 141 /*
150 for (qos = 0; qos < queues_per_port; qos++) { 142 * All ports have been polled. Start the next
151 if (skb_queue_len(&priv->tx_free_list[qos])) { 143 * iteration through the ports in one second.
152 spin_lock(&priv->tx_free_list[qos]. 144 */
153 lock);
154 while (skb_queue_len
155 (&priv->tx_free_list[qos]) >
156 cvmx_fau_fetch_and_add32(priv->
157 fau +
158 qos * 4,
159 0))
160 dev_kfree_skb(__skb_dequeue
161 (&priv->
162 tx_free_list
163 [qos]));
164 spin_unlock(&priv->tx_free_list[qos].
165 lock);
166 }
167 }
168 cvm_oct_device[port]->netdev_ops->ndo_get_stats(cvm_oct_device[port]);
169 }
170 port++;
171 /* Poll the next port in a 50th of a second.
172 This spreads the polling of ports out a little bit */
173 mod_timer(&cvm_oct_poll_timer, jiffies + HZ / 50);
174 } else {
175 port = 0; 145 port = 0;
176 /* All ports have been polled. Start the next iteration through
177 the ports in one second */
178 mod_timer(&cvm_oct_poll_timer, jiffies + HZ); 146 mod_timer(&cvm_oct_poll_timer, jiffies + HZ);
147 return;
148 }
149 if (!cvm_oct_device[port])
150 goto out;
151
152 priv = netdev_priv(cvm_oct_device[port]);
153 if (priv->poll) {
154 /* skip polling if we don't get the lock */
155 if (!down_trylock(&mdio_sem)) {
156 priv->poll(cvm_oct_device[port]);
157 up(&mdio_sem);
158 }
159 }
160
161 queues_per_port = cvmx_pko_get_num_queues(port);
162 /* Drain any pending packets in the free list */
163 for (qos = 0; qos < queues_per_port; qos++) {
164 if (skb_queue_len(&priv->tx_free_list[qos]) == 0)
165 continue;
166 skb_to_free = cvmx_fau_fetch_and_add32(priv->fau + qos * 4,
167 MAX_SKB_TO_FREE);
168 undo = skb_to_free > 0 ?
169 MAX_SKB_TO_FREE : skb_to_free + MAX_SKB_TO_FREE;
170 if (undo > 0)
171 cvmx_fau_atomic_add32(priv->fau+qos*4, -undo);
172 skb_to_free = -skb_to_free > MAX_SKB_TO_FREE ?
173 MAX_SKB_TO_FREE : -skb_to_free;
174 cvm_oct_free_tx_skbs(priv, skb_to_free, qos, 1);
179 } 175 }
176 cvm_oct_device[port]->netdev_ops->ndo_get_stats(cvm_oct_device[port]);
177
178out:
179 port++;
180 /* Poll the next port in a 50th of a second.
181 This spreads the polling of ports out a little bit */
182 mod_timer(&cvm_oct_poll_timer, jiffies + HZ / 50);
180} 183}
181 184
182/** 185/**