diff options
-rw-r--r-- | drivers/staging/octeon/ethernet-defines.h | 18 | ||||
-rw-r--r-- | drivers/staging/octeon/ethernet-rx.c | 300 | ||||
-rw-r--r-- | drivers/staging/octeon/ethernet-rx.h | 25 | ||||
-rw-r--r-- | drivers/staging/octeon/ethernet.c | 52 | ||||
-rw-r--r-- | drivers/staging/octeon/octeon-ethernet.h | 3 |
5 files changed, 235 insertions, 163 deletions
diff --git a/drivers/staging/octeon/ethernet-defines.h b/drivers/staging/octeon/ethernet-defines.h index 6b8065f594bf..9c4910e45d28 100644 --- a/drivers/staging/octeon/ethernet-defines.h +++ b/drivers/staging/octeon/ethernet-defines.h | |||
@@ -45,10 +45,6 @@ | |||
45 | * Controls if the Octeon TCP/UDP checksum engine is used for packet | 45 | * Controls if the Octeon TCP/UDP checksum engine is used for packet |
46 | * output. If this is zero, the kernel will perform the checksum in | 46 | * output. If this is zero, the kernel will perform the checksum in |
47 | * software. | 47 | * software. |
48 | * USE_MULTICORE_RECEIVE | ||
49 | * Process receive interrupts on multiple cores. This spreads the network | ||
50 | * load across the first 8 processors. If ths is zero, only one core | ||
51 | * processes incomming packets. | ||
52 | * USE_ASYNC_IOBDMA | 48 | * USE_ASYNC_IOBDMA |
53 | * Use asynchronous IO access to hardware. This uses Octeon's asynchronous | 49 | * Use asynchronous IO access to hardware. This uses Octeon's asynchronous |
54 | * IOBDMAs to issue IO accesses without stalling. Set this to zero | 50 | * IOBDMAs to issue IO accesses without stalling. Set this to zero |
@@ -79,15 +75,8 @@ | |||
79 | #define REUSE_SKBUFFS_WITHOUT_FREE 1 | 75 | #define REUSE_SKBUFFS_WITHOUT_FREE 1 |
80 | #endif | 76 | #endif |
81 | 77 | ||
82 | /* Max interrupts per second per core */ | ||
83 | #define INTERRUPT_LIMIT 10000 | ||
84 | |||
85 | /* Don't limit the number of interrupts */ | ||
86 | /*#define INTERRUPT_LIMIT 0 */ | ||
87 | #define USE_HW_TCPUDP_CHECKSUM 1 | 78 | #define USE_HW_TCPUDP_CHECKSUM 1 |
88 | 79 | ||
89 | #define USE_MULTICORE_RECEIVE 1 | ||
90 | |||
91 | /* Enable Random Early Dropping under load */ | 80 | /* Enable Random Early Dropping under load */ |
92 | #define USE_RED 1 | 81 | #define USE_RED 1 |
93 | #define USE_ASYNC_IOBDMA (CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0) | 82 | #define USE_ASYNC_IOBDMA (CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0) |
@@ -105,17 +94,10 @@ | |||
105 | /* Use this to not have FPA frees control L2 */ | 94 | /* Use this to not have FPA frees control L2 */ |
106 | /*#define DONT_WRITEBACK(x) 0 */ | 95 | /*#define DONT_WRITEBACK(x) 0 */ |
107 | 96 | ||
108 | /* Maximum number of packets to process per interrupt. */ | ||
109 | #define MAX_RX_PACKETS 120 | ||
110 | /* Maximum number of SKBs to try to free per xmit packet. */ | 97 | /* Maximum number of SKBs to try to free per xmit packet. */ |
111 | #define MAX_SKB_TO_FREE 10 | 98 | #define MAX_SKB_TO_FREE 10 |
112 | #define MAX_OUT_QUEUE_DEPTH 1000 | 99 | #define MAX_OUT_QUEUE_DEPTH 1000 |
113 | 100 | ||
114 | #ifndef CONFIG_SMP | ||
115 | #undef USE_MULTICORE_RECEIVE | ||
116 | #define USE_MULTICORE_RECEIVE 0 | ||
117 | #endif | ||
118 | |||
119 | #define IP_PROTOCOL_TCP 6 | 101 | #define IP_PROTOCOL_TCP 6 |
120 | #define IP_PROTOCOL_UDP 0x11 | 102 | #define IP_PROTOCOL_UDP 0x11 |
121 | 103 | ||
diff --git a/drivers/staging/octeon/ethernet-rx.c b/drivers/staging/octeon/ethernet-rx.c index f63459a96dad..b2e6ab6a3349 100644 --- a/drivers/staging/octeon/ethernet-rx.c +++ b/drivers/staging/octeon/ethernet-rx.c | |||
@@ -4,7 +4,7 @@ | |||
4 | * Contact: support@caviumnetworks.com | 4 | * Contact: support@caviumnetworks.com |
5 | * This file is part of the OCTEON SDK | 5 | * This file is part of the OCTEON SDK |
6 | * | 6 | * |
7 | * Copyright (c) 2003-2007 Cavium Networks | 7 | * Copyright (c) 2003-2010 Cavium Networks |
8 | * | 8 | * |
9 | * This file is free software; you can redistribute it and/or modify | 9 | * This file is free software; you can redistribute it and/or modify |
10 | * it under the terms of the GNU General Public License, Version 2, as | 10 | * it under the terms of the GNU General Public License, Version 2, as |
@@ -27,12 +27,14 @@ | |||
27 | #include <linux/module.h> | 27 | #include <linux/module.h> |
28 | #include <linux/kernel.h> | 28 | #include <linux/kernel.h> |
29 | #include <linux/cache.h> | 29 | #include <linux/cache.h> |
30 | #include <linux/cpumask.h> | ||
30 | #include <linux/netdevice.h> | 31 | #include <linux/netdevice.h> |
31 | #include <linux/init.h> | 32 | #include <linux/init.h> |
32 | #include <linux/etherdevice.h> | 33 | #include <linux/etherdevice.h> |
33 | #include <linux/ip.h> | 34 | #include <linux/ip.h> |
34 | #include <linux/string.h> | 35 | #include <linux/string.h> |
35 | #include <linux/prefetch.h> | 36 | #include <linux/prefetch.h> |
37 | #include <linux/smp.h> | ||
36 | #include <net/dst.h> | 38 | #include <net/dst.h> |
37 | #ifdef CONFIG_XFRM | 39 | #ifdef CONFIG_XFRM |
38 | #include <linux/xfrm.h> | 40 | #include <linux/xfrm.h> |
@@ -44,8 +46,9 @@ | |||
44 | #include <asm/octeon/octeon.h> | 46 | #include <asm/octeon/octeon.h> |
45 | 47 | ||
46 | #include "ethernet-defines.h" | 48 | #include "ethernet-defines.h" |
47 | #include "octeon-ethernet.h" | ||
48 | #include "ethernet-mem.h" | 49 | #include "ethernet-mem.h" |
50 | #include "ethernet-rx.h" | ||
51 | #include "octeon-ethernet.h" | ||
49 | #include "ethernet-util.h" | 52 | #include "ethernet-util.h" |
50 | 53 | ||
51 | #include "cvmx-helper.h" | 54 | #include "cvmx-helper.h" |
@@ -57,56 +60,82 @@ | |||
57 | 60 | ||
58 | #include "cvmx-gmxx-defs.h" | 61 | #include "cvmx-gmxx-defs.h" |
59 | 62 | ||
60 | struct cvm_tasklet_wrapper { | 63 | struct cvm_napi_wrapper { |
61 | struct tasklet_struct t; | 64 | struct napi_struct napi; |
62 | }; | 65 | } ____cacheline_aligned_in_smp; |
63 | 66 | ||
64 | /* | 67 | static struct cvm_napi_wrapper cvm_oct_napi[NR_CPUS] __cacheline_aligned_in_smp; |
65 | * Aligning the tasklet_struct on cachline boundries seems to decrease | ||
66 | * throughput even though in theory it would reduce contantion on the | ||
67 | * cache lines containing the locks. | ||
68 | */ | ||
69 | 68 | ||
70 | static struct cvm_tasklet_wrapper cvm_oct_tasklet[NR_CPUS]; | 69 | struct cvm_oct_core_state { |
70 | int baseline_cores; | ||
71 | /* | ||
72 | * The number of additional cores that could be processing | ||
73 | * input packtes. | ||
74 | */ | ||
75 | atomic_t available_cores; | ||
76 | cpumask_t cpu_state; | ||
77 | } ____cacheline_aligned_in_smp; | ||
71 | 78 | ||
72 | /** | 79 | static struct cvm_oct_core_state core_state __cacheline_aligned_in_smp; |
73 | * Interrupt handler. The interrupt occurs whenever the POW | 80 | |
74 | * transitions from 0->1 packets in our group. | 81 | static void cvm_oct_enable_napi(void *_) |
75 | * | ||
76 | * @cpl: | ||
77 | * @dev_id: | ||
78 | * @regs: | ||
79 | * Returns | ||
80 | */ | ||
81 | irqreturn_t cvm_oct_do_interrupt(int cpl, void *dev_id) | ||
82 | { | 82 | { |
83 | /* Acknowledge the interrupt */ | 83 | int cpu = smp_processor_id(); |
84 | if (INTERRUPT_LIMIT) | 84 | napi_schedule(&cvm_oct_napi[cpu].napi); |
85 | cvmx_write_csr(CVMX_POW_WQ_INT, 1 << pow_receive_group); | 85 | } |
86 | else | 86 | |
87 | cvmx_write_csr(CVMX_POW_WQ_INT, 0x10001 << pow_receive_group); | 87 | static void cvm_oct_enable_one_cpu(void) |
88 | preempt_disable(); | 88 | { |
89 | tasklet_schedule(&cvm_oct_tasklet[smp_processor_id()].t); | 89 | int v; |
90 | preempt_enable(); | 90 | int cpu; |
91 | return IRQ_HANDLED; | 91 | |
92 | /* Check to see if more CPUs are available for receive processing... */ | ||
93 | v = atomic_sub_if_positive(1, &core_state.available_cores); | ||
94 | if (v < 0) | ||
95 | return; | ||
96 | |||
97 | /* ... if a CPU is available, Turn on NAPI polling for that CPU. */ | ||
98 | for_each_online_cpu(cpu) { | ||
99 | if (!cpu_test_and_set(cpu, core_state.cpu_state)) { | ||
100 | v = smp_call_function_single(cpu, cvm_oct_enable_napi, | ||
101 | NULL, 0); | ||
102 | if (v) | ||
103 | panic("Can't enable NAPI."); | ||
104 | break; | ||
105 | } | ||
106 | } | ||
107 | } | ||
108 | |||
109 | static void cvm_oct_no_more_work(void) | ||
110 | { | ||
111 | int cpu = smp_processor_id(); | ||
112 | |||
113 | /* | ||
114 | * CPU zero is special. It always has the irq enabled when | ||
115 | * waiting for incoming packets. | ||
116 | */ | ||
117 | if (cpu == 0) { | ||
118 | enable_irq(OCTEON_IRQ_WORKQ0 + pow_receive_group); | ||
119 | return; | ||
120 | } | ||
121 | |||
122 | cpu_clear(cpu, core_state.cpu_state); | ||
123 | atomic_add(1, &core_state.available_cores); | ||
92 | } | 124 | } |
93 | 125 | ||
94 | #ifdef CONFIG_NET_POLL_CONTROLLER | ||
95 | /** | 126 | /** |
96 | * This is called when the kernel needs to manually poll the | 127 | * Interrupt handler. The interrupt occurs whenever the POW |
97 | * device. For Octeon, this is simply calling the interrupt | 128 | * has packets in our group. |
98 | * handler. We actually poll all the devices, not just the | ||
99 | * one supplied. | ||
100 | * | 129 | * |
101 | * @dev: Device to poll. Unused | ||
102 | */ | 130 | */ |
103 | void cvm_oct_poll_controller(struct net_device *dev) | 131 | static irqreturn_t cvm_oct_do_interrupt(int cpl, void *dev_id) |
104 | { | 132 | { |
105 | preempt_disable(); | 133 | /* Disable the IRQ and start napi_poll. */ |
106 | tasklet_schedule(&cvm_oct_tasklet[smp_processor_id()].t); | 134 | disable_irq_nosync(OCTEON_IRQ_WORKQ0 + pow_receive_group); |
107 | preempt_enable(); | 135 | cvm_oct_enable_napi(NULL); |
136 | |||
137 | return IRQ_HANDLED; | ||
108 | } | 138 | } |
109 | #endif | ||
110 | 139 | ||
111 | /** | 140 | /** |
112 | * This is called on receive errors, and determines if the packet | 141 | * This is called on receive errors, and determines if the packet |
@@ -195,19 +224,19 @@ static inline int cvm_oct_check_rcv_error(cvmx_wqe_t *work) | |||
195 | } | 224 | } |
196 | 225 | ||
197 | /** | 226 | /** |
198 | * Tasklet function that is scheduled on a core when an interrupt occurs. | 227 | * The NAPI poll function. |
199 | * | 228 | * |
200 | * @unused: | 229 | * @napi: The NAPI instance, or null if called from cvm_oct_poll_controller |
230 | * @budget: Maximum number of packets to receive. | ||
201 | */ | 231 | */ |
202 | void cvm_oct_tasklet_rx(unsigned long unused) | 232 | static int cvm_oct_napi_poll(struct napi_struct *napi, int budget) |
203 | { | 233 | { |
204 | const int coreid = cvmx_get_core_num(); | 234 | const int coreid = cvmx_get_core_num(); |
205 | uint64_t old_group_mask; | 235 | uint64_t old_group_mask; |
206 | uint64_t old_scratch; | 236 | uint64_t old_scratch; |
207 | int rx_count = 0; | 237 | int rx_count = 0; |
208 | int number_to_free; | 238 | int did_work_request = 0; |
209 | int num_freed; | 239 | int packet_not_copied; |
210 | int packet_not_copied; | ||
211 | 240 | ||
212 | /* Prefetch cvm_oct_device since we know we need it soon */ | 241 | /* Prefetch cvm_oct_device since we know we need it soon */ |
213 | prefetch(cvm_oct_device); | 242 | prefetch(cvm_oct_device); |
@@ -223,59 +252,63 @@ void cvm_oct_tasklet_rx(unsigned long unused) | |||
223 | cvmx_write_csr(CVMX_POW_PP_GRP_MSKX(coreid), | 252 | cvmx_write_csr(CVMX_POW_PP_GRP_MSKX(coreid), |
224 | (old_group_mask & ~0xFFFFull) | 1 << pow_receive_group); | 253 | (old_group_mask & ~0xFFFFull) | 1 << pow_receive_group); |
225 | 254 | ||
226 | if (USE_ASYNC_IOBDMA) | 255 | if (USE_ASYNC_IOBDMA) { |
227 | cvmx_pow_work_request_async(CVMX_SCR_SCRATCH, CVMX_POW_NO_WAIT); | 256 | cvmx_pow_work_request_async(CVMX_SCR_SCRATCH, CVMX_POW_NO_WAIT); |
257 | did_work_request = 1; | ||
258 | } | ||
228 | 259 | ||
229 | while (1) { | 260 | while (rx_count < budget) { |
230 | struct sk_buff *skb = NULL; | 261 | struct sk_buff *skb = NULL; |
262 | struct sk_buff **pskb = NULL; | ||
231 | int skb_in_hw; | 263 | int skb_in_hw; |
232 | cvmx_wqe_t *work; | 264 | cvmx_wqe_t *work; |
233 | 265 | ||
234 | if (USE_ASYNC_IOBDMA) { | 266 | if (USE_ASYNC_IOBDMA && did_work_request) |
235 | work = cvmx_pow_work_response_async(CVMX_SCR_SCRATCH); | 267 | work = cvmx_pow_work_response_async(CVMX_SCR_SCRATCH); |
236 | } else { | 268 | else |
237 | if ((INTERRUPT_LIMIT == 0) | 269 | work = cvmx_pow_work_request_sync(CVMX_POW_NO_WAIT); |
238 | || likely(rx_count < MAX_RX_PACKETS)) | 270 | |
239 | work = | ||
240 | cvmx_pow_work_request_sync | ||
241 | (CVMX_POW_NO_WAIT); | ||
242 | else | ||
243 | work = NULL; | ||
244 | } | ||
245 | prefetch(work); | 271 | prefetch(work); |
246 | if (work == NULL) | 272 | did_work_request = 0; |
273 | if (work == NULL) { | ||
274 | union cvmx_pow_wq_int wq_int; | ||
275 | wq_int.u64 = 0; | ||
276 | wq_int.s.iq_dis = 1 << pow_receive_group; | ||
277 | wq_int.s.wq_int = 1 << pow_receive_group; | ||
278 | cvmx_write_csr(CVMX_POW_WQ_INT, wq_int.u64); | ||
247 | break; | 279 | break; |
280 | } | ||
281 | pskb = (struct sk_buff **)(cvm_oct_get_buffer_ptr(work->packet_ptr) - sizeof(void *)); | ||
282 | prefetch(pskb); | ||
248 | 283 | ||
249 | /* | 284 | if (USE_ASYNC_IOBDMA && rx_count < (budget - 1)) { |
250 | * Limit each core to processing MAX_RX_PACKETS | 285 | cvmx_pow_work_request_async_nocheck(CVMX_SCR_SCRATCH, CVMX_POW_NO_WAIT); |
251 | * packets without a break. This way the RX can't | 286 | did_work_request = 1; |
252 | * starve the TX task. | 287 | } |
253 | */ | 288 | |
254 | if (USE_ASYNC_IOBDMA) { | 289 | if (rx_count == 0) { |
255 | 290 | /* | |
256 | if ((INTERRUPT_LIMIT == 0) | 291 | * First time through, see if there is enough |
257 | || likely(rx_count < MAX_RX_PACKETS)) | 292 | * work waiting to merit waking another |
258 | cvmx_pow_work_request_async_nocheck | 293 | * CPU. |
259 | (CVMX_SCR_SCRATCH, CVMX_POW_NO_WAIT); | 294 | */ |
260 | else { | 295 | union cvmx_pow_wq_int_cntx counts; |
261 | cvmx_scratch_write64(CVMX_SCR_SCRATCH, | 296 | int backlog; |
262 | 0x8000000000000000ull); | 297 | int cores_in_use = core_state.baseline_cores - atomic_read(&core_state.available_cores); |
263 | cvmx_pow_tag_sw_null_nocheck(); | 298 | counts.u64 = cvmx_read_csr(CVMX_POW_WQ_INT_CNTX(pow_receive_group)); |
264 | } | 299 | backlog = counts.s.iq_cnt + counts.s.ds_cnt; |
300 | if (backlog > budget * cores_in_use && napi != NULL) | ||
301 | cvm_oct_enable_one_cpu(); | ||
265 | } | 302 | } |
266 | 303 | ||
267 | skb_in_hw = USE_SKBUFFS_IN_HW && work->word2.s.bufs == 1; | 304 | skb_in_hw = USE_SKBUFFS_IN_HW && work->word2.s.bufs == 1; |
268 | if (likely(skb_in_hw)) { | 305 | if (likely(skb_in_hw)) { |
269 | skb = | 306 | skb = *pskb; |
270 | *(struct sk_buff | ||
271 | **)(cvm_oct_get_buffer_ptr(work->packet_ptr) - | ||
272 | sizeof(void *)); | ||
273 | prefetch(&skb->head); | 307 | prefetch(&skb->head); |
274 | prefetch(&skb->len); | 308 | prefetch(&skb->len); |
275 | } | 309 | } |
276 | prefetch(cvm_oct_device[work->ipprt]); | 310 | prefetch(cvm_oct_device[work->ipprt]); |
277 | 311 | ||
278 | rx_count++; | ||
279 | /* Immediately throw away all packets with receive errors */ | 312 | /* Immediately throw away all packets with receive errors */ |
280 | if (unlikely(work->word2.snoip.rcv_error)) { | 313 | if (unlikely(work->word2.snoip.rcv_error)) { |
281 | if (cvm_oct_check_rcv_error(work)) | 314 | if (cvm_oct_check_rcv_error(work)) |
@@ -391,6 +424,7 @@ void cvm_oct_tasklet_rx(unsigned long unused) | |||
391 | #endif | 424 | #endif |
392 | } | 425 | } |
393 | netif_receive_skb(skb); | 426 | netif_receive_skb(skb); |
427 | rx_count++; | ||
394 | } else { | 428 | } else { |
395 | /* Drop any packet received for a device that isn't up */ | 429 | /* Drop any packet received for a device that isn't up */ |
396 | /* | 430 | /* |
@@ -432,47 +466,93 @@ void cvm_oct_tasklet_rx(unsigned long unused) | |||
432 | cvm_oct_free_work(work); | 466 | cvm_oct_free_work(work); |
433 | } | 467 | } |
434 | } | 468 | } |
435 | |||
436 | /* Restore the original POW group mask */ | 469 | /* Restore the original POW group mask */ |
437 | cvmx_write_csr(CVMX_POW_PP_GRP_MSKX(coreid), old_group_mask); | 470 | cvmx_write_csr(CVMX_POW_PP_GRP_MSKX(coreid), old_group_mask); |
438 | if (USE_ASYNC_IOBDMA) { | 471 | if (USE_ASYNC_IOBDMA) { |
439 | /* Restore the scratch area */ | 472 | /* Restore the scratch area */ |
440 | cvmx_scratch_write64(CVMX_SCR_SCRATCH, old_scratch); | 473 | cvmx_scratch_write64(CVMX_SCR_SCRATCH, old_scratch); |
441 | } | 474 | } |
475 | cvm_oct_rx_refill_pool(0); | ||
442 | 476 | ||
443 | if (USE_SKBUFFS_IN_HW) { | 477 | if (rx_count < budget && napi != NULL) { |
444 | /* Refill the packet buffer pool */ | 478 | /* No more work */ |
445 | number_to_free = | 479 | napi_complete(napi); |
446 | cvmx_fau_fetch_and_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE, 0); | 480 | cvm_oct_no_more_work(); |
447 | |||
448 | if (number_to_free > 0) { | ||
449 | cvmx_fau_atomic_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE, | ||
450 | -number_to_free); | ||
451 | num_freed = | ||
452 | cvm_oct_mem_fill_fpa(CVMX_FPA_PACKET_POOL, | ||
453 | CVMX_FPA_PACKET_POOL_SIZE, | ||
454 | number_to_free); | ||
455 | if (num_freed != number_to_free) { | ||
456 | cvmx_fau_atomic_add32 | ||
457 | (FAU_NUM_PACKET_BUFFERS_TO_FREE, | ||
458 | number_to_free - num_freed); | ||
459 | } | ||
460 | } | ||
461 | } | 481 | } |
482 | return rx_count; | ||
483 | } | ||
484 | |||
485 | #ifdef CONFIG_NET_POLL_CONTROLLER | ||
486 | /** | ||
487 | * This is called when the kernel needs to manually poll the | ||
488 | * device. | ||
489 | * | ||
490 | * @dev: Device to poll. Unused | ||
491 | */ | ||
492 | void cvm_oct_poll_controller(struct net_device *dev) | ||
493 | { | ||
494 | cvm_oct_napi_poll(NULL, 16); | ||
462 | } | 495 | } |
496 | #endif | ||
463 | 497 | ||
464 | void cvm_oct_rx_initialize(void) | 498 | void cvm_oct_rx_initialize(void) |
465 | { | 499 | { |
466 | int i; | 500 | int i; |
467 | /* Initialize all of the tasklets */ | 501 | struct net_device *dev_for_napi = NULL; |
468 | for (i = 0; i < NR_CPUS; i++) | 502 | union cvmx_pow_wq_int_thrx int_thr; |
469 | tasklet_init(&cvm_oct_tasklet[i].t, cvm_oct_tasklet_rx, 0); | 503 | union cvmx_pow_wq_int_pc int_pc; |
504 | |||
505 | for (i = 0; i < TOTAL_NUMBER_OF_PORTS; i++) { | ||
506 | if (cvm_oct_device[i]) { | ||
507 | dev_for_napi = cvm_oct_device[i]; | ||
508 | break; | ||
509 | } | ||
510 | } | ||
511 | |||
512 | if (NULL == dev_for_napi) | ||
513 | panic("No net_devices were allocated."); | ||
514 | |||
515 | if (max_rx_cpus > 1 && max_rx_cpus < num_online_cpus()) | ||
516 | atomic_set(&core_state.available_cores, max_rx_cpus); | ||
517 | else | ||
518 | atomic_set(&core_state.available_cores, num_online_cpus()); | ||
519 | core_state.baseline_cores = atomic_read(&core_state.available_cores); | ||
520 | |||
521 | core_state.cpu_state = CPU_MASK_NONE; | ||
522 | for_each_possible_cpu(i) { | ||
523 | netif_napi_add(dev_for_napi, &cvm_oct_napi[i].napi, | ||
524 | cvm_oct_napi_poll, rx_napi_weight); | ||
525 | napi_enable(&cvm_oct_napi[i].napi); | ||
526 | } | ||
527 | /* Register an IRQ hander for to receive POW interrupts */ | ||
528 | i = request_irq(OCTEON_IRQ_WORKQ0 + pow_receive_group, | ||
529 | cvm_oct_do_interrupt, 0, "Ethernet", cvm_oct_device); | ||
530 | |||
531 | if (i) | ||
532 | panic("Could not acquire Ethernet IRQ %d\n", | ||
533 | OCTEON_IRQ_WORKQ0 + pow_receive_group); | ||
534 | |||
535 | disable_irq_nosync(OCTEON_IRQ_WORKQ0 + pow_receive_group); | ||
536 | |||
537 | int_thr.u64 = 0; | ||
538 | int_thr.s.tc_en = 1; | ||
539 | int_thr.s.tc_thr = 1; | ||
540 | /* Enable POW interrupt when our port has at least one packet */ | ||
541 | cvmx_write_csr(CVMX_POW_WQ_INT_THRX(pow_receive_group), int_thr.u64); | ||
542 | |||
543 | int_pc.u64 = 0; | ||
544 | int_pc.s.pc_thr = 5; | ||
545 | cvmx_write_csr(CVMX_POW_WQ_INT_PC, int_pc.u64); | ||
546 | |||
547 | |||
548 | /* Scheduld NAPI now. This will indirectly enable interrupts. */ | ||
549 | cvm_oct_enable_one_cpu(); | ||
470 | } | 550 | } |
471 | 551 | ||
472 | void cvm_oct_rx_shutdown(void) | 552 | void cvm_oct_rx_shutdown(void) |
473 | { | 553 | { |
474 | int i; | 554 | int i; |
475 | /* Shutdown all of the tasklets */ | 555 | /* Shutdown all of the NAPIs */ |
476 | for (i = 0; i < NR_CPUS; i++) | 556 | for_each_possible_cpu(i) |
477 | tasklet_kill(&cvm_oct_tasklet[i].t); | 557 | netif_napi_del(&cvm_oct_napi[i].napi); |
478 | } | 558 | } |
diff --git a/drivers/staging/octeon/ethernet-rx.h b/drivers/staging/octeon/ethernet-rx.h index a9b72b87a7a6..a0743b85d54e 100644 --- a/drivers/staging/octeon/ethernet-rx.h +++ b/drivers/staging/octeon/ethernet-rx.h | |||
@@ -24,10 +24,29 @@ | |||
24 | * This file may also be available under a different license from Cavium. | 24 | * This file may also be available under a different license from Cavium. |
25 | * Contact Cavium Networks for more information | 25 | * Contact Cavium Networks for more information |
26 | *********************************************************************/ | 26 | *********************************************************************/ |
27 | #include "cvmx-fau.h" | ||
27 | 28 | ||
28 | irqreturn_t cvm_oct_do_interrupt(int cpl, void *dev_id); | ||
29 | void cvm_oct_poll_controller(struct net_device *dev); | 29 | void cvm_oct_poll_controller(struct net_device *dev); |
30 | void cvm_oct_tasklet_rx(unsigned long unused); | ||
31 | |||
32 | void cvm_oct_rx_initialize(void); | 30 | void cvm_oct_rx_initialize(void); |
33 | void cvm_oct_rx_shutdown(void); | 31 | void cvm_oct_rx_shutdown(void); |
32 | |||
33 | static inline void cvm_oct_rx_refill_pool(int fill_threshold) | ||
34 | { | ||
35 | int number_to_free; | ||
36 | int num_freed; | ||
37 | /* Refill the packet buffer pool */ | ||
38 | number_to_free = | ||
39 | cvmx_fau_fetch_and_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE, 0); | ||
40 | |||
41 | if (number_to_free > fill_threshold) { | ||
42 | cvmx_fau_atomic_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE, | ||
43 | -number_to_free); | ||
44 | num_freed = cvm_oct_mem_fill_fpa(CVMX_FPA_PACKET_POOL, | ||
45 | CVMX_FPA_PACKET_POOL_SIZE, | ||
46 | number_to_free); | ||
47 | if (num_freed != number_to_free) { | ||
48 | cvmx_fau_atomic_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE, | ||
49 | number_to_free - num_freed); | ||
50 | } | ||
51 | } | ||
52 | } | ||
diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c index 973178a80c93..9f5b7419e777 100644 --- a/drivers/staging/octeon/ethernet.c +++ b/drivers/staging/octeon/ethernet.c | |||
@@ -104,6 +104,16 @@ MODULE_PARM_DESC(pow_send_list, "\n" | |||
104 | "\t\"eth2,spi3,spi7\" would cause these three devices to transmit\n" | 104 | "\t\"eth2,spi3,spi7\" would cause these three devices to transmit\n" |
105 | "\tusing the pow_send_group."); | 105 | "\tusing the pow_send_group."); |
106 | 106 | ||
107 | int max_rx_cpus = -1; | ||
108 | module_param(max_rx_cpus, int, 0444); | ||
109 | MODULE_PARM_DESC(max_rx_cpus, "\n" | ||
110 | "\t\tThe maximum number of CPUs to use for packet reception.\n" | ||
111 | "\t\tUse -1 to use all available CPUs."); | ||
112 | |||
113 | int rx_napi_weight = 32; | ||
114 | module_param(rx_napi_weight, int, 0444); | ||
115 | MODULE_PARM_DESC(rx_napi_weight, "The NAPI WEIGHT parameter."); | ||
116 | |||
107 | /* | 117 | /* |
108 | * The offset from mac_addr_base that should be used for the next port | 118 | * The offset from mac_addr_base that should be used for the next port |
109 | * that is configured. By convention, if any mgmt ports exist on the | 119 | * that is configured. By convention, if any mgmt ports exist on the |
@@ -149,6 +159,15 @@ static void cvm_do_timer(unsigned long arg) | |||
149 | } else { | 159 | } else { |
150 | port = 0; | 160 | port = 0; |
151 | /* | 161 | /* |
162 | * FPA 0 may have been drained, try to refill it if we | ||
163 | * need more than num_packet_buffers / 2, otherwise | ||
164 | * normal receive processing will refill it. If it | ||
165 | * were drained, no packets could be received so | ||
166 | * cvm_oct_napi_poll would never be invoked to do the | ||
167 | * refill. | ||
168 | */ | ||
169 | cvm_oct_rx_refill_pool(num_packet_buffers / 2); | ||
170 | /* | ||
152 | * All ports have been polled. Start the next iteration through | 171 | * All ports have been polled. Start the next iteration through |
153 | * the ports in one second. | 172 | * the ports in one second. |
154 | */ | 173 | */ |
@@ -161,7 +180,6 @@ static void cvm_do_timer(unsigned long arg) | |||
161 | */ | 180 | */ |
162 | static __init void cvm_oct_configure_common_hw(void) | 181 | static __init void cvm_oct_configure_common_hw(void) |
163 | { | 182 | { |
164 | int r; | ||
165 | /* Setup the FPA */ | 183 | /* Setup the FPA */ |
166 | cvmx_fpa_enable(); | 184 | cvmx_fpa_enable(); |
167 | cvm_oct_mem_fill_fpa(CVMX_FPA_PACKET_POOL, CVMX_FPA_PACKET_POOL_SIZE, | 185 | cvm_oct_mem_fill_fpa(CVMX_FPA_PACKET_POOL, CVMX_FPA_PACKET_POOL_SIZE, |
@@ -176,17 +194,6 @@ static __init void cvm_oct_configure_common_hw(void) | |||
176 | cvmx_helper_setup_red(num_packet_buffers / 4, | 194 | cvmx_helper_setup_red(num_packet_buffers / 4, |
177 | num_packet_buffers / 8); | 195 | num_packet_buffers / 8); |
178 | 196 | ||
179 | /* Register an IRQ hander for to receive POW interrupts */ | ||
180 | r = request_irq(OCTEON_IRQ_WORKQ0 + pow_receive_group, | ||
181 | cvm_oct_do_interrupt, IRQF_SHARED, "Ethernet", | ||
182 | cvm_oct_device); | ||
183 | |||
184 | #if defined(CONFIG_SMP) && 0 | ||
185 | if (USE_MULTICORE_RECEIVE) { | ||
186 | irq_set_affinity(OCTEON_IRQ_WORKQ0 + pow_receive_group, | ||
187 | cpu_online_mask); | ||
188 | } | ||
189 | #endif | ||
190 | } | 197 | } |
191 | 198 | ||
192 | /** | 199 | /** |
@@ -616,7 +623,6 @@ static int __init cvm_oct_init_module(void) | |||
616 | cvm_oct_mac_addr_offset = 0; | 623 | cvm_oct_mac_addr_offset = 0; |
617 | 624 | ||
618 | cvm_oct_proc_initialize(); | 625 | cvm_oct_proc_initialize(); |
619 | cvm_oct_rx_initialize(); | ||
620 | cvm_oct_configure_common_hw(); | 626 | cvm_oct_configure_common_hw(); |
621 | 627 | ||
622 | cvmx_helper_initialize_packet_io_global(); | 628 | cvmx_helper_initialize_packet_io_global(); |
@@ -781,25 +787,7 @@ static int __init cvm_oct_init_module(void) | |||
781 | } | 787 | } |
782 | } | 788 | } |
783 | 789 | ||
784 | if (INTERRUPT_LIMIT) { | 790 | cvm_oct_rx_initialize(); |
785 | /* | ||
786 | * Set the POW timer rate to give an interrupt at most | ||
787 | * INTERRUPT_LIMIT times per second. | ||
788 | */ | ||
789 | cvmx_write_csr(CVMX_POW_WQ_INT_PC, | ||
790 | octeon_bootinfo->eclock_hz / (INTERRUPT_LIMIT * | ||
791 | 16 * 256) << 8); | ||
792 | |||
793 | /* | ||
794 | * Enable POW timer interrupt. It will count when | ||
795 | * there are packets available. | ||
796 | */ | ||
797 | cvmx_write_csr(CVMX_POW_WQ_INT_THRX(pow_receive_group), | ||
798 | 0x1ful << 24); | ||
799 | } else { | ||
800 | /* Enable POW interrupt when our port has at least one packet */ | ||
801 | cvmx_write_csr(CVMX_POW_WQ_INT_THRX(pow_receive_group), 0x1001); | ||
802 | } | ||
803 | 791 | ||
804 | /* Enable the poll timer for checking RGMII status */ | 792 | /* Enable the poll timer for checking RGMII status */ |
805 | init_timer(&cvm_oct_poll_timer); | 793 | init_timer(&cvm_oct_poll_timer); |
diff --git a/drivers/staging/octeon/octeon-ethernet.h b/drivers/staging/octeon/octeon-ethernet.h index 203c6a920af5..40b695615431 100644 --- a/drivers/staging/octeon/octeon-ethernet.h +++ b/drivers/staging/octeon/octeon-ethernet.h | |||
@@ -98,4 +98,7 @@ extern int pow_receive_group; | |||
98 | extern char pow_send_list[]; | 98 | extern char pow_send_list[]; |
99 | extern struct net_device *cvm_oct_device[]; | 99 | extern struct net_device *cvm_oct_device[]; |
100 | 100 | ||
101 | extern int max_rx_cpus; | ||
102 | extern int rx_napi_weight; | ||
103 | |||
101 | #endif | 104 | #endif |