diff options
-rw-r--r-- | MAINTAINERS | 1 | ||||
-rw-r--r-- | arch/tile/include/asm/cacheflush.h | 52 | ||||
-rw-r--r-- | arch/tile/include/asm/processor.h | 10 | ||||
-rw-r--r-- | arch/tile/include/hv/drv_xgbe_impl.h | 300 | ||||
-rw-r--r-- | arch/tile/include/hv/drv_xgbe_intf.h | 615 | ||||
-rw-r--r-- | arch/tile/include/hv/netio_errors.h | 122 | ||||
-rw-r--r-- | arch/tile/include/hv/netio_intf.h | 2975 | ||||
-rw-r--r-- | arch/tile/mm/init.c | 8 | ||||
-rw-r--r-- | drivers/net/Kconfig | 12 | ||||
-rw-r--r-- | drivers/net/Makefile | 1 | ||||
-rw-r--r-- | drivers/net/tile/Makefile | 10 | ||||
-rw-r--r-- | drivers/net/tile/tilepro.c | 2406 |
12 files changed, 6510 insertions, 2 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 2525b04f2e25..70b3820aa58e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -5828,6 +5828,7 @@ W: http://www.tilera.com/scm/ | |||
5828 | S: Supported | 5828 | S: Supported |
5829 | F: arch/tile/ | 5829 | F: arch/tile/ |
5830 | F: drivers/char/hvc_tile.c | 5830 | F: drivers/char/hvc_tile.c |
5831 | F: drivers/net/tile/ | ||
5831 | 5832 | ||
5832 | TLAN NETWORK DRIVER | 5833 | TLAN NETWORK DRIVER |
5833 | M: Samuel Chessman <chessman@tux.org> | 5834 | M: Samuel Chessman <chessman@tux.org> |
diff --git a/arch/tile/include/asm/cacheflush.h b/arch/tile/include/asm/cacheflush.h index c5741da4eeac..14a3f8556ace 100644 --- a/arch/tile/include/asm/cacheflush.h +++ b/arch/tile/include/asm/cacheflush.h | |||
@@ -137,4 +137,56 @@ static inline void finv_buffer(void *buffer, size_t size) | |||
137 | mb_incoherent(); | 137 | mb_incoherent(); |
138 | } | 138 | } |
139 | 139 | ||
140 | /* | ||
141 | * Flush & invalidate a VA range that is homed remotely on a single core, | ||
142 | * waiting until the memory controller holds the flushed values. | ||
143 | */ | ||
144 | static inline void finv_buffer_remote(void *buffer, size_t size) | ||
145 | { | ||
146 | char *p; | ||
147 | int i; | ||
148 | |||
149 | /* | ||
150 | * Flush and invalidate the buffer out of the local L1/L2 | ||
151 | * and request the home cache to flush and invalidate as well. | ||
152 | */ | ||
153 | __finv_buffer(buffer, size); | ||
154 | |||
155 | /* | ||
156 | * Wait for the home cache to acknowledge that it has processed | ||
157 | * all the flush-and-invalidate requests. This does not mean | ||
158 | * that the flushed data has reached the memory controller yet, | ||
159 | * but it does mean the home cache is processing the flushes. | ||
160 | */ | ||
161 | __insn_mf(); | ||
162 | |||
163 | /* | ||
164 | * Issue a load to the last cache line, which can't complete | ||
165 | * until all the previously-issued flushes to the same memory | ||
166 | * controller have also completed. If we weren't striping | ||
167 | * memory, that one load would be sufficient, but since we may | ||
168 | * be, we also need to back up to the last load issued to | ||
169 | * another memory controller, which would be the point where | ||
170 | * we crossed an 8KB boundary (the granularity of striping | ||
171 | * across memory controllers). Keep backing up and doing this | ||
172 | * until we are before the beginning of the buffer, or have | ||
173 | * hit all the controllers. | ||
174 | */ | ||
175 | for (i = 0, p = (char *)buffer + size - 1; | ||
176 | i < (1 << CHIP_LOG_NUM_MSHIMS()) && p >= (char *)buffer; | ||
177 | ++i) { | ||
178 | const unsigned long STRIPE_WIDTH = 8192; | ||
179 | |||
180 | /* Force a load instruction to issue. */ | ||
181 | *(volatile char *)p; | ||
182 | |||
183 | /* Jump to end of previous stripe. */ | ||
184 | p -= STRIPE_WIDTH; | ||
185 | p = (char *)((unsigned long)p | (STRIPE_WIDTH - 1)); | ||
186 | } | ||
187 | |||
188 | /* Wait for the loads (and thus flushes) to have completed. */ | ||
189 | __insn_mf(); | ||
190 | } | ||
191 | |||
140 | #endif /* _ASM_TILE_CACHEFLUSH_H */ | 192 | #endif /* _ASM_TILE_CACHEFLUSH_H */ |
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index 1747ff3946b2..a9e7c8760334 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h | |||
@@ -292,8 +292,18 @@ extern int kstack_hash; | |||
292 | /* Are we using huge pages in the TLB for kernel data? */ | 292 | /* Are we using huge pages in the TLB for kernel data? */ |
293 | extern int kdata_huge; | 293 | extern int kdata_huge; |
294 | 294 | ||
295 | /* Support standard Linux prefetching. */ | ||
296 | #define ARCH_HAS_PREFETCH | ||
297 | #define prefetch(x) __builtin_prefetch(x) | ||
295 | #define PREFETCH_STRIDE CHIP_L2_LINE_SIZE() | 298 | #define PREFETCH_STRIDE CHIP_L2_LINE_SIZE() |
296 | 299 | ||
300 | /* Bring a value into the L1D, faulting the TLB if necessary. */ | ||
301 | #ifdef __tilegx__ | ||
302 | #define prefetch_L1(x) __insn_prefetch_l1_fault((void *)(x)) | ||
303 | #else | ||
304 | #define prefetch_L1(x) __insn_prefetch_L1((void *)(x)) | ||
305 | #endif | ||
306 | |||
297 | #else /* __ASSEMBLY__ */ | 307 | #else /* __ASSEMBLY__ */ |
298 | 308 | ||
299 | /* Do some slow action (e.g. read a slow SPR). */ | 309 | /* Do some slow action (e.g. read a slow SPR). */ |
diff --git a/arch/tile/include/hv/drv_xgbe_impl.h b/arch/tile/include/hv/drv_xgbe_impl.h new file mode 100644 index 000000000000..3a73b2b44913 --- /dev/null +++ b/arch/tile/include/hv/drv_xgbe_impl.h | |||
@@ -0,0 +1,300 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | /** | ||
16 | * @file drivers/xgbe/impl.h | ||
17 | * Implementation details for the NetIO library. | ||
18 | */ | ||
19 | |||
20 | #ifndef __DRV_XGBE_IMPL_H__ | ||
21 | #define __DRV_XGBE_IMPL_H__ | ||
22 | |||
23 | #include <hv/netio_errors.h> | ||
24 | #include <hv/netio_intf.h> | ||
25 | #include <hv/drv_xgbe_intf.h> | ||
26 | |||
27 | |||
28 | /** How many groups we have (log2). */ | ||
29 | #define LOG2_NUM_GROUPS (12) | ||
30 | /** How many groups we have. */ | ||
31 | #define NUM_GROUPS (1 << LOG2_NUM_GROUPS) | ||
32 | |||
33 | /** Number of output requests we'll buffer per tile. */ | ||
34 | #define EPP_REQS_PER_TILE (32) | ||
35 | |||
36 | /** Words used in an eDMA command without checksum acceleration. */ | ||
37 | #define EDMA_WDS_NO_CSUM 8 | ||
38 | /** Words used in an eDMA command with checksum acceleration. */ | ||
39 | #define EDMA_WDS_CSUM 10 | ||
40 | /** Total available words in the eDMA command FIFO. */ | ||
41 | #define EDMA_WDS_TOTAL 128 | ||
42 | |||
43 | |||
44 | /* | ||
45 | * FIXME: These definitions are internal and should have underscores! | ||
46 | * NOTE: The actual numeric values here are intentional and allow us to | ||
47 | * optimize the concept "if small ... else if large ... else ...", by | ||
48 | * checking for the low bit being set, and then for non-zero. | ||
49 | * These are used as array indices, so they must have the values (0, 1, 2) | ||
50 | * in some order. | ||
51 | */ | ||
52 | #define SIZE_SMALL (1) /**< Small packet queue. */ | ||
53 | #define SIZE_LARGE (2) /**< Large packet queue. */ | ||
54 | #define SIZE_JUMBO (0) /**< Jumbo packet queue. */ | ||
55 | |||
56 | /** The number of "SIZE_xxx" values. */ | ||
57 | #define NETIO_NUM_SIZES 3 | ||
58 | |||
59 | |||
60 | /* | ||
61 | * Default numbers of packets for IPP drivers. These values are chosen | ||
62 | * such that CIPP1 will not overflow its L2 cache. | ||
63 | */ | ||
64 | |||
65 | /** The default number of small packets. */ | ||
66 | #define NETIO_DEFAULT_SMALL_PACKETS 2750 | ||
67 | /** The default number of large packets. */ | ||
68 | #define NETIO_DEFAULT_LARGE_PACKETS 2500 | ||
69 | /** The default number of jumbo packets. */ | ||
70 | #define NETIO_DEFAULT_JUMBO_PACKETS 250 | ||
71 | |||
72 | |||
73 | /** Log2 of the size of a memory arena. */ | ||
74 | #define NETIO_ARENA_SHIFT 24 /* 16 MB */ | ||
75 | /** Size of a memory arena. */ | ||
76 | #define NETIO_ARENA_SIZE (1 << NETIO_ARENA_SHIFT) | ||
77 | |||
78 | |||
79 | /** A queue of packets. | ||
80 | * | ||
81 | * This structure partially defines a queue of packets waiting to be | ||
82 | * processed. The queue as a whole is written to by an interrupt handler and | ||
83 | * read by non-interrupt code; this data structure is what's touched by the | ||
84 | * interrupt handler. The other part of the queue state, the read offset, is | ||
85 | * kept in user space, not in hypervisor space, so it is in a separate data | ||
86 | * structure. | ||
87 | * | ||
88 | * The read offset (__packet_receive_read in the user part of the queue | ||
89 | * structure) points to the next packet to be read. When the read offset is | ||
90 | * equal to the write offset, the queue is empty; therefore the queue must | ||
91 | * contain one more slot than the required maximum queue size. | ||
92 | * | ||
93 | * Here's an example of all 3 state variables and what they mean. All | ||
94 | * pointers move left to right. | ||
95 | * | ||
96 | * @code | ||
97 | * I I V V V V I I I I | ||
98 | * 0 1 2 3 4 5 6 7 8 9 10 | ||
99 | * ^ ^ ^ ^ | ||
100 | * | | | | ||
101 | * | | __last_packet_plus_one | ||
102 | * | __buffer_write | ||
103 | * __packet_receive_read | ||
104 | * @endcode | ||
105 | * | ||
106 | * This queue has 10 slots, and thus can hold 9 packets (_last_packet_plus_one | ||
107 | * = 10). The read pointer is at 2, and the write pointer is at 6; thus, | ||
108 | * there are valid, unread packets in slots 2, 3, 4, and 5. The remaining | ||
109 | * slots are invalid (do not contain a packet). | ||
110 | */ | ||
111 | typedef struct { | ||
112 | /** Byte offset of the next notify packet to be written: zero for the first | ||
113 | * packet on the queue, sizeof (netio_pkt_t) for the second packet on the | ||
114 | * queue, etc. */ | ||
115 | volatile uint32_t __packet_write; | ||
116 | |||
117 | /** Offset of the packet after the last valid packet (i.e., when any | ||
118 | * pointer is incremented to this value, it wraps back to zero). */ | ||
119 | uint32_t __last_packet_plus_one; | ||
120 | } | ||
121 | __netio_packet_queue_t; | ||
122 | |||
123 | |||
124 | /** A queue of buffers. | ||
125 | * | ||
126 | * This structure partially defines a queue of empty buffers which have been | ||
127 | * obtained via requests to the IPP. (The elements of the queue are packet | ||
128 | * handles, which are transformed into a full netio_pkt_t when the buffer is | ||
129 | * retrieved.) The queue as a whole is written to by an interrupt handler and | ||
130 | * read by non-interrupt code; this data structure is what's touched by the | ||
131 | * interrupt handler. The other parts of the queue state, the read offset and | ||
132 | * requested write offset, are kept in user space, not in hypervisor space, so | ||
133 | * they are in a separate data structure. | ||
134 | * | ||
135 | * The read offset (__buffer_read in the user part of the queue structure) | ||
136 | * points to the next buffer to be read. When the read offset is equal to the | ||
137 | * write offset, the queue is empty; therefore the queue must contain one more | ||
138 | * slot than the required maximum queue size. | ||
139 | * | ||
140 | * The requested write offset (__buffer_requested_write in the user part of | ||
141 | * the queue structure) points to the slot which will hold the next buffer we | ||
142 | * request from the IPP, once we get around to sending such a request. When | ||
143 | * the requested write offset is equal to the write offset, no requests for | ||
144 | * new buffers are outstanding; when the requested write offset is one greater | ||
145 | * than the read offset, no more requests may be sent. | ||
146 | * | ||
147 | * Note that, unlike the packet_queue, the buffer_queue places incoming | ||
148 | * buffers at decreasing addresses. This makes the check for "is it time to | ||
149 | * wrap the buffer pointer" cheaper in the assembly code which receives new | ||
150 | * buffers, and means that the value which defines the queue size, | ||
151 | * __last_buffer, is different than in the packet queue. Also, the offset | ||
152 | * used in the packet_queue is already scaled by the size of a packet; here we | ||
153 | * use unscaled slot indices for the offsets. (These differences are | ||
154 | * historical, and in the future it's possible that the packet_queue will look | ||
155 | * more like this queue.) | ||
156 | * | ||
157 | * @code | ||
158 | * Here's an example of all 4 state variables and what they mean. Remember: | ||
159 | * all pointers move right to left. | ||
160 | * | ||
161 | * V V V I I R R V V V | ||
162 | * 0 1 2 3 4 5 6 7 8 9 | ||
163 | * ^ ^ ^ ^ | ||
164 | * | | | | | ||
165 | * | | | __last_buffer | ||
166 | * | | __buffer_write | ||
167 | * | __buffer_requested_write | ||
168 | * __buffer_read | ||
169 | * @endcode | ||
170 | * | ||
171 | * This queue has 10 slots, and thus can hold 9 buffers (_last_buffer = 9). | ||
172 | * The read pointer is at 2, and the write pointer is at 6; thus, there are | ||
173 | * valid, unread buffers in slots 2, 1, 0, 9, 8, and 7. The requested write | ||
174 | * pointer is at 4; thus, requests have been made to the IPP for buffers which | ||
175 | * will be placed in slots 6 and 5 when they arrive. Finally, the remaining | ||
176 | * slots are invalid (do not contain a buffer). | ||
177 | */ | ||
178 | typedef struct | ||
179 | { | ||
180 | /** Ordinal number of the next buffer to be written: 0 for the first slot in | ||
181 | * the queue, 1 for the second slot in the queue, etc. */ | ||
182 | volatile uint32_t __buffer_write; | ||
183 | |||
184 | /** Ordinal number of the last buffer (i.e., when any pointer is decremented | ||
185 | * below zero, it is reloaded with this value). */ | ||
186 | uint32_t __last_buffer; | ||
187 | } | ||
188 | __netio_buffer_queue_t; | ||
189 | |||
190 | |||
191 | /** | ||
192 | * An object for providing Ethernet packets to a process. | ||
193 | */ | ||
194 | typedef struct __netio_queue_impl_t | ||
195 | { | ||
196 | /** The queue of packets waiting to be received. */ | ||
197 | __netio_packet_queue_t __packet_receive_queue; | ||
198 | /** The intr bit mask that IDs this device. */ | ||
199 | unsigned int __intr_id; | ||
200 | /** Offset to queues of empty buffers, one per size. */ | ||
201 | uint32_t __buffer_queue[NETIO_NUM_SIZES]; | ||
202 | /** The address of the first EPP tile, or -1 if no EPP. */ | ||
203 | /* ISSUE: Actually this is always "0" or "~0". */ | ||
204 | uint32_t __epp_location; | ||
205 | /** The queue ID that this queue represents. */ | ||
206 | unsigned int __queue_id; | ||
207 | /** Number of acknowledgements received. */ | ||
208 | volatile uint32_t __acks_received; | ||
209 | /** Last completion number received for packet_sendv. */ | ||
210 | volatile uint32_t __last_completion_rcv; | ||
211 | /** Number of packets allowed to be outstanding. */ | ||
212 | uint32_t __max_outstanding; | ||
213 | /** First VA available for packets. */ | ||
214 | void* __va_0; | ||
215 | /** First VA in second range available for packets. */ | ||
216 | void* __va_1; | ||
217 | /** Padding to align the "__packets" field to the size of a netio_pkt_t. */ | ||
218 | uint32_t __padding[3]; | ||
219 | /** The packets themselves. */ | ||
220 | netio_pkt_t __packets[0]; | ||
221 | } | ||
222 | netio_queue_impl_t; | ||
223 | |||
224 | |||
225 | /** | ||
226 | * An object for managing the user end of a NetIO queue. | ||
227 | */ | ||
228 | typedef struct __netio_queue_user_impl_t | ||
229 | { | ||
230 | /** The next incoming packet to be read. */ | ||
231 | uint32_t __packet_receive_read; | ||
232 | /** The next empty buffers to be read, one index per size. */ | ||
233 | uint8_t __buffer_read[NETIO_NUM_SIZES]; | ||
234 | /** Where the empty buffer we next request from the IPP will go, one index | ||
235 | * per size. */ | ||
236 | uint8_t __buffer_requested_write[NETIO_NUM_SIZES]; | ||
237 | /** PCIe interface flag. */ | ||
238 | uint8_t __pcie; | ||
239 | /** Number of packets left to be received before we send a credit update. */ | ||
240 | uint32_t __receive_credit_remaining; | ||
241 | /** Value placed in __receive_credit_remaining when it reaches zero. */ | ||
242 | uint32_t __receive_credit_interval; | ||
243 | /** First fast I/O routine index. */ | ||
244 | uint32_t __fastio_index; | ||
245 | /** Number of acknowledgements expected. */ | ||
246 | uint32_t __acks_outstanding; | ||
247 | /** Last completion number requested. */ | ||
248 | uint32_t __last_completion_req; | ||
249 | /** File descriptor for driver. */ | ||
250 | int __fd; | ||
251 | } | ||
252 | netio_queue_user_impl_t; | ||
253 | |||
254 | |||
255 | #define NETIO_GROUP_CHUNK_SIZE 64 /**< Max # groups in one IPP request */ | ||
256 | #define NETIO_BUCKET_CHUNK_SIZE 64 /**< Max # buckets in one IPP request */ | ||
257 | |||
258 | |||
259 | /** Internal structure used to convey packet send information to the | ||
260 | * hypervisor. FIXME: Actually, it's not used for that anymore, but | ||
261 | * netio_packet_send() still uses it internally. | ||
262 | */ | ||
263 | typedef struct | ||
264 | { | ||
265 | uint16_t flags; /**< Packet flags (__NETIO_SEND_FLG_xxx) */ | ||
266 | uint16_t transfer_size; /**< Size of packet */ | ||
267 | uint32_t va; /**< VA of start of packet */ | ||
268 | __netio_pkt_handle_t handle; /**< Packet handle */ | ||
269 | uint32_t csum0; /**< First checksum word */ | ||
270 | uint32_t csum1; /**< Second checksum word */ | ||
271 | } | ||
272 | __netio_send_cmd_t; | ||
273 | |||
274 | |||
275 | /** Flags used in two contexts: | ||
276 | * - As the "flags" member in the __netio_send_cmd_t, above; used only | ||
277 | * for netio_pkt_send_{prepare,commit}. | ||
278 | * - As part of the flags passed to the various send packet fast I/O calls. | ||
279 | */ | ||
280 | |||
281 | /** Need acknowledgement on this packet. Note that some code in the | ||
282 | * normal send_pkt fast I/O handler assumes that this is equal to 1. */ | ||
283 | #define __NETIO_SEND_FLG_ACK 0x1 | ||
284 | |||
285 | /** Do checksum on this packet. (Only used with the __netio_send_cmd_t; | ||
286 | * normal packet sends use a special fast I/O index to denote checksumming, | ||
287 | * and multi-segment sends test the checksum descriptor.) */ | ||
288 | #define __NETIO_SEND_FLG_CSUM 0x2 | ||
289 | |||
290 | /** Get a completion on this packet. Only used with multi-segment sends. */ | ||
291 | #define __NETIO_SEND_FLG_COMPLETION 0x4 | ||
292 | |||
293 | /** Position of the number-of-extra-segments value in the flags word. | ||
294 | Only used with multi-segment sends. */ | ||
295 | #define __NETIO_SEND_FLG_XSEG_SHIFT 3 | ||
296 | |||
297 | /** Width of the number-of-extra-segments value in the flags word. */ | ||
298 | #define __NETIO_SEND_FLG_XSEG_WIDTH 2 | ||
299 | |||
300 | #endif /* __DRV_XGBE_IMPL_H__ */ | ||
diff --git a/arch/tile/include/hv/drv_xgbe_intf.h b/arch/tile/include/hv/drv_xgbe_intf.h new file mode 100644 index 000000000000..146e47d5334b --- /dev/null +++ b/arch/tile/include/hv/drv_xgbe_intf.h | |||
@@ -0,0 +1,615 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | /** | ||
16 | * @file drv_xgbe_intf.h | ||
17 | * Interface to the hypervisor XGBE driver. | ||
18 | */ | ||
19 | |||
20 | #ifndef __DRV_XGBE_INTF_H__ | ||
21 | #define __DRV_XGBE_INTF_H__ | ||
22 | |||
23 | /** | ||
24 | * An object for forwarding VAs and PAs to the hypervisor. | ||
25 | * @ingroup types | ||
26 | * | ||
27 | * This allows the supervisor to specify a number of areas of memory to | ||
28 | * store packet buffers. | ||
29 | */ | ||
30 | typedef struct | ||
31 | { | ||
32 | /** The physical address of the memory. */ | ||
33 | HV_PhysAddr pa; | ||
34 | /** Page table entry for the memory. This is only used to derive the | ||
35 | * memory's caching mode; the PA bits are ignored. */ | ||
36 | HV_PTE pte; | ||
37 | /** The virtual address of the memory. */ | ||
38 | HV_VirtAddr va; | ||
39 | /** Size (in bytes) of the memory area. */ | ||
40 | int size; | ||
41 | |||
42 | } | ||
43 | netio_ipp_address_t; | ||
44 | |||
45 | /** The various pread/pwrite offsets into the hypervisor-level driver. | ||
46 | * @ingroup types | ||
47 | */ | ||
48 | typedef enum | ||
49 | { | ||
50 | /** Inform the Linux driver of the address of the NetIO arena memory. | ||
51 | * This offset is actually only used to convey information from netio | ||
52 | * to the Linux driver; it never makes it from there to the hypervisor. | ||
53 | * Write-only; takes a uint32_t specifying the VA address. */ | ||
54 | NETIO_FIXED_ADDR = 0x5000000000000000ULL, | ||
55 | |||
56 | /** Inform the Linux driver of the size of the NetIO arena memory. | ||
57 | * This offset is actually only used to convey information from netio | ||
58 | * to the Linux driver; it never makes it from there to the hypervisor. | ||
59 | * Write-only; takes a uint32_t specifying the VA size. */ | ||
60 | NETIO_FIXED_SIZE = 0x5100000000000000ULL, | ||
61 | |||
62 | /** Register current tile with IPP. Write then read: write, takes a | ||
63 | * netio_input_config_t, read returns a pointer to a netio_queue_impl_t. */ | ||
64 | NETIO_IPP_INPUT_REGISTER_OFF = 0x6000000000000000ULL, | ||
65 | |||
66 | /** Unregister current tile from IPP. Write-only, takes a dummy argument. */ | ||
67 | NETIO_IPP_INPUT_UNREGISTER_OFF = 0x6100000000000000ULL, | ||
68 | |||
69 | /** Start packets flowing. Write-only, takes a dummy argument. */ | ||
70 | NETIO_IPP_INPUT_INIT_OFF = 0x6200000000000000ULL, | ||
71 | |||
72 | /** Stop packets flowing. Write-only, takes a dummy argument. */ | ||
73 | NETIO_IPP_INPUT_UNINIT_OFF = 0x6300000000000000ULL, | ||
74 | |||
75 | /** Configure group (typically we group on VLAN). Write-only: takes an | ||
76 | * array of netio_group_t's, low 24 bits of the offset is the base group | ||
77 | * number times the size of a netio_group_t. */ | ||
78 | NETIO_IPP_INPUT_GROUP_CFG_OFF = 0x6400000000000000ULL, | ||
79 | |||
80 | /** Configure bucket. Write-only: takes an array of netio_bucket_t's, low | ||
81 | * 24 bits of the offset is the base bucket number times the size of a | ||
82 | * netio_bucket_t. */ | ||
83 | NETIO_IPP_INPUT_BUCKET_CFG_OFF = 0x6500000000000000ULL, | ||
84 | |||
85 | /** Get/set a parameter. Read or write: read or write data is the parameter | ||
86 | * value, low 32 bits of the offset is a __netio_getset_offset_t. */ | ||
87 | NETIO_IPP_PARAM_OFF = 0x6600000000000000ULL, | ||
88 | |||
89 | /** Get fast I/O index. Read-only; returns a 4-byte base index value. */ | ||
90 | NETIO_IPP_GET_FASTIO_OFF = 0x6700000000000000ULL, | ||
91 | |||
92 | /** Configure hijack IP address. Packets with this IPv4 dest address | ||
93 | * go to bucket NETIO_NUM_BUCKETS - 1. Write-only: takes an IP address | ||
94 | * in some standard form. FIXME: Define the form! */ | ||
95 | NETIO_IPP_INPUT_HIJACK_CFG_OFF = 0x6800000000000000ULL, | ||
96 | |||
97 | /** | ||
98 | * Offsets beyond this point are reserved for the supervisor (although that | ||
99 | * enforcement must be done by the supervisor driver itself). | ||
100 | */ | ||
101 | NETIO_IPP_USER_MAX_OFF = 0x6FFFFFFFFFFFFFFFULL, | ||
102 | |||
103 | /** Register I/O memory. Write-only, takes a netio_ipp_address_t. */ | ||
104 | NETIO_IPP_IOMEM_REGISTER_OFF = 0x7000000000000000ULL, | ||
105 | |||
106 | /** Unregister I/O memory. Write-only, takes a netio_ipp_address_t. */ | ||
107 | NETIO_IPP_IOMEM_UNREGISTER_OFF = 0x7100000000000000ULL, | ||
108 | |||
109 | /* Offsets greater than 0x7FFFFFFF can't be used directly from Linux | ||
110 | * userspace code due to limitations in the pread/pwrite syscalls. */ | ||
111 | |||
112 | /** Drain LIPP buffers. */ | ||
113 | NETIO_IPP_DRAIN_OFF = 0xFA00000000000000ULL, | ||
114 | |||
115 | /** Supply a netio_ipp_address_t to be used as shared memory for the | ||
116 | * LEPP command queue. */ | ||
117 | NETIO_EPP_SHM_OFF = 0xFB00000000000000ULL, | ||
118 | |||
119 | /* 0xFC... is currently unused. */ | ||
120 | |||
121 | /** Stop IPP/EPP tiles. Write-only, takes a dummy argument. */ | ||
122 | NETIO_IPP_STOP_SHIM_OFF = 0xFD00000000000000ULL, | ||
123 | |||
124 | /** Start IPP/EPP tiles. Write-only, takes a dummy argument. */ | ||
125 | NETIO_IPP_START_SHIM_OFF = 0xFE00000000000000ULL, | ||
126 | |||
127 | /** Supply packet arena. Write-only, takes an array of | ||
128 | * netio_ipp_address_t values. */ | ||
129 | NETIO_IPP_ADDRESS_OFF = 0xFF00000000000000ULL, | ||
130 | } netio_hv_offset_t; | ||
131 | |||
132 | /** Extract the base offset from an offset */ | ||
133 | #define NETIO_BASE_OFFSET(off) ((off) & 0xFF00000000000000ULL) | ||
134 | /** Extract the local offset from an offset */ | ||
135 | #define NETIO_LOCAL_OFFSET(off) ((off) & 0x00FFFFFFFFFFFFFFULL) | ||
136 | |||
137 | |||
138 | /** | ||
139 | * Get/set offset. | ||
140 | */ | ||
141 | typedef union | ||
142 | { | ||
143 | struct | ||
144 | { | ||
145 | uint64_t addr:48; /**< Class-specific address */ | ||
146 | unsigned int class:8; /**< Class (e.g., NETIO_PARAM) */ | ||
147 | unsigned int opcode:8; /**< High 8 bits of NETIO_IPP_PARAM_OFF */ | ||
148 | } | ||
149 | bits; /**< Bitfields */ | ||
150 | uint64_t word; /**< Aggregated value to use as the offset */ | ||
151 | } | ||
152 | __netio_getset_offset_t; | ||
153 | |||
154 | /** | ||
155 | * Fast I/O index offsets (must be contiguous). | ||
156 | */ | ||
157 | typedef enum | ||
158 | { | ||
159 | NETIO_FASTIO_ALLOCATE = 0, /**< Get empty packet buffer */ | ||
160 | NETIO_FASTIO_FREE_BUFFER = 1, /**< Give buffer back to IPP */ | ||
161 | NETIO_FASTIO_RETURN_CREDITS = 2, /**< Give credits to IPP */ | ||
162 | NETIO_FASTIO_SEND_PKT_NOCK = 3, /**< Send a packet, no checksum */ | ||
163 | NETIO_FASTIO_SEND_PKT_CK = 4, /**< Send a packet, with checksum */ | ||
164 | NETIO_FASTIO_SEND_PKT_VEC = 5, /**< Send a vector of packets */ | ||
165 | NETIO_FASTIO_SENDV_PKT = 6, /**< Sendv one packet */ | ||
166 | NETIO_FASTIO_NUM_INDEX = 7, /**< Total number of fast I/O indices */ | ||
167 | } netio_fastio_index_t; | ||
168 | |||
169 | /** 3-word return type for Fast I/O call. */ | ||
170 | typedef struct | ||
171 | { | ||
172 | int err; /**< Error code. */ | ||
173 | uint32_t val0; /**< Value. Meaning depends upon the specific call. */ | ||
174 | uint32_t val1; /**< Value. Meaning depends upon the specific call. */ | ||
175 | } netio_fastio_rv3_t; | ||
176 | |||
177 | /** 0-argument fast I/O call */ | ||
178 | int __netio_fastio0(uint32_t fastio_index); | ||
179 | /** 1-argument fast I/O call */ | ||
180 | int __netio_fastio1(uint32_t fastio_index, uint32_t arg0); | ||
181 | /** 3-argument fast I/O call, 2-word return value */ | ||
182 | netio_fastio_rv3_t __netio_fastio3_rv3(uint32_t fastio_index, uint32_t arg0, | ||
183 | uint32_t arg1, uint32_t arg2); | ||
184 | /** 4-argument fast I/O call */ | ||
185 | int __netio_fastio4(uint32_t fastio_index, uint32_t arg0, uint32_t arg1, | ||
186 | uint32_t arg2, uint32_t arg3); | ||
187 | /** 6-argument fast I/O call */ | ||
188 | int __netio_fastio6(uint32_t fastio_index, uint32_t arg0, uint32_t arg1, | ||
189 | uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5); | ||
190 | /** 9-argument fast I/O call */ | ||
191 | int __netio_fastio9(uint32_t fastio_index, uint32_t arg0, uint32_t arg1, | ||
192 | uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5, | ||
193 | uint32_t arg6, uint32_t arg7, uint32_t arg8); | ||
194 | |||
195 | /** Allocate an empty packet. | ||
196 | * @param fastio_index Fast I/O index. | ||
197 | * @param size Size of the packet to allocate. | ||
198 | */ | ||
199 | #define __netio_fastio_allocate(fastio_index, size) \ | ||
200 | __netio_fastio1((fastio_index) + NETIO_FASTIO_ALLOCATE, size) | ||
201 | |||
202 | /** Free a buffer. | ||
203 | * @param fastio_index Fast I/O index. | ||
204 | * @param handle Handle for the packet to free. | ||
205 | */ | ||
206 | #define __netio_fastio_free_buffer(fastio_index, handle) \ | ||
207 | __netio_fastio1((fastio_index) + NETIO_FASTIO_FREE_BUFFER, handle) | ||
208 | |||
209 | /** Increment our receive credits. | ||
210 | * @param fastio_index Fast I/O index. | ||
211 | * @param credits Number of credits to add. | ||
212 | */ | ||
213 | #define __netio_fastio_return_credits(fastio_index, credits) \ | ||
214 | __netio_fastio1((fastio_index) + NETIO_FASTIO_RETURN_CREDITS, credits) | ||
215 | |||
216 | /** Send packet, no checksum. | ||
217 | * @param fastio_index Fast I/O index. | ||
218 | * @param ackflag Nonzero if we want an ack. | ||
219 | * @param size Size of the packet. | ||
220 | * @param va Virtual address of start of packet. | ||
221 | * @param handle Packet handle. | ||
222 | */ | ||
223 | #define __netio_fastio_send_pkt_nock(fastio_index, ackflag, size, va, handle) \ | ||
224 | __netio_fastio4((fastio_index) + NETIO_FASTIO_SEND_PKT_NOCK, ackflag, \ | ||
225 | size, va, handle) | ||
226 | |||
227 | /** Send packet, calculate checksum. | ||
228 | * @param fastio_index Fast I/O index. | ||
229 | * @param ackflag Nonzero if we want an ack. | ||
230 | * @param size Size of the packet. | ||
231 | * @param va Virtual address of start of packet. | ||
232 | * @param handle Packet handle. | ||
233 | * @param csum0 Shim checksum header. | ||
234 | * @param csum1 Checksum seed. | ||
235 | */ | ||
236 | #define __netio_fastio_send_pkt_ck(fastio_index, ackflag, size, va, handle, \ | ||
237 | csum0, csum1) \ | ||
238 | __netio_fastio6((fastio_index) + NETIO_FASTIO_SEND_PKT_CK, ackflag, \ | ||
239 | size, va, handle, csum0, csum1) | ||
240 | |||
241 | |||
242 | /** Format for the "csum0" argument to the __netio_fastio_send routines | ||
243 | * and LEPP. Note that this is currently exactly identical to the | ||
244 | * ShimProtocolOffloadHeader. | ||
245 | */ | ||
246 | typedef union | ||
247 | { | ||
248 | struct | ||
249 | { | ||
250 | unsigned int start_byte:7; /**< The first byte to be checksummed */ | ||
251 | unsigned int count:14; /**< Number of bytes to be checksummed. */ | ||
252 | unsigned int destination_byte:7; /**< The byte to write the checksum to. */ | ||
253 | unsigned int reserved:4; /**< Reserved. */ | ||
254 | } bits; /**< Decomposed method of access. */ | ||
255 | unsigned int word; /**< To send out the IDN. */ | ||
256 | } __netio_checksum_header_t; | ||
257 | |||
258 | |||
259 | /** Sendv packet with 1 or 2 segments. | ||
260 | * @param fastio_index Fast I/O index. | ||
261 | * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus | ||
262 | * 1 in next 2 bits; expected checksum in high 16 bits. | ||
263 | * @param confno Confirmation number to request, if notify flag set. | ||
264 | * @param csum0 Checksum descriptor; if zero, no checksum. | ||
265 | * @param va_F Virtual address of first segment. | ||
266 | * @param va_L Virtual address of last segment, if 2 segments. | ||
267 | * @param len_F_L Length of first segment in low 16 bits; length of last | ||
268 | * segment, if 2 segments, in high 16 bits. | ||
269 | */ | ||
270 | #define __netio_fastio_sendv_pkt_1_2(fastio_index, flags, confno, csum0, \ | ||
271 | va_F, va_L, len_F_L) \ | ||
272 | __netio_fastio6((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \ | ||
273 | csum0, va_F, va_L, len_F_L) | ||
274 | |||
275 | /** Send packet on PCIe interface. | ||
276 | * @param fastio_index Fast I/O index. | ||
277 | * @param flags Ack/csum/notify flags in low 3 bits. | ||
278 | * @param confno Confirmation number to request, if notify flag set. | ||
279 | * @param csum0 Checksum descriptor; Hard wired 0, not needed for PCIe. | ||
280 | * @param va_F Virtual address of the packet buffer. | ||
281 | * @param va_L Virtual address of last segment, if 2 segments. Hard wired 0. | ||
282 | * @param len_F_L Length of the packet buffer in low 16 bits. | ||
283 | */ | ||
284 | #define __netio_fastio_send_pcie_pkt(fastio_index, flags, confno, csum0, \ | ||
285 | va_F, va_L, len_F_L) \ | ||
286 | __netio_fastio6((fastio_index) + PCIE_FASTIO_SENDV_PKT, flags, confno, \ | ||
287 | csum0, va_F, va_L, len_F_L) | ||
288 | |||
289 | /** Sendv packet with 3 or 4 segments. | ||
290 | * @param fastio_index Fast I/O index. | ||
291 | * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus | ||
292 | * 1 in next 2 bits; expected checksum in high 16 bits. | ||
293 | * @param confno Confirmation number to request, if notify flag set. | ||
294 | * @param csum0 Checksum descriptor; if zero, no checksum. | ||
295 | * @param va_F Virtual address of first segment. | ||
296 | * @param va_L Virtual address of last segment (third segment if 3 segments, | ||
297 | * fourth segment if 4 segments). | ||
298 | * @param len_F_L Length of first segment in low 16 bits; length of last | ||
299 | * segment in high 16 bits. | ||
300 | * @param va_M0 Virtual address of "middle 0" segment; this segment is sent | ||
301 | * second when there are three segments, and third if there are four. | ||
302 | * @param va_M1 Virtual address of "middle 1" segment; this segment is sent | ||
303 | * second when there are four segments. | ||
304 | * @param len_M0_M1 Length of middle 0 segment in low 16 bits; length of middle | ||
305 | * 1 segment, if 4 segments, in high 16 bits. | ||
306 | */ | ||
307 | #define __netio_fastio_sendv_pkt_3_4(fastio_index, flags, confno, csum0, va_F, \ | ||
308 | va_L, len_F_L, va_M0, va_M1, len_M0_M1) \ | ||
309 | __netio_fastio9((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \ | ||
310 | csum0, va_F, va_L, len_F_L, va_M0, va_M1, len_M0_M1) | ||
311 | |||
312 | /** Send vector of packets. | ||
313 | * @param fastio_index Fast I/O index. | ||
314 | * @param seqno Number of packets transmitted so far on this interface; | ||
315 | * used to decide which packets should be acknowledged. | ||
316 | * @param nentries Number of entries in vector. | ||
317 | * @param va Virtual address of start of vector entry array. | ||
318 | * @return 3-word netio_fastio_rv3_t structure. The structure's err member | ||
319 | * is an error code, or zero if no error. The val0 member is the | ||
320 | * updated value of seqno; it has been incremented by 1 for each | ||
321 | * packet sent. That increment may be less than nentries if an | ||
322 | * error occured, or if some of the entries in the vector contain | ||
323 | * handles equal to NETIO_PKT_HANDLE_NONE. The val1 member is the | ||
324 | * updated value of nentries; it has been decremented by 1 for each | ||
325 | * vector entry processed. Again, that decrement may be less than | ||
326 | * nentries (leaving the returned value positive) if an error | ||
327 | * occurred. | ||
328 | */ | ||
329 | #define __netio_fastio_send_pkt_vec(fastio_index, seqno, nentries, va) \ | ||
330 | __netio_fastio3_rv3((fastio_index) + NETIO_FASTIO_SEND_PKT_VEC, seqno, \ | ||
331 | nentries, va) | ||
332 | |||
333 | |||
334 | /** An egress DMA command for LEPP. */ | ||
335 | typedef struct | ||
336 | { | ||
337 | /** Is this a TSO transfer? | ||
338 | * | ||
339 | * NOTE: This field is always 0, to distinguish it from | ||
340 | * lepp_tso_cmd_t. It must come first! | ||
341 | */ | ||
342 | uint8_t tso : 1; | ||
343 | |||
344 | /** Unused padding bits. */ | ||
345 | uint8_t _unused : 3; | ||
346 | |||
347 | /** Should this packet be sent directly from caches instead of DRAM, | ||
348 | * using hash-for-home to locate the packet data? | ||
349 | */ | ||
350 | uint8_t hash_for_home : 1; | ||
351 | |||
352 | /** Should we compute a checksum? */ | ||
353 | uint8_t compute_checksum : 1; | ||
354 | |||
355 | /** Is this the final buffer for this packet? | ||
356 | * | ||
357 | * A single packet can be split over several input buffers (a "gather" | ||
358 | * operation). This flag indicates that this is the last buffer | ||
359 | * in a packet. | ||
360 | */ | ||
361 | uint8_t end_of_packet : 1; | ||
362 | |||
363 | /** Should LEPP advance 'comp_busy' when this DMA is fully finished? */ | ||
364 | uint8_t send_completion : 1; | ||
365 | |||
366 | /** High bits of Client Physical Address of the start of the buffer | ||
367 | * to be egressed. | ||
368 | * | ||
369 | * NOTE: Only 6 bits are actually needed here, as CPAs are | ||
370 | * currently 38 bits. So two bits could be scavenged from this. | ||
371 | */ | ||
372 | uint8_t cpa_hi; | ||
373 | |||
374 | /** The number of bytes to be egressed. */ | ||
375 | uint16_t length; | ||
376 | |||
377 | /** Low 32 bits of Client Physical Address of the start of the buffer | ||
378 | * to be egressed. | ||
379 | */ | ||
380 | uint32_t cpa_lo; | ||
381 | |||
382 | /** Checksum information (only used if 'compute_checksum'). */ | ||
383 | __netio_checksum_header_t checksum_data; | ||
384 | |||
385 | } lepp_cmd_t; | ||
386 | |||
387 | |||
388 | /** A chunk of physical memory for a TSO egress. */ | ||
389 | typedef struct | ||
390 | { | ||
391 | /** The low bits of the CPA. */ | ||
392 | uint32_t cpa_lo; | ||
393 | /** The high bits of the CPA. */ | ||
394 | uint16_t cpa_hi : 15; | ||
395 | /** Should this packet be sent directly from caches instead of DRAM, | ||
396 | * using hash-for-home to locate the packet data? | ||
397 | */ | ||
398 | uint16_t hash_for_home : 1; | ||
399 | /** The length in bytes. */ | ||
400 | uint16_t length; | ||
401 | } lepp_frag_t; | ||
402 | |||
403 | |||
404 | /** An LEPP command that handles TSO. */ | ||
405 | typedef struct | ||
406 | { | ||
407 | /** Is this a TSO transfer? | ||
408 | * | ||
409 | * NOTE: This field is always 1, to distinguish it from | ||
410 | * lepp_cmd_t. It must come first! | ||
411 | */ | ||
412 | uint8_t tso : 1; | ||
413 | |||
414 | /** Unused padding bits. */ | ||
415 | uint8_t _unused : 7; | ||
416 | |||
417 | /** Size of the header[] array in bytes. It must be in the range | ||
418 | * [40, 127], which are the smallest header for a TCP packet over | ||
419 | * Ethernet and the maximum possible prepend size supported by | ||
420 | * hardware, respectively. Note that the array storage must be | ||
421 | * padded out to a multiple of four bytes so that the following | ||
422 | * LEPP command is aligned properly. | ||
423 | */ | ||
424 | uint8_t header_size; | ||
425 | |||
426 | /** Byte offset of the IP header in header[]. */ | ||
427 | uint8_t ip_offset; | ||
428 | |||
429 | /** Byte offset of the TCP header in header[]. */ | ||
430 | uint8_t tcp_offset; | ||
431 | |||
432 | /** The number of bytes to use for the payload of each packet, | ||
433 | * except of course the last one, which may not have enough bytes. | ||
434 | * This means that each Ethernet packet except the last will have a | ||
435 | * size of header_size + payload_size. | ||
436 | */ | ||
437 | uint16_t payload_size; | ||
438 | |||
439 | /** The length of the 'frags' array that follows this struct. */ | ||
440 | uint16_t num_frags; | ||
441 | |||
442 | /** The actual frags. */ | ||
443 | lepp_frag_t frags[0 /* Variable-sized; num_frags entries. */]; | ||
444 | |||
445 | /* | ||
446 | * The packet header template logically follows frags[], | ||
447 | * but you can't declare that in C. | ||
448 | * | ||
449 | * uint32_t header[header_size_in_words_rounded_up]; | ||
450 | */ | ||
451 | |||
452 | } lepp_tso_cmd_t; | ||
453 | |||
454 | |||
455 | /** An LEPP completion ring entry. */ | ||
456 | typedef void* lepp_comp_t; | ||
457 | |||
458 | |||
459 | /** Maximum number of frags for one TSO command. This is adapted from | ||
460 | * linux's "MAX_SKB_FRAGS", and presumably over-estimates by one, for | ||
461 | * our page size of exactly 65536. We add one for a "body" fragment. | ||
462 | */ | ||
463 | #define LEPP_MAX_FRAGS (65536 / HV_PAGE_SIZE_SMALL + 2 + 1) | ||
464 | |||
465 | /** Total number of bytes needed for an lepp_tso_cmd_t. */ | ||
466 | #define LEPP_TSO_CMD_SIZE(num_frags, header_size) \ | ||
467 | (sizeof(lepp_tso_cmd_t) + \ | ||
468 | (num_frags) * sizeof(lepp_frag_t) + \ | ||
469 | (((header_size) + 3) & -4)) | ||
470 | |||
471 | /** The size of the lepp "cmd" queue. */ | ||
472 | #define LEPP_CMD_QUEUE_BYTES \ | ||
473 | (((CHIP_L2_CACHE_SIZE() - 2 * CHIP_L2_LINE_SIZE()) / \ | ||
474 | (sizeof(lepp_cmd_t) + sizeof(lepp_comp_t))) * sizeof(lepp_cmd_t)) | ||
475 | |||
476 | /** The largest possible command that can go in lepp_queue_t::cmds[]. */ | ||
477 | #define LEPP_MAX_CMD_SIZE LEPP_TSO_CMD_SIZE(LEPP_MAX_FRAGS, 128) | ||
478 | |||
479 | /** The largest possible value of lepp_queue_t::cmd_{head, tail} (inclusive). | ||
480 | */ | ||
481 | #define LEPP_CMD_LIMIT \ | ||
482 | (LEPP_CMD_QUEUE_BYTES - LEPP_MAX_CMD_SIZE) | ||
483 | |||
484 | /** The maximum number of completions in an LEPP queue. */ | ||
485 | #define LEPP_COMP_QUEUE_SIZE \ | ||
486 | ((LEPP_CMD_LIMIT + sizeof(lepp_cmd_t) - 1) / sizeof(lepp_cmd_t)) | ||
487 | |||
488 | /** Increment an index modulo the queue size. */ | ||
489 | #define LEPP_QINC(var) \ | ||
490 | (var = __insn_mnz(var - (LEPP_COMP_QUEUE_SIZE - 1), var + 1)) | ||
491 | |||
492 | /** A queue used to convey egress commands from the client to LEPP. */ | ||
493 | typedef struct | ||
494 | { | ||
495 | /** Index of first completion not yet processed by user code. | ||
496 | * If this is equal to comp_busy, there are no such completions. | ||
497 | * | ||
498 | * NOTE: This is only read/written by the user. | ||
499 | */ | ||
500 | unsigned int comp_head; | ||
501 | |||
502 | /** Index of first completion record not yet completed. | ||
503 | * If this is equal to comp_tail, there are no such completions. | ||
504 | * This index gets advanced (modulo LEPP_QUEUE_SIZE) whenever | ||
505 | * a command with the 'completion' bit set is finished. | ||
506 | * | ||
507 | * NOTE: This is only written by LEPP, only read by the user. | ||
508 | */ | ||
509 | volatile unsigned int comp_busy; | ||
510 | |||
511 | /** Index of the first empty slot in the completion ring. | ||
512 | * Entries from this up to but not including comp_head (in ring order) | ||
513 | * can be filled in with completion data. | ||
514 | * | ||
515 | * NOTE: This is only read/written by the user. | ||
516 | */ | ||
517 | unsigned int comp_tail; | ||
518 | |||
519 | /** Byte index of first command enqueued for LEPP but not yet processed. | ||
520 | * | ||
521 | * This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT. | ||
522 | * | ||
523 | * NOTE: LEPP advances this counter as soon as it no longer needs | ||
524 | * the cmds[] storage for this entry, but the transfer is not actually | ||
525 | * complete (i.e. the buffer pointed to by the command is no longer | ||
526 | * needed) until comp_busy advances. | ||
527 | * | ||
528 | * If this is equal to cmd_tail, the ring is empty. | ||
529 | * | ||
530 | * NOTE: This is only written by LEPP, only read by the user. | ||
531 | */ | ||
532 | volatile unsigned int cmd_head; | ||
533 | |||
534 | /** Byte index of first empty slot in the command ring. This field can | ||
535 | * be incremented up to but not equal to cmd_head (because that would | ||
536 | * mean the ring is empty). | ||
537 | * | ||
538 | * This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT. | ||
539 | * | ||
540 | * NOTE: This is read/written by the user, only read by LEPP. | ||
541 | */ | ||
542 | volatile unsigned int cmd_tail; | ||
543 | |||
544 | /** A ring of variable-sized egress DMA commands. | ||
545 | * | ||
546 | * NOTE: Only written by the user, only read by LEPP. | ||
547 | */ | ||
548 | char cmds[LEPP_CMD_QUEUE_BYTES] | ||
549 | __attribute__((aligned(CHIP_L2_LINE_SIZE()))); | ||
550 | |||
551 | /** A ring of user completion data. | ||
552 | * NOTE: Only read/written by the user. | ||
553 | */ | ||
554 | lepp_comp_t comps[LEPP_COMP_QUEUE_SIZE] | ||
555 | __attribute__((aligned(CHIP_L2_LINE_SIZE()))); | ||
556 | } lepp_queue_t; | ||
557 | |||
558 | |||
559 | /** An internal helper function for determining the number of entries | ||
560 | * available in a ring buffer, given that there is one sentinel. | ||
561 | */ | ||
562 | static inline unsigned int | ||
563 | _lepp_num_free_slots(unsigned int head, unsigned int tail) | ||
564 | { | ||
565 | /* | ||
566 | * One entry is reserved for use as a sentinel, to distinguish | ||
567 | * "empty" from "full". So we compute | ||
568 | * (head - tail - 1) % LEPP_QUEUE_SIZE, but without using a slow % operation. | ||
569 | */ | ||
570 | return (head - tail - 1) + ((head <= tail) ? LEPP_COMP_QUEUE_SIZE : 0); | ||
571 | } | ||
572 | |||
573 | |||
574 | /** Returns how many new comp entries can be enqueued. */ | ||
575 | static inline unsigned int | ||
576 | lepp_num_free_comp_slots(const lepp_queue_t* q) | ||
577 | { | ||
578 | return _lepp_num_free_slots(q->comp_head, q->comp_tail); | ||
579 | } | ||
580 | |||
581 | static inline int | ||
582 | lepp_qsub(int v1, int v2) | ||
583 | { | ||
584 | int delta = v1 - v2; | ||
585 | return delta + ((delta >> 31) & LEPP_COMP_QUEUE_SIZE); | ||
586 | } | ||
587 | |||
588 | |||
589 | /** FIXME: Check this from linux, via a new "pwrite()" call. */ | ||
590 | #define LIPP_VERSION 1 | ||
591 | |||
592 | |||
593 | /** We use exactly two bytes of alignment padding. */ | ||
594 | #define LIPP_PACKET_PADDING 2 | ||
595 | |||
596 | /** The minimum size of a "small" buffer (including the padding). */ | ||
597 | #define LIPP_SMALL_PACKET_SIZE 128 | ||
598 | |||
599 | /* | ||
600 | * NOTE: The following two values should total to less than around | ||
601 | * 13582, to keep the total size used for "lipp_state_t" below 64K. | ||
602 | */ | ||
603 | |||
604 | /** The maximum number of "small" buffers. | ||
605 | * This is enough for 53 network cpus with 128 credits. Note that | ||
606 | * if these are exhausted, we will fall back to using large buffers. | ||
607 | */ | ||
608 | #define LIPP_SMALL_BUFFERS 6785 | ||
609 | |||
610 | /** The maximum number of "large" buffers. | ||
611 | * This is enough for 53 network cpus with 128 credits. | ||
612 | */ | ||
613 | #define LIPP_LARGE_BUFFERS 6785 | ||
614 | |||
615 | #endif /* __DRV_XGBE_INTF_H__ */ | ||
diff --git a/arch/tile/include/hv/netio_errors.h b/arch/tile/include/hv/netio_errors.h new file mode 100644 index 000000000000..e1591bff61b5 --- /dev/null +++ b/arch/tile/include/hv/netio_errors.h | |||
@@ -0,0 +1,122 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | /** | ||
16 | * Error codes returned from NetIO routines. | ||
17 | */ | ||
18 | |||
19 | #ifndef __NETIO_ERRORS_H__ | ||
20 | #define __NETIO_ERRORS_H__ | ||
21 | |||
22 | /** | ||
23 | * @addtogroup error | ||
24 | * | ||
25 | * @brief The error codes returned by NetIO functions. | ||
26 | * | ||
27 | * NetIO functions return 0 (defined as ::NETIO_NO_ERROR) on success, and | ||
28 | * a negative value if an error occurs. | ||
29 | * | ||
30 | * In cases where a NetIO function failed due to a error reported by | ||
31 | * system libraries, the error code will be the negation of the | ||
32 | * system errno at the time of failure. The @ref netio_strerror() | ||
33 | * function will deliver error strings for both NetIO and system error | ||
34 | * codes. | ||
35 | * | ||
36 | * @{ | ||
37 | */ | ||
38 | |||
39 | /** The set of all NetIO errors. */ | ||
40 | typedef enum | ||
41 | { | ||
42 | /** Operation successfully completed. */ | ||
43 | NETIO_NO_ERROR = 0, | ||
44 | |||
45 | /** A packet was successfully retrieved from an input queue. */ | ||
46 | NETIO_PKT = 0, | ||
47 | |||
48 | /** Largest NetIO error number. */ | ||
49 | NETIO_ERR_MAX = -701, | ||
50 | |||
51 | /** The tile is not registered with the IPP. */ | ||
52 | NETIO_NOT_REGISTERED = -701, | ||
53 | |||
54 | /** No packet was available to retrieve from the input queue. */ | ||
55 | NETIO_NOPKT = -702, | ||
56 | |||
57 | /** The requested function is not implemented. */ | ||
58 | NETIO_NOT_IMPLEMENTED = -703, | ||
59 | |||
60 | /** On a registration operation, the target queue already has the maximum | ||
61 | * number of tiles registered for it, and no more may be added. On a | ||
62 | * packet send operation, the output queue is full and nothing more can | ||
63 | * be queued until some of the queued packets are actually transmitted. */ | ||
64 | NETIO_QUEUE_FULL = -704, | ||
65 | |||
66 | /** The calling process or thread is not bound to exactly one CPU. */ | ||
67 | NETIO_BAD_AFFINITY = -705, | ||
68 | |||
69 | /** Cannot allocate memory on requested controllers. */ | ||
70 | NETIO_CANNOT_HOME = -706, | ||
71 | |||
72 | /** On a registration operation, the IPP specified is not configured | ||
73 | * to support the options requested; for instance, the application | ||
74 | * wants a specific type of tagged headers which the configured IPP | ||
75 | * doesn't support. Or, the supplied configuration information is | ||
76 | * not self-consistent, or is out of range; for instance, specifying | ||
77 | * both NETIO_RECV and NETIO_NO_RECV, or asking for more than | ||
78 | * NETIO_MAX_SEND_BUFFERS to be preallocated. On a VLAN or bucket | ||
79 | * configure operation, the number of items, or the base item, was | ||
80 | * out of range. | ||
81 | */ | ||
82 | NETIO_BAD_CONFIG = -707, | ||
83 | |||
84 | /** Too many tiles have registered to transmit packets. */ | ||
85 | NETIO_TOOMANY_XMIT = -708, | ||
86 | |||
87 | /** Packet transmission was attempted on a queue which was registered | ||
88 | with transmit disabled. */ | ||
89 | NETIO_UNREG_XMIT = -709, | ||
90 | |||
91 | /** This tile is already registered with the IPP. */ | ||
92 | NETIO_ALREADY_REGISTERED = -710, | ||
93 | |||
94 | /** The Ethernet link is down. The application should try again later. */ | ||
95 | NETIO_LINK_DOWN = -711, | ||
96 | |||
97 | /** An invalid memory buffer has been specified. This may be an unmapped | ||
98 | * virtual address, or one which does not meet alignment requirements. | ||
99 | * For netio_input_register(), this error may be returned when multiple | ||
100 | * processes specify different memory regions to be used for NetIO | ||
101 | * buffers. That can happen if these processes specify explicit memory | ||
102 | * regions with the ::NETIO_FIXED_BUFFER_VA flag, or if tmc_cmem_init() | ||
103 | * has not been called by a common ancestor of the processes. | ||
104 | */ | ||
105 | NETIO_FAULT = -712, | ||
106 | |||
107 | /** Cannot combine user-managed shared memory and cache coherence. */ | ||
108 | NETIO_BAD_CACHE_CONFIG = -713, | ||
109 | |||
110 | /** Smallest NetIO error number. */ | ||
111 | NETIO_ERR_MIN = -713, | ||
112 | |||
113 | #ifndef __DOXYGEN__ | ||
114 | /** Used internally to mean that no response is needed; never returned to | ||
115 | * an application. */ | ||
116 | NETIO_NO_RESPONSE = 1 | ||
117 | #endif | ||
118 | } netio_error_t; | ||
119 | |||
120 | /** @} */ | ||
121 | |||
122 | #endif /* __NETIO_ERRORS_H__ */ | ||
diff --git a/arch/tile/include/hv/netio_intf.h b/arch/tile/include/hv/netio_intf.h new file mode 100644 index 000000000000..8d20972aba2c --- /dev/null +++ b/arch/tile/include/hv/netio_intf.h | |||
@@ -0,0 +1,2975 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | /** | ||
16 | * NetIO interface structures and macros. | ||
17 | */ | ||
18 | |||
19 | #ifndef __NETIO_INTF_H__ | ||
20 | #define __NETIO_INTF_H__ | ||
21 | |||
22 | #include <hv/netio_errors.h> | ||
23 | |||
24 | #ifdef __KERNEL__ | ||
25 | #include <linux/types.h> | ||
26 | #else | ||
27 | #include <stdint.h> | ||
28 | #endif | ||
29 | |||
30 | #if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__) | ||
31 | #include <assert.h> | ||
32 | #define netio_assert assert /**< Enable assertions from macros */ | ||
33 | #else | ||
34 | #define netio_assert(...) ((void)(0)) /**< Disable assertions from macros */ | ||
35 | #endif | ||
36 | |||
37 | /* | ||
38 | * If none of these symbols are defined, we're building libnetio in an | ||
39 | * environment where we have pthreads, so we'll enable locking. | ||
40 | */ | ||
41 | #if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__) && \ | ||
42 | !defined(__NEWLIB__) | ||
43 | #define _NETIO_PTHREAD /**< Include a mutex in netio_queue_t below */ | ||
44 | |||
45 | /* | ||
46 | * If NETIO_UNLOCKED is defined, we don't do use per-cpu locks on | ||
47 | * per-packet NetIO operations. We still do pthread locking on things | ||
48 | * like netio_input_register, though. This is used for building | ||
49 | * libnetio_unlocked. | ||
50 | */ | ||
51 | #ifndef NETIO_UNLOCKED | ||
52 | |||
53 | /* Avoid PLT overhead by using our own inlined per-cpu lock. */ | ||
54 | #include <sched.h> | ||
55 | typedef int _netio_percpu_mutex_t; | ||
56 | |||
57 | static __inline int | ||
58 | _netio_percpu_mutex_init(_netio_percpu_mutex_t* lock) | ||
59 | { | ||
60 | *lock = 0; | ||
61 | return 0; | ||
62 | } | ||
63 | |||
64 | static __inline int | ||
65 | _netio_percpu_mutex_lock(_netio_percpu_mutex_t* lock) | ||
66 | { | ||
67 | while (__builtin_expect(__insn_tns(lock), 0)) | ||
68 | sched_yield(); | ||
69 | return 0; | ||
70 | } | ||
71 | |||
72 | static __inline int | ||
73 | _netio_percpu_mutex_unlock(_netio_percpu_mutex_t* lock) | ||
74 | { | ||
75 | *lock = 0; | ||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | #else /* NETIO_UNLOCKED */ | ||
80 | |||
81 | /* Don't do any locking for per-packet NetIO operations. */ | ||
82 | typedef int _netio_percpu_mutex_t; | ||
83 | #define _netio_percpu_mutex_init(L) | ||
84 | #define _netio_percpu_mutex_lock(L) | ||
85 | #define _netio_percpu_mutex_unlock(L) | ||
86 | |||
87 | #endif /* NETIO_UNLOCKED */ | ||
88 | #endif /* !__HV__, !__BOGUX, !__KERNEL__, !__NEWLIB__ */ | ||
89 | |||
90 | /** How many tiles can register for a given queue. | ||
91 | * @ingroup setup */ | ||
92 | #define NETIO_MAX_TILES_PER_QUEUE 64 | ||
93 | |||
94 | |||
95 | /** Largest permissible queue identifier. | ||
96 | * @ingroup setup */ | ||
97 | #define NETIO_MAX_QUEUE_ID 255 | ||
98 | |||
99 | |||
100 | #ifndef __DOXYGEN__ | ||
101 | |||
102 | /* Metadata packet checksum/ethertype flags. */ | ||
103 | |||
104 | /** The L4 checksum has not been calculated. */ | ||
105 | #define _NETIO_PKT_NO_L4_CSUM_SHIFT 0 | ||
106 | #define _NETIO_PKT_NO_L4_CSUM_RMASK 1 | ||
107 | #define _NETIO_PKT_NO_L4_CSUM_MASK \ | ||
108 | (_NETIO_PKT_NO_L4_CSUM_RMASK << _NETIO_PKT_NO_L4_CSUM_SHIFT) | ||
109 | |||
110 | /** The L3 checksum has not been calculated. */ | ||
111 | #define _NETIO_PKT_NO_L3_CSUM_SHIFT 1 | ||
112 | #define _NETIO_PKT_NO_L3_CSUM_RMASK 1 | ||
113 | #define _NETIO_PKT_NO_L3_CSUM_MASK \ | ||
114 | (_NETIO_PKT_NO_L3_CSUM_RMASK << _NETIO_PKT_NO_L3_CSUM_SHIFT) | ||
115 | |||
116 | /** The L3 checksum is incorrect (or perhaps has not been calculated). */ | ||
117 | #define _NETIO_PKT_BAD_L3_CSUM_SHIFT 2 | ||
118 | #define _NETIO_PKT_BAD_L3_CSUM_RMASK 1 | ||
119 | #define _NETIO_PKT_BAD_L3_CSUM_MASK \ | ||
120 | (_NETIO_PKT_BAD_L3_CSUM_RMASK << _NETIO_PKT_BAD_L3_CSUM_SHIFT) | ||
121 | |||
122 | /** The Ethernet packet type is unrecognized. */ | ||
123 | #define _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT 3 | ||
124 | #define _NETIO_PKT_TYPE_UNRECOGNIZED_RMASK 1 | ||
125 | #define _NETIO_PKT_TYPE_UNRECOGNIZED_MASK \ | ||
126 | (_NETIO_PKT_TYPE_UNRECOGNIZED_RMASK << \ | ||
127 | _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT) | ||
128 | |||
129 | /* Metadata packet type flags. */ | ||
130 | |||
131 | /** Where the packet type bits are; this field is the index into | ||
132 | * _netio_pkt_info. */ | ||
133 | #define _NETIO_PKT_TYPE_SHIFT 4 | ||
134 | #define _NETIO_PKT_TYPE_RMASK 0x3F | ||
135 | |||
136 | /** How many VLAN tags the packet has, and, if we have two, which one we | ||
137 | * actually grouped on. A VLAN within a proprietary (Marvell or Broadcom) | ||
138 | * tag is counted here. */ | ||
139 | #define _NETIO_PKT_VLAN_SHIFT 4 | ||
140 | #define _NETIO_PKT_VLAN_RMASK 0x3 | ||
141 | #define _NETIO_PKT_VLAN_MASK \ | ||
142 | (_NETIO_PKT_VLAN_RMASK << _NETIO_PKT_VLAN_SHIFT) | ||
143 | #define _NETIO_PKT_VLAN_NONE 0 /* No VLAN tag. */ | ||
144 | #define _NETIO_PKT_VLAN_ONE 1 /* One VLAN tag. */ | ||
145 | #define _NETIO_PKT_VLAN_TWO_OUTER 2 /* Two VLAN tags, outer one used. */ | ||
146 | #define _NETIO_PKT_VLAN_TWO_INNER 3 /* Two VLAN tags, inner one used. */ | ||
147 | |||
148 | /** Which proprietary tags the packet has. */ | ||
149 | #define _NETIO_PKT_TAG_SHIFT 6 | ||
150 | #define _NETIO_PKT_TAG_RMASK 0x3 | ||
151 | #define _NETIO_PKT_TAG_MASK \ | ||
152 | (_NETIO_PKT_TAG_RMASK << _NETIO_PKT_TAG_SHIFT) | ||
153 | #define _NETIO_PKT_TAG_NONE 0 /* No proprietary tags. */ | ||
154 | #define _NETIO_PKT_TAG_MRVL 1 /* Marvell HyperG.Stack tags. */ | ||
155 | #define _NETIO_PKT_TAG_MRVL_EXT 2 /* HyperG.Stack extended tags. */ | ||
156 | #define _NETIO_PKT_TAG_BRCM 3 /* Broadcom HiGig tags. */ | ||
157 | |||
158 | /** Whether a packet has an LLC + SNAP header. */ | ||
159 | #define _NETIO_PKT_SNAP_SHIFT 8 | ||
160 | #define _NETIO_PKT_SNAP_RMASK 0x1 | ||
161 | #define _NETIO_PKT_SNAP_MASK \ | ||
162 | (_NETIO_PKT_SNAP_RMASK << _NETIO_PKT_SNAP_SHIFT) | ||
163 | |||
164 | /* NOTE: Bits 9 and 10 are unused. */ | ||
165 | |||
166 | /** Length of any custom data before the L2 header, in words. */ | ||
167 | #define _NETIO_PKT_CUSTOM_LEN_SHIFT 11 | ||
168 | #define _NETIO_PKT_CUSTOM_LEN_RMASK 0x1F | ||
169 | #define _NETIO_PKT_CUSTOM_LEN_MASK \ | ||
170 | (_NETIO_PKT_CUSTOM_LEN_RMASK << _NETIO_PKT_CUSTOM_LEN_SHIFT) | ||
171 | |||
172 | /** The L4 checksum is incorrect (or perhaps has not been calculated). */ | ||
173 | #define _NETIO_PKT_BAD_L4_CSUM_SHIFT 16 | ||
174 | #define _NETIO_PKT_BAD_L4_CSUM_RMASK 0x1 | ||
175 | #define _NETIO_PKT_BAD_L4_CSUM_MASK \ | ||
176 | (_NETIO_PKT_BAD_L4_CSUM_RMASK << _NETIO_PKT_BAD_L4_CSUM_SHIFT) | ||
177 | |||
178 | /** Length of the L2 header, in words. */ | ||
179 | #define _NETIO_PKT_L2_LEN_SHIFT 17 | ||
180 | #define _NETIO_PKT_L2_LEN_RMASK 0x1F | ||
181 | #define _NETIO_PKT_L2_LEN_MASK \ | ||
182 | (_NETIO_PKT_L2_LEN_RMASK << _NETIO_PKT_L2_LEN_SHIFT) | ||
183 | |||
184 | |||
185 | /* Flags in minimal packet metadata. */ | ||
186 | |||
187 | /** We need an eDMA checksum on this packet. */ | ||
188 | #define _NETIO_PKT_NEED_EDMA_CSUM_SHIFT 0 | ||
189 | #define _NETIO_PKT_NEED_EDMA_CSUM_RMASK 1 | ||
190 | #define _NETIO_PKT_NEED_EDMA_CSUM_MASK \ | ||
191 | (_NETIO_PKT_NEED_EDMA_CSUM_RMASK << _NETIO_PKT_NEED_EDMA_CSUM_SHIFT) | ||
192 | |||
193 | /* Data within the packet information table. */ | ||
194 | |||
195 | /* Note that, for efficiency, code which uses these fields assumes that none | ||
196 | * of the shift values below are zero. See uses below for an explanation. */ | ||
197 | |||
198 | /** Offset within the L2 header of the innermost ethertype (in halfwords). */ | ||
199 | #define _NETIO_PKT_INFO_ETYPE_SHIFT 6 | ||
200 | #define _NETIO_PKT_INFO_ETYPE_RMASK 0x1F | ||
201 | |||
202 | /** Offset within the L2 header of the VLAN tag (in halfwords). */ | ||
203 | #define _NETIO_PKT_INFO_VLAN_SHIFT 11 | ||
204 | #define _NETIO_PKT_INFO_VLAN_RMASK 0x1F | ||
205 | |||
206 | #endif | ||
207 | |||
208 | |||
209 | /** The size of a memory buffer representing a small packet. | ||
210 | * @ingroup egress */ | ||
211 | #define SMALL_PACKET_SIZE 256 | ||
212 | |||
213 | /** The size of a memory buffer representing a large packet. | ||
214 | * @ingroup egress */ | ||
215 | #define LARGE_PACKET_SIZE 2048 | ||
216 | |||
217 | /** The size of a memory buffer representing a jumbo packet. | ||
218 | * @ingroup egress */ | ||
219 | #define JUMBO_PACKET_SIZE (12 * 1024) | ||
220 | |||
221 | |||
222 | /* Common ethertypes. | ||
223 | * @ingroup ingress */ | ||
224 | /** @{ */ | ||
225 | /** The ethertype of IPv4. */ | ||
226 | #define ETHERTYPE_IPv4 (0x0800) | ||
227 | /** The ethertype of ARP. */ | ||
228 | #define ETHERTYPE_ARP (0x0806) | ||
229 | /** The ethertype of VLANs. */ | ||
230 | #define ETHERTYPE_VLAN (0x8100) | ||
231 | /** The ethertype of a Q-in-Q header. */ | ||
232 | #define ETHERTYPE_Q_IN_Q (0x9100) | ||
233 | /** The ethertype of IPv6. */ | ||
234 | #define ETHERTYPE_IPv6 (0x86DD) | ||
235 | /** The ethertype of MPLS. */ | ||
236 | #define ETHERTYPE_MPLS (0x8847) | ||
237 | /** @} */ | ||
238 | |||
239 | |||
240 | /** The possible return values of NETIO_PKT_STATUS. | ||
241 | * @ingroup ingress | ||
242 | */ | ||
243 | typedef enum | ||
244 | { | ||
245 | /** No problems were detected with this packet. */ | ||
246 | NETIO_PKT_STATUS_OK, | ||
247 | /** The packet is undersized; this is expected behavior if the packet's | ||
248 | * ethertype is unrecognized, but otherwise the packet is likely corrupt. */ | ||
249 | NETIO_PKT_STATUS_UNDERSIZE, | ||
250 | /** The packet is oversized and some trailing bytes have been discarded. | ||
251 | This is expected behavior for short packets, since it's impossible to | ||
252 | precisely determine the amount of padding which may have been added to | ||
253 | them to make them meet the minimum Ethernet packet size. */ | ||
254 | NETIO_PKT_STATUS_OVERSIZE, | ||
255 | /** The packet was judged to be corrupt by hardware (for instance, it had | ||
256 | a bad CRC, or part of it was discarded due to lack of buffer space in | ||
257 | the I/O shim) and should be discarded. */ | ||
258 | NETIO_PKT_STATUS_BAD | ||
259 | } netio_pkt_status_t; | ||
260 | |||
261 | |||
262 | /** Log2 of how many buckets we have. */ | ||
263 | #define NETIO_LOG2_NUM_BUCKETS (10) | ||
264 | |||
265 | /** How many buckets we have. | ||
266 | * @ingroup ingress */ | ||
267 | #define NETIO_NUM_BUCKETS (1 << NETIO_LOG2_NUM_BUCKETS) | ||
268 | |||
269 | |||
270 | /** | ||
271 | * @brief A group-to-bucket identifier. | ||
272 | * | ||
273 | * @ingroup setup | ||
274 | * | ||
275 | * This tells us what to do with a given group. | ||
276 | */ | ||
277 | typedef union { | ||
278 | /** The header broken down into bits. */ | ||
279 | struct { | ||
280 | /** Whether we should balance on L4, if available */ | ||
281 | unsigned int __balance_on_l4:1; | ||
282 | /** Whether we should balance on L3, if available */ | ||
283 | unsigned int __balance_on_l3:1; | ||
284 | /** Whether we should balance on L2, if available */ | ||
285 | unsigned int __balance_on_l2:1; | ||
286 | /** Reserved for future use */ | ||
287 | unsigned int __reserved:1; | ||
288 | /** The base bucket to use to send traffic */ | ||
289 | unsigned int __bucket_base:NETIO_LOG2_NUM_BUCKETS; | ||
290 | /** The mask to apply to the balancing value. This must be one less | ||
291 | * than a power of two, e.g. 0x3 or 0xFF. | ||
292 | */ | ||
293 | unsigned int __bucket_mask:NETIO_LOG2_NUM_BUCKETS; | ||
294 | /** Pad to 32 bits */ | ||
295 | unsigned int __padding:(32 - 4 - 2 * NETIO_LOG2_NUM_BUCKETS); | ||
296 | } bits; | ||
297 | /** To send out the IDN. */ | ||
298 | unsigned int word; | ||
299 | } | ||
300 | netio_group_t; | ||
301 | |||
302 | |||
303 | /** | ||
304 | * @brief A VLAN-to-bucket identifier. | ||
305 | * | ||
306 | * @ingroup setup | ||
307 | * | ||
308 | * This tells us what to do with a given VLAN. | ||
309 | */ | ||
310 | typedef netio_group_t netio_vlan_t; | ||
311 | |||
312 | |||
313 | /** | ||
314 | * A bucket-to-queue mapping. | ||
315 | * @ingroup setup | ||
316 | */ | ||
317 | typedef unsigned char netio_bucket_t; | ||
318 | |||
319 | |||
320 | /** | ||
321 | * A packet size can always fit in a netio_size_t. | ||
322 | * @ingroup setup | ||
323 | */ | ||
324 | typedef unsigned int netio_size_t; | ||
325 | |||
326 | |||
327 | /** | ||
328 | * @brief Ethernet standard (ingress) packet metadata. | ||
329 | * | ||
330 | * @ingroup ingress | ||
331 | * | ||
332 | * This is additional data associated with each packet. | ||
333 | * This structure is opaque and accessed through the @ref ingress. | ||
334 | * | ||
335 | * Also, the buffer population operation currently assumes that standard | ||
336 | * metadata is at least as large as minimal metadata, and will need to be | ||
337 | * modified if that is no longer the case. | ||
338 | */ | ||
339 | typedef struct | ||
340 | { | ||
341 | #ifdef __DOXYGEN__ | ||
342 | /** This structure is opaque. */ | ||
343 | unsigned char opaque[24]; | ||
344 | #else | ||
345 | /** The overall ordinal of the packet */ | ||
346 | unsigned int __packet_ordinal; | ||
347 | /** The ordinal of the packet within the group */ | ||
348 | unsigned int __group_ordinal; | ||
349 | /** The best flow hash IPP could compute. */ | ||
350 | unsigned int __flow_hash; | ||
351 | /** Flags pertaining to checksum calculation, packet type, etc. */ | ||
352 | unsigned int __flags; | ||
353 | /** The first word of "user data". */ | ||
354 | unsigned int __user_data_0; | ||
355 | /** The second word of "user data". */ | ||
356 | unsigned int __user_data_1; | ||
357 | #endif | ||
358 | } | ||
359 | netio_pkt_metadata_t; | ||
360 | |||
361 | |||
362 | /** To ensure that the L3 header is aligned mod 4, the L2 header should be | ||
363 | * aligned mod 4 plus 2, since every supported L2 header is 4n + 2 bytes | ||
364 | * long. The standard way to do this is to simply add 2 bytes of padding | ||
365 | * before the L2 header. | ||
366 | */ | ||
367 | #define NETIO_PACKET_PADDING 2 | ||
368 | |||
369 | |||
370 | |||
371 | /** | ||
372 | * @brief Ethernet minimal (egress) packet metadata. | ||
373 | * | ||
374 | * @ingroup egress | ||
375 | * | ||
376 | * This structure represents information about packets which have | ||
377 | * been processed by @ref netio_populate_buffer() or | ||
378 | * @ref netio_populate_prepend_buffer(). This structure is opaque | ||
379 | * and accessed through the @ref egress. | ||
380 | * | ||
381 | * @internal This structure is actually copied into the memory used by | ||
382 | * standard metadata, which is assumed to be large enough. | ||
383 | */ | ||
384 | typedef struct | ||
385 | { | ||
386 | #ifdef __DOXYGEN__ | ||
387 | /** This structure is opaque. */ | ||
388 | unsigned char opaque[14]; | ||
389 | #else | ||
390 | /** The offset of the L2 header from the start of the packet data. */ | ||
391 | unsigned short l2_offset; | ||
392 | /** The offset of the L3 header from the start of the packet data. */ | ||
393 | unsigned short l3_offset; | ||
394 | /** Where to write the checksum. */ | ||
395 | unsigned char csum_location; | ||
396 | /** Where to start checksumming from. */ | ||
397 | unsigned char csum_start; | ||
398 | /** Flags pertaining to checksum calculation etc. */ | ||
399 | unsigned short flags; | ||
400 | /** The L2 length of the packet. */ | ||
401 | unsigned short l2_length; | ||
402 | /** The checksum with which to seed the checksum generator. */ | ||
403 | unsigned short csum_seed; | ||
404 | /** How much to checksum. */ | ||
405 | unsigned short csum_length; | ||
406 | #endif | ||
407 | } | ||
408 | netio_pkt_minimal_metadata_t; | ||
409 | |||
410 | |||
411 | #ifndef __DOXYGEN__ | ||
412 | |||
413 | /** | ||
414 | * @brief An I/O notification header. | ||
415 | * | ||
416 | * This is the first word of data received from an I/O shim in a notification | ||
417 | * packet. It contains framing and status information. | ||
418 | */ | ||
419 | typedef union | ||
420 | { | ||
421 | unsigned int word; /**< The whole word. */ | ||
422 | /** The various fields. */ | ||
423 | struct | ||
424 | { | ||
425 | unsigned int __channel:7; /**< Resource channel. */ | ||
426 | unsigned int __type:4; /**< Type. */ | ||
427 | unsigned int __ack:1; /**< Whether an acknowledgement is needed. */ | ||
428 | unsigned int __reserved:1; /**< Reserved. */ | ||
429 | unsigned int __protocol:1; /**< A protocol-specific word is added. */ | ||
430 | unsigned int __status:2; /**< Status of the transfer. */ | ||
431 | unsigned int __framing:2; /**< Framing of the transfer. */ | ||
432 | unsigned int __transfer_size:14; /**< Transfer size in bytes (total). */ | ||
433 | } bits; | ||
434 | } | ||
435 | __netio_pkt_notif_t; | ||
436 | |||
437 | |||
438 | /** | ||
439 | * Returns the base address of the packet. | ||
440 | */ | ||
441 | #define _NETIO_PKT_HANDLE_BASE(p) \ | ||
442 | ((unsigned char*)((p).word & 0xFFFFFFC0)) | ||
443 | |||
444 | /** | ||
445 | * Returns the base address of the packet. | ||
446 | */ | ||
447 | #define _NETIO_PKT_BASE(p) \ | ||
448 | _NETIO_PKT_HANDLE_BASE(p->__packet) | ||
449 | |||
450 | /** | ||
451 | * @brief An I/O notification packet (second word) | ||
452 | * | ||
453 | * This is the second word of data received from an I/O shim in a notification | ||
454 | * packet. This is the virtual address of the packet buffer, plus some flag | ||
455 | * bits. (The virtual address of the packet is always 256-byte aligned so we | ||
456 | * have room for 8 bits' worth of flags in the low 8 bits.) | ||
457 | * | ||
458 | * @internal | ||
459 | * NOTE: The low two bits must contain "__queue", so the "packet size" | ||
460 | * (SIZE_SMALL, SIZE_LARGE, or SIZE_JUMBO) can be determined quickly. | ||
461 | * | ||
462 | * If __addr or __offset are moved, _NETIO_PKT_BASE | ||
463 | * (defined right below this) must be changed. | ||
464 | */ | ||
465 | typedef union | ||
466 | { | ||
467 | unsigned int word; /**< The whole word. */ | ||
468 | /** The various fields. */ | ||
469 | struct | ||
470 | { | ||
471 | /** Which queue the packet will be returned to once it is sent back to | ||
472 | the IPP. This is one of the SIZE_xxx values. */ | ||
473 | unsigned int __queue:2; | ||
474 | |||
475 | /** The IPP handle of the sending IPP. */ | ||
476 | unsigned int __ipp_handle:2; | ||
477 | |||
478 | /** Reserved for future use. */ | ||
479 | unsigned int __reserved:1; | ||
480 | |||
481 | /** If 1, this packet has minimal (egress) metadata; otherwise, it | ||
482 | has standard (ingress) metadata. */ | ||
483 | unsigned int __minimal:1; | ||
484 | |||
485 | /** Offset of the metadata within the packet. This value is multiplied | ||
486 | * by 64 and added to the base packet address to get the metadata | ||
487 | * address. Note that this field is aligned within the word such that | ||
488 | * you can easily extract the metadata address with a 26-bit mask. */ | ||
489 | unsigned int __offset:2; | ||
490 | |||
491 | /** The top 24 bits of the packet's virtual address. */ | ||
492 | unsigned int __addr:24; | ||
493 | } bits; | ||
494 | } | ||
495 | __netio_pkt_handle_t; | ||
496 | |||
497 | #endif /* !__DOXYGEN__ */ | ||
498 | |||
499 | |||
500 | /** | ||
501 | * @brief A handle for an I/O packet's storage. | ||
502 | * @ingroup ingress | ||
503 | * | ||
504 | * netio_pkt_handle_t encodes the concept of a ::netio_pkt_t with its | ||
505 | * packet metadata removed. It is a much smaller type that exists to | ||
506 | * facilitate applications where the full ::netio_pkt_t type is too | ||
507 | * large, such as those that cache enormous numbers of packets or wish | ||
508 | * to transmit packet descriptors over the UDN. | ||
509 | * | ||
510 | * Because there is no metadata, most ::netio_pkt_t operations cannot be | ||
511 | * performed on a netio_pkt_handle_t. It supports only | ||
512 | * netio_free_handle() (to free the buffer) and | ||
513 | * NETIO_PKT_CUSTOM_DATA_H() (to access a pointer to its contents). | ||
514 | * The application must acquire any additional metadata it wants from the | ||
515 | * original ::netio_pkt_t and record it separately. | ||
516 | * | ||
517 | * A netio_pkt_handle_t can be extracted from a ::netio_pkt_t by calling | ||
518 | * NETIO_PKT_HANDLE(). An invalid handle (analogous to NULL) can be | ||
519 | * created by assigning the value ::NETIO_PKT_HANDLE_NONE. A handle can | ||
520 | * be tested for validity with NETIO_PKT_HANDLE_IS_VALID(). | ||
521 | */ | ||
522 | typedef struct | ||
523 | { | ||
524 | unsigned int word; /**< Opaque bits. */ | ||
525 | } netio_pkt_handle_t; | ||
526 | |||
527 | /** | ||
528 | * @brief A packet descriptor. | ||
529 | * | ||
530 | * @ingroup ingress | ||
531 | * @ingroup egress | ||
532 | * | ||
533 | * This data structure represents a packet. The structure is manipulated | ||
534 | * through the @ref ingress and the @ref egress. | ||
535 | * | ||
536 | * While the contents of a netio_pkt_t are opaque, the structure itself is | ||
537 | * portable. This means that it may be shared between all tiles which have | ||
538 | * done a netio_input_register() call for the interface on which the pkt_t | ||
539 | * was initially received (via netio_get_packet()) or retrieved (via | ||
540 | * netio_get_buffer()). The contents of a netio_pkt_t can be transmitted to | ||
541 | * another tile via shared memory, or via a UDN message, or by other means. | ||
542 | * The destination tile may then use the pkt_t as if it had originally been | ||
543 | * received locally; it may read or write the packet's data, read its | ||
544 | * metadata, free the packet, send the packet, transfer the netio_pkt_t to | ||
545 | * yet another tile, and so forth. | ||
546 | * | ||
547 | * Once a netio_pkt_t has been transferred to a second tile, the first tile | ||
548 | * should not reference the original copy; in particular, if more than one | ||
549 | * tile frees or sends the same netio_pkt_t, the IPP's packet free lists will | ||
550 | * become corrupted. Note also that each tile which reads or modifies | ||
551 | * packet data must obey the memory coherency rules outlined in @ref input. | ||
552 | */ | ||
553 | typedef struct | ||
554 | { | ||
555 | #ifdef __DOXYGEN__ | ||
556 | /** This structure is opaque. */ | ||
557 | unsigned char opaque[32]; | ||
558 | #else | ||
559 | /** For an ingress packet (one with standard metadata), this is the | ||
560 | * notification header we got from the I/O shim. For an egress packet | ||
561 | * (one with minimal metadata), this word is zero if the packet has not | ||
562 | * been populated, and nonzero if it has. */ | ||
563 | __netio_pkt_notif_t __notif_header; | ||
564 | |||
565 | /** Virtual address of the packet buffer, plus state flags. */ | ||
566 | __netio_pkt_handle_t __packet; | ||
567 | |||
568 | /** Metadata associated with the packet. */ | ||
569 | netio_pkt_metadata_t __metadata; | ||
570 | #endif | ||
571 | } | ||
572 | netio_pkt_t; | ||
573 | |||
574 | |||
575 | #ifndef __DOXYGEN__ | ||
576 | |||
577 | #define __NETIO_PKT_NOTIF_HEADER(pkt) ((pkt)->__notif_header) | ||
578 | #define __NETIO_PKT_IPP_HANDLE(pkt) ((pkt)->__packet.bits.__ipp_handle) | ||
579 | #define __NETIO_PKT_QUEUE(pkt) ((pkt)->__packet.bits.__queue) | ||
580 | #define __NETIO_PKT_NOTIF_HEADER_M(mda, pkt) ((pkt)->__notif_header) | ||
581 | #define __NETIO_PKT_IPP_HANDLE_M(mda, pkt) ((pkt)->__packet.bits.__ipp_handle) | ||
582 | #define __NETIO_PKT_MINIMAL(pkt) ((pkt)->__packet.bits.__minimal) | ||
583 | #define __NETIO_PKT_QUEUE_M(mda, pkt) ((pkt)->__packet.bits.__queue) | ||
584 | #define __NETIO_PKT_FLAGS_M(mda, pkt) ((mda)->__flags) | ||
585 | |||
586 | /* Packet information table, used by the attribute access functions below. */ | ||
587 | extern const uint16_t _netio_pkt_info[]; | ||
588 | |||
589 | #endif /* __DOXYGEN__ */ | ||
590 | |||
591 | |||
592 | #ifndef __DOXYGEN__ | ||
593 | /* These macros are deprecated and will disappear in a future MDE release. */ | ||
594 | #define NETIO_PKT_GOOD_CHECKSUM(pkt) \ | ||
595 | NETIO_PKT_L4_CSUM_CORRECT(pkt) | ||
596 | #define NETIO_PKT_GOOD_CHECKSUM_M(mda, pkt) \ | ||
597 | NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt) | ||
598 | #endif /* __DOXYGEN__ */ | ||
599 | |||
600 | |||
601 | /* Packet attribute access functions. */ | ||
602 | |||
603 | /** Return a pointer to the metadata for a packet. | ||
604 | * @ingroup ingress | ||
605 | * | ||
606 | * Calling this function once and passing the result to other retrieval | ||
607 | * functions with a "_M" suffix usually improves performance. This | ||
608 | * function must be called on an 'ingress' packet (i.e. one retrieved | ||
609 | * by @ref netio_get_packet(), on which @ref netio_populate_buffer() or | ||
610 | * @ref netio_populate_prepend_buffer have not been called). Use of this | ||
611 | * function on an 'egress' packet will cause an assertion failure. | ||
612 | * | ||
613 | * @param[in] pkt Packet on which to operate. | ||
614 | * @return A pointer to the packet's standard metadata. | ||
615 | */ | ||
616 | static __inline netio_pkt_metadata_t* | ||
617 | NETIO_PKT_METADATA(netio_pkt_t* pkt) | ||
618 | { | ||
619 | netio_assert(!pkt->__packet.bits.__minimal); | ||
620 | return &pkt->__metadata; | ||
621 | } | ||
622 | |||
623 | |||
624 | /** Return a pointer to the minimal metadata for a packet. | ||
625 | * @ingroup egress | ||
626 | * | ||
627 | * Calling this function once and passing the result to other retrieval | ||
628 | * functions with a "_MM" suffix usually improves performance. This | ||
629 | * function must be called on an 'egress' packet (i.e. one on which | ||
630 | * @ref netio_populate_buffer() or @ref netio_populate_prepend_buffer() | ||
631 | * have been called, or one retrieved by @ref netio_get_buffer()). Use of | ||
632 | * this function on an 'ingress' packet will cause an assertion failure. | ||
633 | * | ||
634 | * @param[in] pkt Packet on which to operate. | ||
635 | * @return A pointer to the packet's standard metadata. | ||
636 | */ | ||
637 | static __inline netio_pkt_minimal_metadata_t* | ||
638 | NETIO_PKT_MINIMAL_METADATA(netio_pkt_t* pkt) | ||
639 | { | ||
640 | netio_assert(pkt->__packet.bits.__minimal); | ||
641 | return (netio_pkt_minimal_metadata_t*) &pkt->__metadata; | ||
642 | } | ||
643 | |||
644 | |||
645 | /** Determine whether a packet has 'minimal' metadata. | ||
646 | * @ingroup pktfuncs | ||
647 | * | ||
648 | * This function will return nonzero if the packet is an 'egress' | ||
649 | * packet (i.e. one on which @ref netio_populate_buffer() or | ||
650 | * @ref netio_populate_prepend_buffer() have been called, or one | ||
651 | * retrieved by @ref netio_get_buffer()), and zero if the packet | ||
652 | * is an 'ingress' packet (i.e. one retrieved by @ref netio_get_packet(), | ||
653 | * which has not been converted into an 'egress' packet). | ||
654 | * | ||
655 | * @param[in] pkt Packet on which to operate. | ||
656 | * @return Nonzero if the packet has minimal metadata. | ||
657 | */ | ||
658 | static __inline unsigned int | ||
659 | NETIO_PKT_IS_MINIMAL(netio_pkt_t* pkt) | ||
660 | { | ||
661 | return pkt->__packet.bits.__minimal; | ||
662 | } | ||
663 | |||
664 | |||
665 | /** Return a handle for a packet's storage. | ||
666 | * @ingroup pktfuncs | ||
667 | * | ||
668 | * @param[in] pkt Packet on which to operate. | ||
669 | * @return A handle for the packet's storage. | ||
670 | */ | ||
671 | static __inline netio_pkt_handle_t | ||
672 | NETIO_PKT_HANDLE(netio_pkt_t* pkt) | ||
673 | { | ||
674 | netio_pkt_handle_t h; | ||
675 | h.word = pkt->__packet.word; | ||
676 | return h; | ||
677 | } | ||
678 | |||
679 | |||
680 | /** A special reserved value indicating the absence of a packet handle. | ||
681 | * | ||
682 | * @ingroup pktfuncs | ||
683 | */ | ||
684 | #define NETIO_PKT_HANDLE_NONE ((netio_pkt_handle_t) { 0 }) | ||
685 | |||
686 | |||
687 | /** Test whether a packet handle is valid. | ||
688 | * | ||
689 | * Applications may wish to use the reserved value NETIO_PKT_HANDLE_NONE | ||
690 | * to indicate no packet at all. This function tests to see if a packet | ||
691 | * handle is a real handle, not this special reserved value. | ||
692 | * | ||
693 | * @ingroup pktfuncs | ||
694 | * | ||
695 | * @param[in] handle Handle on which to operate. | ||
696 | * @return One if the packet handle is valid, else zero. | ||
697 | */ | ||
698 | static __inline unsigned int | ||
699 | NETIO_PKT_HANDLE_IS_VALID(netio_pkt_handle_t handle) | ||
700 | { | ||
701 | return handle.word != 0; | ||
702 | } | ||
703 | |||
704 | |||
705 | |||
706 | /** Return a pointer to the start of the packet's custom header. | ||
707 | * A custom header may or may not be present, depending upon the IPP; its | ||
708 | * contents and alignment are also IPP-dependent. Currently, none of the | ||
709 | * standard IPPs supplied by Tilera produce a custom header. If present, | ||
710 | * the custom header precedes the L2 header in the packet buffer. | ||
711 | * @ingroup ingress | ||
712 | * | ||
713 | * @param[in] handle Handle on which to operate. | ||
714 | * @return A pointer to start of the packet. | ||
715 | */ | ||
716 | static __inline unsigned char* | ||
717 | NETIO_PKT_CUSTOM_DATA_H(netio_pkt_handle_t handle) | ||
718 | { | ||
719 | return _NETIO_PKT_HANDLE_BASE(handle) + NETIO_PACKET_PADDING; | ||
720 | } | ||
721 | |||
722 | |||
723 | /** Return the length of the packet's custom header. | ||
724 | * A custom header may or may not be present, depending upon the IPP; its | ||
725 | * contents and alignment are also IPP-dependent. Currently, none of the | ||
726 | * standard IPPs supplied by Tilera produce a custom header. If present, | ||
727 | * the custom header precedes the L2 header in the packet buffer. | ||
728 | * | ||
729 | * @ingroup ingress | ||
730 | * | ||
731 | * @param[in] mda Pointer to packet's standard metadata. | ||
732 | * @param[in] pkt Packet on which to operate. | ||
733 | * @return The length of the packet's custom header, in bytes. | ||
734 | */ | ||
735 | static __inline netio_size_t | ||
736 | NETIO_PKT_CUSTOM_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
737 | { | ||
738 | /* | ||
739 | * Note that we effectively need to extract a quantity from the flags word | ||
740 | * which is measured in words, and then turn it into bytes by shifting | ||
741 | * it left by 2. We do this all at once by just shifting right two less | ||
742 | * bits, and shifting the mask up two bits. | ||
743 | */ | ||
744 | return ((mda->__flags >> (_NETIO_PKT_CUSTOM_LEN_SHIFT - 2)) & | ||
745 | (_NETIO_PKT_CUSTOM_LEN_RMASK << 2)); | ||
746 | } | ||
747 | |||
748 | |||
749 | /** Return the length of the packet, starting with the custom header. | ||
750 | * A custom header may or may not be present, depending upon the IPP; its | ||
751 | * contents and alignment are also IPP-dependent. Currently, none of the | ||
752 | * standard IPPs supplied by Tilera produce a custom header. If present, | ||
753 | * the custom header precedes the L2 header in the packet buffer. | ||
754 | * @ingroup ingress | ||
755 | * | ||
756 | * @param[in] mda Pointer to packet's standard metadata. | ||
757 | * @param[in] pkt Packet on which to operate. | ||
758 | * @return The length of the packet, in bytes. | ||
759 | */ | ||
760 | static __inline netio_size_t | ||
761 | NETIO_PKT_CUSTOM_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
762 | { | ||
763 | return (__NETIO_PKT_NOTIF_HEADER(pkt).bits.__transfer_size - | ||
764 | NETIO_PACKET_PADDING); | ||
765 | } | ||
766 | |||
767 | |||
768 | /** Return a pointer to the start of the packet's custom header. | ||
769 | * A custom header may or may not be present, depending upon the IPP; its | ||
770 | * contents and alignment are also IPP-dependent. Currently, none of the | ||
771 | * standard IPPs supplied by Tilera produce a custom header. If present, | ||
772 | * the custom header precedes the L2 header in the packet buffer. | ||
773 | * @ingroup ingress | ||
774 | * | ||
775 | * @param[in] mda Pointer to packet's standard metadata. | ||
776 | * @param[in] pkt Packet on which to operate. | ||
777 | * @return A pointer to start of the packet. | ||
778 | */ | ||
779 | static __inline unsigned char* | ||
780 | NETIO_PKT_CUSTOM_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
781 | { | ||
782 | return NETIO_PKT_CUSTOM_DATA_H(NETIO_PKT_HANDLE(pkt)); | ||
783 | } | ||
784 | |||
785 | |||
786 | /** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header. | ||
787 | * @ingroup ingress | ||
788 | * | ||
789 | * @param[in] mda Pointer to packet's standard metadata. | ||
790 | * @param[in] pkt Packet on which to operate. | ||
791 | * @return The length of the packet's L2 header, in bytes. | ||
792 | */ | ||
793 | static __inline netio_size_t | ||
794 | NETIO_PKT_L2_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
795 | { | ||
796 | /* | ||
797 | * Note that we effectively need to extract a quantity from the flags word | ||
798 | * which is measured in words, and then turn it into bytes by shifting | ||
799 | * it left by 2. We do this all at once by just shifting right two less | ||
800 | * bits, and shifting the mask up two bits. We then add two bytes. | ||
801 | */ | ||
802 | return ((mda->__flags >> (_NETIO_PKT_L2_LEN_SHIFT - 2)) & | ||
803 | (_NETIO_PKT_L2_LEN_RMASK << 2)) + 2; | ||
804 | } | ||
805 | |||
806 | |||
807 | /** Return the length of the packet, starting with the L2 (Ethernet) header. | ||
808 | * @ingroup ingress | ||
809 | * | ||
810 | * @param[in] mda Pointer to packet's standard metadata. | ||
811 | * @param[in] pkt Packet on which to operate. | ||
812 | * @return The length of the packet, in bytes. | ||
813 | */ | ||
814 | static __inline netio_size_t | ||
815 | NETIO_PKT_L2_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
816 | { | ||
817 | return (NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt) - | ||
818 | NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda,pkt)); | ||
819 | } | ||
820 | |||
821 | |||
822 | /** Return a pointer to the start of the packet's L2 (Ethernet) header. | ||
823 | * @ingroup ingress | ||
824 | * | ||
825 | * @param[in] mda Pointer to packet's standard metadata. | ||
826 | * @param[in] pkt Packet on which to operate. | ||
827 | * @return A pointer to start of the packet. | ||
828 | */ | ||
829 | static __inline unsigned char* | ||
830 | NETIO_PKT_L2_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
831 | { | ||
832 | return (NETIO_PKT_CUSTOM_DATA_M(mda, pkt) + | ||
833 | NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt)); | ||
834 | } | ||
835 | |||
836 | |||
837 | /** Retrieve the length of the packet, starting with the L3 (generally, | ||
838 | * the IP) header. | ||
839 | * @ingroup ingress | ||
840 | * | ||
841 | * @param[in] mda Pointer to packet's standard metadata. | ||
842 | * @param[in] pkt Packet on which to operate. | ||
843 | * @return Length of the packet's L3 header and data, in bytes. | ||
844 | */ | ||
845 | static __inline netio_size_t | ||
846 | NETIO_PKT_L3_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
847 | { | ||
848 | return (NETIO_PKT_L2_LENGTH_M(mda, pkt) - | ||
849 | NETIO_PKT_L2_HEADER_LENGTH_M(mda,pkt)); | ||
850 | } | ||
851 | |||
852 | |||
853 | /** Return a pointer to the packet's L3 (generally, the IP) header. | ||
854 | * @ingroup ingress | ||
855 | * | ||
856 | * Note that we guarantee word alignment of the L3 header. | ||
857 | * | ||
858 | * @param[in] mda Pointer to packet's standard metadata. | ||
859 | * @param[in] pkt Packet on which to operate. | ||
860 | * @return A pointer to the packet's L3 header. | ||
861 | */ | ||
862 | static __inline unsigned char* | ||
863 | NETIO_PKT_L3_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
864 | { | ||
865 | return (NETIO_PKT_L2_DATA_M(mda, pkt) + | ||
866 | NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt)); | ||
867 | } | ||
868 | |||
869 | |||
870 | /** Return the ordinal of the packet. | ||
871 | * @ingroup ingress | ||
872 | * | ||
873 | * Each packet is given an ordinal number when it is delivered by the IPP. | ||
874 | * In the medium term, the ordinal is unique and monotonically increasing, | ||
875 | * being incremented by 1 for each packet; the ordinal of the first packet | ||
876 | * delivered after the IPP starts is zero. (Since the ordinal is of finite | ||
877 | * size, given enough input packets, it will eventually wrap around to zero; | ||
878 | * in the long term, therefore, ordinals are not unique.) The ordinals | ||
879 | * handed out by different IPPs are not disjoint, so two packets from | ||
880 | * different IPPs may have identical ordinals. Packets dropped by the | ||
881 | * IPP or by the I/O shim are not assigned ordinals. | ||
882 | * | ||
883 | * @param[in] mda Pointer to packet's standard metadata. | ||
884 | * @param[in] pkt Packet on which to operate. | ||
885 | * @return The packet's per-IPP packet ordinal. | ||
886 | */ | ||
887 | static __inline unsigned int | ||
888 | NETIO_PKT_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
889 | { | ||
890 | return mda->__packet_ordinal; | ||
891 | } | ||
892 | |||
893 | |||
894 | /** Return the per-group ordinal of the packet. | ||
895 | * @ingroup ingress | ||
896 | * | ||
897 | * Each packet is given a per-group ordinal number when it is | ||
898 | * delivered by the IPP. By default, the group is the packet's VLAN, | ||
899 | * although IPP can be recompiled to use different values. In | ||
900 | * the medium term, the ordinal is unique and monotonically | ||
901 | * increasing, being incremented by 1 for each packet; the ordinal of | ||
902 | * the first packet distributed to a particular group is zero. | ||
903 | * (Since the ordinal is of finite size, given enough input packets, | ||
904 | * it will eventually wrap around to zero; in the long term, | ||
905 | * therefore, ordinals are not unique.) The ordinals handed out by | ||
906 | * different IPPs are not disjoint, so two packets from different IPPs | ||
907 | * may have identical ordinals; similarly, packets distributed to | ||
908 | * different groups may have identical ordinals. Packets dropped by | ||
909 | * the IPP or by the I/O shim are not assigned ordinals. | ||
910 | * | ||
911 | * @param[in] mda Pointer to packet's standard metadata. | ||
912 | * @param[in] pkt Packet on which to operate. | ||
913 | * @return The packet's per-IPP, per-group ordinal. | ||
914 | */ | ||
915 | static __inline unsigned int | ||
916 | NETIO_PKT_GROUP_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
917 | { | ||
918 | return mda->__group_ordinal; | ||
919 | } | ||
920 | |||
921 | |||
922 | /** Return the VLAN ID assigned to the packet. | ||
923 | * @ingroup ingress | ||
924 | * | ||
925 | * This value is usually contained within the packet header. | ||
926 | * | ||
927 | * This value will be zero if the packet does not have a VLAN tag, or if | ||
928 | * this value was not extracted from the packet. | ||
929 | * | ||
930 | * @param[in] mda Pointer to packet's standard metadata. | ||
931 | * @param[in] pkt Packet on which to operate. | ||
932 | * @return The packet's VLAN ID. | ||
933 | */ | ||
934 | static __inline unsigned short | ||
935 | NETIO_PKT_VLAN_ID_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
936 | { | ||
937 | int vl = (mda->__flags >> _NETIO_PKT_VLAN_SHIFT) & _NETIO_PKT_VLAN_RMASK; | ||
938 | unsigned short* pkt_p; | ||
939 | int index; | ||
940 | unsigned short val; | ||
941 | |||
942 | if (vl == _NETIO_PKT_VLAN_NONE) | ||
943 | return 0; | ||
944 | |||
945 | pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt); | ||
946 | index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK; | ||
947 | |||
948 | val = pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_VLAN_SHIFT) & | ||
949 | _NETIO_PKT_INFO_VLAN_RMASK]; | ||
950 | |||
951 | #ifdef __TILECC__ | ||
952 | return (__insn_bytex(val) >> 16) & 0xFFF; | ||
953 | #else | ||
954 | return (__builtin_bswap32(val) >> 16) & 0xFFF; | ||
955 | #endif | ||
956 | } | ||
957 | |||
958 | |||
959 | /** Return the ethertype of the packet. | ||
960 | * @ingroup ingress | ||
961 | * | ||
962 | * This value is usually contained within the packet header. | ||
963 | * | ||
964 | * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED_M() | ||
965 | * returns true, and otherwise, may not be well defined. | ||
966 | * | ||
967 | * @param[in] mda Pointer to packet's standard metadata. | ||
968 | * @param[in] pkt Packet on which to operate. | ||
969 | * @return The packet's ethertype. | ||
970 | */ | ||
971 | static __inline unsigned short | ||
972 | NETIO_PKT_ETHERTYPE_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
973 | { | ||
974 | unsigned short* pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt); | ||
975 | int index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK; | ||
976 | |||
977 | unsigned short val = | ||
978 | pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_ETYPE_SHIFT) & | ||
979 | _NETIO_PKT_INFO_ETYPE_RMASK]; | ||
980 | |||
981 | return __builtin_bswap32(val) >> 16; | ||
982 | } | ||
983 | |||
984 | |||
985 | /** Return the flow hash computed on the packet. | ||
986 | * @ingroup ingress | ||
987 | * | ||
988 | * For TCP and UDP packets, this hash is calculated by hashing together | ||
989 | * the "5-tuple" values, specifically the source IP address, destination | ||
990 | * IP address, protocol type, source port and destination port. | ||
991 | * The hash value is intended to be helpful for millions of distinct | ||
992 | * flows. | ||
993 | * | ||
994 | * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is | ||
995 | * derived by hashing together the source and destination IP addresses. | ||
996 | * | ||
997 | * For MPLS-encapsulated packets, the flow hash is derived by hashing | ||
998 | * the first MPLS label. | ||
999 | * | ||
1000 | * For all other packets the flow hash is computed from the source | ||
1001 | * and destination Ethernet addresses. | ||
1002 | * | ||
1003 | * The hash is symmetric, meaning it produces the same value if the | ||
1004 | * source and destination are swapped. The only exceptions are | ||
1005 | * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple | ||
1006 | * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32 | ||
1007 | * (Encap Security Payload), which use only the destination address | ||
1008 | * since the source address is not meaningful. | ||
1009 | * | ||
1010 | * @param[in] mda Pointer to packet's standard metadata. | ||
1011 | * @param[in] pkt Packet on which to operate. | ||
1012 | * @return The packet's 32-bit flow hash. | ||
1013 | */ | ||
1014 | static __inline unsigned int | ||
1015 | NETIO_PKT_FLOW_HASH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
1016 | { | ||
1017 | return mda->__flow_hash; | ||
1018 | } | ||
1019 | |||
1020 | |||
1021 | /** Return the first word of "user data" for the packet. | ||
1022 | * | ||
1023 | * The contents of the user data words depend on the IPP. | ||
1024 | * | ||
1025 | * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first | ||
1026 | * word of user data contains the least significant bits of the 64-bit | ||
1027 | * arrival cycle count (see @c get_cycle_count_low()). | ||
1028 | * | ||
1029 | * See the <em>System Programmer's Guide</em> for details. | ||
1030 | * | ||
1031 | * @ingroup ingress | ||
1032 | * | ||
1033 | * @param[in] mda Pointer to packet's standard metadata. | ||
1034 | * @param[in] pkt Packet on which to operate. | ||
1035 | * @return The packet's first word of "user data". | ||
1036 | */ | ||
1037 | static __inline unsigned int | ||
1038 | NETIO_PKT_USER_DATA_0_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
1039 | { | ||
1040 | return mda->__user_data_0; | ||
1041 | } | ||
1042 | |||
1043 | |||
1044 | /** Return the second word of "user data" for the packet. | ||
1045 | * | ||
1046 | * The contents of the user data words depend on the IPP. | ||
1047 | * | ||
1048 | * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second | ||
1049 | * word of user data contains the most significant bits of the 64-bit | ||
1050 | * arrival cycle count (see @c get_cycle_count_high()). | ||
1051 | * | ||
1052 | * See the <em>System Programmer's Guide</em> for details. | ||
1053 | * | ||
1054 | * @ingroup ingress | ||
1055 | * | ||
1056 | * @param[in] mda Pointer to packet's standard metadata. | ||
1057 | * @param[in] pkt Packet on which to operate. | ||
1058 | * @return The packet's second word of "user data". | ||
1059 | */ | ||
1060 | static __inline unsigned int | ||
1061 | NETIO_PKT_USER_DATA_1_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
1062 | { | ||
1063 | return mda->__user_data_1; | ||
1064 | } | ||
1065 | |||
1066 | |||
1067 | /** Determine whether the L4 (TCP/UDP) checksum was calculated. | ||
1068 | * @ingroup ingress | ||
1069 | * | ||
1070 | * @param[in] mda Pointer to packet's standard metadata. | ||
1071 | * @param[in] pkt Packet on which to operate. | ||
1072 | * @return Nonzero if the L4 checksum was calculated. | ||
1073 | */ | ||
1074 | static __inline unsigned int | ||
1075 | NETIO_PKT_L4_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
1076 | { | ||
1077 | return !(mda->__flags & _NETIO_PKT_NO_L4_CSUM_MASK); | ||
1078 | } | ||
1079 | |||
1080 | |||
1081 | /** Determine whether the L4 (TCP/UDP) checksum was calculated and found to | ||
1082 | * be correct. | ||
1083 | * @ingroup ingress | ||
1084 | * | ||
1085 | * @param[in] mda Pointer to packet's standard metadata. | ||
1086 | * @param[in] pkt Packet on which to operate. | ||
1087 | * @return Nonzero if the checksum was calculated and is correct. | ||
1088 | */ | ||
1089 | static __inline unsigned int | ||
1090 | NETIO_PKT_L4_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
1091 | { | ||
1092 | return !(mda->__flags & | ||
1093 | (_NETIO_PKT_BAD_L4_CSUM_MASK | _NETIO_PKT_NO_L4_CSUM_MASK)); | ||
1094 | } | ||
1095 | |||
1096 | |||
1097 | /** Determine whether the L3 (IP) checksum was calculated. | ||
1098 | * @ingroup ingress | ||
1099 | * | ||
1100 | * @param[in] mda Pointer to packet's standard metadata. | ||
1101 | * @param[in] pkt Packet on which to operate. | ||
1102 | * @return Nonzero if the L3 (IP) checksum was calculated. | ||
1103 | */ | ||
1104 | static __inline unsigned int | ||
1105 | NETIO_PKT_L3_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
1106 | { | ||
1107 | return !(mda->__flags & _NETIO_PKT_NO_L3_CSUM_MASK); | ||
1108 | } | ||
1109 | |||
1110 | |||
1111 | /** Determine whether the L3 (IP) checksum was calculated and found to be | ||
1112 | * correct. | ||
1113 | * @ingroup ingress | ||
1114 | * | ||
1115 | * @param[in] mda Pointer to packet's standard metadata. | ||
1116 | * @param[in] pkt Packet on which to operate. | ||
1117 | * @return Nonzero if the checksum was calculated and is correct. | ||
1118 | */ | ||
1119 | static __inline unsigned int | ||
1120 | NETIO_PKT_L3_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
1121 | { | ||
1122 | return !(mda->__flags & | ||
1123 | (_NETIO_PKT_BAD_L3_CSUM_MASK | _NETIO_PKT_NO_L3_CSUM_MASK)); | ||
1124 | } | ||
1125 | |||
1126 | |||
1127 | /** Determine whether the ethertype was recognized and L3 packet data was | ||
1128 | * processed. | ||
1129 | * @ingroup ingress | ||
1130 | * | ||
1131 | * @param[in] mda Pointer to packet's standard metadata. | ||
1132 | * @param[in] pkt Packet on which to operate. | ||
1133 | * @return Nonzero if the ethertype was recognized and L3 packet data was | ||
1134 | * processed. | ||
1135 | */ | ||
1136 | static __inline unsigned int | ||
1137 | NETIO_PKT_ETHERTYPE_RECOGNIZED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
1138 | { | ||
1139 | return !(mda->__flags & _NETIO_PKT_TYPE_UNRECOGNIZED_MASK); | ||
1140 | } | ||
1141 | |||
1142 | |||
1143 | /** Retrieve the status of a packet and any errors that may have occurred | ||
1144 | * during ingress processing (length mismatches, CRC errors, etc.). | ||
1145 | * @ingroup ingress | ||
1146 | * | ||
1147 | * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED() | ||
1148 | * returns zero are always reported as underlength, as there is no a priori | ||
1149 | * means to determine their length. Normally, applications should use | ||
1150 | * @ref NETIO_PKT_BAD_M() instead of explicitly checking status with this | ||
1151 | * function. | ||
1152 | * | ||
1153 | * @param[in] mda Pointer to packet's standard metadata. | ||
1154 | * @param[in] pkt Packet on which to operate. | ||
1155 | * @return The packet's status. | ||
1156 | */ | ||
1157 | static __inline netio_pkt_status_t | ||
1158 | NETIO_PKT_STATUS_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
1159 | { | ||
1160 | return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status; | ||
1161 | } | ||
1162 | |||
1163 | |||
1164 | /** Report whether a packet is bad (i.e., was shorter than expected based on | ||
1165 | * its headers, or had a bad CRC). | ||
1166 | * @ingroup ingress | ||
1167 | * | ||
1168 | * Note that this function does not verify L3 or L4 checksums. | ||
1169 | * | ||
1170 | * @param[in] mda Pointer to packet's standard metadata. | ||
1171 | * @param[in] pkt Packet on which to operate. | ||
1172 | * @return Nonzero if the packet is bad and should be discarded. | ||
1173 | */ | ||
1174 | static __inline unsigned int | ||
1175 | NETIO_PKT_BAD_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
1176 | { | ||
1177 | return ((NETIO_PKT_STATUS_M(mda, pkt) & 1) && | ||
1178 | (NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt) || | ||
1179 | NETIO_PKT_STATUS_M(mda, pkt) == NETIO_PKT_STATUS_BAD)); | ||
1180 | } | ||
1181 | |||
1182 | |||
1183 | /** Return the length of the packet, starting with the L2 (Ethernet) header. | ||
1184 | * @ingroup egress | ||
1185 | * | ||
1186 | * @param[in] mmd Pointer to packet's minimal metadata. | ||
1187 | * @param[in] pkt Packet on which to operate. | ||
1188 | * @return The length of the packet, in bytes. | ||
1189 | */ | ||
1190 | static __inline netio_size_t | ||
1191 | NETIO_PKT_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) | ||
1192 | { | ||
1193 | return mmd->l2_length; | ||
1194 | } | ||
1195 | |||
1196 | |||
1197 | /** Return the length of the L2 (Ethernet) header. | ||
1198 | * @ingroup egress | ||
1199 | * | ||
1200 | * @param[in] mmd Pointer to packet's minimal metadata. | ||
1201 | * @param[in] pkt Packet on which to operate. | ||
1202 | * @return The length of the packet's L2 header, in bytes. | ||
1203 | */ | ||
1204 | static __inline netio_size_t | ||
1205 | NETIO_PKT_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, | ||
1206 | netio_pkt_t* pkt) | ||
1207 | { | ||
1208 | return mmd->l3_offset - mmd->l2_offset; | ||
1209 | } | ||
1210 | |||
1211 | |||
1212 | /** Return the length of the packet, starting with the L3 (IP) header. | ||
1213 | * @ingroup egress | ||
1214 | * | ||
1215 | * @param[in] mmd Pointer to packet's minimal metadata. | ||
1216 | * @param[in] pkt Packet on which to operate. | ||
1217 | * @return Length of the packet's L3 header and data, in bytes. | ||
1218 | */ | ||
1219 | static __inline netio_size_t | ||
1220 | NETIO_PKT_L3_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) | ||
1221 | { | ||
1222 | return (NETIO_PKT_L2_LENGTH_MM(mmd, pkt) - | ||
1223 | NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt)); | ||
1224 | } | ||
1225 | |||
1226 | |||
1227 | /** Return a pointer to the packet's L3 (generally, the IP) header. | ||
1228 | * @ingroup egress | ||
1229 | * | ||
1230 | * Note that we guarantee word alignment of the L3 header. | ||
1231 | * | ||
1232 | * @param[in] mmd Pointer to packet's minimal metadata. | ||
1233 | * @param[in] pkt Packet on which to operate. | ||
1234 | * @return A pointer to the packet's L3 header. | ||
1235 | */ | ||
1236 | static __inline unsigned char* | ||
1237 | NETIO_PKT_L3_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) | ||
1238 | { | ||
1239 | return _NETIO_PKT_BASE(pkt) + mmd->l3_offset; | ||
1240 | } | ||
1241 | |||
1242 | |||
1243 | /** Return a pointer to the packet's L2 (Ethernet) header. | ||
1244 | * @ingroup egress | ||
1245 | * | ||
1246 | * @param[in] mmd Pointer to packet's minimal metadata. | ||
1247 | * @param[in] pkt Packet on which to operate. | ||
1248 | * @return A pointer to start of the packet. | ||
1249 | */ | ||
1250 | static __inline unsigned char* | ||
1251 | NETIO_PKT_L2_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) | ||
1252 | { | ||
1253 | return _NETIO_PKT_BASE(pkt) + mmd->l2_offset; | ||
1254 | } | ||
1255 | |||
1256 | |||
1257 | /** Retrieve the status of a packet and any errors that may have occurred | ||
1258 | * during ingress processing (length mismatches, CRC errors, etc.). | ||
1259 | * @ingroup ingress | ||
1260 | * | ||
1261 | * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED() | ||
1262 | * returns zero are always reported as underlength, as there is no a priori | ||
1263 | * means to determine their length. Normally, applications should use | ||
1264 | * @ref NETIO_PKT_BAD() instead of explicitly checking status with this | ||
1265 | * function. | ||
1266 | * | ||
1267 | * @param[in] pkt Packet on which to operate. | ||
1268 | * @return The packet's status. | ||
1269 | */ | ||
1270 | static __inline netio_pkt_status_t | ||
1271 | NETIO_PKT_STATUS(netio_pkt_t* pkt) | ||
1272 | { | ||
1273 | netio_assert(!pkt->__packet.bits.__minimal); | ||
1274 | |||
1275 | return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status; | ||
1276 | } | ||
1277 | |||
1278 | |||
1279 | /** Report whether a packet is bad (i.e., was shorter than expected based on | ||
1280 | * its headers, or had a bad CRC). | ||
1281 | * @ingroup ingress | ||
1282 | * | ||
1283 | * Note that this function does not verify L3 or L4 checksums. | ||
1284 | * | ||
1285 | * @param[in] pkt Packet on which to operate. | ||
1286 | * @return Nonzero if the packet is bad and should be discarded. | ||
1287 | */ | ||
1288 | static __inline unsigned int | ||
1289 | NETIO_PKT_BAD(netio_pkt_t* pkt) | ||
1290 | { | ||
1291 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1292 | |||
1293 | return NETIO_PKT_BAD_M(mda, pkt); | ||
1294 | } | ||
1295 | |||
1296 | |||
1297 | /** Return the length of the packet's custom header. | ||
1298 | * A custom header may or may not be present, depending upon the IPP; its | ||
1299 | * contents and alignment are also IPP-dependent. Currently, none of the | ||
1300 | * standard IPPs supplied by Tilera produce a custom header. If present, | ||
1301 | * the custom header precedes the L2 header in the packet buffer. | ||
1302 | * @ingroup pktfuncs | ||
1303 | * | ||
1304 | * @param[in] pkt Packet on which to operate. | ||
1305 | * @return The length of the packet's custom header, in bytes. | ||
1306 | */ | ||
1307 | static __inline netio_size_t | ||
1308 | NETIO_PKT_CUSTOM_HEADER_LENGTH(netio_pkt_t* pkt) | ||
1309 | { | ||
1310 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1311 | |||
1312 | return NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt); | ||
1313 | } | ||
1314 | |||
1315 | |||
1316 | /** Return the length of the packet, starting with the custom header. | ||
1317 | * A custom header may or may not be present, depending upon the IPP; its | ||
1318 | * contents and alignment are also IPP-dependent. Currently, none of the | ||
1319 | * standard IPPs supplied by Tilera produce a custom header. If present, | ||
1320 | * the custom header precedes the L2 header in the packet buffer. | ||
1321 | * @ingroup pktfuncs | ||
1322 | * | ||
1323 | * @param[in] pkt Packet on which to operate. | ||
1324 | * @return The length of the packet, in bytes. | ||
1325 | */ | ||
1326 | static __inline netio_size_t | ||
1327 | NETIO_PKT_CUSTOM_LENGTH(netio_pkt_t* pkt) | ||
1328 | { | ||
1329 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1330 | |||
1331 | return NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt); | ||
1332 | } | ||
1333 | |||
1334 | |||
1335 | /** Return a pointer to the packet's custom header. | ||
1336 | * A custom header may or may not be present, depending upon the IPP; its | ||
1337 | * contents and alignment are also IPP-dependent. Currently, none of the | ||
1338 | * standard IPPs supplied by Tilera produce a custom header. If present, | ||
1339 | * the custom header precedes the L2 header in the packet buffer. | ||
1340 | * @ingroup pktfuncs | ||
1341 | * | ||
1342 | * @param[in] pkt Packet on which to operate. | ||
1343 | * @return A pointer to start of the packet. | ||
1344 | */ | ||
1345 | static __inline unsigned char* | ||
1346 | NETIO_PKT_CUSTOM_DATA(netio_pkt_t* pkt) | ||
1347 | { | ||
1348 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1349 | |||
1350 | return NETIO_PKT_CUSTOM_DATA_M(mda, pkt); | ||
1351 | } | ||
1352 | |||
1353 | |||
1354 | /** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header. | ||
1355 | * @ingroup pktfuncs | ||
1356 | * | ||
1357 | * @param[in] pkt Packet on which to operate. | ||
1358 | * @return The length of the packet's L2 header, in bytes. | ||
1359 | */ | ||
1360 | static __inline netio_size_t | ||
1361 | NETIO_PKT_L2_HEADER_LENGTH(netio_pkt_t* pkt) | ||
1362 | { | ||
1363 | if (NETIO_PKT_IS_MINIMAL(pkt)) | ||
1364 | { | ||
1365 | netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); | ||
1366 | |||
1367 | return NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt); | ||
1368 | } | ||
1369 | else | ||
1370 | { | ||
1371 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1372 | |||
1373 | return NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt); | ||
1374 | } | ||
1375 | } | ||
1376 | |||
1377 | |||
1378 | /** Return the length of the packet, starting with the L2 (Ethernet) header. | ||
1379 | * @ingroup pktfuncs | ||
1380 | * | ||
1381 | * @param[in] pkt Packet on which to operate. | ||
1382 | * @return The length of the packet, in bytes. | ||
1383 | */ | ||
1384 | static __inline netio_size_t | ||
1385 | NETIO_PKT_L2_LENGTH(netio_pkt_t* pkt) | ||
1386 | { | ||
1387 | if (NETIO_PKT_IS_MINIMAL(pkt)) | ||
1388 | { | ||
1389 | netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); | ||
1390 | |||
1391 | return NETIO_PKT_L2_LENGTH_MM(mmd, pkt); | ||
1392 | } | ||
1393 | else | ||
1394 | { | ||
1395 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1396 | |||
1397 | return NETIO_PKT_L2_LENGTH_M(mda, pkt); | ||
1398 | } | ||
1399 | } | ||
1400 | |||
1401 | |||
1402 | /** Return a pointer to the packet's L2 (Ethernet) header. | ||
1403 | * @ingroup pktfuncs | ||
1404 | * | ||
1405 | * @param[in] pkt Packet on which to operate. | ||
1406 | * @return A pointer to start of the packet. | ||
1407 | */ | ||
1408 | static __inline unsigned char* | ||
1409 | NETIO_PKT_L2_DATA(netio_pkt_t* pkt) | ||
1410 | { | ||
1411 | if (NETIO_PKT_IS_MINIMAL(pkt)) | ||
1412 | { | ||
1413 | netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); | ||
1414 | |||
1415 | return NETIO_PKT_L2_DATA_MM(mmd, pkt); | ||
1416 | } | ||
1417 | else | ||
1418 | { | ||
1419 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1420 | |||
1421 | return NETIO_PKT_L2_DATA_M(mda, pkt); | ||
1422 | } | ||
1423 | } | ||
1424 | |||
1425 | |||
1426 | /** Retrieve the length of the packet, starting with the L3 (generally, the IP) | ||
1427 | * header. | ||
1428 | * @ingroup pktfuncs | ||
1429 | * | ||
1430 | * @param[in] pkt Packet on which to operate. | ||
1431 | * @return Length of the packet's L3 header and data, in bytes. | ||
1432 | */ | ||
1433 | static __inline netio_size_t | ||
1434 | NETIO_PKT_L3_LENGTH(netio_pkt_t* pkt) | ||
1435 | { | ||
1436 | if (NETIO_PKT_IS_MINIMAL(pkt)) | ||
1437 | { | ||
1438 | netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); | ||
1439 | |||
1440 | return NETIO_PKT_L3_LENGTH_MM(mmd, pkt); | ||
1441 | } | ||
1442 | else | ||
1443 | { | ||
1444 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1445 | |||
1446 | return NETIO_PKT_L3_LENGTH_M(mda, pkt); | ||
1447 | } | ||
1448 | } | ||
1449 | |||
1450 | |||
1451 | /** Return a pointer to the packet's L3 (generally, the IP) header. | ||
1452 | * @ingroup pktfuncs | ||
1453 | * | ||
1454 | * Note that we guarantee word alignment of the L3 header. | ||
1455 | * | ||
1456 | * @param[in] pkt Packet on which to operate. | ||
1457 | * @return A pointer to the packet's L3 header. | ||
1458 | */ | ||
1459 | static __inline unsigned char* | ||
1460 | NETIO_PKT_L3_DATA(netio_pkt_t* pkt) | ||
1461 | { | ||
1462 | if (NETIO_PKT_IS_MINIMAL(pkt)) | ||
1463 | { | ||
1464 | netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); | ||
1465 | |||
1466 | return NETIO_PKT_L3_DATA_MM(mmd, pkt); | ||
1467 | } | ||
1468 | else | ||
1469 | { | ||
1470 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1471 | |||
1472 | return NETIO_PKT_L3_DATA_M(mda, pkt); | ||
1473 | } | ||
1474 | } | ||
1475 | |||
1476 | |||
1477 | /** Return the ordinal of the packet. | ||
1478 | * @ingroup ingress | ||
1479 | * | ||
1480 | * Each packet is given an ordinal number when it is delivered by the IPP. | ||
1481 | * In the medium term, the ordinal is unique and monotonically increasing, | ||
1482 | * being incremented by 1 for each packet; the ordinal of the first packet | ||
1483 | * delivered after the IPP starts is zero. (Since the ordinal is of finite | ||
1484 | * size, given enough input packets, it will eventually wrap around to zero; | ||
1485 | * in the long term, therefore, ordinals are not unique.) The ordinals | ||
1486 | * handed out by different IPPs are not disjoint, so two packets from | ||
1487 | * different IPPs may have identical ordinals. Packets dropped by the | ||
1488 | * IPP or by the I/O shim are not assigned ordinals. | ||
1489 | * | ||
1490 | * | ||
1491 | * @param[in] pkt Packet on which to operate. | ||
1492 | * @return The packet's per-IPP packet ordinal. | ||
1493 | */ | ||
1494 | static __inline unsigned int | ||
1495 | NETIO_PKT_ORDINAL(netio_pkt_t* pkt) | ||
1496 | { | ||
1497 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1498 | |||
1499 | return NETIO_PKT_ORDINAL_M(mda, pkt); | ||
1500 | } | ||
1501 | |||
1502 | |||
1503 | /** Return the per-group ordinal of the packet. | ||
1504 | * @ingroup ingress | ||
1505 | * | ||
1506 | * Each packet is given a per-group ordinal number when it is | ||
1507 | * delivered by the IPP. By default, the group is the packet's VLAN, | ||
1508 | * although IPP can be recompiled to use different values. In | ||
1509 | * the medium term, the ordinal is unique and monotonically | ||
1510 | * increasing, being incremented by 1 for each packet; the ordinal of | ||
1511 | * the first packet distributed to a particular group is zero. | ||
1512 | * (Since the ordinal is of finite size, given enough input packets, | ||
1513 | * it will eventually wrap around to zero; in the long term, | ||
1514 | * therefore, ordinals are not unique.) The ordinals handed out by | ||
1515 | * different IPPs are not disjoint, so two packets from different IPPs | ||
1516 | * may have identical ordinals; similarly, packets distributed to | ||
1517 | * different groups may have identical ordinals. Packets dropped by | ||
1518 | * the IPP or by the I/O shim are not assigned ordinals. | ||
1519 | * | ||
1520 | * @param[in] pkt Packet on which to operate. | ||
1521 | * @return The packet's per-IPP, per-group ordinal. | ||
1522 | */ | ||
1523 | static __inline unsigned int | ||
1524 | NETIO_PKT_GROUP_ORDINAL(netio_pkt_t* pkt) | ||
1525 | { | ||
1526 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1527 | |||
1528 | return NETIO_PKT_GROUP_ORDINAL_M(mda, pkt); | ||
1529 | } | ||
1530 | |||
1531 | |||
1532 | /** Return the VLAN ID assigned to the packet. | ||
1533 | * @ingroup ingress | ||
1534 | * | ||
1535 | * This is usually also contained within the packet header. If the packet | ||
1536 | * does not have a VLAN tag, the VLAN ID returned by this function is zero. | ||
1537 | * | ||
1538 | * @param[in] pkt Packet on which to operate. | ||
1539 | * @return The packet's VLAN ID. | ||
1540 | */ | ||
1541 | static __inline unsigned short | ||
1542 | NETIO_PKT_VLAN_ID(netio_pkt_t* pkt) | ||
1543 | { | ||
1544 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1545 | |||
1546 | return NETIO_PKT_VLAN_ID_M(mda, pkt); | ||
1547 | } | ||
1548 | |||
1549 | |||
1550 | /** Return the ethertype of the packet. | ||
1551 | * @ingroup ingress | ||
1552 | * | ||
1553 | * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED() | ||
1554 | * returns true, and otherwise, may not be well defined. | ||
1555 | * | ||
1556 | * @param[in] pkt Packet on which to operate. | ||
1557 | * @return The packet's ethertype. | ||
1558 | */ | ||
1559 | static __inline unsigned short | ||
1560 | NETIO_PKT_ETHERTYPE(netio_pkt_t* pkt) | ||
1561 | { | ||
1562 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1563 | |||
1564 | return NETIO_PKT_ETHERTYPE_M(mda, pkt); | ||
1565 | } | ||
1566 | |||
1567 | |||
1568 | /** Return the flow hash computed on the packet. | ||
1569 | * @ingroup ingress | ||
1570 | * | ||
1571 | * For TCP and UDP packets, this hash is calculated by hashing together | ||
1572 | * the "5-tuple" values, specifically the source IP address, destination | ||
1573 | * IP address, protocol type, source port and destination port. | ||
1574 | * The hash value is intended to be helpful for millions of distinct | ||
1575 | * flows. | ||
1576 | * | ||
1577 | * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is | ||
1578 | * derived by hashing together the source and destination IP addresses. | ||
1579 | * | ||
1580 | * For MPLS-encapsulated packets, the flow hash is derived by hashing | ||
1581 | * the first MPLS label. | ||
1582 | * | ||
1583 | * For all other packets the flow hash is computed from the source | ||
1584 | * and destination Ethernet addresses. | ||
1585 | * | ||
1586 | * The hash is symmetric, meaning it produces the same value if the | ||
1587 | * source and destination are swapped. The only exceptions are | ||
1588 | * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple | ||
1589 | * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32 | ||
1590 | * (Encap Security Payload), which use only the destination address | ||
1591 | * since the source address is not meaningful. | ||
1592 | * | ||
1593 | * @param[in] pkt Packet on which to operate. | ||
1594 | * @return The packet's 32-bit flow hash. | ||
1595 | */ | ||
1596 | static __inline unsigned int | ||
1597 | NETIO_PKT_FLOW_HASH(netio_pkt_t* pkt) | ||
1598 | { | ||
1599 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1600 | |||
1601 | return NETIO_PKT_FLOW_HASH_M(mda, pkt); | ||
1602 | } | ||
1603 | |||
1604 | |||
1605 | /** Return the first word of "user data" for the packet. | ||
1606 | * | ||
1607 | * The contents of the user data words depend on the IPP. | ||
1608 | * | ||
1609 | * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first | ||
1610 | * word of user data contains the least significant bits of the 64-bit | ||
1611 | * arrival cycle count (see @c get_cycle_count_low()). | ||
1612 | * | ||
1613 | * See the <em>System Programmer's Guide</em> for details. | ||
1614 | * | ||
1615 | * @ingroup ingress | ||
1616 | * | ||
1617 | * @param[in] pkt Packet on which to operate. | ||
1618 | * @return The packet's first word of "user data". | ||
1619 | */ | ||
1620 | static __inline unsigned int | ||
1621 | NETIO_PKT_USER_DATA_0(netio_pkt_t* pkt) | ||
1622 | { | ||
1623 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1624 | |||
1625 | return NETIO_PKT_USER_DATA_0_M(mda, pkt); | ||
1626 | } | ||
1627 | |||
1628 | |||
1629 | /** Return the second word of "user data" for the packet. | ||
1630 | * | ||
1631 | * The contents of the user data words depend on the IPP. | ||
1632 | * | ||
1633 | * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second | ||
1634 | * word of user data contains the most significant bits of the 64-bit | ||
1635 | * arrival cycle count (see @c get_cycle_count_high()). | ||
1636 | * | ||
1637 | * See the <em>System Programmer's Guide</em> for details. | ||
1638 | * | ||
1639 | * @ingroup ingress | ||
1640 | * | ||
1641 | * @param[in] pkt Packet on which to operate. | ||
1642 | * @return The packet's second word of "user data". | ||
1643 | */ | ||
1644 | static __inline unsigned int | ||
1645 | NETIO_PKT_USER_DATA_1(netio_pkt_t* pkt) | ||
1646 | { | ||
1647 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1648 | |||
1649 | return NETIO_PKT_USER_DATA_1_M(mda, pkt); | ||
1650 | } | ||
1651 | |||
1652 | |||
1653 | /** Determine whether the L4 (TCP/UDP) checksum was calculated. | ||
1654 | * @ingroup ingress | ||
1655 | * | ||
1656 | * @param[in] pkt Packet on which to operate. | ||
1657 | * @return Nonzero if the L4 checksum was calculated. | ||
1658 | */ | ||
1659 | static __inline unsigned int | ||
1660 | NETIO_PKT_L4_CSUM_CALCULATED(netio_pkt_t* pkt) | ||
1661 | { | ||
1662 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1663 | |||
1664 | return NETIO_PKT_L4_CSUM_CALCULATED_M(mda, pkt); | ||
1665 | } | ||
1666 | |||
1667 | |||
1668 | /** Determine whether the L4 (TCP/UDP) checksum was calculated and found to | ||
1669 | * be correct. | ||
1670 | * @ingroup ingress | ||
1671 | * | ||
1672 | * @param[in] pkt Packet on which to operate. | ||
1673 | * @return Nonzero if the checksum was calculated and is correct. | ||
1674 | */ | ||
1675 | static __inline unsigned int | ||
1676 | NETIO_PKT_L4_CSUM_CORRECT(netio_pkt_t* pkt) | ||
1677 | { | ||
1678 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1679 | |||
1680 | return NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt); | ||
1681 | } | ||
1682 | |||
1683 | |||
1684 | /** Determine whether the L3 (IP) checksum was calculated. | ||
1685 | * @ingroup ingress | ||
1686 | * | ||
1687 | * @param[in] pkt Packet on which to operate. | ||
1688 | * @return Nonzero if the L3 (IP) checksum was calculated. | ||
1689 | */ | ||
1690 | static __inline unsigned int | ||
1691 | NETIO_PKT_L3_CSUM_CALCULATED(netio_pkt_t* pkt) | ||
1692 | { | ||
1693 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1694 | |||
1695 | return NETIO_PKT_L3_CSUM_CALCULATED_M(mda, pkt); | ||
1696 | } | ||
1697 | |||
1698 | |||
1699 | /** Determine whether the L3 (IP) checksum was calculated and found to be | ||
1700 | * correct. | ||
1701 | * @ingroup ingress | ||
1702 | * | ||
1703 | * @param[in] pkt Packet on which to operate. | ||
1704 | * @return Nonzero if the checksum was calculated and is correct. | ||
1705 | */ | ||
1706 | static __inline unsigned int | ||
1707 | NETIO_PKT_L3_CSUM_CORRECT(netio_pkt_t* pkt) | ||
1708 | { | ||
1709 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1710 | |||
1711 | return NETIO_PKT_L3_CSUM_CORRECT_M(mda, pkt); | ||
1712 | } | ||
1713 | |||
1714 | |||
1715 | /** Determine whether the Ethertype was recognized and L3 packet data was | ||
1716 | * processed. | ||
1717 | * @ingroup ingress | ||
1718 | * | ||
1719 | * @param[in] pkt Packet on which to operate. | ||
1720 | * @return Nonzero if the Ethertype was recognized and L3 packet data was | ||
1721 | * processed. | ||
1722 | */ | ||
1723 | static __inline unsigned int | ||
1724 | NETIO_PKT_ETHERTYPE_RECOGNIZED(netio_pkt_t* pkt) | ||
1725 | { | ||
1726 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1727 | |||
1728 | return NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt); | ||
1729 | } | ||
1730 | |||
1731 | |||
1732 | /** Set an egress packet's L2 length, using a metadata pointer to speed the | ||
1733 | * computation. | ||
1734 | * @ingroup egress | ||
1735 | * | ||
1736 | * @param[in,out] mmd Pointer to packet's minimal metadata. | ||
1737 | * @param[in] pkt Packet on which to operate. | ||
1738 | * @param[in] len Packet L2 length, in bytes. | ||
1739 | */ | ||
1740 | static __inline void | ||
1741 | NETIO_PKT_SET_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt, | ||
1742 | int len) | ||
1743 | { | ||
1744 | mmd->l2_length = len; | ||
1745 | } | ||
1746 | |||
1747 | |||
1748 | /** Set an egress packet's L2 length. | ||
1749 | * @ingroup egress | ||
1750 | * | ||
1751 | * @param[in,out] pkt Packet on which to operate. | ||
1752 | * @param[in] len Packet L2 length, in bytes. | ||
1753 | */ | ||
1754 | static __inline void | ||
1755 | NETIO_PKT_SET_L2_LENGTH(netio_pkt_t* pkt, int len) | ||
1756 | { | ||
1757 | netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); | ||
1758 | |||
1759 | NETIO_PKT_SET_L2_LENGTH_MM(mmd, pkt, len); | ||
1760 | } | ||
1761 | |||
1762 | |||
1763 | /** Set an egress packet's L2 header length, using a metadata pointer to | ||
1764 | * speed the computation. | ||
1765 | * @ingroup egress | ||
1766 | * | ||
1767 | * It is not normally necessary to call this routine; only the L2 length, | ||
1768 | * not the header length, is needed to transmit a packet. It may be useful if | ||
1769 | * the egress packet will later be processed by code which expects to use | ||
1770 | * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload. | ||
1771 | * | ||
1772 | * @param[in,out] mmd Pointer to packet's minimal metadata. | ||
1773 | * @param[in] pkt Packet on which to operate. | ||
1774 | * @param[in] len Packet L2 header length, in bytes. | ||
1775 | */ | ||
1776 | static __inline void | ||
1777 | NETIO_PKT_SET_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, | ||
1778 | netio_pkt_t* pkt, int len) | ||
1779 | { | ||
1780 | mmd->l3_offset = mmd->l2_offset + len; | ||
1781 | } | ||
1782 | |||
1783 | |||
1784 | /** Set an egress packet's L2 header length. | ||
1785 | * @ingroup egress | ||
1786 | * | ||
1787 | * It is not normally necessary to call this routine; only the L2 length, | ||
1788 | * not the header length, is needed to transmit a packet. It may be useful if | ||
1789 | * the egress packet will later be processed by code which expects to use | ||
1790 | * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload. | ||
1791 | * | ||
1792 | * @param[in,out] pkt Packet on which to operate. | ||
1793 | * @param[in] len Packet L2 header length, in bytes. | ||
1794 | */ | ||
1795 | static __inline void | ||
1796 | NETIO_PKT_SET_L2_HEADER_LENGTH(netio_pkt_t* pkt, int len) | ||
1797 | { | ||
1798 | netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); | ||
1799 | |||
1800 | NETIO_PKT_SET_L2_HEADER_LENGTH_MM(mmd, pkt, len); | ||
1801 | } | ||
1802 | |||
1803 | |||
1804 | /** Set up an egress packet for hardware checksum computation, using a | ||
1805 | * metadata pointer to speed the operation. | ||
1806 | * @ingroup egress | ||
1807 | * | ||
1808 | * NetIO provides the ability to automatically calculate a standard | ||
1809 | * 16-bit Internet checksum on transmitted packets. The application | ||
1810 | * may specify the point in the packet where the checksum starts, the | ||
1811 | * number of bytes to be checksummed, and the two bytes in the packet | ||
1812 | * which will be replaced with the completed checksum. (If the range | ||
1813 | * of bytes to be checksummed includes the bytes to be replaced, the | ||
1814 | * initial values of those bytes will be included in the checksum.) | ||
1815 | * | ||
1816 | * For some protocols, the packet checksum covers data which is not present | ||
1817 | * in the packet, or is at least not contiguous to the main data payload. | ||
1818 | * For instance, the TCP checksum includes a "pseudo-header" which includes | ||
1819 | * the source and destination IP addresses of the packet. To accommodate | ||
1820 | * this, the checksum engine may be "seeded" with an initial value, which | ||
1821 | * the application would need to compute based on the specific protocol's | ||
1822 | * requirements. Note that the seed is given in host byte order (little- | ||
1823 | * endian), not network byte order (big-endian); code written to compute a | ||
1824 | * pseudo-header checksum in network byte order will need to byte-swap it | ||
1825 | * before use as the seed. | ||
1826 | * | ||
1827 | * Note that the checksum is computed as part of the transmission process, | ||
1828 | * so it will not be present in the packet upon completion of this routine. | ||
1829 | * | ||
1830 | * @param[in,out] mmd Pointer to packet's minimal metadata. | ||
1831 | * @param[in] pkt Packet on which to operate. | ||
1832 | * @param[in] start Offset within L2 packet of the first byte to include in | ||
1833 | * the checksum. | ||
1834 | * @param[in] length Number of bytes to include in the checksum. | ||
1835 | * the checksum. | ||
1836 | * @param[in] location Offset within L2 packet of the first of the two bytes | ||
1837 | * to be replaced with the calculated checksum. | ||
1838 | * @param[in] seed Initial value of the running checksum before any of the | ||
1839 | * packet data is added. | ||
1840 | */ | ||
1841 | static __inline void | ||
1842 | NETIO_PKT_DO_EGRESS_CSUM_MM(netio_pkt_minimal_metadata_t* mmd, | ||
1843 | netio_pkt_t* pkt, int start, int length, | ||
1844 | int location, uint16_t seed) | ||
1845 | { | ||
1846 | mmd->csum_start = start; | ||
1847 | mmd->csum_length = length; | ||
1848 | mmd->csum_location = location; | ||
1849 | mmd->csum_seed = seed; | ||
1850 | mmd->flags |= _NETIO_PKT_NEED_EDMA_CSUM_MASK; | ||
1851 | } | ||
1852 | |||
1853 | |||
1854 | /** Set up an egress packet for hardware checksum computation. | ||
1855 | * @ingroup egress | ||
1856 | * | ||
1857 | * NetIO provides the ability to automatically calculate a standard | ||
1858 | * 16-bit Internet checksum on transmitted packets. The application | ||
1859 | * may specify the point in the packet where the checksum starts, the | ||
1860 | * number of bytes to be checksummed, and the two bytes in the packet | ||
1861 | * which will be replaced with the completed checksum. (If the range | ||
1862 | * of bytes to be checksummed includes the bytes to be replaced, the | ||
1863 | * initial values of those bytes will be included in the checksum.) | ||
1864 | * | ||
1865 | * For some protocols, the packet checksum covers data which is not present | ||
1866 | * in the packet, or is at least not contiguous to the main data payload. | ||
1867 | * For instance, the TCP checksum includes a "pseudo-header" which includes | ||
1868 | * the source and destination IP addresses of the packet. To accommodate | ||
1869 | * this, the checksum engine may be "seeded" with an initial value, which | ||
1870 | * the application would need to compute based on the specific protocol's | ||
1871 | * requirements. Note that the seed is given in host byte order (little- | ||
1872 | * endian), not network byte order (big-endian); code written to compute a | ||
1873 | * pseudo-header checksum in network byte order will need to byte-swap it | ||
1874 | * before use as the seed. | ||
1875 | * | ||
1876 | * Note that the checksum is computed as part of the transmission process, | ||
1877 | * so it will not be present in the packet upon completion of this routine. | ||
1878 | * | ||
1879 | * @param[in,out] pkt Packet on which to operate. | ||
1880 | * @param[in] start Offset within L2 packet of the first byte to include in | ||
1881 | * the checksum. | ||
1882 | * @param[in] length Number of bytes to include in the checksum. | ||
1883 | * the checksum. | ||
1884 | * @param[in] location Offset within L2 packet of the first of the two bytes | ||
1885 | * to be replaced with the calculated checksum. | ||
1886 | * @param[in] seed Initial value of the running checksum before any of the | ||
1887 | * packet data is added. | ||
1888 | */ | ||
1889 | static __inline void | ||
1890 | NETIO_PKT_DO_EGRESS_CSUM(netio_pkt_t* pkt, int start, int length, | ||
1891 | int location, uint16_t seed) | ||
1892 | { | ||
1893 | netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); | ||
1894 | |||
1895 | NETIO_PKT_DO_EGRESS_CSUM_MM(mmd, pkt, start, length, location, seed); | ||
1896 | } | ||
1897 | |||
1898 | |||
1899 | /** Return the number of bytes which could be prepended to a packet, using a | ||
1900 | * metadata pointer to speed the operation. | ||
1901 | * See @ref netio_populate_prepend_buffer() to get a full description of | ||
1902 | * prepending. | ||
1903 | * | ||
1904 | * @param[in,out] mda Pointer to packet's standard metadata. | ||
1905 | * @param[in] pkt Packet on which to operate. | ||
1906 | */ | ||
1907 | static __inline int | ||
1908 | NETIO_PKT_PREPEND_AVAIL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
1909 | { | ||
1910 | return (pkt->__packet.bits.__offset << 6) + | ||
1911 | NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt); | ||
1912 | } | ||
1913 | |||
1914 | |||
1915 | /** Return the number of bytes which could be prepended to a packet, using a | ||
1916 | * metadata pointer to speed the operation. | ||
1917 | * See @ref netio_populate_prepend_buffer() to get a full description of | ||
1918 | * prepending. | ||
1919 | * @ingroup egress | ||
1920 | * | ||
1921 | * @param[in,out] mmd Pointer to packet's minimal metadata. | ||
1922 | * @param[in] pkt Packet on which to operate. | ||
1923 | */ | ||
1924 | static __inline int | ||
1925 | NETIO_PKT_PREPEND_AVAIL_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) | ||
1926 | { | ||
1927 | return (pkt->__packet.bits.__offset << 6) + mmd->l2_offset; | ||
1928 | } | ||
1929 | |||
1930 | |||
1931 | /** Return the number of bytes which could be prepended to a packet. | ||
1932 | * See @ref netio_populate_prepend_buffer() to get a full description of | ||
1933 | * prepending. | ||
1934 | * @ingroup egress | ||
1935 | * | ||
1936 | * @param[in] pkt Packet on which to operate. | ||
1937 | */ | ||
1938 | static __inline int | ||
1939 | NETIO_PKT_PREPEND_AVAIL(netio_pkt_t* pkt) | ||
1940 | { | ||
1941 | if (NETIO_PKT_IS_MINIMAL(pkt)) | ||
1942 | { | ||
1943 | netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); | ||
1944 | |||
1945 | return NETIO_PKT_PREPEND_AVAIL_MM(mmd, pkt); | ||
1946 | } | ||
1947 | else | ||
1948 | { | ||
1949 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1950 | |||
1951 | return NETIO_PKT_PREPEND_AVAIL_M(mda, pkt); | ||
1952 | } | ||
1953 | } | ||
1954 | |||
1955 | |||
1956 | /** Flush a packet's minimal metadata from the cache, using a metadata pointer | ||
1957 | * to speed the operation. | ||
1958 | * @ingroup egress | ||
1959 | * | ||
1960 | * @param[in] mmd Pointer to packet's minimal metadata. | ||
1961 | * @param[in] pkt Packet on which to operate. | ||
1962 | */ | ||
1963 | static __inline void | ||
1964 | NETIO_PKT_FLUSH_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd, | ||
1965 | netio_pkt_t* pkt) | ||
1966 | { | ||
1967 | } | ||
1968 | |||
1969 | |||
1970 | /** Invalidate a packet's minimal metadata from the cache, using a metadata | ||
1971 | * pointer to speed the operation. | ||
1972 | * @ingroup egress | ||
1973 | * | ||
1974 | * @param[in] mmd Pointer to packet's minimal metadata. | ||
1975 | * @param[in] pkt Packet on which to operate. | ||
1976 | */ | ||
1977 | static __inline void | ||
1978 | NETIO_PKT_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd, | ||
1979 | netio_pkt_t* pkt) | ||
1980 | { | ||
1981 | } | ||
1982 | |||
1983 | |||
1984 | /** Flush and then invalidate a packet's minimal metadata from the cache, | ||
1985 | * using a metadata pointer to speed the operation. | ||
1986 | * @ingroup egress | ||
1987 | * | ||
1988 | * @param[in] mmd Pointer to packet's minimal metadata. | ||
1989 | * @param[in] pkt Packet on which to operate. | ||
1990 | */ | ||
1991 | static __inline void | ||
1992 | NETIO_PKT_FLUSH_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd, | ||
1993 | netio_pkt_t* pkt) | ||
1994 | { | ||
1995 | } | ||
1996 | |||
1997 | |||
1998 | /** Flush a packet's metadata from the cache, using a metadata pointer | ||
1999 | * to speed the operation. | ||
2000 | * @ingroup ingress | ||
2001 | * | ||
2002 | * @param[in] mda Pointer to packet's minimal metadata. | ||
2003 | * @param[in] pkt Packet on which to operate. | ||
2004 | */ | ||
2005 | static __inline void | ||
2006 | NETIO_PKT_FLUSH_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
2007 | { | ||
2008 | } | ||
2009 | |||
2010 | |||
2011 | /** Invalidate a packet's metadata from the cache, using a metadata | ||
2012 | * pointer to speed the operation. | ||
2013 | * @ingroup ingress | ||
2014 | * | ||
2015 | * @param[in] mda Pointer to packet's metadata. | ||
2016 | * @param[in] pkt Packet on which to operate. | ||
2017 | */ | ||
2018 | static __inline void | ||
2019 | NETIO_PKT_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
2020 | { | ||
2021 | } | ||
2022 | |||
2023 | |||
2024 | /** Flush and then invalidate a packet's metadata from the cache, | ||
2025 | * using a metadata pointer to speed the operation. | ||
2026 | * @ingroup ingress | ||
2027 | * | ||
2028 | * @param[in] mda Pointer to packet's metadata. | ||
2029 | * @param[in] pkt Packet on which to operate. | ||
2030 | */ | ||
2031 | static __inline void | ||
2032 | NETIO_PKT_FLUSH_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
2033 | { | ||
2034 | } | ||
2035 | |||
2036 | |||
2037 | /** Flush a packet's minimal metadata from the cache. | ||
2038 | * @ingroup egress | ||
2039 | * | ||
2040 | * @param[in] pkt Packet on which to operate. | ||
2041 | */ | ||
2042 | static __inline void | ||
2043 | NETIO_PKT_FLUSH_MINIMAL_METADATA(netio_pkt_t* pkt) | ||
2044 | { | ||
2045 | } | ||
2046 | |||
2047 | |||
2048 | /** Invalidate a packet's minimal metadata from the cache. | ||
2049 | * @ingroup egress | ||
2050 | * | ||
2051 | * @param[in] pkt Packet on which to operate. | ||
2052 | */ | ||
2053 | static __inline void | ||
2054 | NETIO_PKT_INV_MINIMAL_METADATA(netio_pkt_t* pkt) | ||
2055 | { | ||
2056 | } | ||
2057 | |||
2058 | |||
2059 | /** Flush and then invalidate a packet's minimal metadata from the cache. | ||
2060 | * @ingroup egress | ||
2061 | * | ||
2062 | * @param[in] pkt Packet on which to operate. | ||
2063 | */ | ||
2064 | static __inline void | ||
2065 | NETIO_PKT_FLUSH_INV_MINIMAL_METADATA(netio_pkt_t* pkt) | ||
2066 | { | ||
2067 | } | ||
2068 | |||
2069 | |||
2070 | /** Flush a packet's metadata from the cache. | ||
2071 | * @ingroup ingress | ||
2072 | * | ||
2073 | * @param[in] pkt Packet on which to operate. | ||
2074 | */ | ||
2075 | static __inline void | ||
2076 | NETIO_PKT_FLUSH_METADATA(netio_pkt_t* pkt) | ||
2077 | { | ||
2078 | } | ||
2079 | |||
2080 | |||
2081 | /** Invalidate a packet's metadata from the cache. | ||
2082 | * @ingroup ingress | ||
2083 | * | ||
2084 | * @param[in] pkt Packet on which to operate. | ||
2085 | */ | ||
2086 | static __inline void | ||
2087 | NETIO_PKT_INV_METADATA(netio_pkt_t* pkt) | ||
2088 | { | ||
2089 | } | ||
2090 | |||
2091 | |||
2092 | /** Flush and then invalidate a packet's metadata from the cache. | ||
2093 | * @ingroup ingress | ||
2094 | * | ||
2095 | * @param[in] pkt Packet on which to operate. | ||
2096 | */ | ||
2097 | static __inline void | ||
2098 | NETIO_PKT_FLUSH_INV_METADATA(netio_pkt_t* pkt) | ||
2099 | { | ||
2100 | } | ||
2101 | |||
2102 | /** Number of NUMA nodes we can distribute buffers to. | ||
2103 | * @ingroup setup */ | ||
2104 | #define NETIO_NUM_NODE_WEIGHTS 16 | ||
2105 | |||
2106 | /** | ||
2107 | * @brief An object for specifying the characteristics of NetIO communication | ||
2108 | * endpoint. | ||
2109 | * | ||
2110 | * @ingroup setup | ||
2111 | * | ||
2112 | * The @ref netio_input_register() function uses this structure to define | ||
2113 | * how an application tile will communicate with an IPP. | ||
2114 | * | ||
2115 | * | ||
2116 | * Future updates to NetIO may add new members to this structure, | ||
2117 | * which can affect the success of the registration operation. Thus, | ||
2118 | * if dynamically initializing the structure, applications are urged to | ||
2119 | * zero it out first, for example: | ||
2120 | * | ||
2121 | * @code | ||
2122 | * netio_input_config_t config; | ||
2123 | * memset(&config, 0, sizeof (config)); | ||
2124 | * config.flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE; | ||
2125 | * config.num_receive_packets = NETIO_MAX_RECEIVE_PKTS; | ||
2126 | * config.queue_id = 0; | ||
2127 | * . | ||
2128 | * . | ||
2129 | * . | ||
2130 | * @endcode | ||
2131 | * | ||
2132 | * since that guarantees that any unused structure members, including | ||
2133 | * members which did not exist when the application was first developed, | ||
2134 | * will not have unexpected values. | ||
2135 | * | ||
2136 | * If statically initializing the structure, we strongly recommend use of | ||
2137 | * C99-style named initializers, for example: | ||
2138 | * | ||
2139 | * @code | ||
2140 | * netio_input_config_t config = { | ||
2141 | * .flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE, | ||
2142 | * .num_receive_packets = NETIO_MAX_RECEIVE_PKTS, | ||
2143 | * .queue_id = 0, | ||
2144 | * }, | ||
2145 | * @endcode | ||
2146 | * | ||
2147 | * instead of the old-style structure initialization: | ||
2148 | * | ||
2149 | * @code | ||
2150 | * // Bad example! Currently equivalent to the above, but don't do this. | ||
2151 | * netio_input_config_t config = { | ||
2152 | * NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE, NETIO_MAX_RECEIVE_PKTS, 0 | ||
2153 | * }, | ||
2154 | * @endcode | ||
2155 | * | ||
2156 | * since the C99 style requires no changes to the code if elements of the | ||
2157 | * config structure are rearranged. (It also makes the initialization much | ||
2158 | * easier to understand.) | ||
2159 | * | ||
2160 | * Except for items which address a particular tile's transmit or receive | ||
2161 | * characteristics, such as the ::NETIO_RECV flag, applications are advised | ||
2162 | * to specify the same set of configuration data on all registrations. | ||
2163 | * This prevents differing results if multiple tiles happen to do their | ||
2164 | * registration operations in a different order on different invocations of | ||
2165 | * the application. This is particularly important for things like link | ||
2166 | * management flags, and buffer size and homing specifications. | ||
2167 | * | ||
2168 | * Unless the ::NETIO_FIXED_BUFFER_VA flag is specified in flags, the NetIO | ||
2169 | * buffer pool is automatically created and mapped into the application's | ||
2170 | * virtual address space at an address chosen by the operating system, | ||
2171 | * using the common memory (cmem) facility in the Tilera Multicore | ||
2172 | * Components library. The cmem facility allows multiple processes to gain | ||
2173 | * access to shared memory which is mapped into each process at an | ||
2174 | * identical virtual address. In order for this to work, the processes | ||
2175 | * must have a common ancestor, which must create the common memory using | ||
2176 | * tmc_cmem_init(). | ||
2177 | * | ||
2178 | * In programs using the iLib process creation API, or in programs which use | ||
2179 | * only one process (which include programs using the pthreads library), | ||
2180 | * tmc_cmem_init() is called automatically. All other applications | ||
2181 | * must call it explicitly, before any child processes which might call | ||
2182 | * netio_input_register() are created. | ||
2183 | */ | ||
2184 | typedef struct | ||
2185 | { | ||
2186 | /** Registration characteristics. | ||
2187 | |||
2188 | This value determines several characteristics of the registration; | ||
2189 | flags for different types of behavior are ORed together to make the | ||
2190 | final flag value. Generally applications should specify exactly | ||
2191 | one flag from each of the following categories: | ||
2192 | |||
2193 | - Whether the application will be receiving packets on this queue | ||
2194 | (::NETIO_RECV or ::NETIO_NO_RECV). | ||
2195 | |||
2196 | - Whether the application will be transmitting packets on this queue, | ||
2197 | and if so, whether it will request egress checksum calculation | ||
2198 | (::NETIO_XMIT, ::NETIO_XMIT_CSUM, or ::NETIO_NO_XMIT). It is | ||
2199 | legal to call netio_get_buffer() without one of the XMIT flags, | ||
2200 | as long as ::NETIO_RECV is specified; in this case, the retrieved | ||
2201 | buffers must be passed to another tile for transmission. | ||
2202 | |||
2203 | - Whether the application expects any vendor-specific tags in | ||
2204 | its packets' L2 headers (::NETIO_TAG_NONE, ::NETIO_TAG_BRCM, | ||
2205 | or ::NETIO_TAG_MRVL). This must match the configuration of the | ||
2206 | target IPP. | ||
2207 | |||
2208 | To accommodate applications written to previous versions of the NetIO | ||
2209 | interface, none of the flags above are currently required; if omitted, | ||
2210 | NetIO behaves more or less as if ::NETIO_RECV | ::NETIO_XMIT_CSUM | | ||
2211 | ::NETIO_TAG_NONE were used. However, explicit specification of | ||
2212 | the relevant flags allows NetIO to do a better job of resource | ||
2213 | allocation, allows earlier detection of certain configuration errors, | ||
2214 | and may enable advanced features or higher performance in the future, | ||
2215 | so their use is strongly recommended. | ||
2216 | |||
2217 | Note that specifying ::NETIO_NO_RECV along with ::NETIO_NO_XMIT | ||
2218 | is a special case, intended primarily for use by programs which | ||
2219 | retrieve network statistics or do link management operations. | ||
2220 | When these flags are both specified, the resulting queue may not | ||
2221 | be used with NetIO routines other than netio_get(), netio_set(), | ||
2222 | and netio_input_unregister(). See @ref link for more information | ||
2223 | on link management. | ||
2224 | |||
2225 | Other flags are optional; their use is described below. | ||
2226 | */ | ||
2227 | int flags; | ||
2228 | |||
2229 | /** Interface name. This is a string which identifies the specific | ||
2230 | Ethernet controller hardware to be used. The format of the string | ||
2231 | is a device type and a device index, separated by a slash; so, | ||
2232 | the first 10 Gigabit Ethernet controller is named "xgbe/0", while | ||
2233 | the second 10/100/1000 Megabit Ethernet controller is named "gbe/1". | ||
2234 | */ | ||
2235 | const char* interface; | ||
2236 | |||
2237 | /** Receive packet queue size. This specifies the maximum number | ||
2238 | of ingress packets that can be received on this queue without | ||
2239 | being retrieved by @ref netio_get_packet(). If the IPP's distribution | ||
2240 | algorithm calls for a packet to be sent to this queue, and this | ||
2241 | number of packets are already pending there, the new packet | ||
2242 | will either be discarded, or sent to another tile registered | ||
2243 | for the same queue_id (see @ref drops). This value must | ||
2244 | be at least ::NETIO_MIN_RECEIVE_PKTS, can always be at least | ||
2245 | ::NETIO_MAX_RECEIVE_PKTS, and may be larger than that on certain | ||
2246 | interfaces. | ||
2247 | */ | ||
2248 | int num_receive_packets; | ||
2249 | |||
2250 | /** The queue ID being requested. Legal values for this range from 0 | ||
2251 | to ::NETIO_MAX_QUEUE_ID, inclusive. ::NETIO_MAX_QUEUE_ID is always | ||
2252 | greater than or equal to the number of tiles; this allows one queue | ||
2253 | for each tile, plus at least one additional queue. Some applications | ||
2254 | may wish to use the additional queue as a destination for unwanted | ||
2255 | packets, since packets delivered to queues for which no tiles have | ||
2256 | registered are discarded. | ||
2257 | */ | ||
2258 | unsigned int queue_id; | ||
2259 | |||
2260 | /** Maximum number of small send buffers to be held in the local empty | ||
2261 | buffer cache. This specifies the size of the area which holds | ||
2262 | empty small egress buffers requested from the IPP but not yet | ||
2263 | retrieved via @ref netio_get_buffer(). This value must be greater | ||
2264 | than zero if the application will ever use @ref netio_get_buffer() | ||
2265 | to allocate empty small egress buffers; it may be no larger than | ||
2266 | ::NETIO_MAX_SEND_BUFFERS. See @ref epp for more details on empty | ||
2267 | buffer caching. | ||
2268 | */ | ||
2269 | int num_send_buffers_small_total; | ||
2270 | |||
2271 | /** Number of small send buffers to be preallocated at registration. | ||
2272 | If this value is nonzero, the specified number of empty small egress | ||
2273 | buffers will be requested from the IPP during the netio_input_register | ||
2274 | operation; this may speed the execution of @ref netio_get_buffer(). | ||
2275 | This may be no larger than @ref num_send_buffers_small_total. See @ref | ||
2276 | epp for more details on empty buffer caching. | ||
2277 | */ | ||
2278 | int num_send_buffers_small_prealloc; | ||
2279 | |||
2280 | /** Maximum number of large send buffers to be held in the local empty | ||
2281 | buffer cache. This specifies the size of the area which holds empty | ||
2282 | large egress buffers requested from the IPP but not yet retrieved via | ||
2283 | @ref netio_get_buffer(). This value must be greater than zero if the | ||
2284 | application will ever use @ref netio_get_buffer() to allocate empty | ||
2285 | large egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS. | ||
2286 | See @ref epp for more details on empty buffer caching. | ||
2287 | */ | ||
2288 | int num_send_buffers_large_total; | ||
2289 | |||
2290 | /** Number of large send buffers to be preallocated at registration. | ||
2291 | If this value is nonzero, the specified number of empty large egress | ||
2292 | buffers will be requested from the IPP during the netio_input_register | ||
2293 | operation; this may speed the execution of @ref netio_get_buffer(). | ||
2294 | This may be no larger than @ref num_send_buffers_large_total. See @ref | ||
2295 | epp for more details on empty buffer caching. | ||
2296 | */ | ||
2297 | int num_send_buffers_large_prealloc; | ||
2298 | |||
2299 | /** Maximum number of jumbo send buffers to be held in the local empty | ||
2300 | buffer cache. This specifies the size of the area which holds empty | ||
2301 | jumbo egress buffers requested from the IPP but not yet retrieved via | ||
2302 | @ref netio_get_buffer(). This value must be greater than zero if the | ||
2303 | application will ever use @ref netio_get_buffer() to allocate empty | ||
2304 | jumbo egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS. | ||
2305 | See @ref epp for more details on empty buffer caching. | ||
2306 | */ | ||
2307 | int num_send_buffers_jumbo_total; | ||
2308 | |||
2309 | /** Number of jumbo send buffers to be preallocated at registration. | ||
2310 | If this value is nonzero, the specified number of empty jumbo egress | ||
2311 | buffers will be requested from the IPP during the netio_input_register | ||
2312 | operation; this may speed the execution of @ref netio_get_buffer(). | ||
2313 | This may be no larger than @ref num_send_buffers_jumbo_total. See @ref | ||
2314 | epp for more details on empty buffer caching. | ||
2315 | */ | ||
2316 | int num_send_buffers_jumbo_prealloc; | ||
2317 | |||
2318 | /** Total packet buffer size. This determines the total size, in bytes, | ||
2319 | of the NetIO buffer pool. Note that the maximum number of available | ||
2320 | buffers of each size is determined during hypervisor configuration | ||
2321 | (see the <em>System Programmer's Guide</em> for details); this just | ||
2322 | influences how much host memory is allocated for those buffers. | ||
2323 | |||
2324 | The buffer pool is allocated from common memory, which will be | ||
2325 | automatically initialized if needed. If your buffer pool is larger | ||
2326 | than 240 MB, you might need to explicitly call @c tmc_cmem_init(), | ||
2327 | as described in the Application Libraries Reference Manual (UG227). | ||
2328 | |||
2329 | Packet buffers are currently allocated in chunks of 16 MB; this | ||
2330 | value will be rounded up to the next larger multiple of 16 MB. | ||
2331 | If this value is zero, a default of 32 MB will be used; this was | ||
2332 | the value used by previous versions of NetIO. Note that taking this | ||
2333 | default also affects the placement of buffers on Linux NUMA nodes. | ||
2334 | See @ref buffer_node_weights for an explanation of buffer placement. | ||
2335 | |||
2336 | In order to successfully allocate packet buffers, Linux must have | ||
2337 | available huge pages on the relevant Linux NUMA nodes. See the | ||
2338 | <em>System Programmer's Guide</em> for information on configuring | ||
2339 | huge page support in Linux. | ||
2340 | */ | ||
2341 | uint64_t total_buffer_size; | ||
2342 | |||
2343 | /** Buffer placement weighting factors. | ||
2344 | |||
2345 | This array specifies the relative amount of buffering to place | ||
2346 | on each of the available Linux NUMA nodes. This array is | ||
2347 | indexed by the NUMA node, and the values in the array are | ||
2348 | proportional to the amount of buffer space to allocate on that | ||
2349 | node. | ||
2350 | |||
2351 | If memory striping is enabled in the Hypervisor, then there is | ||
2352 | only one logical NUMA node (node 0). In that case, NetIO will by | ||
2353 | default ignore the suggested buffer node weights, and buffers | ||
2354 | will be striped across the physical memory controllers. See | ||
2355 | UG209 System Programmer's Guide for a description of the | ||
2356 | hypervisor option that controls memory striping. | ||
2357 | |||
2358 | If memory striping is disabled, then there are up to four NUMA | ||
2359 | nodes, corresponding to the four DDRAM controllers in the TILE | ||
2360 | processor architecture. See UG100 Tile Processor Architecture | ||
2361 | Overview for a diagram showing the location of each of the DDRAM | ||
2362 | controllers relative to the tile array. | ||
2363 | |||
2364 | For instance, if memory striping is disabled, the following | ||
2365 | configuration strucure: | ||
2366 | |||
2367 | @code | ||
2368 | netio_input_config_t config = { | ||
2369 | . | ||
2370 | . | ||
2371 | . | ||
2372 | .total_buffer_size = 4 * 16 * 1024 * 1024; | ||
2373 | .buffer_node_weights = { 1, 0, 1, 0 }, | ||
2374 | }, | ||
2375 | @endcode | ||
2376 | |||
2377 | would result in 32 MB of buffers being placed on controller 0, and | ||
2378 | 32 MB on controller 2. (Since buffers are allocated in units of | ||
2379 | 16 MB, some sets of weights will not be able to be matched exactly.) | ||
2380 | |||
2381 | For the weights to be effective, @ref total_buffer_size must be | ||
2382 | nonzero. If @ref total_buffer_size is zero, causing the default | ||
2383 | 32 MB of buffer space to be used, then any specified weights will | ||
2384 | be ignored, and buffers will positioned as they were in previous | ||
2385 | versions of NetIO: | ||
2386 | |||
2387 | - For xgbe/0 and gbe/0, 16 MB of buffers will be placed on controller 1, | ||
2388 | and the other 16 MB will be placed on controller 2. | ||
2389 | |||
2390 | - For xgbe/1 and gbe/1, 16 MB of buffers will be placed on controller 2, | ||
2391 | and the other 16 MB will be placed on controller 3. | ||
2392 | |||
2393 | If @ref total_buffer_size is nonzero, but all weights are zero, | ||
2394 | then all buffer space will be allocated on Linux NUMA node zero. | ||
2395 | |||
2396 | By default, the specified buffer placement is treated as a hint; | ||
2397 | if sufficient free memory is not available on the specified | ||
2398 | controllers, the buffers will be allocated elsewhere. However, | ||
2399 | if the ::NETIO_STRICT_HOMING flag is specified in @ref flags, then a | ||
2400 | failure to allocate buffer space exactly as requested will cause the | ||
2401 | registration operation to fail with an error of ::NETIO_CANNOT_HOME. | ||
2402 | |||
2403 | Note that maximal network performance cannot be achieved with | ||
2404 | only one memory controller. | ||
2405 | */ | ||
2406 | uint8_t buffer_node_weights[NETIO_NUM_NODE_WEIGHTS]; | ||
2407 | |||
2408 | /** Fixed virtual address for packet buffers. Only valid when | ||
2409 | ::NETIO_FIXED_BUFFER_VA is specified in @ref flags; see the | ||
2410 | description of that flag for details. | ||
2411 | */ | ||
2412 | void* fixed_buffer_va; | ||
2413 | |||
2414 | /** | ||
2415 | Maximum number of outstanding send packet requests. This value is | ||
2416 | only relevant when an EPP is in use; it determines the number of | ||
2417 | slots in the EPP's outgoing packet queue which this tile is allowed | ||
2418 | to consume, and thus the number of packets which may be sent before | ||
2419 | the sending tile must wait for an acknowledgment from the EPP. | ||
2420 | Modifying this value is generally only helpful when using @ref | ||
2421 | netio_send_packet_vector(), where it can help improve performance by | ||
2422 | allowing a single vector send operation to process more packets. | ||
2423 | Typically it is not specified, and the default, which divides the | ||
2424 | outgoing packet slots evenly between all tiles on the chip, is used. | ||
2425 | |||
2426 | If a registration asks for more outgoing packet queue slots than are | ||
2427 | available, ::NETIO_TOOMANY_XMIT will be returned. The total number | ||
2428 | of packet queue slots which are available for all tiles for each EPP | ||
2429 | is subject to change, but is currently ::NETIO_TOTAL_SENDS_OUTSTANDING. | ||
2430 | |||
2431 | |||
2432 | This value is ignored if ::NETIO_XMIT is not specified in flags. | ||
2433 | If you want to specify a large value here for a specific tile, you are | ||
2434 | advised to specify NETIO_NO_XMIT on other, non-transmitting tiles so | ||
2435 | that they do not consume a default number of packet slots. Any tile | ||
2436 | transmitting is required to have at least ::NETIO_MIN_SENDS_OUTSTANDING | ||
2437 | slots allocated to it; values less than that will be silently | ||
2438 | increased by the NetIO library. | ||
2439 | */ | ||
2440 | int num_sends_outstanding; | ||
2441 | } | ||
2442 | netio_input_config_t; | ||
2443 | |||
2444 | |||
2445 | /** Registration flags; used in the @ref netio_input_config_t structure. | ||
2446 | * @addtogroup setup | ||
2447 | */ | ||
2448 | /** @{ */ | ||
2449 | |||
2450 | /** Fail a registration request if we can't put packet buffers | ||
2451 | on the specified memory controllers. */ | ||
2452 | #define NETIO_STRICT_HOMING 0x00000002 | ||
2453 | |||
2454 | /** This application expects no tags on its L2 headers. */ | ||
2455 | #define NETIO_TAG_NONE 0x00000004 | ||
2456 | |||
2457 | /** This application expects Marvell extended tags on its L2 headers. */ | ||
2458 | #define NETIO_TAG_MRVL 0x00000008 | ||
2459 | |||
2460 | /** This application expects Broadcom tags on its L2 headers. */ | ||
2461 | #define NETIO_TAG_BRCM 0x00000010 | ||
2462 | |||
2463 | /** This registration may call routines which receive packets. */ | ||
2464 | #define NETIO_RECV 0x00000020 | ||
2465 | |||
2466 | /** This registration may not call routines which receive packets. */ | ||
2467 | #define NETIO_NO_RECV 0x00000040 | ||
2468 | |||
2469 | /** This registration may call routines which transmit packets. */ | ||
2470 | #define NETIO_XMIT 0x00000080 | ||
2471 | |||
2472 | /** This registration may call routines which transmit packets with | ||
2473 | checksum acceleration. */ | ||
2474 | #define NETIO_XMIT_CSUM 0x00000100 | ||
2475 | |||
2476 | /** This registration may not call routines which transmit packets. */ | ||
2477 | #define NETIO_NO_XMIT 0x00000200 | ||
2478 | |||
2479 | /** This registration wants NetIO buffers mapped at an application-specified | ||
2480 | virtual address. | ||
2481 | |||
2482 | NetIO buffers are by default created by the TMC common memory facility, | ||
2483 | which must be configured by a common ancestor of all processes sharing | ||
2484 | a network interface. When this flag is specified, NetIO buffers are | ||
2485 | instead mapped at an address chosen by the application (and specified | ||
2486 | in @ref netio_input_config_t::fixed_buffer_va). This allows multiple | ||
2487 | unrelated but cooperating processes to share a NetIO interface. | ||
2488 | All processes sharing the same interface must specify this flag, | ||
2489 | and all must specify the same fixed virtual address. | ||
2490 | |||
2491 | @ref netio_input_config_t::fixed_buffer_va must be a | ||
2492 | multiple of 16 MB, and the packet buffers will occupy @ref | ||
2493 | netio_input_config_t::total_buffer_size bytes of virtual address | ||
2494 | space, beginning at that address. If any of those virtual addresses | ||
2495 | are currently occupied by other memory objects, like application or | ||
2496 | shared library code or data, @ref netio_input_register() will return | ||
2497 | ::NETIO_FAULT. While it is impossible to provide a fixed_buffer_va | ||
2498 | which will work for all applications, a good first guess might be to | ||
2499 | use 0xb0000000 minus @ref netio_input_config_t::total_buffer_size. | ||
2500 | If that fails, it might be helpful to consult the running application's | ||
2501 | virtual address description file (/proc/<em>pid</em>/maps) to see | ||
2502 | which regions of virtual address space are available. | ||
2503 | */ | ||
2504 | #define NETIO_FIXED_BUFFER_VA 0x00000400 | ||
2505 | |||
2506 | /** This registration call will not complete unless the network link | ||
2507 | is up. The process will wait several seconds for this to happen (the | ||
2508 | precise interval is link-dependent), but if the link does not come up, | ||
2509 | ::NETIO_LINK_DOWN will be returned. This flag is the default if | ||
2510 | ::NETIO_NOREQUIRE_LINK_UP is not specified. Note that this flag by | ||
2511 | itself does not request that the link be brought up; that can be done | ||
2512 | with the ::NETIO_AUTO_LINK_UPDN or ::NETIO_AUTO_LINK_UP flags (the | ||
2513 | latter is the default if no NETIO_AUTO_LINK_xxx flags are specified), | ||
2514 | or by explicitly setting the link's desired state via netio_set(). | ||
2515 | If the link is not brought up by one of those methods, and this flag | ||
2516 | is specified, the registration operation will return ::NETIO_LINK_DOWN. | ||
2517 | This flag is ignored if it is specified along with ::NETIO_NO_XMIT and | ||
2518 | ::NETIO_NO_RECV. See @ref link for more information on link | ||
2519 | management. | ||
2520 | */ | ||
2521 | #define NETIO_REQUIRE_LINK_UP 0x00000800 | ||
2522 | |||
2523 | /** This registration call will complete even if the network link is not up. | ||
2524 | Whenever the link is not up, packets will not be sent or received: | ||
2525 | netio_get_packet() will return ::NETIO_NOPKT once all queued packets | ||
2526 | have been drained, and netio_send_packet() and similar routines will | ||
2527 | return NETIO_QUEUE_FULL once the outgoing packet queue in the EPP | ||
2528 | or the I/O shim is full. See @ref link for more information on link | ||
2529 | management. | ||
2530 | */ | ||
2531 | #define NETIO_NOREQUIRE_LINK_UP 0x00001000 | ||
2532 | |||
2533 | #ifndef __DOXYGEN__ | ||
2534 | /* | ||
2535 | * These are part of the implementation of the NETIO_AUTO_LINK_xxx flags, | ||
2536 | * but should not be used directly by applications, and are thus not | ||
2537 | * documented. | ||
2538 | */ | ||
2539 | #define _NETIO_AUTO_UP 0x00002000 | ||
2540 | #define _NETIO_AUTO_DN 0x00004000 | ||
2541 | #define _NETIO_AUTO_PRESENT 0x00008000 | ||
2542 | #endif | ||
2543 | |||
2544 | /** Set the desired state of the link to up, allowing any speeds which are | ||
2545 | supported by the link hardware, as part of this registration operation. | ||
2546 | Do not take down the link automatically. This is the default if | ||
2547 | no other NETIO_AUTO_LINK_xxx flags are specified. This flag is ignored | ||
2548 | if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV. | ||
2549 | See @ref link for more information on link management. | ||
2550 | */ | ||
2551 | #define NETIO_AUTO_LINK_UP (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP) | ||
2552 | |||
2553 | /** Set the desired state of the link to up, allowing any speeds which are | ||
2554 | supported by the link hardware, as part of this registration operation. | ||
2555 | Set the desired state of the link to down the next time no tiles are | ||
2556 | registered for packet reception or transmission. This flag is ignored | ||
2557 | if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV. | ||
2558 | See @ref link for more information on link management. | ||
2559 | */ | ||
2560 | #define NETIO_AUTO_LINK_UPDN (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP | \ | ||
2561 | _NETIO_AUTO_DN) | ||
2562 | |||
2563 | /** Set the desired state of the link to down the next time no tiles are | ||
2564 | registered for packet reception or transmission. This flag is ignored | ||
2565 | if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV. | ||
2566 | See @ref link for more information on link management. | ||
2567 | */ | ||
2568 | #define NETIO_AUTO_LINK_DN (_NETIO_AUTO_PRESENT | _NETIO_AUTO_DN) | ||
2569 | |||
2570 | /** Do not bring up the link automatically as part of this registration | ||
2571 | operation. Do not take down the link automatically. This flag | ||
2572 | is ignored if it is specified along with ::NETIO_NO_XMIT and | ||
2573 | ::NETIO_NO_RECV. See @ref link for more information on link management. | ||
2574 | */ | ||
2575 | #define NETIO_AUTO_LINK_NONE _NETIO_AUTO_PRESENT | ||
2576 | |||
2577 | |||
2578 | /** Minimum number of receive packets. */ | ||
2579 | #define NETIO_MIN_RECEIVE_PKTS 16 | ||
2580 | |||
2581 | /** Lower bound on the maximum number of receive packets; may be higher | ||
2582 | than this on some interfaces. */ | ||
2583 | #define NETIO_MAX_RECEIVE_PKTS 128 | ||
2584 | |||
2585 | /** Maximum number of send buffers, per packet size. */ | ||
2586 | #define NETIO_MAX_SEND_BUFFERS 16 | ||
2587 | |||
2588 | /** Number of EPP queue slots, and thus outstanding sends, per EPP. */ | ||
2589 | #define NETIO_TOTAL_SENDS_OUTSTANDING 2015 | ||
2590 | |||
2591 | /** Minimum number of EPP queue slots, and thus outstanding sends, per | ||
2592 | * transmitting tile. */ | ||
2593 | #define NETIO_MIN_SENDS_OUTSTANDING 16 | ||
2594 | |||
2595 | |||
2596 | /**@}*/ | ||
2597 | |||
2598 | #ifndef __DOXYGEN__ | ||
2599 | |||
2600 | /** | ||
2601 | * An object for providing Ethernet packets to a process. | ||
2602 | */ | ||
2603 | struct __netio_queue_impl_t; | ||
2604 | |||
2605 | /** | ||
2606 | * An object for managing the user end of a NetIO queue. | ||
2607 | */ | ||
2608 | struct __netio_queue_user_impl_t; | ||
2609 | |||
2610 | #endif /* !__DOXYGEN__ */ | ||
2611 | |||
2612 | |||
2613 | /** A netio_queue_t describes a NetIO communications endpoint. | ||
2614 | * @ingroup setup | ||
2615 | */ | ||
2616 | typedef struct | ||
2617 | { | ||
2618 | #ifdef __DOXYGEN__ | ||
2619 | uint8_t opaque[8]; /**< This is an opaque structure. */ | ||
2620 | #else | ||
2621 | struct __netio_queue_impl_t* __system_part; /**< The system part. */ | ||
2622 | struct __netio_queue_user_impl_t* __user_part; /**< The user part. */ | ||
2623 | #ifdef _NETIO_PTHREAD | ||
2624 | _netio_percpu_mutex_t lock; /**< Queue lock. */ | ||
2625 | #endif | ||
2626 | #endif | ||
2627 | } | ||
2628 | netio_queue_t; | ||
2629 | |||
2630 | |||
2631 | /** | ||
2632 | * @brief Packet send context. | ||
2633 | * | ||
2634 | * @ingroup egress | ||
2635 | * | ||
2636 | * Packet send context for use with netio_send_packet_prepare and _commit. | ||
2637 | */ | ||
2638 | typedef struct | ||
2639 | { | ||
2640 | #ifdef __DOXYGEN__ | ||
2641 | uint8_t opaque[44]; /**< This is an opaque structure. */ | ||
2642 | #else | ||
2643 | uint8_t flags; /**< Defined below */ | ||
2644 | uint8_t datalen; /**< Number of valid words pointed to by data. */ | ||
2645 | uint32_t request[9]; /**< Request to be sent to the EPP or shim. Note | ||
2646 | that this is smaller than the 11-word maximum | ||
2647 | request size, since some constant values are | ||
2648 | not saved in the context. */ | ||
2649 | uint32_t *data; /**< Data to be sent to the EPP or shim via IDN. */ | ||
2650 | #endif | ||
2651 | } | ||
2652 | netio_send_pkt_context_t; | ||
2653 | |||
2654 | |||
2655 | #ifndef __DOXYGEN__ | ||
2656 | #define SEND_PKT_CTX_USE_EPP 1 /**< We're sending to an EPP. */ | ||
2657 | #define SEND_PKT_CTX_SEND_CSUM 2 /**< Request includes a checksum. */ | ||
2658 | #endif | ||
2659 | |||
2660 | /** | ||
2661 | * @brief Packet vector entry. | ||
2662 | * | ||
2663 | * @ingroup egress | ||
2664 | * | ||
2665 | * This data structure is used with netio_send_packet_vector() to send multiple | ||
2666 | * packets with one NetIO call. The structure should be initialized by | ||
2667 | * calling netio_pkt_vector_set(), rather than by setting the fields | ||
2668 | * directly. | ||
2669 | * | ||
2670 | * This structure is guaranteed to be a power of two in size, no | ||
2671 | * bigger than one L2 cache line, and to be aligned modulo its size. | ||
2672 | */ | ||
2673 | typedef struct | ||
2674 | #ifndef __DOXYGEN__ | ||
2675 | __attribute__((aligned(8))) | ||
2676 | #endif | ||
2677 | { | ||
2678 | /** Reserved for use by the user application. When initialized with | ||
2679 | * the netio_set_pkt_vector_entry() function, this field is guaranteed | ||
2680 | * to be visible to readers only after all other fields are already | ||
2681 | * visible. This way it can be used as a valid flag or generation | ||
2682 | * counter. */ | ||
2683 | uint8_t user_data; | ||
2684 | |||
2685 | /* Structure members below this point should not be accessed directly by | ||
2686 | * applications, as they may change in the future. */ | ||
2687 | |||
2688 | /** Low 8 bits of the packet address to send. The high bits are | ||
2689 | * acquired from the 'handle' field. */ | ||
2690 | uint8_t buffer_address_low; | ||
2691 | |||
2692 | /** Number of bytes to transmit. */ | ||
2693 | uint16_t size; | ||
2694 | |||
2695 | /** The raw handle from a netio_pkt_t. If this is NETIO_PKT_HANDLE_NONE, | ||
2696 | * this vector entry will be skipped and no packet will be transmitted. */ | ||
2697 | netio_pkt_handle_t handle; | ||
2698 | } | ||
2699 | netio_pkt_vector_entry_t; | ||
2700 | |||
2701 | |||
2702 | /** | ||
2703 | * @brief Initialize fields in a packet vector entry. | ||
2704 | * | ||
2705 | * @ingroup egress | ||
2706 | * | ||
2707 | * @param[out] v Pointer to the vector entry to be initialized. | ||
2708 | * @param[in] pkt Packet to be transmitted when the vector entry is passed to | ||
2709 | * netio_send_packet_vector(). Note that the packet's attributes | ||
2710 | * (e.g., its L2 offset and length) are captured at the time this | ||
2711 | * routine is called; subsequent changes in those attributes will not | ||
2712 | * be reflected in the packet which is actually transmitted. | ||
2713 | * Changes in the packet's contents, however, will be so reflected. | ||
2714 | * If this is NULL, no packet will be transmitted. | ||
2715 | * @param[in] user_data User data to be set in the vector entry. | ||
2716 | * This function guarantees that the "user_data" field will become | ||
2717 | * visible to a reader only after all other fields have become visible. | ||
2718 | * This allows a structure in a ring buffer to be written and read | ||
2719 | * by a polling reader without any locks or other synchronization. | ||
2720 | */ | ||
2721 | static __inline void | ||
2722 | netio_pkt_vector_set(volatile netio_pkt_vector_entry_t* v, netio_pkt_t* pkt, | ||
2723 | uint8_t user_data) | ||
2724 | { | ||
2725 | if (pkt) | ||
2726 | { | ||
2727 | if (NETIO_PKT_IS_MINIMAL(pkt)) | ||
2728 | { | ||
2729 | netio_pkt_minimal_metadata_t* mmd = | ||
2730 | (netio_pkt_minimal_metadata_t*) &pkt->__metadata; | ||
2731 | v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_MM(mmd, pkt) & 0xFF; | ||
2732 | v->size = NETIO_PKT_L2_LENGTH_MM(mmd, pkt); | ||
2733 | } | ||
2734 | else | ||
2735 | { | ||
2736 | netio_pkt_metadata_t* mda = &pkt->__metadata; | ||
2737 | v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_M(mda, pkt) & 0xFF; | ||
2738 | v->size = NETIO_PKT_L2_LENGTH_M(mda, pkt); | ||
2739 | } | ||
2740 | v->handle.word = pkt->__packet.word; | ||
2741 | } | ||
2742 | else | ||
2743 | { | ||
2744 | v->handle.word = 0; /* Set handle to NETIO_PKT_HANDLE_NONE. */ | ||
2745 | } | ||
2746 | |||
2747 | __asm__("" : : : "memory"); | ||
2748 | |||
2749 | v->user_data = user_data; | ||
2750 | } | ||
2751 | |||
2752 | |||
2753 | /** | ||
2754 | * Flags and structures for @ref netio_get() and @ref netio_set(). | ||
2755 | * @ingroup config | ||
2756 | */ | ||
2757 | |||
2758 | /** @{ */ | ||
2759 | /** Parameter class; addr is a NETIO_PARAM_xxx value. */ | ||
2760 | #define NETIO_PARAM 0 | ||
2761 | /** Interface MAC address. This address is only valid with @ref netio_get(). | ||
2762 | * The value is a 6-byte MAC address. Depending upon the overall system | ||
2763 | * design, a MAC address may or may not be available for each interface. */ | ||
2764 | #define NETIO_PARAM_MAC 0 | ||
2765 | |||
2766 | /** Determine whether to suspend output on the receipt of pause frames. | ||
2767 | * If the value is nonzero, the I/O shim will suspend output when a pause | ||
2768 | * frame is received. If the value is zero, pause frames will be ignored. */ | ||
2769 | #define NETIO_PARAM_PAUSE_IN 1 | ||
2770 | |||
2771 | /** Determine whether to send pause frames if the I/O shim packet FIFOs are | ||
2772 | * nearly full. If the value is zero, pause frames are not sent. If | ||
2773 | * the value is nonzero, it is the delay value which will be sent in any | ||
2774 | * pause frames which are output, in units of 512 bit times. */ | ||
2775 | #define NETIO_PARAM_PAUSE_OUT 2 | ||
2776 | |||
2777 | /** Jumbo frame support. The value is a 4-byte integer. If the value is | ||
2778 | * nonzero, the MAC will accept frames of up to 10240 bytes. If the value | ||
2779 | * is zero, the MAC will only accept frames of up to 1544 bytes. */ | ||
2780 | #define NETIO_PARAM_JUMBO 3 | ||
2781 | |||
2782 | /** I/O shim's overflow statistics register. The value is two 16-bit integers. | ||
2783 | * The first 16-bit value (or the low 16 bits, if the value is treated as a | ||
2784 | * 32-bit number) is the count of packets which were completely dropped and | ||
2785 | * not delivered by the shim. The second 16-bit value (or the high 16 bits, | ||
2786 | * if the value is treated as a 32-bit number) is the count of packets | ||
2787 | * which were truncated and thus only partially delivered by the shim. This | ||
2788 | * register is automatically reset to zero after it has been read. | ||
2789 | */ | ||
2790 | #define NETIO_PARAM_OVERFLOW 4 | ||
2791 | |||
2792 | /** IPP statistics. This address is only valid with @ref netio_get(). The | ||
2793 | * value is a netio_stat_t structure. Unlike the I/O shim statistics, the | ||
2794 | * IPP statistics are not all reset to zero on read; see the description | ||
2795 | * of the netio_stat_t for details. */ | ||
2796 | #define NETIO_PARAM_STAT 5 | ||
2797 | |||
2798 | /** Possible link state. The value is a combination of "NETIO_LINK_xxx" | ||
2799 | * flags. With @ref netio_get(), this will indicate which flags are | ||
2800 | * actually supported by the hardware. | ||
2801 | * | ||
2802 | * For historical reasons, specifying this value to netio_set() will have | ||
2803 | * the same behavior as using ::NETIO_PARAM_LINK_CONFIG, but this usage is | ||
2804 | * discouraged. | ||
2805 | */ | ||
2806 | #define NETIO_PARAM_LINK_POSSIBLE_STATE 6 | ||
2807 | |||
2808 | /** Link configuration. The value is a combination of "NETIO_LINK_xxx" flags. | ||
2809 | * With @ref netio_set(), this will attempt to immediately bring up the | ||
2810 | * link using whichever of the requested flags are supported by the | ||
2811 | * hardware, or take down the link if the flags are zero; if this is | ||
2812 | * not possible, an error will be returned. Many programs will want | ||
2813 | * to use ::NETIO_PARAM_LINK_DESIRED_STATE instead. | ||
2814 | * | ||
2815 | * For historical reasons, specifying this value to netio_get() will | ||
2816 | * have the same behavior as using ::NETIO_PARAM_LINK_POSSIBLE_STATE, | ||
2817 | * but this usage is discouraged. | ||
2818 | */ | ||
2819 | #define NETIO_PARAM_LINK_CONFIG NETIO_PARAM_LINK_POSSIBLE_STATE | ||
2820 | |||
2821 | /** Current link state. This address is only valid with @ref netio_get(). | ||
2822 | * The value is zero or more of the "NETIO_LINK_xxx" flags, ORed together. | ||
2823 | * If the link is down, the value ANDed with NETIO_LINK_SPEED will be | ||
2824 | * zero; if the link is up, the value ANDed with NETIO_LINK_SPEED will | ||
2825 | * result in exactly one of the NETIO_LINK_xxx values, indicating the | ||
2826 | * current speed. */ | ||
2827 | #define NETIO_PARAM_LINK_CURRENT_STATE 7 | ||
2828 | |||
2829 | /** Variant symbol for current state, retained for compatibility with | ||
2830 | * pre-MDE-2.1 programs. */ | ||
2831 | #define NETIO_PARAM_LINK_STATUS NETIO_PARAM_LINK_CURRENT_STATE | ||
2832 | |||
2833 | /** Packet Coherence protocol. This address is only valid with @ref netio_get(). | ||
2834 | * The value is nonzero if the interface is configured for cache-coherent DMA. | ||
2835 | */ | ||
2836 | #define NETIO_PARAM_COHERENT 8 | ||
2837 | |||
2838 | /** Desired link state. The value is a conbination of "NETIO_LINK_xxx" | ||
2839 | * flags, which specify the desired state for the link. With @ref | ||
2840 | * netio_set(), this will, in the background, attempt to bring up the link | ||
2841 | * using whichever of the requested flags are reasonable, or take down the | ||
2842 | * link if the flags are zero. The actual link up or down operation may | ||
2843 | * happen after this call completes. If the link state changes in the | ||
2844 | * future, the system will continue to try to get back to the desired link | ||
2845 | * state; for instance, if the link is brought up successfully, and then | ||
2846 | * the network cable is disconnected, the link will go down. However, the | ||
2847 | * desired state of the link is still up, so if the cable is reconnected, | ||
2848 | * the link will be brought up again. | ||
2849 | * | ||
2850 | * With @ref netio_get(), this will indicate the desired state for the | ||
2851 | * link, as set with a previous netio_set() call, or implicitly by a | ||
2852 | * netio_input_register() or netio_input_unregister() operation. This may | ||
2853 | * not reflect the current state of the link; to get that, use | ||
2854 | * ::NETIO_PARAM_LINK_CURRENT_STATE. */ | ||
2855 | #define NETIO_PARAM_LINK_DESIRED_STATE 9 | ||
2856 | |||
2857 | /** NetIO statistics structure. Retrieved using the ::NETIO_PARAM_STAT | ||
2858 | * address passed to @ref netio_get(). */ | ||
2859 | typedef struct | ||
2860 | { | ||
2861 | /** Number of packets which have been received by the IPP and forwarded | ||
2862 | * to a tile's receive queue for processing. This value wraps at its | ||
2863 | * maximum, and is not cleared upon read. */ | ||
2864 | uint32_t packets_received; | ||
2865 | |||
2866 | /** Number of packets which have been dropped by the IPP, because they could | ||
2867 | * not be received, or could not be forwarded to a tile. The former happens | ||
2868 | * when the IPP does not have a free packet buffer of suitable size for an | ||
2869 | * incoming frame. The latter happens when all potential destination tiles | ||
2870 | * for a packet, as defined by the group, bucket, and queue configuration, | ||
2871 | * have full receive queues. This value wraps at its maximum, and is not | ||
2872 | * cleared upon read. */ | ||
2873 | uint32_t packets_dropped; | ||
2874 | |||
2875 | /* | ||
2876 | * Note: the #defines after each of the following four one-byte values | ||
2877 | * denote their location within the third word of the netio_stat_t. They | ||
2878 | * are intended for use only by the IPP implementation and are thus omitted | ||
2879 | * from the Doxygen output. | ||
2880 | */ | ||
2881 | |||
2882 | /** Number of packets dropped because no worker was able to accept a new | ||
2883 | * packet. This value saturates at its maximum, and is cleared upon | ||
2884 | * read. */ | ||
2885 | uint8_t drops_no_worker; | ||
2886 | #ifndef __DOXYGEN__ | ||
2887 | #define NETIO_STAT_DROPS_NO_WORKER 0 | ||
2888 | #endif | ||
2889 | |||
2890 | /** Number of packets dropped because no small buffers were available. | ||
2891 | * This value saturates at its maximum, and is cleared upon read. */ | ||
2892 | uint8_t drops_no_smallbuf; | ||
2893 | #ifndef __DOXYGEN__ | ||
2894 | #define NETIO_STAT_DROPS_NO_SMALLBUF 1 | ||
2895 | #endif | ||
2896 | |||
2897 | /** Number of packets dropped because no large buffers were available. | ||
2898 | * This value saturates at its maximum, and is cleared upon read. */ | ||
2899 | uint8_t drops_no_largebuf; | ||
2900 | #ifndef __DOXYGEN__ | ||
2901 | #define NETIO_STAT_DROPS_NO_LARGEBUF 2 | ||
2902 | #endif | ||
2903 | |||
2904 | /** Number of packets dropped because no jumbo buffers were available. | ||
2905 | * This value saturates at its maximum, and is cleared upon read. */ | ||
2906 | uint8_t drops_no_jumbobuf; | ||
2907 | #ifndef __DOXYGEN__ | ||
2908 | #define NETIO_STAT_DROPS_NO_JUMBOBUF 3 | ||
2909 | #endif | ||
2910 | } | ||
2911 | netio_stat_t; | ||
2912 | |||
2913 | |||
2914 | /** Link can run, should run, or is running at 10 Mbps. */ | ||
2915 | #define NETIO_LINK_10M 0x01 | ||
2916 | |||
2917 | /** Link can run, should run, or is running at 100 Mbps. */ | ||
2918 | #define NETIO_LINK_100M 0x02 | ||
2919 | |||
2920 | /** Link can run, should run, or is running at 1 Gbps. */ | ||
2921 | #define NETIO_LINK_1G 0x04 | ||
2922 | |||
2923 | /** Link can run, should run, or is running at 10 Gbps. */ | ||
2924 | #define NETIO_LINK_10G 0x08 | ||
2925 | |||
2926 | /** Link should run at the highest speed supported by the link and by | ||
2927 | * the device connected to the link. Only usable as a value for | ||
2928 | * the link's desired state; never returned as a value for the current | ||
2929 | * or possible states. */ | ||
2930 | #define NETIO_LINK_ANYSPEED 0x10 | ||
2931 | |||
2932 | /** All legal link speeds. */ | ||
2933 | #define NETIO_LINK_SPEED (NETIO_LINK_10M | \ | ||
2934 | NETIO_LINK_100M | \ | ||
2935 | NETIO_LINK_1G | \ | ||
2936 | NETIO_LINK_10G | \ | ||
2937 | NETIO_LINK_ANYSPEED) | ||
2938 | |||
2939 | |||
2940 | /** MAC register class. Addr is a register offset within the MAC. | ||
2941 | * Registers within the XGbE and GbE MACs are documented in the Tile | ||
2942 | * Processor I/O Device Guide (UG104). MAC registers start at address | ||
2943 | * 0x4000, and do not include the MAC_INTERFACE registers. */ | ||
2944 | #define NETIO_MAC 1 | ||
2945 | |||
2946 | /** MDIO register class (IEEE 802.3 clause 22 format). Addr is the "addr" | ||
2947 | * member of a netio_mdio_addr_t structure. */ | ||
2948 | #define NETIO_MDIO 2 | ||
2949 | |||
2950 | /** MDIO register class (IEEE 802.3 clause 45 format). Addr is the "addr" | ||
2951 | * member of a netio_mdio_addr_t structure. */ | ||
2952 | #define NETIO_MDIO_CLAUSE45 3 | ||
2953 | |||
2954 | /** NetIO MDIO address type. Retrieved or provided using the ::NETIO_MDIO | ||
2955 | * address passed to @ref netio_get() or @ref netio_set(). */ | ||
2956 | typedef union | ||
2957 | { | ||
2958 | struct | ||
2959 | { | ||
2960 | unsigned int reg:16; /**< MDIO register offset. For clause 22 access, | ||
2961 | must be less than 32. */ | ||
2962 | unsigned int phy:5; /**< Which MDIO PHY to access. */ | ||
2963 | unsigned int dev:5; /**< Which MDIO device to access within that PHY. | ||
2964 | Applicable for clause 45 access only; ignored | ||
2965 | for clause 22 access. */ | ||
2966 | } | ||
2967 | bits; /**< Container for bitfields. */ | ||
2968 | uint64_t addr; /**< Value to pass to @ref netio_get() or | ||
2969 | * @ref netio_set(). */ | ||
2970 | } | ||
2971 | netio_mdio_addr_t; | ||
2972 | |||
2973 | /** @} */ | ||
2974 | |||
2975 | #endif /* __NETIO_INTF_H__ */ | ||
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 78e1982cb6c9..0b9ce69b0ee5 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c | |||
@@ -988,8 +988,12 @@ static long __write_once initfree = 1; | |||
988 | /* Select whether to free (1) or mark unusable (0) the __init pages. */ | 988 | /* Select whether to free (1) or mark unusable (0) the __init pages. */ |
989 | static int __init set_initfree(char *str) | 989 | static int __init set_initfree(char *str) |
990 | { | 990 | { |
991 | strict_strtol(str, 0, &initfree); | 991 | long val; |
992 | pr_info("initfree: %s free init pages\n", initfree ? "will" : "won't"); | 992 | if (strict_strtol(str, 0, &val)) { |
993 | initfree = val; | ||
994 | pr_info("initfree: %s free init pages\n", | ||
995 | initfree ? "will" : "won't"); | ||
996 | } | ||
993 | return 1; | 997 | return 1; |
994 | } | 998 | } |
995 | __setup("initfree=", set_initfree); | 999 | __setup("initfree=", set_initfree); |
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index f6668cdaac85..43db398437b7 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig | |||
@@ -2945,6 +2945,18 @@ source "drivers/s390/net/Kconfig" | |||
2945 | 2945 | ||
2946 | source "drivers/net/caif/Kconfig" | 2946 | source "drivers/net/caif/Kconfig" |
2947 | 2947 | ||
2948 | config TILE_NET | ||
2949 | tristate "Tilera GBE/XGBE network driver support" | ||
2950 | depends on TILE | ||
2951 | default y | ||
2952 | select CRC32 | ||
2953 | help | ||
2954 | This is a standard Linux network device driver for the | ||
2955 | on-chip Tilera Gigabit Ethernet and XAUI interfaces. | ||
2956 | |||
2957 | To compile this driver as a module, choose M here: the module | ||
2958 | will be called tile_net. | ||
2959 | |||
2948 | config XEN_NETDEV_FRONTEND | 2960 | config XEN_NETDEV_FRONTEND |
2949 | tristate "Xen network device frontend driver" | 2961 | tristate "Xen network device frontend driver" |
2950 | depends on XEN | 2962 | depends on XEN |
diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 652fc6b98039..b90738d13994 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile | |||
@@ -301,3 +301,4 @@ obj-$(CONFIG_CAIF) += caif/ | |||
301 | 301 | ||
302 | obj-$(CONFIG_OCTEON_MGMT_ETHERNET) += octeon/ | 302 | obj-$(CONFIG_OCTEON_MGMT_ETHERNET) += octeon/ |
303 | obj-$(CONFIG_PCH_GBE) += pch_gbe/ | 303 | obj-$(CONFIG_PCH_GBE) += pch_gbe/ |
304 | obj-$(CONFIG_TILE_NET) += tile/ | ||
diff --git a/drivers/net/tile/Makefile b/drivers/net/tile/Makefile new file mode 100644 index 000000000000..f634f142cab4 --- /dev/null +++ b/drivers/net/tile/Makefile | |||
@@ -0,0 +1,10 @@ | |||
1 | # | ||
2 | # Makefile for the TILE on-chip networking support. | ||
3 | # | ||
4 | |||
5 | obj-$(CONFIG_TILE_NET) += tile_net.o | ||
6 | ifdef CONFIG_TILEGX | ||
7 | tile_net-objs := tilegx.o mpipe.o iorpc_mpipe.o dma_queue.o | ||
8 | else | ||
9 | tile_net-objs := tilepro.o | ||
10 | endif | ||
diff --git a/drivers/net/tile/tilepro.c b/drivers/net/tile/tilepro.c new file mode 100644 index 000000000000..0e6bac5ec65b --- /dev/null +++ b/drivers/net/tile/tilepro.c | |||
@@ -0,0 +1,2406 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | #include <linux/module.h> | ||
16 | #include <linux/init.h> | ||
17 | #include <linux/moduleparam.h> | ||
18 | #include <linux/sched.h> | ||
19 | #include <linux/kernel.h> /* printk() */ | ||
20 | #include <linux/slab.h> /* kmalloc() */ | ||
21 | #include <linux/errno.h> /* error codes */ | ||
22 | #include <linux/types.h> /* size_t */ | ||
23 | #include <linux/interrupt.h> | ||
24 | #include <linux/in.h> | ||
25 | #include <linux/netdevice.h> /* struct device, and other headers */ | ||
26 | #include <linux/etherdevice.h> /* eth_type_trans */ | ||
27 | #include <linux/skbuff.h> | ||
28 | #include <linux/ioctl.h> | ||
29 | #include <linux/cdev.h> | ||
30 | #include <linux/hugetlb.h> | ||
31 | #include <linux/in6.h> | ||
32 | #include <linux/timer.h> | ||
33 | #include <linux/io.h> | ||
34 | #include <asm/checksum.h> | ||
35 | #include <asm/homecache.h> | ||
36 | |||
37 | #include <hv/drv_xgbe_intf.h> | ||
38 | #include <hv/drv_xgbe_impl.h> | ||
39 | #include <hv/hypervisor.h> | ||
40 | #include <hv/netio_intf.h> | ||
41 | |||
42 | /* For TSO */ | ||
43 | #include <linux/ip.h> | ||
44 | #include <linux/tcp.h> | ||
45 | |||
46 | |||
47 | /* There is no singlethread_cpu, so schedule work on the current cpu. */ | ||
48 | #define singlethread_cpu -1 | ||
49 | |||
50 | |||
51 | /* | ||
52 | * First, "tile_net_init_module()" initializes all four "devices" which | ||
53 | * can be used by linux. | ||
54 | * | ||
55 | * Then, "ifconfig DEVICE up" calls "tile_net_open()", which analyzes | ||
56 | * the network cpus, then uses "tile_net_open_aux()" to initialize | ||
57 | * LIPP/LEPP, and then uses "tile_net_open_inner()" to register all | ||
58 | * the tiles, provide buffers to LIPP, allow ingress to start, and | ||
59 | * turn on hypervisor interrupt handling (and NAPI) on all tiles. | ||
60 | * | ||
61 | * If registration fails due to the link being down, then "retry_work" | ||
62 | * is used to keep calling "tile_net_open_inner()" until it succeeds. | ||
63 | * | ||
64 | * If "ifconfig DEVICE down" is called, it uses "tile_net_stop()" to | ||
65 | * stop egress, drain the LIPP buffers, unregister all the tiles, stop | ||
66 | * LIPP/LEPP, and wipe the LEPP queue. | ||
67 | * | ||
68 | * We start out with the ingress interrupt enabled on each CPU. When | ||
69 | * this interrupt fires, we disable it, and call "napi_schedule()". | ||
70 | * This will cause "tile_net_poll()" to be called, which will pull | ||
71 | * packets from the netio queue, filtering them out, or passing them | ||
72 | * to "netif_receive_skb()". If our budget is exhausted, we will | ||
73 | * return, knowing we will be called again later. Otherwise, we | ||
74 | * reenable the ingress interrupt, and call "napi_complete()". | ||
75 | * | ||
76 | * | ||
77 | * NOTE: The use of "native_driver" ensures that EPP exists, and that | ||
78 | * "epp_sendv" is legal, and that "LIPP" is being used. | ||
79 | * | ||
80 | * NOTE: Failing to free completions for an arbitrarily long time | ||
81 | * (which is defined to be illegal) does in fact cause bizarre | ||
82 | * problems. The "egress_timer" helps prevent this from happening. | ||
83 | * | ||
84 | * NOTE: The egress code can be interrupted by the interrupt handler. | ||
85 | */ | ||
86 | |||
87 | |||
88 | /* HACK: Allow use of "jumbo" packets. */ | ||
89 | /* This should be 1500 if "jumbo" is not set in LIPP. */ | ||
90 | /* This should be at most 10226 (10240 - 14) if "jumbo" is set in LIPP. */ | ||
91 | /* ISSUE: This has not been thoroughly tested (except at 1500). */ | ||
92 | #define TILE_NET_MTU 1500 | ||
93 | |||
94 | /* HACK: Define to support GSO. */ | ||
95 | /* ISSUE: This may actually hurt performance of the TCP blaster. */ | ||
96 | /* #define TILE_NET_GSO */ | ||
97 | |||
98 | /* Define this to collapse "duplicate" acks. */ | ||
99 | /* #define IGNORE_DUP_ACKS */ | ||
100 | |||
101 | /* HACK: Define this to verify incoming packets. */ | ||
102 | /* #define TILE_NET_VERIFY_INGRESS */ | ||
103 | |||
104 | /* Use 3000 to enable the Linux Traffic Control (QoS) layer, else 0. */ | ||
105 | #define TILE_NET_TX_QUEUE_LEN 0 | ||
106 | |||
107 | /* Define to dump packets (prints out the whole packet on tx and rx). */ | ||
108 | /* #define TILE_NET_DUMP_PACKETS */ | ||
109 | |||
110 | /* Define to enable debug spew (all PDEBUG's are enabled). */ | ||
111 | /* #define TILE_NET_DEBUG */ | ||
112 | |||
113 | |||
114 | /* Define to activate paranoia checks. */ | ||
115 | /* #define TILE_NET_PARANOIA */ | ||
116 | |||
117 | /* Default transmit lockup timeout period, in jiffies. */ | ||
118 | #define TILE_NET_TIMEOUT (5 * HZ) | ||
119 | |||
120 | /* Default retry interval for bringing up the NetIO interface, in jiffies. */ | ||
121 | #define TILE_NET_RETRY_INTERVAL (5 * HZ) | ||
122 | |||
123 | /* Number of ports (xgbe0, xgbe1, gbe0, gbe1). */ | ||
124 | #define TILE_NET_DEVS 4 | ||
125 | |||
126 | |||
127 | |||
128 | /* Paranoia. */ | ||
129 | #if NET_IP_ALIGN != LIPP_PACKET_PADDING | ||
130 | #error "NET_IP_ALIGN must match LIPP_PACKET_PADDING." | ||
131 | #endif | ||
132 | |||
133 | |||
134 | /* Debug print. */ | ||
135 | #ifdef TILE_NET_DEBUG | ||
136 | #define PDEBUG(fmt, args...) net_printk(fmt, ## args) | ||
137 | #else | ||
138 | #define PDEBUG(fmt, args...) | ||
139 | #endif | ||
140 | |||
141 | |||
142 | MODULE_AUTHOR("Tilera"); | ||
143 | MODULE_LICENSE("GPL"); | ||
144 | |||
145 | |||
146 | #define IS_MULTICAST(mac_addr) \ | ||
147 | (((u8 *)(mac_addr))[0] & 0x01) | ||
148 | |||
149 | #define IS_BROADCAST(mac_addr) \ | ||
150 | (((u16 *)(mac_addr))[0] == 0xffff) | ||
151 | |||
152 | |||
153 | /* | ||
154 | * Queue of incoming packets for a specific cpu and device. | ||
155 | * | ||
156 | * Includes a pointer to the "system" data, and the actual "user" data. | ||
157 | */ | ||
158 | struct tile_netio_queue { | ||
159 | netio_queue_impl_t *__system_part; | ||
160 | netio_queue_user_impl_t __user_part; | ||
161 | |||
162 | }; | ||
163 | |||
164 | |||
165 | /* | ||
166 | * Statistics counters for a specific cpu and device. | ||
167 | */ | ||
168 | struct tile_net_stats_t { | ||
169 | u32 rx_packets; | ||
170 | u32 rx_bytes; | ||
171 | u32 tx_packets; | ||
172 | u32 tx_bytes; | ||
173 | }; | ||
174 | |||
175 | |||
176 | /* | ||
177 | * Info for a specific cpu and device. | ||
178 | * | ||
179 | * ISSUE: There is a "dev" pointer in "napi" as well. | ||
180 | */ | ||
181 | struct tile_net_cpu { | ||
182 | /* The NAPI struct. */ | ||
183 | struct napi_struct napi; | ||
184 | /* Packet queue. */ | ||
185 | struct tile_netio_queue queue; | ||
186 | /* Statistics. */ | ||
187 | struct tile_net_stats_t stats; | ||
188 | /* ISSUE: Is this needed? */ | ||
189 | bool napi_enabled; | ||
190 | /* True if this tile has succcessfully registered with the IPP. */ | ||
191 | bool registered; | ||
192 | /* True if the link was down last time we tried to register. */ | ||
193 | bool link_down; | ||
194 | /* True if "egress_timer" is scheduled. */ | ||
195 | bool egress_timer_scheduled; | ||
196 | /* Number of small sk_buffs which must still be provided. */ | ||
197 | unsigned int num_needed_small_buffers; | ||
198 | /* Number of large sk_buffs which must still be provided. */ | ||
199 | unsigned int num_needed_large_buffers; | ||
200 | /* A timer for handling egress completions. */ | ||
201 | struct timer_list egress_timer; | ||
202 | }; | ||
203 | |||
204 | |||
205 | /* | ||
206 | * Info for a specific device. | ||
207 | */ | ||
208 | struct tile_net_priv { | ||
209 | /* Our network device. */ | ||
210 | struct net_device *dev; | ||
211 | /* The actual egress queue. */ | ||
212 | lepp_queue_t *epp_queue; | ||
213 | /* Protects "epp_queue->cmd_tail" and "epp_queue->comp_tail" */ | ||
214 | spinlock_t cmd_lock; | ||
215 | /* Protects "epp_queue->comp_head". */ | ||
216 | spinlock_t comp_lock; | ||
217 | /* The hypervisor handle for this interface. */ | ||
218 | int hv_devhdl; | ||
219 | /* The intr bit mask that IDs this device. */ | ||
220 | u32 intr_id; | ||
221 | /* True iff "tile_net_open_aux()" has succeeded. */ | ||
222 | int partly_opened; | ||
223 | /* True iff "tile_net_open_inner()" has succeeded. */ | ||
224 | int fully_opened; | ||
225 | /* Effective network cpus. */ | ||
226 | struct cpumask network_cpus_map; | ||
227 | /* Number of network cpus. */ | ||
228 | int network_cpus_count; | ||
229 | /* Credits per network cpu. */ | ||
230 | int network_cpus_credits; | ||
231 | /* Network stats. */ | ||
232 | struct net_device_stats stats; | ||
233 | /* For NetIO bringup retries. */ | ||
234 | struct delayed_work retry_work; | ||
235 | /* Quick access to per cpu data. */ | ||
236 | struct tile_net_cpu *cpu[NR_CPUS]; | ||
237 | }; | ||
238 | |||
239 | |||
240 | /* | ||
241 | * The actual devices (xgbe0, xgbe1, gbe0, gbe1). | ||
242 | */ | ||
243 | static struct net_device *tile_net_devs[TILE_NET_DEVS]; | ||
244 | |||
245 | /* | ||
246 | * The "tile_net_cpu" structures for each device. | ||
247 | */ | ||
248 | static DEFINE_PER_CPU(struct tile_net_cpu, hv_xgbe0); | ||
249 | static DEFINE_PER_CPU(struct tile_net_cpu, hv_xgbe1); | ||
250 | static DEFINE_PER_CPU(struct tile_net_cpu, hv_gbe0); | ||
251 | static DEFINE_PER_CPU(struct tile_net_cpu, hv_gbe1); | ||
252 | |||
253 | |||
254 | /* | ||
255 | * True if "network_cpus" was specified. | ||
256 | */ | ||
257 | static bool network_cpus_used; | ||
258 | |||
259 | /* | ||
260 | * The actual cpus in "network_cpus". | ||
261 | */ | ||
262 | static struct cpumask network_cpus_map; | ||
263 | |||
264 | |||
265 | |||
266 | #ifdef TILE_NET_DEBUG | ||
267 | /* | ||
268 | * printk with extra stuff. | ||
269 | * | ||
270 | * We print the CPU we're running in brackets. | ||
271 | */ | ||
272 | static void net_printk(char *fmt, ...) | ||
273 | { | ||
274 | int i; | ||
275 | int len; | ||
276 | va_list args; | ||
277 | static char buf[256]; | ||
278 | |||
279 | len = sprintf(buf, "tile_net[%2.2d]: ", smp_processor_id()); | ||
280 | va_start(args, fmt); | ||
281 | i = vscnprintf(buf + len, sizeof(buf) - len - 1, fmt, args); | ||
282 | va_end(args); | ||
283 | buf[255] = '\0'; | ||
284 | pr_notice(buf); | ||
285 | } | ||
286 | #endif | ||
287 | |||
288 | |||
289 | #ifdef TILE_NET_DUMP_PACKETS | ||
290 | /* | ||
291 | * Dump a packet. | ||
292 | */ | ||
293 | static void dump_packet(unsigned char *data, unsigned long length, char *s) | ||
294 | { | ||
295 | unsigned long i; | ||
296 | static unsigned int count; | ||
297 | |||
298 | pr_info("dump_packet(data %p, length 0x%lx s %s count 0x%x)\n", | ||
299 | data, length, s, count++); | ||
300 | |||
301 | pr_info("\n"); | ||
302 | |||
303 | for (i = 0; i < length; i++) { | ||
304 | if ((i & 0xf) == 0) | ||
305 | sprintf(buf, "%8.8lx:", i); | ||
306 | sprintf(buf + strlen(buf), " %2.2x", data[i]); | ||
307 | if ((i & 0xf) == 0xf || i == length - 1) | ||
308 | pr_info("%s\n", buf); | ||
309 | } | ||
310 | } | ||
311 | #endif | ||
312 | |||
313 | |||
314 | /* | ||
315 | * Provide support for the __netio_fastio1() swint | ||
316 | * (see <hv/drv_xgbe_intf.h> for how it is used). | ||
317 | * | ||
318 | * The fastio swint2 call may clobber all the caller-saved registers. | ||
319 | * It rarely clobbers memory, but we allow for the possibility in | ||
320 | * the signature just to be on the safe side. | ||
321 | * | ||
322 | * Also, gcc doesn't seem to allow an input operand to be | ||
323 | * clobbered, so we fake it with dummy outputs. | ||
324 | * | ||
325 | * This function can't be static because of the way it is declared | ||
326 | * in the netio header. | ||
327 | */ | ||
328 | inline int __netio_fastio1(u32 fastio_index, u32 arg0) | ||
329 | { | ||
330 | long result, clobber_r1, clobber_r10; | ||
331 | asm volatile("swint2" | ||
332 | : "=R00" (result), | ||
333 | "=R01" (clobber_r1), "=R10" (clobber_r10) | ||
334 | : "R10" (fastio_index), "R01" (arg0) | ||
335 | : "memory", "r2", "r3", "r4", | ||
336 | "r5", "r6", "r7", "r8", "r9", | ||
337 | "r11", "r12", "r13", "r14", | ||
338 | "r15", "r16", "r17", "r18", "r19", | ||
339 | "r20", "r21", "r22", "r23", "r24", | ||
340 | "r25", "r26", "r27", "r28", "r29"); | ||
341 | return result; | ||
342 | } | ||
343 | |||
344 | |||
345 | /* | ||
346 | * Provide a linux buffer to LIPP. | ||
347 | */ | ||
348 | static void tile_net_provide_linux_buffer(struct tile_net_cpu *info, | ||
349 | void *va, bool small) | ||
350 | { | ||
351 | struct tile_netio_queue *queue = &info->queue; | ||
352 | |||
353 | /* Convert "va" and "small" to "linux_buffer_t". */ | ||
354 | unsigned int buffer = ((unsigned int)(__pa(va) >> 7) << 1) + small; | ||
355 | |||
356 | __netio_fastio_free_buffer(queue->__user_part.__fastio_index, buffer); | ||
357 | } | ||
358 | |||
359 | |||
360 | /* | ||
361 | * Provide a linux buffer for LIPP. | ||
362 | */ | ||
363 | static bool tile_net_provide_needed_buffer(struct tile_net_cpu *info, | ||
364 | bool small) | ||
365 | { | ||
366 | /* ISSUE: What should we use here? */ | ||
367 | unsigned int large_size = NET_IP_ALIGN + TILE_NET_MTU + 100; | ||
368 | |||
369 | /* Round up to ensure to avoid "false sharing" with last cache line. */ | ||
370 | unsigned int buffer_size = | ||
371 | (((small ? LIPP_SMALL_PACKET_SIZE : large_size) + | ||
372 | CHIP_L2_LINE_SIZE() - 1) & -CHIP_L2_LINE_SIZE()); | ||
373 | |||
374 | /* | ||
375 | * ISSUE: Since CPAs are 38 bits, and we can only encode the | ||
376 | * high 31 bits in a "linux_buffer_t", the low 7 bits must be | ||
377 | * zero, and thus, we must align the actual "va" mod 128. | ||
378 | */ | ||
379 | const unsigned long align = 128; | ||
380 | |||
381 | struct sk_buff *skb; | ||
382 | void *va; | ||
383 | |||
384 | struct sk_buff **skb_ptr; | ||
385 | |||
386 | /* Note that "dev_alloc_skb()" adds NET_SKB_PAD more bytes, */ | ||
387 | /* and also "reserves" that many bytes. */ | ||
388 | /* ISSUE: Can we "share" the NET_SKB_PAD bytes with "skb_ptr"? */ | ||
389 | int len = sizeof(*skb_ptr) + align + buffer_size; | ||
390 | |||
391 | while (1) { | ||
392 | |||
393 | /* Allocate (or fail). */ | ||
394 | skb = dev_alloc_skb(len); | ||
395 | if (skb == NULL) | ||
396 | return false; | ||
397 | |||
398 | /* Make room for a back-pointer to 'skb'. */ | ||
399 | skb_reserve(skb, sizeof(*skb_ptr)); | ||
400 | |||
401 | /* Make sure we are aligned. */ | ||
402 | skb_reserve(skb, -(long)skb->data & (align - 1)); | ||
403 | |||
404 | /* This address is given to IPP. */ | ||
405 | va = skb->data; | ||
406 | |||
407 | if (small) | ||
408 | break; | ||
409 | |||
410 | /* ISSUE: This has never been observed! */ | ||
411 | /* Large buffers must not span a huge page. */ | ||
412 | if (((((long)va & ~HPAGE_MASK) + 1535) & HPAGE_MASK) == 0) | ||
413 | break; | ||
414 | pr_err("Leaking unaligned linux buffer at %p.\n", va); | ||
415 | } | ||
416 | |||
417 | /* Skip two bytes to satisfy LIPP assumptions. */ | ||
418 | /* Note that this aligns IP on a 16 byte boundary. */ | ||
419 | /* ISSUE: Do this when the packet arrives? */ | ||
420 | skb_reserve(skb, NET_IP_ALIGN); | ||
421 | |||
422 | /* Save a back-pointer to 'skb'. */ | ||
423 | skb_ptr = va - sizeof(*skb_ptr); | ||
424 | *skb_ptr = skb; | ||
425 | |||
426 | /* Invalidate the packet buffer. */ | ||
427 | if (!hash_default) | ||
428 | __inv_buffer(skb->data, buffer_size); | ||
429 | |||
430 | /* Make sure "skb_ptr" has been flushed. */ | ||
431 | __insn_mf(); | ||
432 | |||
433 | #ifdef TILE_NET_PARANOIA | ||
434 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
435 | if (hash_default) { | ||
436 | HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)va); | ||
437 | if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3) | ||
438 | panic("Non-coherent ingress buffer!"); | ||
439 | } | ||
440 | #endif | ||
441 | #endif | ||
442 | |||
443 | /* Provide the new buffer. */ | ||
444 | tile_net_provide_linux_buffer(info, va, small); | ||
445 | |||
446 | return true; | ||
447 | } | ||
448 | |||
449 | |||
450 | /* | ||
451 | * Provide linux buffers for LIPP. | ||
452 | */ | ||
453 | static void tile_net_provide_needed_buffers(struct tile_net_cpu *info) | ||
454 | { | ||
455 | while (info->num_needed_small_buffers != 0) { | ||
456 | if (!tile_net_provide_needed_buffer(info, true)) | ||
457 | goto oops; | ||
458 | info->num_needed_small_buffers--; | ||
459 | } | ||
460 | |||
461 | while (info->num_needed_large_buffers != 0) { | ||
462 | if (!tile_net_provide_needed_buffer(info, false)) | ||
463 | goto oops; | ||
464 | info->num_needed_large_buffers--; | ||
465 | } | ||
466 | |||
467 | return; | ||
468 | |||
469 | oops: | ||
470 | |||
471 | /* Add a description to the page allocation failure dump. */ | ||
472 | pr_notice("Could not provide a linux buffer to LIPP.\n"); | ||
473 | } | ||
474 | |||
475 | |||
476 | /* | ||
477 | * Grab some LEPP completions, and store them in "comps", of size | ||
478 | * "comps_size", and return the number of completions which were | ||
479 | * stored, so the caller can free them. | ||
480 | * | ||
481 | * If "pending" is not NULL, it will be set to true if there might | ||
482 | * still be some pending completions caused by this tile, else false. | ||
483 | */ | ||
484 | static unsigned int tile_net_lepp_grab_comps(struct net_device *dev, | ||
485 | struct sk_buff *comps[], | ||
486 | unsigned int comps_size, | ||
487 | bool *pending) | ||
488 | { | ||
489 | struct tile_net_priv *priv = netdev_priv(dev); | ||
490 | |||
491 | lepp_queue_t *eq = priv->epp_queue; | ||
492 | |||
493 | unsigned int n = 0; | ||
494 | |||
495 | unsigned int comp_head; | ||
496 | unsigned int comp_busy; | ||
497 | unsigned int comp_tail; | ||
498 | |||
499 | spin_lock(&priv->comp_lock); | ||
500 | |||
501 | comp_head = eq->comp_head; | ||
502 | comp_busy = eq->comp_busy; | ||
503 | comp_tail = eq->comp_tail; | ||
504 | |||
505 | while (comp_head != comp_busy && n < comps_size) { | ||
506 | comps[n++] = eq->comps[comp_head]; | ||
507 | LEPP_QINC(comp_head); | ||
508 | } | ||
509 | |||
510 | if (pending != NULL) | ||
511 | *pending = (comp_head != comp_tail); | ||
512 | |||
513 | eq->comp_head = comp_head; | ||
514 | |||
515 | spin_unlock(&priv->comp_lock); | ||
516 | |||
517 | return n; | ||
518 | } | ||
519 | |||
520 | |||
521 | /* | ||
522 | * Make sure the egress timer is scheduled. | ||
523 | * | ||
524 | * Note that we use "schedule if not scheduled" logic instead of the more | ||
525 | * obvious "reschedule" logic, because "reschedule" is fairly expensive. | ||
526 | */ | ||
527 | static void tile_net_schedule_egress_timer(struct tile_net_cpu *info) | ||
528 | { | ||
529 | if (!info->egress_timer_scheduled) { | ||
530 | mod_timer_pinned(&info->egress_timer, jiffies + 1); | ||
531 | info->egress_timer_scheduled = true; | ||
532 | } | ||
533 | } | ||
534 | |||
535 | |||
536 | /* | ||
537 | * The "function" for "info->egress_timer". | ||
538 | * | ||
539 | * This timer will reschedule itself as long as there are any pending | ||
540 | * completions expected (on behalf of any tile). | ||
541 | * | ||
542 | * ISSUE: Realistically, will the timer ever stop scheduling itself? | ||
543 | * | ||
544 | * ISSUE: This timer is almost never actually needed, so just use a global | ||
545 | * timer that can run on any tile. | ||
546 | * | ||
547 | * ISSUE: Maybe instead track number of expected completions, and free | ||
548 | * only that many, resetting to zero if "pending" is ever false. | ||
549 | */ | ||
550 | static void tile_net_handle_egress_timer(unsigned long arg) | ||
551 | { | ||
552 | struct tile_net_cpu *info = (struct tile_net_cpu *)arg; | ||
553 | struct net_device *dev = info->napi.dev; | ||
554 | |||
555 | struct sk_buff *olds[32]; | ||
556 | unsigned int wanted = 32; | ||
557 | unsigned int i, nolds = 0; | ||
558 | bool pending; | ||
559 | |||
560 | /* The timer is no longer scheduled. */ | ||
561 | info->egress_timer_scheduled = false; | ||
562 | |||
563 | nolds = tile_net_lepp_grab_comps(dev, olds, wanted, &pending); | ||
564 | |||
565 | for (i = 0; i < nolds; i++) | ||
566 | kfree_skb(olds[i]); | ||
567 | |||
568 | /* Reschedule timer if needed. */ | ||
569 | if (pending) | ||
570 | tile_net_schedule_egress_timer(info); | ||
571 | } | ||
572 | |||
573 | |||
574 | #ifdef IGNORE_DUP_ACKS | ||
575 | |||
576 | /* | ||
577 | * Help detect "duplicate" ACKs. These are sequential packets (for a | ||
578 | * given flow) which are exactly 66 bytes long, sharing everything but | ||
579 | * ID=2@0x12, Hsum=2@0x18, Ack=4@0x2a, WinSize=2@0x30, Csum=2@0x32, | ||
580 | * Tstamps=10@0x38. The ID's are +1, the Hsum's are -1, the Ack's are | ||
581 | * +N, and the Tstamps are usually identical. | ||
582 | * | ||
583 | * NOTE: Apparently truly duplicate acks (with identical "ack" values), | ||
584 | * should not be collapsed, as they are used for some kind of flow control. | ||
585 | */ | ||
586 | static bool is_dup_ack(char *s1, char *s2, unsigned int len) | ||
587 | { | ||
588 | int i; | ||
589 | |||
590 | unsigned long long ignorable = 0; | ||
591 | |||
592 | /* Identification. */ | ||
593 | ignorable |= (1ULL << 0x12); | ||
594 | ignorable |= (1ULL << 0x13); | ||
595 | |||
596 | /* Header checksum. */ | ||
597 | ignorable |= (1ULL << 0x18); | ||
598 | ignorable |= (1ULL << 0x19); | ||
599 | |||
600 | /* ACK. */ | ||
601 | ignorable |= (1ULL << 0x2a); | ||
602 | ignorable |= (1ULL << 0x2b); | ||
603 | ignorable |= (1ULL << 0x2c); | ||
604 | ignorable |= (1ULL << 0x2d); | ||
605 | |||
606 | /* WinSize. */ | ||
607 | ignorable |= (1ULL << 0x30); | ||
608 | ignorable |= (1ULL << 0x31); | ||
609 | |||
610 | /* Checksum. */ | ||
611 | ignorable |= (1ULL << 0x32); | ||
612 | ignorable |= (1ULL << 0x33); | ||
613 | |||
614 | for (i = 0; i < len; i++, ignorable >>= 1) { | ||
615 | |||
616 | if ((ignorable & 1) || (s1[i] == s2[i])) | ||
617 | continue; | ||
618 | |||
619 | #ifdef TILE_NET_DEBUG | ||
620 | /* HACK: Mention non-timestamp diffs. */ | ||
621 | if (i < 0x38 && i != 0x2f && | ||
622 | net_ratelimit()) | ||
623 | pr_info("Diff at 0x%x\n", i); | ||
624 | #endif | ||
625 | |||
626 | return false; | ||
627 | } | ||
628 | |||
629 | #ifdef TILE_NET_NO_SUPPRESS_DUP_ACKS | ||
630 | /* HACK: Do not suppress truly duplicate ACKs. */ | ||
631 | /* ISSUE: Is this actually necessary or helpful? */ | ||
632 | if (s1[0x2a] == s2[0x2a] && | ||
633 | s1[0x2b] == s2[0x2b] && | ||
634 | s1[0x2c] == s2[0x2c] && | ||
635 | s1[0x2d] == s2[0x2d]) { | ||
636 | return false; | ||
637 | } | ||
638 | #endif | ||
639 | |||
640 | return true; | ||
641 | } | ||
642 | |||
643 | #endif | ||
644 | |||
645 | |||
646 | |||
647 | /* | ||
648 | * Like "tile_net_handle_packets()", but just discard packets. | ||
649 | */ | ||
650 | static void tile_net_discard_packets(struct net_device *dev) | ||
651 | { | ||
652 | struct tile_net_priv *priv = netdev_priv(dev); | ||
653 | int my_cpu = smp_processor_id(); | ||
654 | struct tile_net_cpu *info = priv->cpu[my_cpu]; | ||
655 | struct tile_netio_queue *queue = &info->queue; | ||
656 | netio_queue_impl_t *qsp = queue->__system_part; | ||
657 | netio_queue_user_impl_t *qup = &queue->__user_part; | ||
658 | |||
659 | while (qup->__packet_receive_read != | ||
660 | qsp->__packet_receive_queue.__packet_write) { | ||
661 | |||
662 | int index = qup->__packet_receive_read; | ||
663 | |||
664 | int index2_aux = index + sizeof(netio_pkt_t); | ||
665 | int index2 = | ||
666 | ((index2_aux == | ||
667 | qsp->__packet_receive_queue.__last_packet_plus_one) ? | ||
668 | 0 : index2_aux); | ||
669 | |||
670 | netio_pkt_t *pkt = (netio_pkt_t *) | ||
671 | ((unsigned long) &qsp[1] + index); | ||
672 | |||
673 | /* Extract the "linux_buffer_t". */ | ||
674 | unsigned int buffer = pkt->__packet.word; | ||
675 | |||
676 | /* Convert "linux_buffer_t" to "va". */ | ||
677 | void *va = __va((phys_addr_t)(buffer >> 1) << 7); | ||
678 | |||
679 | /* Acquire the associated "skb". */ | ||
680 | struct sk_buff **skb_ptr = va - sizeof(*skb_ptr); | ||
681 | struct sk_buff *skb = *skb_ptr; | ||
682 | |||
683 | kfree_skb(skb); | ||
684 | |||
685 | /* Consume this packet. */ | ||
686 | qup->__packet_receive_read = index2; | ||
687 | } | ||
688 | } | ||
689 | |||
690 | |||
691 | /* | ||
692 | * Handle the next packet. Return true if "processed", false if "filtered". | ||
693 | */ | ||
694 | static bool tile_net_poll_aux(struct tile_net_cpu *info, int index) | ||
695 | { | ||
696 | struct net_device *dev = info->napi.dev; | ||
697 | |||
698 | struct tile_netio_queue *queue = &info->queue; | ||
699 | netio_queue_impl_t *qsp = queue->__system_part; | ||
700 | netio_queue_user_impl_t *qup = &queue->__user_part; | ||
701 | struct tile_net_stats_t *stats = &info->stats; | ||
702 | |||
703 | int filter; | ||
704 | |||
705 | int index2_aux = index + sizeof(netio_pkt_t); | ||
706 | int index2 = | ||
707 | ((index2_aux == | ||
708 | qsp->__packet_receive_queue.__last_packet_plus_one) ? | ||
709 | 0 : index2_aux); | ||
710 | |||
711 | netio_pkt_t *pkt = (netio_pkt_t *)((unsigned long) &qsp[1] + index); | ||
712 | |||
713 | netio_pkt_metadata_t *metadata = NETIO_PKT_METADATA(pkt); | ||
714 | |||
715 | /* Extract the packet size. */ | ||
716 | unsigned long len = | ||
717 | (NETIO_PKT_CUSTOM_LENGTH(pkt) + | ||
718 | NET_IP_ALIGN - NETIO_PACKET_PADDING); | ||
719 | |||
720 | /* Extract the "linux_buffer_t". */ | ||
721 | unsigned int buffer = pkt->__packet.word; | ||
722 | |||
723 | /* Extract "small" (vs "large"). */ | ||
724 | bool small = ((buffer & 1) != 0); | ||
725 | |||
726 | /* Convert "linux_buffer_t" to "va". */ | ||
727 | void *va = __va((phys_addr_t)(buffer >> 1) << 7); | ||
728 | |||
729 | /* Extract the packet data pointer. */ | ||
730 | /* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */ | ||
731 | unsigned char *buf = va + NET_IP_ALIGN; | ||
732 | |||
733 | #ifdef IGNORE_DUP_ACKS | ||
734 | |||
735 | static int other; | ||
736 | static int final; | ||
737 | static int keep; | ||
738 | static int skip; | ||
739 | |||
740 | #endif | ||
741 | |||
742 | /* Invalidate the packet buffer. */ | ||
743 | if (!hash_default) | ||
744 | __inv_buffer(buf, len); | ||
745 | |||
746 | /* ISSUE: Is this needed? */ | ||
747 | dev->last_rx = jiffies; | ||
748 | |||
749 | #ifdef TILE_NET_DUMP_PACKETS | ||
750 | dump_packet(buf, len, "rx"); | ||
751 | #endif /* TILE_NET_DUMP_PACKETS */ | ||
752 | |||
753 | #ifdef TILE_NET_VERIFY_INGRESS | ||
754 | if (!NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt) && | ||
755 | NETIO_PKT_L4_CSUM_CALCULATED_M(metadata, pkt)) { | ||
756 | /* | ||
757 | * FIXME: This complains about UDP packets | ||
758 | * with a "zero" checksum (bug 6624). | ||
759 | */ | ||
760 | #ifdef TILE_NET_PANIC_ON_BAD | ||
761 | dump_packet(buf, len, "rx"); | ||
762 | panic("Bad L4 checksum."); | ||
763 | #else | ||
764 | pr_warning("Bad L4 checksum on %d byte packet.\n", len); | ||
765 | #endif | ||
766 | } | ||
767 | if (!NETIO_PKT_L3_CSUM_CORRECT_M(metadata, pkt) && | ||
768 | NETIO_PKT_L3_CSUM_CALCULATED_M(metadata, pkt)) { | ||
769 | dump_packet(buf, len, "rx"); | ||
770 | panic("Bad L3 checksum."); | ||
771 | } | ||
772 | switch (NETIO_PKT_STATUS_M(metadata, pkt)) { | ||
773 | case NETIO_PKT_STATUS_OVERSIZE: | ||
774 | if (len >= 64) { | ||
775 | dump_packet(buf, len, "rx"); | ||
776 | panic("Unexpected OVERSIZE."); | ||
777 | } | ||
778 | break; | ||
779 | case NETIO_PKT_STATUS_BAD: | ||
780 | #ifdef TILE_NET_PANIC_ON_BAD | ||
781 | dump_packet(buf, len, "rx"); | ||
782 | panic("Unexpected BAD packet."); | ||
783 | #else | ||
784 | pr_warning("Unexpected BAD %d byte packet.\n", len); | ||
785 | #endif | ||
786 | } | ||
787 | #endif | ||
788 | |||
789 | filter = 0; | ||
790 | |||
791 | if (!(dev->flags & IFF_UP)) { | ||
792 | /* Filter packets received before we're up. */ | ||
793 | filter = 1; | ||
794 | } else if (!(dev->flags & IFF_PROMISC)) { | ||
795 | /* | ||
796 | * FIXME: Implement HW multicast filter. | ||
797 | */ | ||
798 | if (!IS_MULTICAST(buf) && !IS_BROADCAST(buf)) { | ||
799 | /* Filter packets not for our address. */ | ||
800 | const u8 *mine = dev->dev_addr; | ||
801 | filter = compare_ether_addr(mine, buf); | ||
802 | } | ||
803 | } | ||
804 | |||
805 | #ifdef IGNORE_DUP_ACKS | ||
806 | |||
807 | if (len != 66) { | ||
808 | /* FIXME: Must check "is_tcp_ack(buf, len)" somehow. */ | ||
809 | |||
810 | other++; | ||
811 | |||
812 | } else if (index2 == | ||
813 | qsp->__packet_receive_queue.__packet_write) { | ||
814 | |||
815 | final++; | ||
816 | |||
817 | } else { | ||
818 | |||
819 | netio_pkt_t *pkt2 = (netio_pkt_t *) | ||
820 | ((unsigned long) &qsp[1] + index2); | ||
821 | |||
822 | netio_pkt_metadata_t *metadata2 = | ||
823 | NETIO_PKT_METADATA(pkt2); | ||
824 | |||
825 | /* Extract the packet size. */ | ||
826 | unsigned long len2 = | ||
827 | (NETIO_PKT_CUSTOM_LENGTH(pkt2) + | ||
828 | NET_IP_ALIGN - NETIO_PACKET_PADDING); | ||
829 | |||
830 | if (len2 == 66 && | ||
831 | NETIO_PKT_FLOW_HASH_M(metadata, pkt) == | ||
832 | NETIO_PKT_FLOW_HASH_M(metadata2, pkt2)) { | ||
833 | |||
834 | /* Extract the "linux_buffer_t". */ | ||
835 | unsigned int buffer2 = pkt2->__packet.word; | ||
836 | |||
837 | /* Convert "linux_buffer_t" to "va". */ | ||
838 | void *va2 = | ||
839 | __va((phys_addr_t)(buffer2 >> 1) << 7); | ||
840 | |||
841 | /* Extract the packet data pointer. */ | ||
842 | /* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */ | ||
843 | unsigned char *buf2 = va2 + NET_IP_ALIGN; | ||
844 | |||
845 | /* Invalidate the packet buffer. */ | ||
846 | if (!hash_default) | ||
847 | __inv_buffer(buf2, len2); | ||
848 | |||
849 | if (is_dup_ack(buf, buf2, len)) { | ||
850 | skip++; | ||
851 | filter = 1; | ||
852 | } else { | ||
853 | keep++; | ||
854 | } | ||
855 | } | ||
856 | } | ||
857 | |||
858 | if (net_ratelimit()) | ||
859 | pr_info("Other %d Final %d Keep %d Skip %d.\n", | ||
860 | other, final, keep, skip); | ||
861 | |||
862 | #endif | ||
863 | |||
864 | if (filter) { | ||
865 | |||
866 | /* ISSUE: Update "drop" statistics? */ | ||
867 | |||
868 | tile_net_provide_linux_buffer(info, va, small); | ||
869 | |||
870 | } else { | ||
871 | |||
872 | /* Acquire the associated "skb". */ | ||
873 | struct sk_buff **skb_ptr = va - sizeof(*skb_ptr); | ||
874 | struct sk_buff *skb = *skb_ptr; | ||
875 | |||
876 | /* Paranoia. */ | ||
877 | if (skb->data != buf) | ||
878 | panic("Corrupt linux buffer from LIPP! " | ||
879 | "VA=%p, skb=%p, skb->data=%p\n", | ||
880 | va, skb, skb->data); | ||
881 | |||
882 | /* Encode the actual packet length. */ | ||
883 | skb_put(skb, len); | ||
884 | |||
885 | /* NOTE: This call also sets "skb->dev = dev". */ | ||
886 | skb->protocol = eth_type_trans(skb, dev); | ||
887 | |||
888 | /* ISSUE: Discard corrupt packets? */ | ||
889 | /* ISSUE: Discard packets with bad checksums? */ | ||
890 | |||
891 | /* Avoid recomputing TCP/UDP checksums. */ | ||
892 | if (NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt)) | ||
893 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
894 | |||
895 | netif_receive_skb(skb); | ||
896 | |||
897 | stats->rx_packets++; | ||
898 | stats->rx_bytes += len; | ||
899 | |||
900 | if (small) | ||
901 | info->num_needed_small_buffers++; | ||
902 | else | ||
903 | info->num_needed_large_buffers++; | ||
904 | } | ||
905 | |||
906 | /* Return four credits after every fourth packet. */ | ||
907 | if (--qup->__receive_credit_remaining == 0) { | ||
908 | u32 interval = qup->__receive_credit_interval; | ||
909 | qup->__receive_credit_remaining = interval; | ||
910 | __netio_fastio_return_credits(qup->__fastio_index, interval); | ||
911 | } | ||
912 | |||
913 | /* Consume this packet. */ | ||
914 | qup->__packet_receive_read = index2; | ||
915 | |||
916 | return !filter; | ||
917 | } | ||
918 | |||
919 | |||
920 | /* | ||
921 | * Handle some packets for the given device on the current CPU. | ||
922 | * | ||
923 | * ISSUE: The "rotting packet" race condition occurs if a packet | ||
924 | * arrives after the queue appears to be empty, and before the | ||
925 | * hypervisor interrupt is re-enabled. | ||
926 | */ | ||
927 | static int tile_net_poll(struct napi_struct *napi, int budget) | ||
928 | { | ||
929 | struct net_device *dev = napi->dev; | ||
930 | struct tile_net_priv *priv = netdev_priv(dev); | ||
931 | int my_cpu = smp_processor_id(); | ||
932 | struct tile_net_cpu *info = priv->cpu[my_cpu]; | ||
933 | struct tile_netio_queue *queue = &info->queue; | ||
934 | netio_queue_impl_t *qsp = queue->__system_part; | ||
935 | netio_queue_user_impl_t *qup = &queue->__user_part; | ||
936 | |||
937 | unsigned int work = 0; | ||
938 | |||
939 | while (1) { | ||
940 | int index = qup->__packet_receive_read; | ||
941 | if (index == qsp->__packet_receive_queue.__packet_write) | ||
942 | break; | ||
943 | |||
944 | if (tile_net_poll_aux(info, index)) { | ||
945 | if (++work >= budget) | ||
946 | goto done; | ||
947 | } | ||
948 | } | ||
949 | |||
950 | napi_complete(&info->napi); | ||
951 | |||
952 | /* Re-enable hypervisor interrupts. */ | ||
953 | enable_percpu_irq(priv->intr_id); | ||
954 | |||
955 | /* HACK: Avoid the "rotting packet" problem. */ | ||
956 | if (qup->__packet_receive_read != | ||
957 | qsp->__packet_receive_queue.__packet_write) | ||
958 | napi_schedule(&info->napi); | ||
959 | |||
960 | /* ISSUE: Handle completions? */ | ||
961 | |||
962 | done: | ||
963 | |||
964 | tile_net_provide_needed_buffers(info); | ||
965 | |||
966 | return work; | ||
967 | } | ||
968 | |||
969 | |||
970 | /* | ||
971 | * Handle an ingress interrupt for the given device on the current cpu. | ||
972 | */ | ||
973 | static irqreturn_t tile_net_handle_ingress_interrupt(int irq, void *dev_ptr) | ||
974 | { | ||
975 | struct net_device *dev = (struct net_device *)dev_ptr; | ||
976 | struct tile_net_priv *priv = netdev_priv(dev); | ||
977 | int my_cpu = smp_processor_id(); | ||
978 | struct tile_net_cpu *info = priv->cpu[my_cpu]; | ||
979 | |||
980 | /* Disable hypervisor interrupt. */ | ||
981 | disable_percpu_irq(priv->intr_id); | ||
982 | |||
983 | napi_schedule(&info->napi); | ||
984 | |||
985 | return IRQ_HANDLED; | ||
986 | } | ||
987 | |||
988 | |||
989 | /* | ||
990 | * One time initialization per interface. | ||
991 | */ | ||
992 | static int tile_net_open_aux(struct net_device *dev) | ||
993 | { | ||
994 | struct tile_net_priv *priv = netdev_priv(dev); | ||
995 | |||
996 | int ret; | ||
997 | int dummy; | ||
998 | unsigned int epp_lotar; | ||
999 | |||
1000 | /* | ||
1001 | * Find out where EPP memory should be homed. | ||
1002 | */ | ||
1003 | ret = hv_dev_pread(priv->hv_devhdl, 0, | ||
1004 | (HV_VirtAddr)&epp_lotar, sizeof(epp_lotar), | ||
1005 | NETIO_EPP_SHM_OFF); | ||
1006 | if (ret < 0) { | ||
1007 | pr_err("could not read epp_shm_queue lotar.\n"); | ||
1008 | return -EIO; | ||
1009 | } | ||
1010 | |||
1011 | /* | ||
1012 | * Home the page on the EPP. | ||
1013 | */ | ||
1014 | { | ||
1015 | int epp_home = hv_lotar_to_cpu(epp_lotar); | ||
1016 | struct page *page = virt_to_page(priv->epp_queue); | ||
1017 | homecache_change_page_home(page, 0, epp_home); | ||
1018 | } | ||
1019 | |||
1020 | /* | ||
1021 | * Register the EPP shared memory queue. | ||
1022 | */ | ||
1023 | { | ||
1024 | netio_ipp_address_t ea = { | ||
1025 | .va = 0, | ||
1026 | .pa = __pa(priv->epp_queue), | ||
1027 | .pte = hv_pte(0), | ||
1028 | .size = PAGE_SIZE, | ||
1029 | }; | ||
1030 | ea.pte = hv_pte_set_lotar(ea.pte, epp_lotar); | ||
1031 | ea.pte = hv_pte_set_mode(ea.pte, HV_PTE_MODE_CACHE_TILE_L3); | ||
1032 | ret = hv_dev_pwrite(priv->hv_devhdl, 0, | ||
1033 | (HV_VirtAddr)&ea, | ||
1034 | sizeof(ea), | ||
1035 | NETIO_EPP_SHM_OFF); | ||
1036 | if (ret < 0) | ||
1037 | return -EIO; | ||
1038 | } | ||
1039 | |||
1040 | /* | ||
1041 | * Start LIPP/LEPP. | ||
1042 | */ | ||
1043 | if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, | ||
1044 | sizeof(dummy), NETIO_IPP_START_SHIM_OFF) < 0) { | ||
1045 | pr_warning("Failed to start LIPP/LEPP.\n"); | ||
1046 | return -EIO; | ||
1047 | } | ||
1048 | |||
1049 | return 0; | ||
1050 | } | ||
1051 | |||
1052 | |||
1053 | /* | ||
1054 | * Register with hypervisor on each CPU. | ||
1055 | * | ||
1056 | * Strangely, this function does important things even if it "fails", | ||
1057 | * which is especially common if the link is not up yet. Hopefully | ||
1058 | * these things are all "harmless" if done twice! | ||
1059 | */ | ||
1060 | static void tile_net_register(void *dev_ptr) | ||
1061 | { | ||
1062 | struct net_device *dev = (struct net_device *)dev_ptr; | ||
1063 | struct tile_net_priv *priv = netdev_priv(dev); | ||
1064 | int my_cpu = smp_processor_id(); | ||
1065 | struct tile_net_cpu *info; | ||
1066 | |||
1067 | struct tile_netio_queue *queue; | ||
1068 | |||
1069 | /* Only network cpus can receive packets. */ | ||
1070 | int queue_id = | ||
1071 | cpumask_test_cpu(my_cpu, &priv->network_cpus_map) ? 0 : 255; | ||
1072 | |||
1073 | netio_input_config_t config = { | ||
1074 | .flags = 0, | ||
1075 | .num_receive_packets = priv->network_cpus_credits, | ||
1076 | .queue_id = queue_id | ||
1077 | }; | ||
1078 | |||
1079 | int ret = 0; | ||
1080 | netio_queue_impl_t *queuep; | ||
1081 | |||
1082 | PDEBUG("tile_net_register(queue_id %d)\n", queue_id); | ||
1083 | |||
1084 | if (!strcmp(dev->name, "xgbe0")) | ||
1085 | info = &__get_cpu_var(hv_xgbe0); | ||
1086 | else if (!strcmp(dev->name, "xgbe1")) | ||
1087 | info = &__get_cpu_var(hv_xgbe1); | ||
1088 | else if (!strcmp(dev->name, "gbe0")) | ||
1089 | info = &__get_cpu_var(hv_gbe0); | ||
1090 | else if (!strcmp(dev->name, "gbe1")) | ||
1091 | info = &__get_cpu_var(hv_gbe1); | ||
1092 | else | ||
1093 | BUG(); | ||
1094 | |||
1095 | /* Initialize the egress timer. */ | ||
1096 | init_timer(&info->egress_timer); | ||
1097 | info->egress_timer.data = (long)info; | ||
1098 | info->egress_timer.function = tile_net_handle_egress_timer; | ||
1099 | |||
1100 | priv->cpu[my_cpu] = info; | ||
1101 | |||
1102 | /* | ||
1103 | * Register ourselves with the IPP. | ||
1104 | */ | ||
1105 | ret = hv_dev_pwrite(priv->hv_devhdl, 0, | ||
1106 | (HV_VirtAddr)&config, | ||
1107 | sizeof(netio_input_config_t), | ||
1108 | NETIO_IPP_INPUT_REGISTER_OFF); | ||
1109 | PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_REGISTER_OFF) returned %d\n", | ||
1110 | ret); | ||
1111 | if (ret < 0) { | ||
1112 | printk(KERN_DEBUG "hv_dev_pwrite NETIO_IPP_INPUT_REGISTER_OFF" | ||
1113 | " failure %d\n", ret); | ||
1114 | info->link_down = (ret == NETIO_LINK_DOWN); | ||
1115 | return; | ||
1116 | } | ||
1117 | |||
1118 | /* | ||
1119 | * Get the pointer to our queue's system part. | ||
1120 | */ | ||
1121 | |||
1122 | ret = hv_dev_pread(priv->hv_devhdl, 0, | ||
1123 | (HV_VirtAddr)&queuep, | ||
1124 | sizeof(netio_queue_impl_t *), | ||
1125 | NETIO_IPP_INPUT_REGISTER_OFF); | ||
1126 | PDEBUG("hv_dev_pread(NETIO_IPP_INPUT_REGISTER_OFF) returned %d\n", | ||
1127 | ret); | ||
1128 | PDEBUG("queuep %p\n", queuep); | ||
1129 | if (ret <= 0) { | ||
1130 | /* ISSUE: Shouldn't this be a fatal error? */ | ||
1131 | pr_err("hv_dev_pread NETIO_IPP_INPUT_REGISTER_OFF failure\n"); | ||
1132 | return; | ||
1133 | } | ||
1134 | |||
1135 | queue = &info->queue; | ||
1136 | |||
1137 | queue->__system_part = queuep; | ||
1138 | |||
1139 | memset(&queue->__user_part, 0, sizeof(netio_queue_user_impl_t)); | ||
1140 | |||
1141 | /* This is traditionally "config.num_receive_packets / 2". */ | ||
1142 | queue->__user_part.__receive_credit_interval = 4; | ||
1143 | queue->__user_part.__receive_credit_remaining = | ||
1144 | queue->__user_part.__receive_credit_interval; | ||
1145 | |||
1146 | /* | ||
1147 | * Get a fastio index from the hypervisor. | ||
1148 | * ISSUE: Shouldn't this check the result? | ||
1149 | */ | ||
1150 | ret = hv_dev_pread(priv->hv_devhdl, 0, | ||
1151 | (HV_VirtAddr)&queue->__user_part.__fastio_index, | ||
1152 | sizeof(queue->__user_part.__fastio_index), | ||
1153 | NETIO_IPP_GET_FASTIO_OFF); | ||
1154 | PDEBUG("hv_dev_pread(NETIO_IPP_GET_FASTIO_OFF) returned %d\n", ret); | ||
1155 | |||
1156 | netif_napi_add(dev, &info->napi, tile_net_poll, 64); | ||
1157 | |||
1158 | /* Now we are registered. */ | ||
1159 | info->registered = true; | ||
1160 | } | ||
1161 | |||
1162 | |||
1163 | /* | ||
1164 | * Unregister with hypervisor on each CPU. | ||
1165 | */ | ||
1166 | static void tile_net_unregister(void *dev_ptr) | ||
1167 | { | ||
1168 | struct net_device *dev = (struct net_device *)dev_ptr; | ||
1169 | struct tile_net_priv *priv = netdev_priv(dev); | ||
1170 | int my_cpu = smp_processor_id(); | ||
1171 | struct tile_net_cpu *info = priv->cpu[my_cpu]; | ||
1172 | |||
1173 | int ret = 0; | ||
1174 | int dummy = 0; | ||
1175 | |||
1176 | /* Do nothing if never registered. */ | ||
1177 | if (info == NULL) | ||
1178 | return; | ||
1179 | |||
1180 | /* Do nothing if already unregistered. */ | ||
1181 | if (!info->registered) | ||
1182 | return; | ||
1183 | |||
1184 | /* | ||
1185 | * Unregister ourselves with LIPP. | ||
1186 | */ | ||
1187 | ret = hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, | ||
1188 | sizeof(dummy), NETIO_IPP_INPUT_UNREGISTER_OFF); | ||
1189 | PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_UNREGISTER_OFF) returned %d\n", | ||
1190 | ret); | ||
1191 | if (ret < 0) { | ||
1192 | /* FIXME: Just panic? */ | ||
1193 | pr_err("hv_dev_pwrite NETIO_IPP_INPUT_UNREGISTER_OFF" | ||
1194 | " failure %d\n", ret); | ||
1195 | } | ||
1196 | |||
1197 | /* | ||
1198 | * Discard all packets still in our NetIO queue. Hopefully, | ||
1199 | * once the unregister call is complete, there will be no | ||
1200 | * packets still in flight on the IDN. | ||
1201 | */ | ||
1202 | tile_net_discard_packets(dev); | ||
1203 | |||
1204 | /* Reset state. */ | ||
1205 | info->num_needed_small_buffers = 0; | ||
1206 | info->num_needed_large_buffers = 0; | ||
1207 | |||
1208 | /* Cancel egress timer. */ | ||
1209 | del_timer(&info->egress_timer); | ||
1210 | info->egress_timer_scheduled = false; | ||
1211 | |||
1212 | netif_napi_del(&info->napi); | ||
1213 | |||
1214 | /* Now we are unregistered. */ | ||
1215 | info->registered = false; | ||
1216 | } | ||
1217 | |||
1218 | |||
1219 | /* | ||
1220 | * Helper function for "tile_net_stop()". | ||
1221 | * | ||
1222 | * Also used to handle registration failure in "tile_net_open_inner()", | ||
1223 | * when "fully_opened" is known to be false, and the various extra | ||
1224 | * steps in "tile_net_stop()" are not necessary. ISSUE: It might be | ||
1225 | * simpler if we could just call "tile_net_stop()" anyway. | ||
1226 | */ | ||
1227 | static void tile_net_stop_aux(struct net_device *dev) | ||
1228 | { | ||
1229 | struct tile_net_priv *priv = netdev_priv(dev); | ||
1230 | |||
1231 | int dummy = 0; | ||
1232 | |||
1233 | /* Unregister all tiles, so LIPP will stop delivering packets. */ | ||
1234 | on_each_cpu(tile_net_unregister, (void *)dev, 1); | ||
1235 | |||
1236 | /* Stop LIPP/LEPP. */ | ||
1237 | if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, | ||
1238 | sizeof(dummy), NETIO_IPP_STOP_SHIM_OFF) < 0) | ||
1239 | panic("Failed to stop LIPP/LEPP!\n"); | ||
1240 | |||
1241 | priv->partly_opened = 0; | ||
1242 | } | ||
1243 | |||
1244 | |||
1245 | /* | ||
1246 | * Disable ingress interrupts for the given device on the current cpu. | ||
1247 | */ | ||
1248 | static void tile_net_disable_intr(void *dev_ptr) | ||
1249 | { | ||
1250 | struct net_device *dev = (struct net_device *)dev_ptr; | ||
1251 | struct tile_net_priv *priv = netdev_priv(dev); | ||
1252 | int my_cpu = smp_processor_id(); | ||
1253 | struct tile_net_cpu *info = priv->cpu[my_cpu]; | ||
1254 | |||
1255 | /* Disable hypervisor interrupt. */ | ||
1256 | disable_percpu_irq(priv->intr_id); | ||
1257 | |||
1258 | /* Disable NAPI if needed. */ | ||
1259 | if (info != NULL && info->napi_enabled) { | ||
1260 | napi_disable(&info->napi); | ||
1261 | info->napi_enabled = false; | ||
1262 | } | ||
1263 | } | ||
1264 | |||
1265 | |||
1266 | /* | ||
1267 | * Enable ingress interrupts for the given device on the current cpu. | ||
1268 | */ | ||
1269 | static void tile_net_enable_intr(void *dev_ptr) | ||
1270 | { | ||
1271 | struct net_device *dev = (struct net_device *)dev_ptr; | ||
1272 | struct tile_net_priv *priv = netdev_priv(dev); | ||
1273 | int my_cpu = smp_processor_id(); | ||
1274 | struct tile_net_cpu *info = priv->cpu[my_cpu]; | ||
1275 | |||
1276 | /* Enable hypervisor interrupt. */ | ||
1277 | enable_percpu_irq(priv->intr_id); | ||
1278 | |||
1279 | /* Enable NAPI. */ | ||
1280 | napi_enable(&info->napi); | ||
1281 | info->napi_enabled = true; | ||
1282 | } | ||
1283 | |||
1284 | |||
1285 | /* | ||
1286 | * tile_net_open_inner does most of the work of bringing up the interface. | ||
1287 | * It's called from tile_net_open(), and also from tile_net_retry_open(). | ||
1288 | * The return value is 0 if the interface was brought up, < 0 if | ||
1289 | * tile_net_open() should return the return value as an error, and > 0 if | ||
1290 | * tile_net_open() should return success and schedule a work item to | ||
1291 | * periodically retry the bringup. | ||
1292 | */ | ||
1293 | static int tile_net_open_inner(struct net_device *dev) | ||
1294 | { | ||
1295 | struct tile_net_priv *priv = netdev_priv(dev); | ||
1296 | int my_cpu = smp_processor_id(); | ||
1297 | struct tile_net_cpu *info; | ||
1298 | struct tile_netio_queue *queue; | ||
1299 | unsigned int irq; | ||
1300 | int i; | ||
1301 | |||
1302 | /* | ||
1303 | * First try to register just on the local CPU, and handle any | ||
1304 | * semi-expected "link down" failure specially. Note that we | ||
1305 | * do NOT call "tile_net_stop_aux()", unlike below. | ||
1306 | */ | ||
1307 | tile_net_register(dev); | ||
1308 | info = priv->cpu[my_cpu]; | ||
1309 | if (!info->registered) { | ||
1310 | if (info->link_down) | ||
1311 | return 1; | ||
1312 | return -EAGAIN; | ||
1313 | } | ||
1314 | |||
1315 | /* | ||
1316 | * Now register everywhere else. If any registration fails, | ||
1317 | * even for "link down" (which might not be possible), we | ||
1318 | * clean up using "tile_net_stop_aux()". | ||
1319 | */ | ||
1320 | smp_call_function(tile_net_register, (void *)dev, 1); | ||
1321 | for_each_online_cpu(i) { | ||
1322 | if (!priv->cpu[i]->registered) { | ||
1323 | tile_net_stop_aux(dev); | ||
1324 | return -EAGAIN; | ||
1325 | } | ||
1326 | } | ||
1327 | |||
1328 | queue = &info->queue; | ||
1329 | |||
1330 | /* | ||
1331 | * Set the device intr bit mask. | ||
1332 | * The tile_net_register above sets per tile __intr_id. | ||
1333 | */ | ||
1334 | priv->intr_id = queue->__system_part->__intr_id; | ||
1335 | BUG_ON(!priv->intr_id); | ||
1336 | |||
1337 | /* | ||
1338 | * Register the device interrupt handler. | ||
1339 | * The __ffs() function returns the index into the interrupt handler | ||
1340 | * table from the interrupt bit mask which should have one bit | ||
1341 | * and one bit only set. | ||
1342 | */ | ||
1343 | irq = __ffs(priv->intr_id); | ||
1344 | tile_irq_activate(irq, TILE_IRQ_PERCPU); | ||
1345 | BUG_ON(request_irq(irq, tile_net_handle_ingress_interrupt, | ||
1346 | 0, dev->name, (void *)dev) != 0); | ||
1347 | |||
1348 | /* ISSUE: How could "priv->fully_opened" ever be "true" here? */ | ||
1349 | |||
1350 | if (!priv->fully_opened) { | ||
1351 | |||
1352 | int dummy = 0; | ||
1353 | |||
1354 | /* Allocate initial buffers. */ | ||
1355 | |||
1356 | int max_buffers = | ||
1357 | priv->network_cpus_count * priv->network_cpus_credits; | ||
1358 | |||
1359 | info->num_needed_small_buffers = | ||
1360 | min(LIPP_SMALL_BUFFERS, max_buffers); | ||
1361 | |||
1362 | info->num_needed_large_buffers = | ||
1363 | min(LIPP_LARGE_BUFFERS, max_buffers); | ||
1364 | |||
1365 | tile_net_provide_needed_buffers(info); | ||
1366 | |||
1367 | if (info->num_needed_small_buffers != 0 || | ||
1368 | info->num_needed_large_buffers != 0) | ||
1369 | panic("Insufficient memory for buffer stack!"); | ||
1370 | |||
1371 | /* Start LIPP/LEPP and activate "ingress" at the shim. */ | ||
1372 | if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, | ||
1373 | sizeof(dummy), NETIO_IPP_INPUT_INIT_OFF) < 0) | ||
1374 | panic("Failed to activate the LIPP Shim!\n"); | ||
1375 | |||
1376 | priv->fully_opened = 1; | ||
1377 | } | ||
1378 | |||
1379 | /* On each tile, enable the hypervisor to trigger interrupts. */ | ||
1380 | /* ISSUE: Do this before starting LIPP/LEPP? */ | ||
1381 | on_each_cpu(tile_net_enable_intr, (void *)dev, 1); | ||
1382 | |||
1383 | /* Start our transmit queue. */ | ||
1384 | netif_start_queue(dev); | ||
1385 | |||
1386 | return 0; | ||
1387 | } | ||
1388 | |||
1389 | |||
1390 | /* | ||
1391 | * Called periodically to retry bringing up the NetIO interface, | ||
1392 | * if it doesn't come up cleanly during tile_net_open(). | ||
1393 | */ | ||
1394 | static void tile_net_open_retry(struct work_struct *w) | ||
1395 | { | ||
1396 | struct delayed_work *dw = | ||
1397 | container_of(w, struct delayed_work, work); | ||
1398 | |||
1399 | struct tile_net_priv *priv = | ||
1400 | container_of(dw, struct tile_net_priv, retry_work); | ||
1401 | |||
1402 | /* | ||
1403 | * Try to bring the NetIO interface up. If it fails, reschedule | ||
1404 | * ourselves to try again later; otherwise, tell Linux we now have | ||
1405 | * a working link. ISSUE: What if the return value is negative? | ||
1406 | */ | ||
1407 | if (tile_net_open_inner(priv->dev)) | ||
1408 | schedule_delayed_work_on(singlethread_cpu, &priv->retry_work, | ||
1409 | TILE_NET_RETRY_INTERVAL); | ||
1410 | else | ||
1411 | netif_carrier_on(priv->dev); | ||
1412 | } | ||
1413 | |||
1414 | |||
1415 | /* | ||
1416 | * Called when a network interface is made active. | ||
1417 | * | ||
1418 | * Returns 0 on success, negative value on failure. | ||
1419 | * | ||
1420 | * The open entry point is called when a network interface is made | ||
1421 | * active by the system (IFF_UP). At this point all resources needed | ||
1422 | * for transmit and receive operations are allocated, the interrupt | ||
1423 | * handler is registered with the OS, the watchdog timer is started, | ||
1424 | * and the stack is notified that the interface is ready. | ||
1425 | * | ||
1426 | * If the actual link is not available yet, then we tell Linux that | ||
1427 | * we have no carrier, and we keep checking until the link comes up. | ||
1428 | */ | ||
1429 | static int tile_net_open(struct net_device *dev) | ||
1430 | { | ||
1431 | int ret = 0; | ||
1432 | struct tile_net_priv *priv = netdev_priv(dev); | ||
1433 | |||
1434 | /* | ||
1435 | * We rely on priv->partly_opened to tell us if this is the | ||
1436 | * first time this interface is being brought up. If it is | ||
1437 | * set, the IPP was already initialized and should not be | ||
1438 | * initialized again. | ||
1439 | */ | ||
1440 | if (!priv->partly_opened) { | ||
1441 | |||
1442 | int count; | ||
1443 | int credits; | ||
1444 | |||
1445 | /* Initialize LIPP/LEPP, and start the Shim. */ | ||
1446 | ret = tile_net_open_aux(dev); | ||
1447 | if (ret < 0) { | ||
1448 | pr_err("tile_net_open_aux failed: %d\n", ret); | ||
1449 | return ret; | ||
1450 | } | ||
1451 | |||
1452 | /* Analyze the network cpus. */ | ||
1453 | |||
1454 | if (network_cpus_used) | ||
1455 | cpumask_copy(&priv->network_cpus_map, | ||
1456 | &network_cpus_map); | ||
1457 | else | ||
1458 | cpumask_copy(&priv->network_cpus_map, cpu_online_mask); | ||
1459 | |||
1460 | |||
1461 | count = cpumask_weight(&priv->network_cpus_map); | ||
1462 | |||
1463 | /* Limit credits to available buffers, and apply min. */ | ||
1464 | credits = max(16, (LIPP_LARGE_BUFFERS / count) & ~1); | ||
1465 | |||
1466 | /* Apply "GBE" max limit. */ | ||
1467 | /* ISSUE: Use higher limit for XGBE? */ | ||
1468 | credits = min(NETIO_MAX_RECEIVE_PKTS, credits); | ||
1469 | |||
1470 | priv->network_cpus_count = count; | ||
1471 | priv->network_cpus_credits = credits; | ||
1472 | |||
1473 | #ifdef TILE_NET_DEBUG | ||
1474 | pr_info("Using %d network cpus, with %d credits each\n", | ||
1475 | priv->network_cpus_count, priv->network_cpus_credits); | ||
1476 | #endif | ||
1477 | |||
1478 | priv->partly_opened = 1; | ||
1479 | } | ||
1480 | |||
1481 | /* | ||
1482 | * Attempt to bring up the link. | ||
1483 | */ | ||
1484 | ret = tile_net_open_inner(dev); | ||
1485 | if (ret <= 0) { | ||
1486 | if (ret == 0) | ||
1487 | netif_carrier_on(dev); | ||
1488 | return ret; | ||
1489 | } | ||
1490 | |||
1491 | /* | ||
1492 | * We were unable to bring up the NetIO interface, but we want to | ||
1493 | * try again in a little bit. Tell Linux that we have no carrier | ||
1494 | * so it doesn't try to use the interface before the link comes up | ||
1495 | * and then remember to try again later. | ||
1496 | */ | ||
1497 | netif_carrier_off(dev); | ||
1498 | schedule_delayed_work_on(singlethread_cpu, &priv->retry_work, | ||
1499 | TILE_NET_RETRY_INTERVAL); | ||
1500 | |||
1501 | return 0; | ||
1502 | } | ||
1503 | |||
1504 | |||
1505 | /* | ||
1506 | * Disables a network interface. | ||
1507 | * | ||
1508 | * Returns 0, this is not allowed to fail. | ||
1509 | * | ||
1510 | * The close entry point is called when an interface is de-activated | ||
1511 | * by the OS. The hardware is still under the drivers control, but | ||
1512 | * needs to be disabled. A global MAC reset is issued to stop the | ||
1513 | * hardware, and all transmit and receive resources are freed. | ||
1514 | * | ||
1515 | * ISSUE: Can this can be called while "tile_net_poll()" is running? | ||
1516 | */ | ||
1517 | static int tile_net_stop(struct net_device *dev) | ||
1518 | { | ||
1519 | struct tile_net_priv *priv = netdev_priv(dev); | ||
1520 | |||
1521 | bool pending = true; | ||
1522 | |||
1523 | PDEBUG("tile_net_stop()\n"); | ||
1524 | |||
1525 | /* ISSUE: Only needed if not yet fully open. */ | ||
1526 | cancel_delayed_work_sync(&priv->retry_work); | ||
1527 | |||
1528 | /* Can't transmit any more. */ | ||
1529 | netif_stop_queue(dev); | ||
1530 | |||
1531 | /* | ||
1532 | * Disable hypervisor interrupts on each tile. | ||
1533 | */ | ||
1534 | on_each_cpu(tile_net_disable_intr, (void *)dev, 1); | ||
1535 | |||
1536 | /* | ||
1537 | * Unregister the interrupt handler. | ||
1538 | * The __ffs() function returns the index into the interrupt handler | ||
1539 | * table from the interrupt bit mask which should have one bit | ||
1540 | * and one bit only set. | ||
1541 | */ | ||
1542 | if (priv->intr_id) | ||
1543 | free_irq(__ffs(priv->intr_id), dev); | ||
1544 | |||
1545 | /* | ||
1546 | * Drain all the LIPP buffers. | ||
1547 | */ | ||
1548 | |||
1549 | while (true) { | ||
1550 | int buffer; | ||
1551 | |||
1552 | /* NOTE: This should never fail. */ | ||
1553 | if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&buffer, | ||
1554 | sizeof(buffer), NETIO_IPP_DRAIN_OFF) < 0) | ||
1555 | break; | ||
1556 | |||
1557 | /* Stop when done. */ | ||
1558 | if (buffer == 0) | ||
1559 | break; | ||
1560 | |||
1561 | { | ||
1562 | /* Convert "linux_buffer_t" to "va". */ | ||
1563 | void *va = __va((phys_addr_t)(buffer >> 1) << 7); | ||
1564 | |||
1565 | /* Acquire the associated "skb". */ | ||
1566 | struct sk_buff **skb_ptr = va - sizeof(*skb_ptr); | ||
1567 | struct sk_buff *skb = *skb_ptr; | ||
1568 | |||
1569 | kfree_skb(skb); | ||
1570 | } | ||
1571 | } | ||
1572 | |||
1573 | /* Stop LIPP/LEPP. */ | ||
1574 | tile_net_stop_aux(dev); | ||
1575 | |||
1576 | |||
1577 | priv->fully_opened = 0; | ||
1578 | |||
1579 | |||
1580 | /* | ||
1581 | * XXX: ISSUE: It appears that, in practice anyway, by the | ||
1582 | * time we get here, there are no pending completions. | ||
1583 | */ | ||
1584 | while (pending) { | ||
1585 | |||
1586 | struct sk_buff *olds[32]; | ||
1587 | unsigned int wanted = 32; | ||
1588 | unsigned int i, nolds = 0; | ||
1589 | |||
1590 | nolds = tile_net_lepp_grab_comps(dev, olds, | ||
1591 | wanted, &pending); | ||
1592 | |||
1593 | /* ISSUE: We have never actually seen this debug spew. */ | ||
1594 | if (nolds != 0) | ||
1595 | pr_info("During tile_net_stop(), grabbed %d comps.\n", | ||
1596 | nolds); | ||
1597 | |||
1598 | for (i = 0; i < nolds; i++) | ||
1599 | kfree_skb(olds[i]); | ||
1600 | } | ||
1601 | |||
1602 | |||
1603 | /* Wipe the EPP queue. */ | ||
1604 | memset(priv->epp_queue, 0, sizeof(lepp_queue_t)); | ||
1605 | |||
1606 | /* Evict the EPP queue. */ | ||
1607 | finv_buffer(priv->epp_queue, PAGE_SIZE); | ||
1608 | |||
1609 | return 0; | ||
1610 | } | ||
1611 | |||
1612 | |||
1613 | /* | ||
1614 | * Prepare the "frags" info for the resulting LEPP command. | ||
1615 | * | ||
1616 | * If needed, flush the memory used by the frags. | ||
1617 | */ | ||
1618 | static unsigned int tile_net_tx_frags(lepp_frag_t *frags, | ||
1619 | struct sk_buff *skb, | ||
1620 | void *b_data, unsigned int b_len) | ||
1621 | { | ||
1622 | unsigned int i, n = 0; | ||
1623 | |||
1624 | struct skb_shared_info *sh = skb_shinfo(skb); | ||
1625 | |||
1626 | phys_addr_t cpa; | ||
1627 | |||
1628 | if (b_len != 0) { | ||
1629 | |||
1630 | if (!hash_default) | ||
1631 | finv_buffer_remote(b_data, b_len); | ||
1632 | |||
1633 | cpa = __pa(b_data); | ||
1634 | frags[n].cpa_lo = cpa; | ||
1635 | frags[n].cpa_hi = cpa >> 32; | ||
1636 | frags[n].length = b_len; | ||
1637 | frags[n].hash_for_home = hash_default; | ||
1638 | n++; | ||
1639 | } | ||
1640 | |||
1641 | for (i = 0; i < sh->nr_frags; i++) { | ||
1642 | |||
1643 | skb_frag_t *f = &sh->frags[i]; | ||
1644 | unsigned long pfn = page_to_pfn(f->page); | ||
1645 | |||
1646 | /* FIXME: Compute "hash_for_home" properly. */ | ||
1647 | /* ISSUE: The hypervisor checks CHIP_HAS_REV1_DMA_PACKETS(). */ | ||
1648 | int hash_for_home = hash_default; | ||
1649 | |||
1650 | /* FIXME: Hmmm. */ | ||
1651 | if (!hash_default) { | ||
1652 | void *va = pfn_to_kaddr(pfn) + f->page_offset; | ||
1653 | BUG_ON(PageHighMem(f->page)); | ||
1654 | finv_buffer_remote(va, f->size); | ||
1655 | } | ||
1656 | |||
1657 | cpa = ((phys_addr_t)pfn << PAGE_SHIFT) + f->page_offset; | ||
1658 | frags[n].cpa_lo = cpa; | ||
1659 | frags[n].cpa_hi = cpa >> 32; | ||
1660 | frags[n].length = f->size; | ||
1661 | frags[n].hash_for_home = hash_for_home; | ||
1662 | n++; | ||
1663 | } | ||
1664 | |||
1665 | return n; | ||
1666 | } | ||
1667 | |||
1668 | |||
1669 | /* | ||
1670 | * This function takes "skb", consisting of a header template and a | ||
1671 | * payload, and hands it to LEPP, to emit as one or more segments, | ||
1672 | * each consisting of a possibly modified header, plus a piece of the | ||
1673 | * payload, via a process known as "tcp segmentation offload". | ||
1674 | * | ||
1675 | * Usually, "data" will contain the header template, of size "sh_len", | ||
1676 | * and "sh->frags" will contain "skb->data_len" bytes of payload, and | ||
1677 | * there will be "sh->gso_segs" segments. | ||
1678 | * | ||
1679 | * Sometimes, if "sendfile()" requires copying, we will be called with | ||
1680 | * "data" containing the header and payload, with "frags" being empty. | ||
1681 | * | ||
1682 | * In theory, "sh->nr_frags" could be 3, but in practice, it seems | ||
1683 | * that this will never actually happen. | ||
1684 | * | ||
1685 | * See "emulate_large_send_offload()" for some reference code, which | ||
1686 | * does not handle checksumming. | ||
1687 | * | ||
1688 | * ISSUE: How do we make sure that high memory DMA does not migrate? | ||
1689 | */ | ||
1690 | static int tile_net_tx_tso(struct sk_buff *skb, struct net_device *dev) | ||
1691 | { | ||
1692 | struct tile_net_priv *priv = netdev_priv(dev); | ||
1693 | int my_cpu = smp_processor_id(); | ||
1694 | struct tile_net_cpu *info = priv->cpu[my_cpu]; | ||
1695 | struct tile_net_stats_t *stats = &info->stats; | ||
1696 | |||
1697 | struct skb_shared_info *sh = skb_shinfo(skb); | ||
1698 | |||
1699 | unsigned char *data = skb->data; | ||
1700 | |||
1701 | /* The ip header follows the ethernet header. */ | ||
1702 | struct iphdr *ih = ip_hdr(skb); | ||
1703 | unsigned int ih_len = ih->ihl * 4; | ||
1704 | |||
1705 | /* Note that "nh == ih", by definition. */ | ||
1706 | unsigned char *nh = skb_network_header(skb); | ||
1707 | unsigned int eh_len = nh - data; | ||
1708 | |||
1709 | /* The tcp header follows the ip header. */ | ||
1710 | struct tcphdr *th = (struct tcphdr *)(nh + ih_len); | ||
1711 | unsigned int th_len = th->doff * 4; | ||
1712 | |||
1713 | /* The total number of header bytes. */ | ||
1714 | /* NOTE: This may be less than skb_headlen(skb). */ | ||
1715 | unsigned int sh_len = eh_len + ih_len + th_len; | ||
1716 | |||
1717 | /* The number of payload bytes at "skb->data + sh_len". */ | ||
1718 | /* This is non-zero for sendfile() without HIGHDMA. */ | ||
1719 | unsigned int b_len = skb_headlen(skb) - sh_len; | ||
1720 | |||
1721 | /* The total number of payload bytes. */ | ||
1722 | unsigned int d_len = b_len + skb->data_len; | ||
1723 | |||
1724 | /* The maximum payload size. */ | ||
1725 | unsigned int p_len = sh->gso_size; | ||
1726 | |||
1727 | /* The total number of segments. */ | ||
1728 | unsigned int num_segs = sh->gso_segs; | ||
1729 | |||
1730 | /* The temporary copy of the command. */ | ||
1731 | u32 cmd_body[(LEPP_MAX_CMD_SIZE + 3) / 4]; | ||
1732 | lepp_tso_cmd_t *cmd = (lepp_tso_cmd_t *)cmd_body; | ||
1733 | |||
1734 | /* Analyze the "frags". */ | ||
1735 | unsigned int num_frags = | ||
1736 | tile_net_tx_frags(cmd->frags, skb, data + sh_len, b_len); | ||
1737 | |||
1738 | /* The size of the command, including frags and header. */ | ||
1739 | size_t cmd_size = LEPP_TSO_CMD_SIZE(num_frags, sh_len); | ||
1740 | |||
1741 | /* The command header. */ | ||
1742 | lepp_tso_cmd_t cmd_init = { | ||
1743 | .tso = true, | ||
1744 | .header_size = sh_len, | ||
1745 | .ip_offset = eh_len, | ||
1746 | .tcp_offset = eh_len + ih_len, | ||
1747 | .payload_size = p_len, | ||
1748 | .num_frags = num_frags, | ||
1749 | }; | ||
1750 | |||
1751 | unsigned long irqflags; | ||
1752 | |||
1753 | lepp_queue_t *eq = priv->epp_queue; | ||
1754 | |||
1755 | struct sk_buff *olds[4]; | ||
1756 | unsigned int wanted = 4; | ||
1757 | unsigned int i, nolds = 0; | ||
1758 | |||
1759 | unsigned int cmd_head, cmd_tail, cmd_next; | ||
1760 | unsigned int comp_tail; | ||
1761 | |||
1762 | unsigned int free_slots; | ||
1763 | |||
1764 | |||
1765 | /* Paranoia. */ | ||
1766 | BUG_ON(skb->protocol != htons(ETH_P_IP)); | ||
1767 | BUG_ON(ih->protocol != IPPROTO_TCP); | ||
1768 | BUG_ON(skb->ip_summed != CHECKSUM_PARTIAL); | ||
1769 | BUG_ON(num_frags > LEPP_MAX_FRAGS); | ||
1770 | /*--BUG_ON(num_segs != (d_len + (p_len - 1)) / p_len); */ | ||
1771 | BUG_ON(num_segs <= 1); | ||
1772 | |||
1773 | |||
1774 | /* Finish preparing the command. */ | ||
1775 | |||
1776 | /* Copy the command header. */ | ||
1777 | *cmd = cmd_init; | ||
1778 | |||
1779 | /* Copy the "header". */ | ||
1780 | memcpy(&cmd->frags[num_frags], data, sh_len); | ||
1781 | |||
1782 | |||
1783 | /* Prefetch and wait, to minimize time spent holding the spinlock. */ | ||
1784 | prefetch_L1(&eq->comp_tail); | ||
1785 | prefetch_L1(&eq->cmd_tail); | ||
1786 | mb(); | ||
1787 | |||
1788 | |||
1789 | /* Enqueue the command. */ | ||
1790 | |||
1791 | spin_lock_irqsave(&priv->cmd_lock, irqflags); | ||
1792 | |||
1793 | /* | ||
1794 | * Handle completions if needed to make room. | ||
1795 | * HACK: Spin until there is sufficient room. | ||
1796 | */ | ||
1797 | free_slots = lepp_num_free_comp_slots(eq); | ||
1798 | if (free_slots < 1) { | ||
1799 | spin: | ||
1800 | nolds += tile_net_lepp_grab_comps(dev, olds + nolds, | ||
1801 | wanted - nolds, NULL); | ||
1802 | if (lepp_num_free_comp_slots(eq) < 1) | ||
1803 | goto spin; | ||
1804 | } | ||
1805 | |||
1806 | cmd_head = eq->cmd_head; | ||
1807 | cmd_tail = eq->cmd_tail; | ||
1808 | |||
1809 | /* NOTE: The "gotos" below are untested. */ | ||
1810 | |||
1811 | /* Prepare to advance, detecting full queue. */ | ||
1812 | cmd_next = cmd_tail + cmd_size; | ||
1813 | if (cmd_tail < cmd_head && cmd_next >= cmd_head) | ||
1814 | goto spin; | ||
1815 | if (cmd_next > LEPP_CMD_LIMIT) { | ||
1816 | cmd_next = 0; | ||
1817 | if (cmd_next == cmd_head) | ||
1818 | goto spin; | ||
1819 | } | ||
1820 | |||
1821 | /* Copy the command. */ | ||
1822 | memcpy(&eq->cmds[cmd_tail], cmd, cmd_size); | ||
1823 | |||
1824 | /* Advance. */ | ||
1825 | cmd_tail = cmd_next; | ||
1826 | |||
1827 | /* Record "skb" for eventual freeing. */ | ||
1828 | comp_tail = eq->comp_tail; | ||
1829 | eq->comps[comp_tail] = skb; | ||
1830 | LEPP_QINC(comp_tail); | ||
1831 | eq->comp_tail = comp_tail; | ||
1832 | |||
1833 | /* Flush before allowing LEPP to handle the command. */ | ||
1834 | __insn_mf(); | ||
1835 | |||
1836 | eq->cmd_tail = cmd_tail; | ||
1837 | |||
1838 | spin_unlock_irqrestore(&priv->cmd_lock, irqflags); | ||
1839 | |||
1840 | if (nolds == 0) | ||
1841 | nolds = tile_net_lepp_grab_comps(dev, olds, wanted, NULL); | ||
1842 | |||
1843 | /* Handle completions. */ | ||
1844 | for (i = 0; i < nolds; i++) | ||
1845 | kfree_skb(olds[i]); | ||
1846 | |||
1847 | /* Update stats. */ | ||
1848 | stats->tx_packets += num_segs; | ||
1849 | stats->tx_bytes += (num_segs * sh_len) + d_len; | ||
1850 | |||
1851 | /* Make sure the egress timer is scheduled. */ | ||
1852 | tile_net_schedule_egress_timer(info); | ||
1853 | |||
1854 | return NETDEV_TX_OK; | ||
1855 | } | ||
1856 | |||
1857 | |||
1858 | /* | ||
1859 | * Transmit a packet (called by the kernel via "hard_start_xmit" hook). | ||
1860 | */ | ||
1861 | static int tile_net_tx(struct sk_buff *skb, struct net_device *dev) | ||
1862 | { | ||
1863 | struct tile_net_priv *priv = netdev_priv(dev); | ||
1864 | int my_cpu = smp_processor_id(); | ||
1865 | struct tile_net_cpu *info = priv->cpu[my_cpu]; | ||
1866 | struct tile_net_stats_t *stats = &info->stats; | ||
1867 | |||
1868 | unsigned long irqflags; | ||
1869 | |||
1870 | struct skb_shared_info *sh = skb_shinfo(skb); | ||
1871 | |||
1872 | unsigned int len = skb->len; | ||
1873 | unsigned char *data = skb->data; | ||
1874 | |||
1875 | unsigned int csum_start = skb->csum_start - skb_headroom(skb); | ||
1876 | |||
1877 | lepp_frag_t frags[LEPP_MAX_FRAGS]; | ||
1878 | |||
1879 | unsigned int num_frags; | ||
1880 | |||
1881 | lepp_queue_t *eq = priv->epp_queue; | ||
1882 | |||
1883 | struct sk_buff *olds[4]; | ||
1884 | unsigned int wanted = 4; | ||
1885 | unsigned int i, nolds = 0; | ||
1886 | |||
1887 | unsigned int cmd_size = sizeof(lepp_cmd_t); | ||
1888 | |||
1889 | unsigned int cmd_head, cmd_tail, cmd_next; | ||
1890 | unsigned int comp_tail; | ||
1891 | |||
1892 | lepp_cmd_t cmds[LEPP_MAX_FRAGS]; | ||
1893 | |||
1894 | unsigned int free_slots; | ||
1895 | |||
1896 | |||
1897 | /* | ||
1898 | * This is paranoia, since we think that if the link doesn't come | ||
1899 | * up, telling Linux we have no carrier will keep it from trying | ||
1900 | * to transmit. If it does, though, we can't execute this routine, | ||
1901 | * since data structures we depend on aren't set up yet. | ||
1902 | */ | ||
1903 | if (!info->registered) | ||
1904 | return NETDEV_TX_BUSY; | ||
1905 | |||
1906 | |||
1907 | /* Save the timestamp. */ | ||
1908 | dev->trans_start = jiffies; | ||
1909 | |||
1910 | |||
1911 | #ifdef TILE_NET_PARANOIA | ||
1912 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
1913 | if (hash_default) { | ||
1914 | HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)data); | ||
1915 | if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3) | ||
1916 | panic("Non-coherent egress buffer!"); | ||
1917 | } | ||
1918 | #endif | ||
1919 | #endif | ||
1920 | |||
1921 | |||
1922 | #ifdef TILE_NET_DUMP_PACKETS | ||
1923 | /* ISSUE: Does not dump the "frags". */ | ||
1924 | dump_packet(data, skb_headlen(skb), "tx"); | ||
1925 | #endif /* TILE_NET_DUMP_PACKETS */ | ||
1926 | |||
1927 | |||
1928 | if (sh->gso_size != 0) | ||
1929 | return tile_net_tx_tso(skb, dev); | ||
1930 | |||
1931 | |||
1932 | /* Prepare the commands. */ | ||
1933 | |||
1934 | num_frags = tile_net_tx_frags(frags, skb, data, skb_headlen(skb)); | ||
1935 | |||
1936 | for (i = 0; i < num_frags; i++) { | ||
1937 | |||
1938 | bool final = (i == num_frags - 1); | ||
1939 | |||
1940 | lepp_cmd_t cmd = { | ||
1941 | .cpa_lo = frags[i].cpa_lo, | ||
1942 | .cpa_hi = frags[i].cpa_hi, | ||
1943 | .length = frags[i].length, | ||
1944 | .hash_for_home = frags[i].hash_for_home, | ||
1945 | .send_completion = final, | ||
1946 | .end_of_packet = final | ||
1947 | }; | ||
1948 | |||
1949 | if (i == 0 && skb->ip_summed == CHECKSUM_PARTIAL) { | ||
1950 | cmd.compute_checksum = 1; | ||
1951 | cmd.checksum_data.bits.start_byte = csum_start; | ||
1952 | cmd.checksum_data.bits.count = len - csum_start; | ||
1953 | cmd.checksum_data.bits.destination_byte = | ||
1954 | csum_start + skb->csum_offset; | ||
1955 | } | ||
1956 | |||
1957 | cmds[i] = cmd; | ||
1958 | } | ||
1959 | |||
1960 | |||
1961 | /* Prefetch and wait, to minimize time spent holding the spinlock. */ | ||
1962 | prefetch_L1(&eq->comp_tail); | ||
1963 | prefetch_L1(&eq->cmd_tail); | ||
1964 | mb(); | ||
1965 | |||
1966 | |||
1967 | /* Enqueue the commands. */ | ||
1968 | |||
1969 | spin_lock_irqsave(&priv->cmd_lock, irqflags); | ||
1970 | |||
1971 | /* | ||
1972 | * Handle completions if needed to make room. | ||
1973 | * HACK: Spin until there is sufficient room. | ||
1974 | */ | ||
1975 | free_slots = lepp_num_free_comp_slots(eq); | ||
1976 | if (free_slots < 1) { | ||
1977 | spin: | ||
1978 | nolds += tile_net_lepp_grab_comps(dev, olds + nolds, | ||
1979 | wanted - nolds, NULL); | ||
1980 | if (lepp_num_free_comp_slots(eq) < 1) | ||
1981 | goto spin; | ||
1982 | } | ||
1983 | |||
1984 | cmd_head = eq->cmd_head; | ||
1985 | cmd_tail = eq->cmd_tail; | ||
1986 | |||
1987 | /* NOTE: The "gotos" below are untested. */ | ||
1988 | |||
1989 | /* Copy the commands, or fail. */ | ||
1990 | for (i = 0; i < num_frags; i++) { | ||
1991 | |||
1992 | /* Prepare to advance, detecting full queue. */ | ||
1993 | cmd_next = cmd_tail + cmd_size; | ||
1994 | if (cmd_tail < cmd_head && cmd_next >= cmd_head) | ||
1995 | goto spin; | ||
1996 | if (cmd_next > LEPP_CMD_LIMIT) { | ||
1997 | cmd_next = 0; | ||
1998 | if (cmd_next == cmd_head) | ||
1999 | goto spin; | ||
2000 | } | ||
2001 | |||
2002 | /* Copy the command. */ | ||
2003 | *(lepp_cmd_t *)&eq->cmds[cmd_tail] = cmds[i]; | ||
2004 | |||
2005 | /* Advance. */ | ||
2006 | cmd_tail = cmd_next; | ||
2007 | } | ||
2008 | |||
2009 | /* Record "skb" for eventual freeing. */ | ||
2010 | comp_tail = eq->comp_tail; | ||
2011 | eq->comps[comp_tail] = skb; | ||
2012 | LEPP_QINC(comp_tail); | ||
2013 | eq->comp_tail = comp_tail; | ||
2014 | |||
2015 | /* Flush before allowing LEPP to handle the command. */ | ||
2016 | __insn_mf(); | ||
2017 | |||
2018 | eq->cmd_tail = cmd_tail; | ||
2019 | |||
2020 | spin_unlock_irqrestore(&priv->cmd_lock, irqflags); | ||
2021 | |||
2022 | if (nolds == 0) | ||
2023 | nolds = tile_net_lepp_grab_comps(dev, olds, wanted, NULL); | ||
2024 | |||
2025 | /* Handle completions. */ | ||
2026 | for (i = 0; i < nolds; i++) | ||
2027 | kfree_skb(olds[i]); | ||
2028 | |||
2029 | /* HACK: Track "expanded" size for short packets (e.g. 42 < 60). */ | ||
2030 | stats->tx_packets++; | ||
2031 | stats->tx_bytes += ((len >= ETH_ZLEN) ? len : ETH_ZLEN); | ||
2032 | |||
2033 | /* Make sure the egress timer is scheduled. */ | ||
2034 | tile_net_schedule_egress_timer(info); | ||
2035 | |||
2036 | return NETDEV_TX_OK; | ||
2037 | } | ||
2038 | |||
2039 | |||
2040 | /* | ||
2041 | * Deal with a transmit timeout. | ||
2042 | */ | ||
2043 | static void tile_net_tx_timeout(struct net_device *dev) | ||
2044 | { | ||
2045 | PDEBUG("tile_net_tx_timeout()\n"); | ||
2046 | PDEBUG("Transmit timeout at %ld, latency %ld\n", jiffies, | ||
2047 | jiffies - dev->trans_start); | ||
2048 | |||
2049 | /* XXX: ISSUE: This doesn't seem useful for us. */ | ||
2050 | netif_wake_queue(dev); | ||
2051 | } | ||
2052 | |||
2053 | |||
2054 | /* | ||
2055 | * Ioctl commands. | ||
2056 | */ | ||
2057 | static int tile_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) | ||
2058 | { | ||
2059 | return -EOPNOTSUPP; | ||
2060 | } | ||
2061 | |||
2062 | |||
2063 | /* | ||
2064 | * Get System Network Statistics. | ||
2065 | * | ||
2066 | * Returns the address of the device statistics structure. | ||
2067 | */ | ||
2068 | static struct net_device_stats *tile_net_get_stats(struct net_device *dev) | ||
2069 | { | ||
2070 | struct tile_net_priv *priv = netdev_priv(dev); | ||
2071 | u32 rx_packets = 0; | ||
2072 | u32 tx_packets = 0; | ||
2073 | u32 rx_bytes = 0; | ||
2074 | u32 tx_bytes = 0; | ||
2075 | int i; | ||
2076 | |||
2077 | for_each_online_cpu(i) { | ||
2078 | if (priv->cpu[i]) { | ||
2079 | rx_packets += priv->cpu[i]->stats.rx_packets; | ||
2080 | rx_bytes += priv->cpu[i]->stats.rx_bytes; | ||
2081 | tx_packets += priv->cpu[i]->stats.tx_packets; | ||
2082 | tx_bytes += priv->cpu[i]->stats.tx_bytes; | ||
2083 | } | ||
2084 | } | ||
2085 | |||
2086 | priv->stats.rx_packets = rx_packets; | ||
2087 | priv->stats.rx_bytes = rx_bytes; | ||
2088 | priv->stats.tx_packets = tx_packets; | ||
2089 | priv->stats.tx_bytes = tx_bytes; | ||
2090 | |||
2091 | return &priv->stats; | ||
2092 | } | ||
2093 | |||
2094 | |||
2095 | /* | ||
2096 | * Change the "mtu". | ||
2097 | * | ||
2098 | * The "change_mtu" method is usually not needed. | ||
2099 | * If you need it, it must be like this. | ||
2100 | */ | ||
2101 | static int tile_net_change_mtu(struct net_device *dev, int new_mtu) | ||
2102 | { | ||
2103 | PDEBUG("tile_net_change_mtu()\n"); | ||
2104 | |||
2105 | /* Check ranges. */ | ||
2106 | if ((new_mtu < 68) || (new_mtu > 1500)) | ||
2107 | return -EINVAL; | ||
2108 | |||
2109 | /* Accept the value. */ | ||
2110 | dev->mtu = new_mtu; | ||
2111 | |||
2112 | return 0; | ||
2113 | } | ||
2114 | |||
2115 | |||
2116 | /* | ||
2117 | * Change the Ethernet Address of the NIC. | ||
2118 | * | ||
2119 | * The hypervisor driver does not support changing MAC address. However, | ||
2120 | * the IPP does not do anything with the MAC address, so the address which | ||
2121 | * gets used on outgoing packets, and which is accepted on incoming packets, | ||
2122 | * is completely up to the NetIO program or kernel driver which is actually | ||
2123 | * handling them. | ||
2124 | * | ||
2125 | * Returns 0 on success, negative on failure. | ||
2126 | */ | ||
2127 | static int tile_net_set_mac_address(struct net_device *dev, void *p) | ||
2128 | { | ||
2129 | struct sockaddr *addr = p; | ||
2130 | |||
2131 | if (!is_valid_ether_addr(addr->sa_data)) | ||
2132 | return -EINVAL; | ||
2133 | |||
2134 | /* ISSUE: Note that "dev_addr" is now a pointer. */ | ||
2135 | memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); | ||
2136 | |||
2137 | return 0; | ||
2138 | } | ||
2139 | |||
2140 | |||
2141 | /* | ||
2142 | * Obtain the MAC address from the hypervisor. | ||
2143 | * This must be done before opening the device. | ||
2144 | */ | ||
2145 | static int tile_net_get_mac(struct net_device *dev) | ||
2146 | { | ||
2147 | struct tile_net_priv *priv = netdev_priv(dev); | ||
2148 | |||
2149 | char hv_dev_name[32]; | ||
2150 | int len; | ||
2151 | |||
2152 | __netio_getset_offset_t offset = { .word = NETIO_IPP_PARAM_OFF }; | ||
2153 | |||
2154 | int ret; | ||
2155 | |||
2156 | /* For example, "xgbe0". */ | ||
2157 | strcpy(hv_dev_name, dev->name); | ||
2158 | len = strlen(hv_dev_name); | ||
2159 | |||
2160 | /* For example, "xgbe/0". */ | ||
2161 | hv_dev_name[len] = hv_dev_name[len - 1]; | ||
2162 | hv_dev_name[len - 1] = '/'; | ||
2163 | len++; | ||
2164 | |||
2165 | /* For example, "xgbe/0/native_hash". */ | ||
2166 | strcpy(hv_dev_name + len, hash_default ? "/native_hash" : "/native"); | ||
2167 | |||
2168 | /* Get the hypervisor handle for this device. */ | ||
2169 | priv->hv_devhdl = hv_dev_open((HV_VirtAddr)hv_dev_name, 0); | ||
2170 | PDEBUG("hv_dev_open(%s) returned %d %p\n", | ||
2171 | hv_dev_name, priv->hv_devhdl, &priv->hv_devhdl); | ||
2172 | if (priv->hv_devhdl < 0) { | ||
2173 | if (priv->hv_devhdl == HV_ENODEV) | ||
2174 | printk(KERN_DEBUG "Ignoring unconfigured device %s\n", | ||
2175 | hv_dev_name); | ||
2176 | else | ||
2177 | printk(KERN_DEBUG "hv_dev_open(%s) returned %d\n", | ||
2178 | hv_dev_name, priv->hv_devhdl); | ||
2179 | return -1; | ||
2180 | } | ||
2181 | |||
2182 | /* | ||
2183 | * Read the hardware address from the hypervisor. | ||
2184 | * ISSUE: Note that "dev_addr" is now a pointer. | ||
2185 | */ | ||
2186 | offset.bits.class = NETIO_PARAM; | ||
2187 | offset.bits.addr = NETIO_PARAM_MAC; | ||
2188 | ret = hv_dev_pread(priv->hv_devhdl, 0, | ||
2189 | (HV_VirtAddr)dev->dev_addr, dev->addr_len, | ||
2190 | offset.word); | ||
2191 | PDEBUG("hv_dev_pread(NETIO_PARAM_MAC) returned %d\n", ret); | ||
2192 | if (ret <= 0) { | ||
2193 | printk(KERN_DEBUG "hv_dev_pread(NETIO_PARAM_MAC) %s failed\n", | ||
2194 | dev->name); | ||
2195 | /* | ||
2196 | * Since the device is configured by the hypervisor but we | ||
2197 | * can't get its MAC address, we are most likely running | ||
2198 | * the simulator, so let's generate a random MAC address. | ||
2199 | */ | ||
2200 | random_ether_addr(dev->dev_addr); | ||
2201 | } | ||
2202 | |||
2203 | return 0; | ||
2204 | } | ||
2205 | |||
2206 | |||
2207 | static struct net_device_ops tile_net_ops = { | ||
2208 | .ndo_open = tile_net_open, | ||
2209 | .ndo_stop = tile_net_stop, | ||
2210 | .ndo_start_xmit = tile_net_tx, | ||
2211 | .ndo_do_ioctl = tile_net_ioctl, | ||
2212 | .ndo_get_stats = tile_net_get_stats, | ||
2213 | .ndo_change_mtu = tile_net_change_mtu, | ||
2214 | .ndo_tx_timeout = tile_net_tx_timeout, | ||
2215 | .ndo_set_mac_address = tile_net_set_mac_address | ||
2216 | }; | ||
2217 | |||
2218 | |||
2219 | /* | ||
2220 | * The setup function. | ||
2221 | * | ||
2222 | * This uses ether_setup() to assign various fields in dev, including | ||
2223 | * setting IFF_BROADCAST and IFF_MULTICAST, then sets some extra fields. | ||
2224 | */ | ||
2225 | static void tile_net_setup(struct net_device *dev) | ||
2226 | { | ||
2227 | PDEBUG("tile_net_setup()\n"); | ||
2228 | |||
2229 | ether_setup(dev); | ||
2230 | |||
2231 | dev->netdev_ops = &tile_net_ops; | ||
2232 | |||
2233 | dev->watchdog_timeo = TILE_NET_TIMEOUT; | ||
2234 | |||
2235 | /* We want lockless xmit. */ | ||
2236 | dev->features |= NETIF_F_LLTX; | ||
2237 | |||
2238 | /* We support hardware tx checksums. */ | ||
2239 | dev->features |= NETIF_F_HW_CSUM; | ||
2240 | |||
2241 | /* We support scatter/gather. */ | ||
2242 | dev->features |= NETIF_F_SG; | ||
2243 | |||
2244 | /* We support TSO. */ | ||
2245 | dev->features |= NETIF_F_TSO; | ||
2246 | |||
2247 | #ifdef TILE_NET_GSO | ||
2248 | /* We support GSO. */ | ||
2249 | dev->features |= NETIF_F_GSO; | ||
2250 | #endif | ||
2251 | |||
2252 | if (hash_default) | ||
2253 | dev->features |= NETIF_F_HIGHDMA; | ||
2254 | |||
2255 | /* ISSUE: We should support NETIF_F_UFO. */ | ||
2256 | |||
2257 | dev->tx_queue_len = TILE_NET_TX_QUEUE_LEN; | ||
2258 | |||
2259 | dev->mtu = TILE_NET_MTU; | ||
2260 | } | ||
2261 | |||
2262 | |||
2263 | /* | ||
2264 | * Allocate the device structure, register the device, and obtain the | ||
2265 | * MAC address from the hypervisor. | ||
2266 | */ | ||
2267 | static struct net_device *tile_net_dev_init(const char *name) | ||
2268 | { | ||
2269 | int ret; | ||
2270 | struct net_device *dev; | ||
2271 | struct tile_net_priv *priv; | ||
2272 | struct page *page; | ||
2273 | |||
2274 | /* | ||
2275 | * Allocate the device structure. This allocates "priv", calls | ||
2276 | * tile_net_setup(), and saves "name". Normally, "name" is a | ||
2277 | * template, instantiated by register_netdev(), but not for us. | ||
2278 | */ | ||
2279 | dev = alloc_netdev(sizeof(*priv), name, tile_net_setup); | ||
2280 | if (!dev) { | ||
2281 | pr_err("alloc_netdev(%s) failed\n", name); | ||
2282 | return NULL; | ||
2283 | } | ||
2284 | |||
2285 | priv = netdev_priv(dev); | ||
2286 | |||
2287 | /* Initialize "priv". */ | ||
2288 | |||
2289 | memset(priv, 0, sizeof(*priv)); | ||
2290 | |||
2291 | /* Save "dev" for "tile_net_open_retry()". */ | ||
2292 | priv->dev = dev; | ||
2293 | |||
2294 | INIT_DELAYED_WORK(&priv->retry_work, tile_net_open_retry); | ||
2295 | |||
2296 | spin_lock_init(&priv->cmd_lock); | ||
2297 | spin_lock_init(&priv->comp_lock); | ||
2298 | |||
2299 | /* Allocate "epp_queue". */ | ||
2300 | BUG_ON(get_order(sizeof(lepp_queue_t)) != 0); | ||
2301 | page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); | ||
2302 | if (!page) { | ||
2303 | free_netdev(dev); | ||
2304 | return NULL; | ||
2305 | } | ||
2306 | priv->epp_queue = page_address(page); | ||
2307 | |||
2308 | /* Register the network device. */ | ||
2309 | ret = register_netdev(dev); | ||
2310 | if (ret) { | ||
2311 | pr_err("register_netdev %s failed %d\n", dev->name, ret); | ||
2312 | free_page((unsigned long)priv->epp_queue); | ||
2313 | free_netdev(dev); | ||
2314 | return NULL; | ||
2315 | } | ||
2316 | |||
2317 | /* Get the MAC address. */ | ||
2318 | ret = tile_net_get_mac(dev); | ||
2319 | if (ret < 0) { | ||
2320 | unregister_netdev(dev); | ||
2321 | free_page((unsigned long)priv->epp_queue); | ||
2322 | free_netdev(dev); | ||
2323 | return NULL; | ||
2324 | } | ||
2325 | |||
2326 | return dev; | ||
2327 | } | ||
2328 | |||
2329 | |||
2330 | /* | ||
2331 | * Module cleanup. | ||
2332 | */ | ||
2333 | static void tile_net_cleanup(void) | ||
2334 | { | ||
2335 | int i; | ||
2336 | |||
2337 | for (i = 0; i < TILE_NET_DEVS; i++) { | ||
2338 | if (tile_net_devs[i]) { | ||
2339 | struct net_device *dev = tile_net_devs[i]; | ||
2340 | struct tile_net_priv *priv = netdev_priv(dev); | ||
2341 | unregister_netdev(dev); | ||
2342 | finv_buffer(priv->epp_queue, PAGE_SIZE); | ||
2343 | free_page((unsigned long)priv->epp_queue); | ||
2344 | free_netdev(dev); | ||
2345 | } | ||
2346 | } | ||
2347 | } | ||
2348 | |||
2349 | |||
2350 | /* | ||
2351 | * Module initialization. | ||
2352 | */ | ||
2353 | static int tile_net_init_module(void) | ||
2354 | { | ||
2355 | pr_info("Tilera IPP Net Driver\n"); | ||
2356 | |||
2357 | tile_net_devs[0] = tile_net_dev_init("xgbe0"); | ||
2358 | tile_net_devs[1] = tile_net_dev_init("xgbe1"); | ||
2359 | tile_net_devs[2] = tile_net_dev_init("gbe0"); | ||
2360 | tile_net_devs[3] = tile_net_dev_init("gbe1"); | ||
2361 | |||
2362 | return 0; | ||
2363 | } | ||
2364 | |||
2365 | |||
2366 | #ifndef MODULE | ||
2367 | /* | ||
2368 | * The "network_cpus" boot argument specifies the cpus that are dedicated | ||
2369 | * to handle ingress packets. | ||
2370 | * | ||
2371 | * The parameter should be in the form "network_cpus=m-n[,x-y]", where | ||
2372 | * m, n, x, y are integer numbers that represent the cpus that can be | ||
2373 | * neither a dedicated cpu nor a dataplane cpu. | ||
2374 | */ | ||
2375 | static int __init network_cpus_setup(char *str) | ||
2376 | { | ||
2377 | int rc = cpulist_parse_crop(str, &network_cpus_map); | ||
2378 | if (rc != 0) { | ||
2379 | pr_warning("network_cpus=%s: malformed cpu list\n", | ||
2380 | str); | ||
2381 | } else { | ||
2382 | |||
2383 | /* Remove dedicated cpus. */ | ||
2384 | cpumask_and(&network_cpus_map, &network_cpus_map, | ||
2385 | cpu_possible_mask); | ||
2386 | |||
2387 | |||
2388 | if (cpumask_empty(&network_cpus_map)) { | ||
2389 | pr_warning("Ignoring network_cpus='%s'.\n", | ||
2390 | str); | ||
2391 | } else { | ||
2392 | char buf[1024]; | ||
2393 | cpulist_scnprintf(buf, sizeof(buf), &network_cpus_map); | ||
2394 | pr_info("Linux network CPUs: %s\n", buf); | ||
2395 | network_cpus_used = true; | ||
2396 | } | ||
2397 | } | ||
2398 | |||
2399 | return 0; | ||
2400 | } | ||
2401 | __setup("network_cpus=", network_cpus_setup); | ||
2402 | #endif | ||
2403 | |||
2404 | |||
2405 | module_init(tile_net_init_module); | ||
2406 | module_exit(tile_net_cleanup); | ||